diff --git a/recipes/dawn.recipe b/recipes/dawn.recipe
index 3d4ab42a07..d242d5c65b 100644
--- a/recipes/dawn.recipe
+++ b/recipes/dawn.recipe
@@ -16,22 +16,19 @@ class DawnRecipe(BasicNewsRecipe):
remove_empty_feeds = True
oldest_article = 2
max_articles_per_feed = 100
+ #auto_cleanup = True
+ #auto_cleanup_keep = '//dix[@class="slideshow"]'
+
no_stylesheets = True
remove_javascript = True
encoding = 'utf-8'
+ keep_only_tags = [dict(name='div', attrs={'class':'push-half--sides push--top'}),
+ dict(name='article', attrs={'class':'story story--single push-half'})]
# Feeds from http://www.dawn.com/wps/wcm/connect/dawn-content-library/dawn/services/rss
feeds = []
- feeds.append((u'Latest News', u'http://feedproxy.google.com/Dawn-All-News'))
- feeds.append((u'Pakistan News', u'http://feeds2.feedburner.com/dawn/news/pakistan'))
- feeds.append((u'World News', u'http://feeds2.feedburner.com/dawn/news/world'))
- feeds.append((u'Business News', u'http://feeds2.feedburner.com/dawn/news/business'))
- feeds.append((u'Sport News', u'http://feeds2.feedburner.com/dawn/news/sport'))
- feeds.append((u'Cricket News', u'http://feeds2.feedburner.com/dawn/news/cricket'))
- feeds.append((u'Sci-tech News', u'http://feeds2.feedburner.com/dawn/news/technology'))
- feeds.append((u'Entertainment News', u'http://feeds2.feedburner.com/dawn/news/entertainment'))
- feeds.append((u'Columnists', u'http://feeds2.feedburner.com/dawn/news/columnists'))
+ feeds.append((u'Latest News', u'http://feeds.feedburner.com/dawn-news'))
#feeds.append((u'', u''))
conversion_options = {'comments': description, 'tags': category, 'language': 'en',
@@ -45,48 +42,50 @@ class DawnRecipe(BasicNewsRecipe):
span.news_byline {font-size: x-small; color: #696969; margin-top: 1em;}
'''
- def print_version(self, url):
- return url + '?pagedesign=Dawn_PrintlyFriendlyPage'
+ #def print_version(self, url):
+ #url = url.split('?')[0] + '/print'
+ #print(url)
+ #return url
- def preprocess_html(self, soup):
- newBody = Tag(soup, 'body')
+ #def preprocess_html(self, soup):
+ #newBody = Tag(soup, 'body')
- for cl in ['page_title', 'news_headline', 'news_byline']:
- tag = soup.find('span', attrs = {'class': cl})
- if tag:
- # They like their
tags; I don't: does not work well on small screens.
- if tag['class'] == 'news_byline':
- for br in tag.findAll('br'):
- br.extract()
+ #for cl in ['page_title', 'news_headline', 'news_byline']:
+ #tag = soup.find('span', attrs = {'class': cl})
+ #if tag:
+ ## They like their
tags; I don't: does not work well on small screens.
+ #if tag['class'] == 'news_byline':
+ #for br in tag.findAll('br'):
+ #br.extract()
- newBody.append(tag)
+ #newBody.append(tag)
- table = soup.find('table', attrs = {'id': 'body table'})
- if table:
- for td in table.findAll('td', attrs = {'class': 'news_story'}):
- for tag in td.findAll(True):
- if tag.has_key('id') and tag['id'] == 'banner-img_slide':
- tag.extract()
- elif tag.has_key('style'):
- del tag['style']
- elif tag.name == 'script':
- tag.extract()
+ #table = soup.find('table', attrs = {'id': 'body table'})
+ #if table:
+ #for td in table.findAll('td', attrs = {'class': 'news_story'}):
+ #for tag in td.findAll(True):
+ #if tag.has_key('id') and tag['id'] == 'banner-img_slide':
+ #tag.extract()
+ #elif tag.has_key('style'):
+ #del tag['style']
+ #elif tag.name == 'script':
+ #tag.extract()
- # They like their
tags; I don't: does not work well on small screens.
- center = td.find('center')
- if center:
- for br in center.findNextSiblings('br'):
- br.extract()
- for br in center.findPreviousSiblings('br'):
- br.extract()
+ ## They like their
tags; I don't: does not work well on small screens.
+ #center = td.find('center')
+ #if center:
+ #for br in center.findNextSiblings('br'):
+ #br.extract()
+ #for br in center.findPreviousSiblings('br'):
+ #br.extract()
- for attr in ['align', 'valign']:
- if td.has_key(attr):
- del td[attr]
+ #for attr in ['align', 'valign']:
+ #if td.has_key(attr):
+ #del td[attr]
- td.name = 'div'
- newBody.append(td)
+ #td.name = 'div'
+ #newBody.append(td)
- soup.body.replaceWith(newBody)
+ #soup.body.replaceWith(newBody)
- return soup
+ #return soup