Update Dawn

2025-07-09 03:04:10 -04:00 · 2014-09-07 17:31:52 +05:30 · 2014-09-07 17:31:52 +05:30 · 20e861dfaf
commit 20e861dfaf
parent 82f03dca32
1 changed files with 44 additions and 45 deletions
--- a/recipes/dawn.recipe
+++ b/recipes/dawn.recipe
@ -16,22 +16,19 @@ class DawnRecipe(BasicNewsRecipe):
    remove_empty_feeds = True
    oldest_article = 2
    max_articles_per_feed = 100
    #auto_cleanup = True
    #auto_cleanup_keep = '//dix[@class="slideshow"]'
    no_stylesheets = True
    remove_javascript = True
    encoding = 'utf-8'
    keep_only_tags = [dict(name='div', attrs={'class':'push-half--sides  push--top'}),
 		      dict(name='article', attrs={'class':'story  story--single  push-half'})]
    # Feeds from http://www.dawn.com/wps/wcm/connect/dawn-content-library/dawn/services/rss
    feeds = []
-    feeds.append((u'Latest News', u'http://feedproxy.google.com/Dawn-All-News'))
+    feeds.append((u'Latest News', u'http://feeds.feedburner.com/dawn-news'))
    feeds.append((u'Pakistan News', u'http://feeds2.feedburner.com/dawn/news/pakistan'))
    feeds.append((u'World News', u'http://feeds2.feedburner.com/dawn/news/world'))
    feeds.append((u'Business News', u'http://feeds2.feedburner.com/dawn/news/business'))
    feeds.append((u'Sport News', u'http://feeds2.feedburner.com/dawn/news/sport'))
    feeds.append((u'Cricket News', u'http://feeds2.feedburner.com/dawn/news/cricket'))
    feeds.append((u'Sci-tech News', u'http://feeds2.feedburner.com/dawn/news/technology'))
    feeds.append((u'Entertainment News', u'http://feeds2.feedburner.com/dawn/news/entertainment'))
    feeds.append((u'Columnists', u'http://feeds2.feedburner.com/dawn/news/columnists'))
    #feeds.append((u'', u''))
    conversion_options = {'comments': description, 'tags': category, 'language': 'en',
@ -45,48 +42,50 @@ class DawnRecipe(BasicNewsRecipe):
                span.news_byline {font-size: x-small; color: #696969; margin-top: 1em;}
                '''
-    def print_version(self, url):
+    #def print_version(self, url):
-        return url + '?pagedesign=Dawn_PrintlyFriendlyPage'
+        #url = url.split('?')[0] + '/print'
        #print(url)
        #return url
-    def preprocess_html(self, soup):
+    #def preprocess_html(self, soup):
-        newBody = Tag(soup, 'body')
+        #newBody = Tag(soup, 'body')
-        for cl in ['page_title', 'news_headline', 'news_byline']:
+        #for cl in ['page_title', 'news_headline', 'news_byline']:
-            tag = soup.find('span', attrs = {'class': cl})
+            #tag = soup.find('span', attrs = {'class': cl})
-            if tag:
+            #if tag:
-                # They like their <br> tags; I don't: does not work well on small screens.
+                ## They like their <br> tags; I don't: does not work well on small screens.
-                if tag['class'] == 'news_byline':
+                #if tag['class'] == 'news_byline':
-                    for br in tag.findAll('br'):
+                    #for br in tag.findAll('br'):
-                        br.extract()
+                        #br.extract()
-                newBody.append(tag)
+                #newBody.append(tag)
-        table = soup.find('table', attrs = {'id': 'body table'})
+        #table = soup.find('table', attrs = {'id': 'body table'})
-        if table:
+        #if table:
-            for td in table.findAll('td', attrs = {'class': 'news_story'}):
+            #for td in table.findAll('td', attrs = {'class': 'news_story'}):
-                for tag in td.findAll(True):
+                #for tag in td.findAll(True):
-                    if tag.has_key('id') and tag['id'] == 'banner-img_slide':
+                    #if tag.has_key('id') and tag['id'] == 'banner-img_slide':
-                        tag.extract()
+                        #tag.extract()
-                    elif tag.has_key('style'):
+                    #elif tag.has_key('style'):
-                        del tag['style']
+                        #del tag['style']
-                    elif tag.name == 'script':
+                    #elif tag.name == 'script':
-                        tag.extract()
+                        #tag.extract()
-                # They like their <br> tags; I don't: does not work well on small screens.
+                ## They like their <br> tags; I don't: does not work well on small screens.
-                center = td.find('center')
+                #center = td.find('center')
-                if center:
+                #if center:
-                    for br in center.findNextSiblings('br'):
+                    #for br in center.findNextSiblings('br'):
-                        br.extract()
+                        #br.extract()
-                    for br in center.findPreviousSiblings('br'):
+                    #for br in center.findPreviousSiblings('br'):
-                        br.extract()
+                        #br.extract()
-                for attr in ['align', 'valign']:
+                #for attr in ['align', 'valign']:
-                    if td.has_key(attr):
+                    #if td.has_key(attr):
-                        del td[attr]
+                        #del td[attr]
-                td.name = 'div'
+                #td.name = 'div'
-                newBody.append(td)
+                #newBody.append(td)
-            soup.body.replaceWith(newBody)
+            #soup.body.replaceWith(newBody)
-            return soup
+            #return soup