merge from my lp branch

2025-07-09 03:04:10 -04:00 · 2011-04-18 15:59:20 +00:00 · 2011-04-18 15:59:20 +00:00 · 0744534b2d
commit 0744534b2d
parent 7281c8de79 faa64884d6
2 changed files with 63 additions and 8 deletions
--- a/recipes/daily_mail_fast.recipe
+++ b/recipes/daily_mail_fast.recipe
@ -0,0 +1,52 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class TheDailyMail(BasicNewsRecipe):
+    title          = u'The Daily Mail (fast)'
+    oldest_article = 2
+    language = 'en_GB'
+
+    author = 'RufusA and Sujata Raman'
+    description = 'Faster and smaller version of the Daily Mail that does not download pictures'
+    simultaneous_downloads= 1
+    max_articles_per_feed = 50
+
+    extra_css = '''#js-article-text{font-family:Arial,Helvetica,sans-serif;}
+                    h1{font-size:x-large; font-weight:bold;}
+                    a.author{color:#003580;}
+                    .js-article-text{font-size:50%;}
+                    .imageCaption{font-size:x-small; font-weight:bold}
+
+
+                '''
+
+    remove_tags = [ dict(name='div', attrs={'class':['article-icon-links-container','print-or-mail-links cleared',
+                                                     'social-links cleared','explore-links','relatedItems','intellicrumbs box','travel','moduleHalf']}),
+                    dict(name='div', attrs={'id':['link-unit-wrapper','pushdown-ad','reader-comments','googleAds',]}),
+                    dict(name='h3', attrs={'class':['social-links-title']}),
+                     dict(name='span', attrs={'class':['clickToEnlargeTop']}),
+                     dict(name=['img']),
+                    ]
+    #remove_tags_before  = dict(name='div', attrs={'id':'content'})
+    keep_only_tags = [dict(name='div', attrs={'id':'js-article-text'})]
+
+    no_stylesheets = True
+
+    feeds          = [
+	(u'Home', u'http://www.dailymail.co.uk/home/index.rss'),
+	(u'News', u'http://www.dailymail.co.uk/news/index.rss'),
+	(u'Sport', u'http://www.dailymail.co.uk/sport/index.rss'),
+	(u'TV and Showbiz', u'http://www.dailymail.co.uk/tvshowbiz/index.rss'),
+	(u'Femail', u'http://www.dailymail.co.uk/femail/index.rss'),
+	(u'Health', u'http://www.dailymail.co.uk/health/index.rss'),
+	(u'Science and Technology', u'http://www.dailymail.co.uk/sciencetech/index.rss'),
+	(u'Money', u'http://www.dailymail.co.uk/money/index.rss'),
+	(u'Property', u'http://www.dailymail.co.uk/property/index.rss'),
+	(u'Motoring', u'http://www.dailymail.co.uk/motoring/index.rss'),
+	(u'Travel', u'http://www.dailymail.co.uk/travel/index.rss')
+        ]
+
+    #def print_version(self, url):
+    #    main = url.partition('?')[0]
+    #    return main + '?printingPage=true'
+
+
--- a/recipes/guardian.recipe
+++ b/recipes/guardian.recipe
@ -87,7 +87,13 @@ class Guardian(BasicNewsRecipe):
        idx = soup.find('div', id='book-index')
        for s in idx.findAll('strong', attrs={'class':'book'}):
            a = s.find('a', href=True)
-            yield (self.tag_to_string(a), a['href'])
+            section_title = self.tag_to_string(a)
+            if not section_title in self.ignore_sections:
+                prefix = ''
+                if section_title != 'Main section':
+                    prefix = section_title + ': '
+                for subsection in s.parent.findAll('a', attrs={'class':'book-section'}):
+                    yield (prefix + self.tag_to_string(subsection), subsection['href'])
    
    def find_articles(self, url):
        soup = self.index_to_soup(url)
@ -114,10 +120,7 @@ class Guardian(BasicNewsRecipe):
        try:
            feeds = []
            for title, href in self.find_sections():
-                if not title in self.ignore_sections:
-                    feeds.append((title, list(self.find_articles(href))))
+                feeds.append((title, list(self.find_articles(href))))
            return feeds
        except:
            raise NotImplementedError
-
-