From f567ee214cfaa223bce69b4d9a4b893b98c1a456 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 13 Dec 2011 08:53:08 +0530 Subject: [PATCH] Update WSJ, nytime, telegraph, guardian, globe and mail and independent to use toc_thumbnail --- recipes/globe_and_mail.recipe | 7 +++++++ recipes/guardian.recipe | 6 ++++++ recipes/independent.recipe | 7 +++++++ recipes/nytimes.recipe | 12 +++++++++++- recipes/nytimes_sub.recipe | 10 ++++++++++ recipes/telegraph_uk.recipe | 5 +++++ recipes/wsj.recipe | 6 ++++++ recipes/wsj_free.recipe | 6 ++++++ 8 files changed, 58 insertions(+), 1 deletion(-) diff --git a/recipes/globe_and_mail.recipe b/recipes/globe_and_mail.recipe index 03061a2329..a7c78887c5 100644 --- a/recipes/globe_and_mail.recipe +++ b/recipes/globe_and_mail.recipe @@ -51,6 +51,13 @@ class AdvancedUserRecipe1287083651(BasicNewsRecipe): {'class':['articleTools', 'pagination', 'Ads', 'topad', 'breadcrumbs', 'footerNav', 'footerUtil', 'downloadlinks']}] + def populate_article_metadata(self, article, soup, first): + if first and hasattr(self, 'add_toc_thumbnail'): + picdiv = soup.find('img') + if picdiv is not None: + self.add_toc_thumbnail(article,picdiv['src']) + + #Use the mobile version rather than the web version def print_version(self, url): return url.rpartition('?')[0] + '?service=mobile' diff --git a/recipes/guardian.recipe b/recipes/guardian.recipe index 840e8302af..8bff4f9be8 100644 --- a/recipes/guardian.recipe +++ b/recipes/guardian.recipe @@ -79,6 +79,12 @@ class Guardian(BasicNewsRecipe): url = None return url + def populate_article_metadata(self, article, soup, first): + if first and hasattr(self, 'add_toc_thumbnail'): + picdiv = soup.find('img') + if picdiv is not None: + self.add_toc_thumbnail(article,picdiv['src']) + def preprocess_html(self, soup): # multiple html sections in soup, useful stuff in the first diff --git a/recipes/independent.recipe b/recipes/independent.recipe index ebe0a30fd2..db89a07264 100644 --- a/recipes/independent.recipe +++ b/recipes/independent.recipe @@ -104,6 +104,13 @@ class TheIndependentNew(BasicNewsRecipe): url = None return url + def populate_article_metadata(self, article, soup, first): + if first and hasattr(self, 'add_toc_thumbnail'): + picdiv = soup.find('img') + if picdiv is not None: + self.add_toc_thumbnail(article,picdiv['src']) + + def preprocess_html(self, soup): #remove 'advertorial articles' diff --git a/recipes/nytimes.recipe b/recipes/nytimes.recipe index 0a5c310af4..3876c1428c 100644 --- a/recipes/nytimes.recipe +++ b/recipes/nytimes.recipe @@ -1,5 +1,5 @@ #!/usr/bin/env python - +# -*- coding: utf-8 -*- __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' ''' @@ -707,6 +707,16 @@ class NYTimes(BasicNewsRecipe): return soup def populate_article_metadata(self, article, soup, first): + if first and hasattr(self, 'add_toc_thumbnail'): + idxdiv = soup.find('div',attrs={'class':'articleSpanImage'}) + if idxdiv is not None: + if idxdiv.img: + self.add_toc_thumbnail(article, idxdiv.img['src']) + else: + img = soup.find('img') + if img is not None: + self.add_toc_thumbnail(article, img['src']) + shortparagraph = "" try: if len(article.text_summary.strip()) == 0: diff --git a/recipes/nytimes_sub.recipe b/recipes/nytimes_sub.recipe index d24307c887..7c59b2fc16 100644 --- a/recipes/nytimes_sub.recipe +++ b/recipes/nytimes_sub.recipe @@ -855,6 +855,16 @@ class NYTimes(BasicNewsRecipe): return soup def populate_article_metadata(self, article, soup, first): + if first and hasattr(self, 'add_toc_thumbnail'): + idxdiv = soup.find('div',attrs={'class':'articleSpanImage'}) + if idxdiv is not None: + if idxdiv.img: + self.add_toc_thumbnail(article, idxdiv.img['src']) + else: + img = soup.find('img') + if img is not None: + self.add_toc_thumbnail(article, img['src']) + shortparagraph = "" try: if len(article.text_summary.strip()) == 0: diff --git a/recipes/telegraph_uk.recipe b/recipes/telegraph_uk.recipe index 157cfa99e9..347352d424 100644 --- a/recipes/telegraph_uk.recipe +++ b/recipes/telegraph_uk.recipe @@ -59,6 +59,11 @@ class TelegraphUK(BasicNewsRecipe): ,(u'Travel' , u'http://www.telegraph.co.uk/travel/rss' ) ,(u'How about that?', u'http://www.telegraph.co.uk/news/newstopics/howaboutthat/rss' ) ] + def populate_article_metadata(self, article, soup, first): + if first and hasattr(self, 'add_toc_thumbnail'): + picdiv = soup.find('img') + if picdiv is not None: + self.add_toc_thumbnail(article,picdiv['src']) def get_article_url(self, article): url = article.get('link', None) diff --git a/recipes/wsj.recipe b/recipes/wsj.recipe index f01e7ae858..0e6f40de26 100644 --- a/recipes/wsj.recipe +++ b/recipes/wsj.recipe @@ -57,6 +57,12 @@ class WallStreetJournal(BasicNewsRecipe): 'username and password') return br + def populate_article_metadata(self, article, soup, first): + if first and hasattr(self, 'add_toc_thumbnail'): + picdiv = soup.find('img') + if picdiv is not None: + self.add_toc_thumbnail(article,picdiv['src']) + def postprocess_html(self, soup, first): for tag in soup.findAll(name=['table', 'tr', 'td']): tag.name = 'div' diff --git a/recipes/wsj_free.recipe b/recipes/wsj_free.recipe index 42d791294a..deea63aa64 100644 --- a/recipes/wsj_free.recipe +++ b/recipes/wsj_free.recipe @@ -44,6 +44,12 @@ class WallStreetJournal(BasicNewsRecipe): ] remove_tags_after = [dict(id="article_story_body"), {'class':"article story"},] + def populate_article_metadata(self, article, soup, first): + if first and hasattr(self, 'add_toc_thumbnail'): + picdiv = soup.find('img') + if picdiv is not None: + self.add_toc_thumbnail(article,picdiv['src']) + def postprocess_html(self, soup, first): for tag in soup.findAll(name=['table', 'tr', 'td']): tag.name = 'div'