Update WSJ, nytime, telegraph, guardian, globe and mail and independent to use toc_thumbnail

This commit is contained in:
Kovid Goyal 2011-12-13 08:53:08 +05:30
parent 09b5ece8e7
commit f567ee214c
8 changed files with 58 additions and 1 deletions

View File

@ -51,6 +51,13 @@ class AdvancedUserRecipe1287083651(BasicNewsRecipe):
{'class':['articleTools', 'pagination', 'Ads', 'topad', {'class':['articleTools', 'pagination', 'Ads', 'topad',
'breadcrumbs', 'footerNav', 'footerUtil', 'downloadlinks']}] 'breadcrumbs', 'footerNav', 'footerUtil', 'downloadlinks']}]
def populate_article_metadata(self, article, soup, first):
if first and hasattr(self, 'add_toc_thumbnail'):
picdiv = soup.find('img')
if picdiv is not None:
self.add_toc_thumbnail(article,picdiv['src'])
#Use the mobile version rather than the web version #Use the mobile version rather than the web version
def print_version(self, url): def print_version(self, url):
return url.rpartition('?')[0] + '?service=mobile' return url.rpartition('?')[0] + '?service=mobile'

View File

@ -79,6 +79,12 @@ class Guardian(BasicNewsRecipe):
url = None url = None
return url return url
def populate_article_metadata(self, article, soup, first):
if first and hasattr(self, 'add_toc_thumbnail'):
picdiv = soup.find('img')
if picdiv is not None:
self.add_toc_thumbnail(article,picdiv['src'])
def preprocess_html(self, soup): def preprocess_html(self, soup):
# multiple html sections in soup, useful stuff in the first # multiple html sections in soup, useful stuff in the first

View File

@ -104,6 +104,13 @@ class TheIndependentNew(BasicNewsRecipe):
url = None url = None
return url return url
def populate_article_metadata(self, article, soup, first):
if first and hasattr(self, 'add_toc_thumbnail'):
picdiv = soup.find('img')
if picdiv is not None:
self.add_toc_thumbnail(article,picdiv['src'])
def preprocess_html(self, soup): def preprocess_html(self, soup):
#remove 'advertorial articles' #remove 'advertorial articles'

View File

@ -1,5 +1,5 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
''' '''
@ -707,6 +707,16 @@ class NYTimes(BasicNewsRecipe):
return soup return soup
def populate_article_metadata(self, article, soup, first): def populate_article_metadata(self, article, soup, first):
if first and hasattr(self, 'add_toc_thumbnail'):
idxdiv = soup.find('div',attrs={'class':'articleSpanImage'})
if idxdiv is not None:
if idxdiv.img:
self.add_toc_thumbnail(article, idxdiv.img['src'])
else:
img = soup.find('img')
if img is not None:
self.add_toc_thumbnail(article, img['src'])
shortparagraph = "" shortparagraph = ""
try: try:
if len(article.text_summary.strip()) == 0: if len(article.text_summary.strip()) == 0:

View File

@ -855,6 +855,16 @@ class NYTimes(BasicNewsRecipe):
return soup return soup
def populate_article_metadata(self, article, soup, first): def populate_article_metadata(self, article, soup, first):
if first and hasattr(self, 'add_toc_thumbnail'):
idxdiv = soup.find('div',attrs={'class':'articleSpanImage'})
if idxdiv is not None:
if idxdiv.img:
self.add_toc_thumbnail(article, idxdiv.img['src'])
else:
img = soup.find('img')
if img is not None:
self.add_toc_thumbnail(article, img['src'])
shortparagraph = "" shortparagraph = ""
try: try:
if len(article.text_summary.strip()) == 0: if len(article.text_summary.strip()) == 0:

View File

@ -59,6 +59,11 @@ class TelegraphUK(BasicNewsRecipe):
,(u'Travel' , u'http://www.telegraph.co.uk/travel/rss' ) ,(u'Travel' , u'http://www.telegraph.co.uk/travel/rss' )
,(u'How about that?', u'http://www.telegraph.co.uk/news/newstopics/howaboutthat/rss' ) ,(u'How about that?', u'http://www.telegraph.co.uk/news/newstopics/howaboutthat/rss' )
] ]
def populate_article_metadata(self, article, soup, first):
if first and hasattr(self, 'add_toc_thumbnail'):
picdiv = soup.find('img')
if picdiv is not None:
self.add_toc_thumbnail(article,picdiv['src'])
def get_article_url(self, article): def get_article_url(self, article):
url = article.get('link', None) url = article.get('link', None)

View File

@ -57,6 +57,12 @@ class WallStreetJournal(BasicNewsRecipe):
'username and password') 'username and password')
return br return br
def populate_article_metadata(self, article, soup, first):
if first and hasattr(self, 'add_toc_thumbnail'):
picdiv = soup.find('img')
if picdiv is not None:
self.add_toc_thumbnail(article,picdiv['src'])
def postprocess_html(self, soup, first): def postprocess_html(self, soup, first):
for tag in soup.findAll(name=['table', 'tr', 'td']): for tag in soup.findAll(name=['table', 'tr', 'td']):
tag.name = 'div' tag.name = 'div'

View File

@ -44,6 +44,12 @@ class WallStreetJournal(BasicNewsRecipe):
] ]
remove_tags_after = [dict(id="article_story_body"), {'class':"article story"},] remove_tags_after = [dict(id="article_story_body"), {'class':"article story"},]
def populate_article_metadata(self, article, soup, first):
if first and hasattr(self, 'add_toc_thumbnail'):
picdiv = soup.find('img')
if picdiv is not None:
self.add_toc_thumbnail(article,picdiv['src'])
def postprocess_html(self, soup, first): def postprocess_html(self, soup, first):
for tag in soup.findAll(name=['table', 'tr', 'td']): for tag in soup.findAll(name=['table', 'tr', 'td']):
tag.name = 'div' tag.name = 'div'