mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update WSJ, nytime, telegraph, guardian, globe and mail and independent to use toc_thumbnail
This commit is contained in:
parent
09b5ece8e7
commit
f567ee214c
@ -51,6 +51,13 @@ class AdvancedUserRecipe1287083651(BasicNewsRecipe):
|
|||||||
{'class':['articleTools', 'pagination', 'Ads', 'topad',
|
{'class':['articleTools', 'pagination', 'Ads', 'topad',
|
||||||
'breadcrumbs', 'footerNav', 'footerUtil', 'downloadlinks']}]
|
'breadcrumbs', 'footerNav', 'footerUtil', 'downloadlinks']}]
|
||||||
|
|
||||||
|
def populate_article_metadata(self, article, soup, first):
|
||||||
|
if first and hasattr(self, 'add_toc_thumbnail'):
|
||||||
|
picdiv = soup.find('img')
|
||||||
|
if picdiv is not None:
|
||||||
|
self.add_toc_thumbnail(article,picdiv['src'])
|
||||||
|
|
||||||
|
|
||||||
#Use the mobile version rather than the web version
|
#Use the mobile version rather than the web version
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.rpartition('?')[0] + '?service=mobile'
|
return url.rpartition('?')[0] + '?service=mobile'
|
||||||
|
@ -79,6 +79,12 @@ class Guardian(BasicNewsRecipe):
|
|||||||
url = None
|
url = None
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
def populate_article_metadata(self, article, soup, first):
|
||||||
|
if first and hasattr(self, 'add_toc_thumbnail'):
|
||||||
|
picdiv = soup.find('img')
|
||||||
|
if picdiv is not None:
|
||||||
|
self.add_toc_thumbnail(article,picdiv['src'])
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
|
||||||
# multiple html sections in soup, useful stuff in the first
|
# multiple html sections in soup, useful stuff in the first
|
||||||
|
@ -104,6 +104,13 @@ class TheIndependentNew(BasicNewsRecipe):
|
|||||||
url = None
|
url = None
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
def populate_article_metadata(self, article, soup, first):
|
||||||
|
if first and hasattr(self, 'add_toc_thumbnail'):
|
||||||
|
picdiv = soup.find('img')
|
||||||
|
if picdiv is not None:
|
||||||
|
self.add_toc_thumbnail(article,picdiv['src'])
|
||||||
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
|
||||||
#remove 'advertorial articles'
|
#remove 'advertorial articles'
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
'''
|
'''
|
||||||
@ -707,6 +707,16 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
return soup
|
return soup
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
def populate_article_metadata(self, article, soup, first):
|
||||||
|
if first and hasattr(self, 'add_toc_thumbnail'):
|
||||||
|
idxdiv = soup.find('div',attrs={'class':'articleSpanImage'})
|
||||||
|
if idxdiv is not None:
|
||||||
|
if idxdiv.img:
|
||||||
|
self.add_toc_thumbnail(article, idxdiv.img['src'])
|
||||||
|
else:
|
||||||
|
img = soup.find('img')
|
||||||
|
if img is not None:
|
||||||
|
self.add_toc_thumbnail(article, img['src'])
|
||||||
|
|
||||||
shortparagraph = ""
|
shortparagraph = ""
|
||||||
try:
|
try:
|
||||||
if len(article.text_summary.strip()) == 0:
|
if len(article.text_summary.strip()) == 0:
|
||||||
|
@ -855,6 +855,16 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
return soup
|
return soup
|
||||||
def populate_article_metadata(self, article, soup, first):
|
def populate_article_metadata(self, article, soup, first):
|
||||||
|
if first and hasattr(self, 'add_toc_thumbnail'):
|
||||||
|
idxdiv = soup.find('div',attrs={'class':'articleSpanImage'})
|
||||||
|
if idxdiv is not None:
|
||||||
|
if idxdiv.img:
|
||||||
|
self.add_toc_thumbnail(article, idxdiv.img['src'])
|
||||||
|
else:
|
||||||
|
img = soup.find('img')
|
||||||
|
if img is not None:
|
||||||
|
self.add_toc_thumbnail(article, img['src'])
|
||||||
|
|
||||||
shortparagraph = ""
|
shortparagraph = ""
|
||||||
try:
|
try:
|
||||||
if len(article.text_summary.strip()) == 0:
|
if len(article.text_summary.strip()) == 0:
|
||||||
|
@ -59,6 +59,11 @@ class TelegraphUK(BasicNewsRecipe):
|
|||||||
,(u'Travel' , u'http://www.telegraph.co.uk/travel/rss' )
|
,(u'Travel' , u'http://www.telegraph.co.uk/travel/rss' )
|
||||||
,(u'How about that?', u'http://www.telegraph.co.uk/news/newstopics/howaboutthat/rss' )
|
,(u'How about that?', u'http://www.telegraph.co.uk/news/newstopics/howaboutthat/rss' )
|
||||||
]
|
]
|
||||||
|
def populate_article_metadata(self, article, soup, first):
|
||||||
|
if first and hasattr(self, 'add_toc_thumbnail'):
|
||||||
|
picdiv = soup.find('img')
|
||||||
|
if picdiv is not None:
|
||||||
|
self.add_toc_thumbnail(article,picdiv['src'])
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
url = article.get('link', None)
|
url = article.get('link', None)
|
||||||
|
@ -57,6 +57,12 @@ class WallStreetJournal(BasicNewsRecipe):
|
|||||||
'username and password')
|
'username and password')
|
||||||
return br
|
return br
|
||||||
|
|
||||||
|
def populate_article_metadata(self, article, soup, first):
|
||||||
|
if first and hasattr(self, 'add_toc_thumbnail'):
|
||||||
|
picdiv = soup.find('img')
|
||||||
|
if picdiv is not None:
|
||||||
|
self.add_toc_thumbnail(article,picdiv['src'])
|
||||||
|
|
||||||
def postprocess_html(self, soup, first):
|
def postprocess_html(self, soup, first):
|
||||||
for tag in soup.findAll(name=['table', 'tr', 'td']):
|
for tag in soup.findAll(name=['table', 'tr', 'td']):
|
||||||
tag.name = 'div'
|
tag.name = 'div'
|
||||||
|
@ -44,6 +44,12 @@ class WallStreetJournal(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
remove_tags_after = [dict(id="article_story_body"), {'class':"article story"},]
|
remove_tags_after = [dict(id="article_story_body"), {'class':"article story"},]
|
||||||
|
|
||||||
|
def populate_article_metadata(self, article, soup, first):
|
||||||
|
if first and hasattr(self, 'add_toc_thumbnail'):
|
||||||
|
picdiv = soup.find('img')
|
||||||
|
if picdiv is not None:
|
||||||
|
self.add_toc_thumbnail(article,picdiv['src'])
|
||||||
|
|
||||||
def postprocess_html(self, soup, first):
|
def postprocess_html(self, soup, first):
|
||||||
for tag in soup.findAll(name=['table', 'tr', 'td']):
|
for tag in soup.findAll(name=['table', 'tr', 'td']):
|
||||||
tag.name = 'div'
|
tag.name = 'div'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user