mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Updated Economist recipe for new website layout
This commit is contained in:
parent
efe64efe25
commit
0ed7568ae1
@ -24,9 +24,10 @@ class Economist(BasicNewsRecipe):
|
|||||||
oldest_article = 7.0
|
oldest_article = 7.0
|
||||||
cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
|
cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
|
||||||
remove_tags = [dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
|
remove_tags = [dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
|
||||||
dict(attrs={'class':['dblClkTrk']})]
|
dict(attrs={'class':['dblClkTrk', 'ec-article-info']})]
|
||||||
remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body')
|
keep_only_tags = [dict(id='ec-article-body')]
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
|
no_stylesheets = True
|
||||||
preprocess_regexps = [(re.compile('</html>.*', re.DOTALL),
|
preprocess_regexps = [(re.compile('</html>.*', re.DOTALL),
|
||||||
lambda x:'</html>')]
|
lambda x:'</html>')]
|
||||||
|
|
||||||
@ -87,7 +88,7 @@ class Economist(BasicNewsRecipe):
|
|||||||
continue
|
continue
|
||||||
a = tag.find('a', href=True)
|
a = tag.find('a', href=True)
|
||||||
if a is not None:
|
if a is not None:
|
||||||
url=a['href'].replace('displaystory', 'PrinterFriendly').strip()
|
url=a['href'].split('?')[0]+'/print'
|
||||||
if url.startswith('Printer'):
|
if url.startswith('Printer'):
|
||||||
url = '/'+url
|
url = '/'+url
|
||||||
if url.startswith('/'):
|
if url.startswith('/'):
|
||||||
|
@ -17,8 +17,9 @@ class Economist(BasicNewsRecipe):
|
|||||||
oldest_article = 7.0
|
oldest_article = 7.0
|
||||||
cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
|
cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
|
||||||
remove_tags = [dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
|
remove_tags = [dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
|
||||||
dict(attrs={'class':['dblClkTrk']})]
|
dict(attrs={'class':['dblClkTrk', 'ec-article-info']})]
|
||||||
remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body')
|
keep_only_tags = [dict(id='ec-article-body')]
|
||||||
|
no_stylesheets = True
|
||||||
preprocess_regexps = [(re.compile('</html>.*', re.DOTALL),
|
preprocess_regexps = [(re.compile('</html>.*', re.DOTALL),
|
||||||
lambda x:'</html>')]
|
lambda x:'</html>')]
|
||||||
|
|
||||||
@ -88,19 +89,20 @@ class Economist(BasicNewsRecipe):
|
|||||||
br = browser()
|
br = browser()
|
||||||
ret = br.open(url)
|
ret = br.open(url)
|
||||||
raw = ret.read()
|
raw = ret.read()
|
||||||
url = br.geturl().replace('displaystory', 'PrinterFriendly').strip()
|
url = br.geturl().split('?')[0]+'/print'
|
||||||
root = html.fromstring(raw)
|
root = html.fromstring(raw)
|
||||||
matches = root.xpath('//*[@class = "article-section"]')
|
matches = root.xpath('//*[@class = "ec-article-info"]')
|
||||||
feedtitle = 'Miscellaneous'
|
feedtitle = 'Miscellaneous'
|
||||||
if matches:
|
if matches:
|
||||||
feedtitle = string.capwords(html.tostring(matches[0], method='text',
|
feedtitle = string.capwords(html.tostring(matches[-1], method='text',
|
||||||
encoding=unicode))
|
encoding=unicode).split('|')[-1].strip())
|
||||||
return (i, feedtitle, url, title, description, author, published)
|
return (i, feedtitle, url, title, description, author, published)
|
||||||
|
|
||||||
def eco_article_found(self, req, result):
|
def eco_article_found(self, req, result):
|
||||||
from calibre.web.feeds import Article
|
from calibre.web.feeds import Article
|
||||||
i, feedtitle, link, title, description, author, published = result
|
i, feedtitle, link, title, description, author, published = result
|
||||||
self.log('Found print version for article:', title)
|
self.log('Found print version for article:', title, 'in', feedtitle,
|
||||||
|
'at', link)
|
||||||
|
|
||||||
a = Article(i, title, link, author, description, published, '')
|
a = Article(i, title, link, author, description, published, '')
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user