mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix Globe and Mail recipe for updated site
This commit is contained in:
parent
e6728649be
commit
cd648cad29
@ -8,46 +8,37 @@ globeandmail.com
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
|
||||||
|
|
||||||
class GlobeAndMail(BasicNewsRecipe):
|
class GlobeAndMail(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'Globe and Mail'
|
title = 'Globe and Mail'
|
||||||
__author__ = 'Kovid Goyal'
|
__author__ = 'Kovid Goyal'
|
||||||
language = _('English')
|
language = _('English')
|
||||||
|
oldest_article = 2.0
|
||||||
|
no_stylesheets = True
|
||||||
description = 'Canada\'s national newspaper'
|
description = 'Canada\'s national newspaper'
|
||||||
keep_only_tags = [dict(id='content')]
|
remove_tags_before = dict(id="article-top")
|
||||||
remove_tags = [dict(attrs={'class':'nav'}), dict(id=['related', 'TPphoto', 'secondaryNav', 'articleBottomToolsHolder'])]
|
remove_tags = [
|
||||||
|
{'id':['util', 'article-tabs', 'comments', 'article-relations',
|
||||||
|
'gallery-controls', 'video', 'galleryLoading']},
|
||||||
|
]
|
||||||
|
remove_tags_after = dict(id='article-content')
|
||||||
|
|
||||||
def parse_index(self):
|
feeds = [
|
||||||
src = self.browser.open('http://www.theglobeandmail.com/frontpage/').read()
|
('Latest headlines', 'http://www.theglobeandmail.com/?service=rss'),
|
||||||
soup = BeautifulSoup(src)
|
('Top stories', 'http://www.theglobeandmail.com/?service=rss&feed=topstories'),
|
||||||
|
('National', 'http://www.theglobeandmail.com/news/national/?service=rss'),
|
||||||
feeds = []
|
('Politics', 'http://www.theglobeandmail.com/news/politics/?service=rss'),
|
||||||
articles = []
|
('World', 'http://www.theglobeandmail.com/news/world/?service=rss'),
|
||||||
feed = 'Front Page'
|
('Business', 'http://www.theglobeandmail.com/report-on-business/?service=rss'),
|
||||||
for tag in soup.findAll(['h3', 'h4']):
|
('Opinions', 'http://www.theglobeandmail.com/news/opinions/?service=rss'),
|
||||||
if tag.name == 'h3':
|
('Columnists', 'http://www.theglobeandmail.com/news/opinions/columnists/?service=rss'),
|
||||||
a = tag.find('a', href=True)
|
('Globe Investor', 'http://www.theglobeandmail.com/globe-investor/?service=rss'),
|
||||||
if a is not None:
|
('Sports', 'http://www.theglobeandmail.com/sports/?service=rss'),
|
||||||
href = 'http://www.theglobeandmail.com' + a['href'].strip()
|
('Technology', 'http://www.theglobeandmail.com/news/technology/?service=rss'),
|
||||||
text = a.find(text=True)
|
('Arts', 'http://www.theglobeandmail.com/news/arts/?service=rss'),
|
||||||
if text:
|
('Life', 'http://www.theglobeandmail.com/life/?service=rss'),
|
||||||
text = text.strip()
|
('Blogs', 'http://www.theglobeandmail.com/blogs/?service=rss'),
|
||||||
desc = ''
|
('Real Estate', 'http://www.theglobeandmail.com/real-estate/?service=rss'),
|
||||||
summary = tag.findNextSiblings('p', attrs={'class':'summary'}, limit=1)
|
('Auto', 'http://www.theglobeandmail.com/auto/?service=rss'),
|
||||||
if summary:
|
]
|
||||||
desc = self.tag_to_string(summary[0], False)
|
|
||||||
articles.append({
|
|
||||||
'title': text,
|
|
||||||
'url' : href,
|
|
||||||
'desc' : desc,
|
|
||||||
'date' : '',
|
|
||||||
})
|
|
||||||
elif tag.name == 'h4':
|
|
||||||
if articles:
|
|
||||||
feeds.append((feed, articles))
|
|
||||||
articles = []
|
|
||||||
feed = self.tag_to_string(tag, False)
|
|
||||||
|
|
||||||
return feeds
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user