mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update Globe and Mail. Fix #405 (New news feed)
This commit is contained in:
parent
7e6c93504c
commit
d5462c8d00
@ -26,31 +26,12 @@ class GlobeAndMail(BasicNewsRecipe):
|
|||||||
#credit {margin-top:0px;}
|
#credit {margin-top:0px;}
|
||||||
.tag {font-size: 22pt;}'''
|
.tag {font-size: 22pt;}'''
|
||||||
description = 'Canada\'s national newspaper'
|
description = 'Canada\'s national newspaper'
|
||||||
remove_tags_before = dict(id="article-top")
|
keep_only_tags = [dict(name='article')]
|
||||||
remove_tags = [
|
remove_tags = [dict(name='aside'),
|
||||||
{'id':['util', 'article-tabs', 'comments', 'article-relations',
|
dict(name='footer'),
|
||||||
'gallery-controls', 'video', 'galleryLoading','deck','header',
|
dict(name='div', attrs={'class':(lambda x: isinstance(x, (str,unicode)) and 'articlecommentcountholder' in x.split(' '))}),
|
||||||
'toolsBottom'] },
|
dict(name='ul', attrs={'class':(lambda x: isinstance(x, (str,unicode)) and 'articletoolbar' in x.split(' '))}),
|
||||||
{'class':['credit','inline-img-caption','tab-pointer'] },
|
]
|
||||||
dict(name='div', attrs={'id':['lead-photo', 'most-popular-story']}),
|
|
||||||
dict(name='div', attrs={'class':'right'}),
|
|
||||||
dict(name='div', attrs={'id':'footer'}),
|
|
||||||
dict(name='div', attrs={'id':'beta-msg'}),
|
|
||||||
dict(name='img', attrs={'class':'headshot'}),
|
|
||||||
dict(name='div', attrs={'class':'brand'}),
|
|
||||||
dict(name='div', attrs={'id':'nav-wrap'}),
|
|
||||||
dict(name='div', attrs={'id':'featureTopics'}),
|
|
||||||
dict(name='div', attrs={'id':'videoNav'}),
|
|
||||||
dict(name='div', attrs={'id':'blog-header'}),
|
|
||||||
dict(name='div', attrs={'id':'right-rail'}),
|
|
||||||
dict(name='div', attrs={'id':'group-footer-container'}),
|
|
||||||
dict(name=['iframe', 'style'])
|
|
||||||
]
|
|
||||||
remove_attributes = ['style']
|
|
||||||
remove_tags_after = [{'id':['article-content']},
|
|
||||||
{'class':['pull','inline-img'] },
|
|
||||||
dict(name='img', attrs={'class':'inline-media-embed'}),
|
|
||||||
]
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Latest headlines', u'http://www.theglobeandmail.com/?service=rss'),
|
(u'Latest headlines', u'http://www.theglobeandmail.com/?service=rss'),
|
||||||
(u'Top stories', u'http://www.theglobeandmail.com/?service=rss&feed=topstories'),
|
(u'Top stories', u'http://www.theglobeandmail.com/?service=rss&feed=topstories'),
|
||||||
|
@ -11,6 +11,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
on 10/10/10 to include function to grab print version of articles
|
on 10/10/10 to include function to grab print version of articles
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
from datetime import date
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
'''
|
'''
|
||||||
added by Tony Stegall
|
added by Tony Stegall
|
||||||
@ -27,7 +28,6 @@ class AdvancedUserRecipe1249039563(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
language = 'nl'
|
language = 'nl'
|
||||||
|
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
body{font-family:Arial,Helvetica,sans-serif; font-size:small;}
|
body{font-family:Arial,Helvetica,sans-serif; font-size:small;}
|
||||||
h1{font-size:large;}
|
h1{font-size:large;}
|
||||||
@ -43,14 +43,16 @@ class AdvancedUserRecipe1249039563(BasicNewsRecipe):
|
|||||||
|
|
||||||
def get_obfuscated_article(self, url):
|
def get_obfuscated_article(self, url):
|
||||||
br = self.get_browser()
|
br = self.get_browser()
|
||||||
|
print 'THE CURRENT URL IS: ', url
|
||||||
br.open(url)
|
br.open(url)
|
||||||
|
year = date.today().year
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = br.follow_link(url_regex='.*?(2010)(\\/)(article)(\\/)(print)(\\/)', nr = 0)
|
response = br.follow_link(url_regex='.*?(%d)(\\/)(article)(\\/)(print)(\\/)'%year, nr = 0)
|
||||||
html = response.read()
|
html = response.read()
|
||||||
except:
|
except:
|
||||||
response = br.open(url)
|
response = br.open(url)
|
||||||
html = response.read()
|
html = response.read()
|
||||||
|
|
||||||
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||||
self.temp_files[-1].write(html)
|
self.temp_files[-1].write(html)
|
||||||
@ -59,19 +61,22 @@ class AdvancedUserRecipe1249039563(BasicNewsRecipe):
|
|||||||
|
|
||||||
###############################################################################################################
|
###############################################################################################################
|
||||||
|
|
||||||
feeds = [
|
'''
|
||||||
(u'Laatste Nieuws', u'http://volkskrant.nl/rss/laatstenieuws.rss'),
|
Change Log:
|
||||||
(u'Binnenlands nieuws', u'http://volkskrant.nl/rss/nederland.rss'),
|
Date: 10/15/2010
|
||||||
(u'Buitenlands nieuws', u'http://volkskrant.nl/rss/internationaal.rss'),
|
Feeds updated by Martin Tarenskeen
|
||||||
(u'Economisch nieuws', u'http://volkskrant.nl/rss/economie.rss'),
|
'''
|
||||||
(u'Sportnieuws', u'http://volkskrant.nl/rss/sport.rss'),
|
|
||||||
(u'Kunstnieuws', u'http://volkskrant.nl/rss/kunst.rss'),
|
feeds = [
|
||||||
|
(u'Laatste Nieuws', u'http://www.volkskrant.nl/rss/laatstenieuws.rss'),
|
||||||
|
(u'Binnenland', u'http://www.volkskrant.nl/rss/nederland.rss'),
|
||||||
|
(u'Buitenland', u'http://www.volkskrant.nl/rss/internationaal.rss'),
|
||||||
|
(u'Economie', u'http://www.volkskrant.nl/rss/economie.rss'),
|
||||||
|
(u'Sport', u'http://www.volkskrant.nl/rss/sport.rss'),
|
||||||
|
(u'Cultuur', u'http://www.volkskrant.nl/rss/kunst.rss'),
|
||||||
|
(u'Gezondheid & Wetenschap', u'http://www.volkskrant.nl/rss/wetenschap.rss'),
|
||||||
|
(u'Internet & Media', u'http://www.volkskrant.nl/rss/media.rss') ]
|
||||||
|
|
||||||
#both of these rss feeds link back to the main volksrant.nl url a.k.a Broken
|
|
||||||
#If someone happens to know the correct paths then they can put them in here
|
|
||||||
#(u'Wetenschapsnieuws', u'http://feeds.feedburner.com/DeVolkskrantWetenschap'),
|
|
||||||
#(u'Technologienieuws', u'http://feeds.feedburner.com/vkmedia')
|
|
||||||
]
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
example for formating
|
example for formating
|
||||||
|
Loading…
x
Reference in New Issue
Block a user