mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fixed Time magazine recipe for end-of-year issue
This commit is contained in:
parent
19c8d41c2e
commit
7c0a3e2e46
@ -16,7 +16,7 @@ class Guardian(BasicNewsRecipe):
|
||||
language = 'en_GB'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 25
|
||||
max_articles_per_feed = 50
|
||||
remove_javascript = True
|
||||
|
||||
timefmt = ' [%a, %d %b %Y]'
|
||||
|
@ -18,32 +18,37 @@ class Time(BasicNewsRecipe):
|
||||
language = 'en'
|
||||
remove_javascript = True
|
||||
|
||||
extra_css = ''' h1 {font-family:Arial,Sans-serif;}
|
||||
extra_css = ''' h1 {font-family:georgia,serif;color:#000000;}
|
||||
.mainHd{font-family:georgia,serif;color:#000000;}
|
||||
h2 {font-family:Arial,Sans-serif;}
|
||||
.name{font-family:Arial,Sans-serif; font-size:x-small; }
|
||||
.name{font-family:Arial,Sans-serif; font-size:x-small;font-weight:bold; }
|
||||
.date{font-family:Arial,Sans-serif; font-size:x-small ;color:#999999;}
|
||||
.byline{font-family:Arial,Sans-serif; font-size:x-small ;}
|
||||
.photoBkt{ font-size:x-small ;}
|
||||
.vertPhoto{font-size:x-small ;}
|
||||
.credits{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;}
|
||||
.credit{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;}
|
||||
.artTxt{font-family:georgia,serif;}
|
||||
#article{font-family:georgia,serif;}
|
||||
#content{font-family:georgia,serif;}
|
||||
.caption{font-family:georgia,serif; font-size:x-small;color:#333333;}
|
||||
.credit{font-family:georgia,serif; font-size:x-small;color:#999999;}
|
||||
a:link{color:#CC0000;}
|
||||
.breadcrumb{font-family:Arial,Sans-serif;font-size:x-small;}
|
||||
'''
|
||||
|
||||
|
||||
|
||||
keep_only_tags = [ dict(name ="div",attrs = {"id" :["article",]}) ,
|
||||
keep_only_tags = [ dict(name ="div",attrs = {"id" :["content"]}) ,
|
||||
dict(name ="div",attrs = {"class" :["artHd","artTxt","photoBkt","vertPhoto","image","copy"]}) ,]
|
||||
remove_tags = [ dict(name ="div",attrs = {'class':['articlePagination','nextUp',"rtCol","pagination","enlarge",]}),
|
||||
remove_tags = [ dict(name ="div",attrs = {'class':['articleFooterNav','listsByTopic','articleTools2','relatedContent','sideContent','topBannerWrap','articlePagination','nextUp',"rtCol","pagination","enlarge","contentTools2",]}),
|
||||
dict(name ="span",attrs = {'class':['see']}),
|
||||
dict(name ="div",attrs = {'id':['articleSideBar',"articleTools","articleFooter","cmBotLt","quigoPackage"]}),
|
||||
dict(name ="div",attrs = {'id':['header','articleSideBar',"articleTools","articleFooter","cmBotLt","quigoPackage"]}),
|
||||
dict(name ="a",attrs = {'class':['listLink']}),
|
||||
dict(name ="ul",attrs = {'id':['shareSocial','tabs']}),
|
||||
dict(name ="li",attrs = {'class':['back']}),
|
||||
dict(name ="ul",attrs = {'class':['navCount']}),
|
||||
]
|
||||
recursions = 1
|
||||
match_regexps = [r'/[0-9,]+-(2|3|4|5|6|7|8|9)(,\d+){0,1}.html']
|
||||
recursions = 10
|
||||
match_regexps = [r'/[0-9,]+-(2|3|4|5|6|7|8|9)(,\d+){0,1}.html',r'http://www.time.com/time/specials/packages/article/.*']
|
||||
|
||||
preprocess_regexps = [(re.compile(
|
||||
r'<meta .+/>'), lambda m:'')]
|
||||
@ -101,3 +106,8 @@ class Time(BasicNewsRecipe):
|
||||
else:
|
||||
ans.append(unicode(t))
|
||||
return u' '.join(ans).replace(u'\xa0', u'').strip()
|
||||
|
||||
def postprocess_html(self,soup,first):
|
||||
for tag in soup.findAll(attrs ={'class':['artPag','pagination']}):
|
||||
tag.extract()
|
||||
return soup
|
||||
|
Loading…
x
Reference in New Issue
Block a user