Fixed Time magazine recipe for end-of-year issue

This commit is contained in:
Kovid Goyal 2009-12-14 09:29:37 -07:00
parent 19c8d41c2e
commit 7c0a3e2e46
2 changed files with 20 additions and 10 deletions

View File

@ -16,7 +16,7 @@ class Guardian(BasicNewsRecipe):
language = 'en_GB'
oldest_article = 7
max_articles_per_feed = 25
max_articles_per_feed = 50
remove_javascript = True
timefmt = ' [%a, %d %b %Y]'

View File

@ -18,32 +18,37 @@ class Time(BasicNewsRecipe):
language = 'en'
remove_javascript = True
extra_css = ''' h1 {font-family:Arial,Sans-serif;}
extra_css = ''' h1 {font-family:georgia,serif;color:#000000;}
.mainHd{font-family:georgia,serif;color:#000000;}
h2 {font-family:Arial,Sans-serif;}
.name{font-family:Arial,Sans-serif; font-size:x-small; }
.name{font-family:Arial,Sans-serif; font-size:x-small;font-weight:bold; }
.date{font-family:Arial,Sans-serif; font-size:x-small ;color:#999999;}
.byline{font-family:Arial,Sans-serif; font-size:x-small ;}
.photoBkt{ font-size:x-small ;}
.vertPhoto{font-size:x-small ;}
.credits{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;}
.credit{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;}
.artTxt{font-family:georgia,serif;}
#article{font-family:georgia,serif;}
#content{font-family:georgia,serif;}
.caption{font-family:georgia,serif; font-size:x-small;color:#333333;}
.credit{font-family:georgia,serif; font-size:x-small;color:#999999;}
a:link{color:#CC0000;}
.breadcrumb{font-family:Arial,Sans-serif;font-size:x-small;}
'''
keep_only_tags = [ dict(name ="div",attrs = {"id" :["article",]}) ,
keep_only_tags = [ dict(name ="div",attrs = {"id" :["content"]}) ,
dict(name ="div",attrs = {"class" :["artHd","artTxt","photoBkt","vertPhoto","image","copy"]}) ,]
remove_tags = [ dict(name ="div",attrs = {'class':['articlePagination','nextUp',"rtCol","pagination","enlarge",]}),
remove_tags = [ dict(name ="div",attrs = {'class':['articleFooterNav','listsByTopic','articleTools2','relatedContent','sideContent','topBannerWrap','articlePagination','nextUp',"rtCol","pagination","enlarge","contentTools2",]}),
dict(name ="span",attrs = {'class':['see']}),
dict(name ="div",attrs = {'id':['articleSideBar',"articleTools","articleFooter","cmBotLt","quigoPackage"]}),
dict(name ="div",attrs = {'id':['header','articleSideBar',"articleTools","articleFooter","cmBotLt","quigoPackage"]}),
dict(name ="a",attrs = {'class':['listLink']}),
dict(name ="ul",attrs = {'id':['shareSocial','tabs']}),
dict(name ="li",attrs = {'class':['back']}),
dict(name ="ul",attrs = {'class':['navCount']}),
]
recursions = 1
match_regexps = [r'/[0-9,]+-(2|3|4|5|6|7|8|9)(,\d+){0,1}.html']
recursions = 10
match_regexps = [r'/[0-9,]+-(2|3|4|5|6|7|8|9)(,\d+){0,1}.html',r'http://www.time.com/time/specials/packages/article/.*']
preprocess_regexps = [(re.compile(
r'<meta .+/>'), lambda m:'')]
@ -101,3 +106,8 @@ class Time(BasicNewsRecipe):
else:
ans.append(unicode(t))
return u' '.join(ans).replace(u'\xa0', u'').strip()
def postprocess_html(self,soup,first):
for tag in soup.findAll(attrs ={'class':['artPag','pagination']}):
tag.extract()
return soup