Updated recipe for the Time Magazine

This commit is contained in:
Kovid Goyal 2009-10-18 12:24:21 -06:00
parent d28e84166a
commit 767657c460

View File

@ -17,18 +17,36 @@ class Time(BasicNewsRecipe):
no_stylesheets = True
language = 'en'
extra_css = '''.headline {font-size: large;}
.fact { padding-top: 10pt }
h1 {font-family:Arial,Sans-serif}
.byline{font-family:Arial,Sans-serif; font-size:xx-small ;color:blue}
.timestamp{font-family:Arial,Sans-serif; font-size:x-small ;color:gray}'''
remove_tags_before = dict(id="artHd")
remove_tags_after = {'class':"ltCol"}
remove_tags = [
{'class':['articleTools', 'enlarge', 'search','socialtools','blogtools','moretools','page','nextUp','next','subnav','RSS','line2','first','ybuzz','articlePagination','chiclets','imgcont','createListLink','rlinks','tabsWrap','pagination']},
{'id':['quigoArticle', 'contentTools', 'articleSideBar', 'header', 'navTop','articleTools','feedmodule','feedmodule3','promos','footer','linksFooter','timeArchive','belt','relatedStories','packages','Features']},
{'target':'_blank'},
]
extra_css = ''' h1 {font-family:Arial,Sans-serif;}
h2 {font-family:Arial,Sans-serif;}
.name{font-family:Arial,Sans-serif; font-size:x-small; }
.date{font-family:Arial,Sans-serif; font-size:x-small ;color:#999999;}
.byline{font-family:Arial,Sans-serif; font-size:x-small ;}
.photoBkt{ font-size:x-small ;}
.vertPhoto{font-size:x-small ;}
.credits{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;}
.artTxt{font-family:georgia,serif;}
#article{font-family:georgia,serif;}
.caption{font-family:georgia,serif; font-size:x-small;color:#333333;}
.credit{font-family:georgia,serif; font-size:x-small;color:#999999;}
a:link{color:#CC0000;}
'''
# remove_tags_before = dict(id="artHd")
# remove_tags_after = {'class':"ltCol"}
# remove_tags = [
# {'class':['articleTools', 'enlarge', 'search','socialtools','blogtools','moretools','page','nextUp','next','subnav','RSS','line2','first','ybuzz','articlePagination','chiclets','imgcont','createListLink','rlinks','tabsWrap','pagination']},
# {'id':['quigoArticle', 'contentTools', 'articleSideBar', 'header', 'navTop','articleTools','feedmodule','feedmodule3','promos','footer','linksFooter','timeArchive','belt','relatedStories','packages','Features']},
# {'target':'_blank'},
# ]
keep_only_tags = [ dict(name ="div",attrs = {"id" :["article",]}) ,
dict(name ="div",attrs = {"class" :["artHd","artTxt","photoBkt","vertPhoto","image","copy"]}) ,]
remove_tags = [ dict(name ="div",attrs = {'class':['articlePagination','nextUp',"rtCol","pagination","enlarge",]}),
dict(name ="span",attrs = {'class':['see']}),
dict(name ="div",attrs = {'id':['articleSideBar',"articleTools","articleFooter","cmBotLt","quigoPackage"]}),
dict(name ="a",attrs = {'class':['listLink']}),
]
recursions = 1
match_regexps = [r'/[0-9,]+-(2|3|4|5|6|7|8|9)(,\d+){0,1}.html']
@ -81,20 +99,3 @@ class Time(BasicNewsRecipe):
else:
ans.append(unicode(t))
return u' '.join(ans).replace(u'\xa0', u'').strip()
def postprocess_html(self, soup, first_page):
div = soup.find(attrs={'class':'artPag'})
if div is not None:
div.extract()
if not first_page:
for cls in ('photoBkt', 'artHd'):
div = soup.find(attrs={'class':cls})
if div is not None:
div.extract()
div = soup.find(attrs={'class':'artTxt'})
if div is not None:
p = div.find('p')
if p is not None:
p.extract()
return soup