Improved recipe for Welt Online

This commit is contained in:
Kovid Goyal 2010-06-04 07:05:42 -06:00
parent 0c40b8e8ee
commit a60b414c7e

View File

@ -21,12 +21,16 @@ class weltDe(BasicNewsRecipe):
no_stylesheets = True
remove_stylesheets = True
remove_javascript = True
encoding = 'iso-8859-1'
BasicNewsRecipe.summary_length = 200
encoding = 'utf-8'
html2epub_options = 'linearize_tables = True\nbase_font_size2=10'
BasicNewsRecipe.summary_length = 100
remove_tags = [dict(id='jumplinks'),
dict(id='ad1'),
dict(id='top'),
dict(id='header'),
dict(id='additionalNavWrapper'),
dict(id='fullimage_index'),
dict(id='additionalNav'),
dict(id='printMenu'),
@ -35,6 +39,8 @@ class weltDe(BasicNewsRecipe):
dict(id='servicesBox'),
dict(id='servicesNav'),
dict(id='ad2'),
dict(id='banner_1'),
dict(id='ssoInfoTop'),
dict(id='brandingWrapper'),
dict(id='links-intern'),
dict(id='navigation'),
@ -53,10 +59,22 @@ class weltDe(BasicNewsRecipe):
dict(id='xmsg_comment'),
dict(id='additionalNavWrapper'),
dict(id='imagebox'),
dict(id='footerContainer'),
#dict(id=''),
dict(name='span'),
dict(name='div', attrs={'class':'printURL'}),
dict(name='ul', attrs={'class':'clear mainNavigation inline'}),
dict(name='ul', attrs={'class':'inline'}),
dict(name='ul', attrs={'class':'ubar'}),
dict(name='hr', attrs={'class':'ubar'}),
dict(name='li', attrs={'class':'counter'}),
dict(name='li', attrs={'class':'browseBack'}),
dict(name='li', attrs={'class':'browseNext'}),
dict(name='li', attrs={'class':'selected'}),
dict(name='div', attrs={'class':'floatLeft'}),
dict(name='div', attrs={'class':'ad'}),
dict(name='div', attrs={'class':'ftBarLeft'}),
dict(name='div', attrs={'class':'clear additionalNav'}),
dict(name='div', attrs={'class':'inlineBox inlineFurtherLinks'}),
dict(name='div', attrs={'class':'inlineBox videoInlineBox'}),
dict(name='div', attrs={'class':'inlineGallery'}),
@ -65,6 +83,23 @@ class weltDe(BasicNewsRecipe):
dict(name='div', attrs={'class':'articleOptions clear'}),
dict(name='div', attrs={'class':'noPrint galleryIndex'}),
dict(name='div', attrs={'class':'inlineBox inlineTagCloud'}),
dict(name='div', attrs={'class':'clear module writeComment bgColor1'}),
dict(name='div', attrs={'class':'clear module textGallery bgColor1'}),
dict(name='div', attrs={'class':'clear module socialMedia bgColor1'}),
dict(name='div', attrs={'class':'clear module continuativeLinks'}),
dict(name='div', attrs={'class':'moreArtH3'}),
dict(name='div', attrs={'class':'jqmWindow'}),
dict(name='div', attrs={'class':'clear gap4'}),
dict(name='div', attrs={'class':'hidden'}),
dict(name='div', attrs={'class':'advertising'}),
dict(name='div', attrs={'class':'ad adMarginBottom'}),
dict(name='div', attrs={'class':'ad'}),
dict(name='div', attrs={'class':'topLine'}),
dict(name='div', attrs={'class':'toplineH2'}),
dict(name='div', attrs={'class':'headLineH3'}),
dict(name='div', attrs={'class':'print'}),
dict(name='div', attrs={'class':'clear menu'}),
dict(name='div', attrs={'class':'clear galleryContent'}),
dict(name='p', attrs={'class':'jump'}),
dict(name='a', attrs={'class':'commentLink'}),
dict(name='h2', attrs={'class':'jumpHeading'}),
@ -75,7 +110,7 @@ class weltDe(BasicNewsRecipe):
dict(name='table', attrs={'class':'textGallery'}),
dict(name='li', attrs={'class':'active'})]
remove_tags_after = [dict(id='tw_link_widget')]
remove_tags_after = [dict(name='div', attrs={'class':'clear departmentLine'})]
extra_css = '''
h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #003399;}
@ -87,7 +122,6 @@ class weltDe(BasicNewsRecipe):
.photo {font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #666666;} '''
feeds = [ ('Politik', 'http://welt.de/politik/?service=Rss'),
('Deutsche Dinge', 'http://www.welt.de/deutsche-dinge/?service=Rss'),
('Wirtschaft', 'http://welt.de/wirtschaft/?service=Rss'),
('Finanzen', 'http://welt.de/finanzen/?service=Rss'),
('Sport', 'http://welt.de/sport/?service=Rss'),
@ -101,4 +135,5 @@ class weltDe(BasicNewsRecipe):
def print_version(self, url):
return url.replace ('.html', '.html?print=yes')
return url.replace ('.html', '.html?print=true')