From a60b414c7e088d5b45531e7cc198349f09a4a4b9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 4 Jun 2010 07:05:42 -0600 Subject: [PATCH] Improved recipe for Welt Online --- resources/recipes/welt.recipe | 45 +++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/resources/recipes/welt.recipe b/resources/recipes/welt.recipe index 59d900f53e..89e0d42c09 100644 --- a/resources/recipes/welt.recipe +++ b/resources/recipes/welt.recipe @@ -21,12 +21,16 @@ class weltDe(BasicNewsRecipe): no_stylesheets = True remove_stylesheets = True remove_javascript = True - encoding = 'iso-8859-1' - BasicNewsRecipe.summary_length = 200 + encoding = 'utf-8' + html2epub_options = 'linearize_tables = True\nbase_font_size2=10' + BasicNewsRecipe.summary_length = 100 remove_tags = [dict(id='jumplinks'), dict(id='ad1'), + dict(id='top'), + dict(id='header'), + dict(id='additionalNavWrapper'), dict(id='fullimage_index'), dict(id='additionalNav'), dict(id='printMenu'), @@ -35,6 +39,8 @@ class weltDe(BasicNewsRecipe): dict(id='servicesBox'), dict(id='servicesNav'), dict(id='ad2'), + dict(id='banner_1'), + dict(id='ssoInfoTop'), dict(id='brandingWrapper'), dict(id='links-intern'), dict(id='navigation'), @@ -53,10 +59,22 @@ class weltDe(BasicNewsRecipe): dict(id='xmsg_comment'), dict(id='additionalNavWrapper'), dict(id='imagebox'), + dict(id='footerContainer'), #dict(id=''), dict(name='span'), dict(name='div', attrs={'class':'printURL'}), + dict(name='ul', attrs={'class':'clear mainNavigation inline'}), + dict(name='ul', attrs={'class':'inline'}), + dict(name='ul', attrs={'class':'ubar'}), + dict(name='hr', attrs={'class':'ubar'}), + dict(name='li', attrs={'class':'counter'}), + dict(name='li', attrs={'class':'browseBack'}), + dict(name='li', attrs={'class':'browseNext'}), + dict(name='li', attrs={'class':'selected'}), + dict(name='div', attrs={'class':'floatLeft'}), dict(name='div', attrs={'class':'ad'}), + dict(name='div', attrs={'class':'ftBarLeft'}), + dict(name='div', attrs={'class':'clear additionalNav'}), dict(name='div', attrs={'class':'inlineBox inlineFurtherLinks'}), dict(name='div', attrs={'class':'inlineBox videoInlineBox'}), dict(name='div', attrs={'class':'inlineGallery'}), @@ -65,6 +83,23 @@ class weltDe(BasicNewsRecipe): dict(name='div', attrs={'class':'articleOptions clear'}), dict(name='div', attrs={'class':'noPrint galleryIndex'}), dict(name='div', attrs={'class':'inlineBox inlineTagCloud'}), + dict(name='div', attrs={'class':'clear module writeComment bgColor1'}), + dict(name='div', attrs={'class':'clear module textGallery bgColor1'}), + dict(name='div', attrs={'class':'clear module socialMedia bgColor1'}), + dict(name='div', attrs={'class':'clear module continuativeLinks'}), + dict(name='div', attrs={'class':'moreArtH3'}), + dict(name='div', attrs={'class':'jqmWindow'}), + dict(name='div', attrs={'class':'clear gap4'}), + dict(name='div', attrs={'class':'hidden'}), + dict(name='div', attrs={'class':'advertising'}), + dict(name='div', attrs={'class':'ad adMarginBottom'}), + dict(name='div', attrs={'class':'ad'}), + dict(name='div', attrs={'class':'topLine'}), + dict(name='div', attrs={'class':'toplineH2'}), + dict(name='div', attrs={'class':'headLineH3'}), + dict(name='div', attrs={'class':'print'}), + dict(name='div', attrs={'class':'clear menu'}), + dict(name='div', attrs={'class':'clear galleryContent'}), dict(name='p', attrs={'class':'jump'}), dict(name='a', attrs={'class':'commentLink'}), dict(name='h2', attrs={'class':'jumpHeading'}), @@ -75,7 +110,7 @@ class weltDe(BasicNewsRecipe): dict(name='table', attrs={'class':'textGallery'}), dict(name='li', attrs={'class':'active'})] - remove_tags_after = [dict(id='tw_link_widget')] + remove_tags_after = [dict(name='div', attrs={'class':'clear departmentLine'})] extra_css = ''' h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #003399;} @@ -87,7 +122,6 @@ class weltDe(BasicNewsRecipe): .photo {font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #666666;} ''' feeds = [ ('Politik', 'http://welt.de/politik/?service=Rss'), - ('Deutsche Dinge', 'http://www.welt.de/deutsche-dinge/?service=Rss'), ('Wirtschaft', 'http://welt.de/wirtschaft/?service=Rss'), ('Finanzen', 'http://welt.de/finanzen/?service=Rss'), ('Sport', 'http://welt.de/sport/?service=Rss'), @@ -101,4 +135,5 @@ class weltDe(BasicNewsRecipe): def print_version(self, url): - return url.replace ('.html', '.html?print=yes') + return url.replace ('.html', '.html?print=true') +