__license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' ''' Fetch Weltonline. ''' from calibre.web.feeds.news import BasicNewsRecipe class weltDe(BasicNewsRecipe): title = 'Weltonline' description = 'german newspaper' language = 'de' __author__ = 'Oliver Niesner' use_embedded_content = False timefmt = ' [%d %b %Y]' max_articles_per_feed = 15 linearize_tables = True no_stylesheets = True remove_stylesheets = True remove_javascript = True encoding = 'utf-8' html2epub_options = 'base_font_size=10' BasicNewsRecipe.summary_length = 100 remove_tags = [dict(id='jumplinks'), dict(id='ad1'), dict(id='top'), dict(id='header'), dict(id='additionalNavWrapper'), dict(id='fullimage_index'), dict(id='additionalNav'), dict(id='printMenu'), dict(id='topteaser1'), dict(id='topteaser2'), dict(id='servicesBox'), dict(id='servicesNav'), dict(id='ad2'), dict(id='banner_1'), dict(id='ssoInfoTop'), dict(id='brandingWrapper'), dict(id='links-intern'), dict(id='navigation'), dict(id='subNav'), dict(id='branding'), dict(id='servicesNav'), dict(id='searchArea'), dict(id='servicesBox'), dict(id='toggleAdvancedSearch'), dict(id='mainNav'), dict(id='articleInlineMediaBox0'), dict(id='sectionSponsor'), dict(id='sprucharea'), dict(id='xmsg_recommendEmail'), dict(id='xmsg_recommendSms'), dict(id='xmsg_comment'), dict(id='additionalNavWrapper'), dict(id='imagebox'), dict(id='footerContainer'), # dict(id=''), dict(name='span'), dict(name='div', attrs={'class': 'printURL'}), dict(name='ul', attrs={ 'class': 'clear mainNavigation inline'}), dict(name='ul', attrs={'class': 'inline'}), dict(name='ul', attrs={'class': 'ubar'}), dict(name='hr', attrs={'class': 'ubar'}), dict(name='li', attrs={'class': 'counter'}), dict(name='li', attrs={'class': 'browseBack'}), dict(name='li', attrs={'class': 'browseNext'}), dict(name='li', attrs={'class': 'selected'}), dict(name='div', attrs={'class': 'floatLeft'}), dict(name='div', attrs={'class': 'ad'}), dict(name='div', attrs={'class': 'ftBarLeft'}), dict(name='div', attrs={'class': 'clear additionalNav'}), dict(name='div', attrs={ 'class': 'inlineBox inlineFurtherLinks'}), dict(name='div', attrs={ 'class': 'inlineBox videoInlineBox'}), dict(name='div', attrs={'class': 'inlineGallery'}), dict(name='div', attrs={'class': 'ratingBox'}), dict(name='div', attrs={'class': 'socialBookmarks clear'}), dict(name='div', attrs={'class': 'articleOptions clear'}), dict(name='div', attrs={'class': 'noPrint galleryIndex'}), dict(name='div', attrs={ 'class': 'inlineBox inlineTagCloud'}), dict(name='div', attrs={ 'class': 'clear module imageGalleryBig bgColor1'}), dict(name='div', attrs={ 'class': 'clear module writeComment bgColor1'}), dict(name='div', attrs={ 'class': 'clear module textGallery bgColor1'}), dict(name='div', attrs={'class': 'moreArtH3'}), dict(name='div', attrs={'class': 'jqmWindow'}), dict(name='div', attrs={'class': 'clear gap4'}), dict(name='div', attrs={'class': 'hidden'}), dict(name='div', attrs={'class': 'advertising'}), dict(name='div', attrs={'class': 'ad adMarginBottom'}), dict(name='div', attrs={'class': 'ad'}), dict(name='div', attrs={'class': 'topLine'}), dict(name='div', attrs={'class': 'toplineH2'}), dict(name='div', attrs={'class': 'headLineH3'}), dict(name='div', attrs={'class': 'print'}), dict(name='div', attrs={'class': 'clear menu'}), dict(name='div', attrs={'class': 'themenalarm'}), dict(name='p', attrs={'class': 'jump'}), dict(name='a', attrs={'class': 'commentLink'}), dict(name='h2', attrs={'class': 'jumpHeading'}), dict(name='ul', attrs={'class': 'optionsSubNav clear'}), dict(name='li', attrs={'class': 'next'}), dict(name='li', attrs={'class': 'prev'}), dict(name='li', attrs={'class': 'last'}), dict(name='table', attrs={'class': 'textGallery'}), dict(name='li', attrs={'class': 'active'})] remove_tags_after = [dict(name='div', attrs={'class': 'themenalarm'})] extra_css = ''' h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #003399;} a{font-family:Arial,Helvetica,sans-serif; font-size: x-small; font-style:italic;} .dachzeile p{font-family:Arial,Helvetica,sans-serif; font-size: x-small; } h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;} .artikelTeaser{font-family:Arial,Helvetica,sans-serif; font-size: x-small; font-weight:bold; } body{font-family:Arial,Helvetica,sans-serif; } .photo {font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #666666;} ''' feeds = [('Politik', 'http://welt.de/politik/?service=Rss'), ('Deutsche Dinge', 'http://www.welt.de/deutsche-dinge/?service=Rss'), ('Wirtschaft', 'http://welt.de/wirtschaft/?service=Rss'), ('Finanzen', 'http://welt.de/finanzen/?service=Rss'), ('Sport', 'http://welt.de/sport/?service=Rss'), ('Webwelt', 'http://welt.de/webwelt/?service=Rss'), ('Kultur', 'http://welt.de/kultur/?service=Rss'), ('Literarische Welt', 'http://welt.de/kultur/literarischewelt/?service=Rss'), ('Wissenschaft', 'http://welt.de/wissenschaft/?service=Rss'), ('Satire', 'http://welt.de/satire/?service=Rss'), ('Motor', 'http://welt.de/motor/?service=Rss'), ('Vermischtes', 'http://welt.de/vermischtes/?service=Rss')] def print_version(self, url): return url.replace('.html', '.html?print=true')