diff --git a/src/calibre/web/feeds/recipes/recipe_hna.py b/src/calibre/web/feeds/recipes/recipe_hna.py index 40193336d1..c4faec94ba 100644 --- a/src/calibre/web/feeds/recipes/recipe_hna.py +++ b/src/calibre/web/feeds/recipes/recipe_hna.py @@ -2,7 +2,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' ''' -Fetch Hessisch Niedersächsische Allgemeine. +Fetch Hessisch Niedersachsische Allgemeine. ''' from calibre.web.feeds.news import BasicNewsRecipe diff --git a/src/calibre/web/feeds/recipes/recipe_linuxdevices.py b/src/calibre/web/feeds/recipes/recipe_linuxdevices.py index 04db6b02d5..cd914e96ad 100644 --- a/src/calibre/web/feeds/recipes/recipe_linuxdevices.py +++ b/src/calibre/web/feeds/recipes/recipe_linuxdevices.py @@ -1,80 +1,78 @@ -__license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' - -''' -Fetch Linuxdevices. -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Sueddeutsche(BasicNewsRecipe): - - title = u'Linuxdevices' - description = 'News about Linux driven Hardware' - __author__ = 'Oliver Niesner' - use_embedded_content = False - timefmt = ' [%a, %d %b %Y]' - language = _('English') - max_articles_per_feed = 50 - no_stylesheets = True - encoding = 'latin1' - - remove_tags_after = [dict(id='nointelliTXT')] - filter_regexps = [r'ad\.doubleclick\.net'] - - - remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}), - dict(name='div', attrs={'class':'bannerSky'}), - dict(name='div', attrs={'class':'footerLinks'}), - dict(name='div', attrs={'class':'seitenanfang'}), - dict(name='td', attrs={'class':'mar5'}), - dict(name='td', attrs={'class':'mar5'}), - dict(name='table', attrs={'class':'pageAktiv'}), - dict(name='table', attrs={'class':'xartable'}), - dict(name='table', attrs={'class':'wpnavi'}), - dict(name='table', attrs={'class':'bgcontent absatz'}), - dict(name='table', attrs={'class':'footer'}), - dict(name='table', attrs={'class':'artikelBox'}), - dict(name='table', attrs={'class':'kommentare'}), - dict(name='table', attrs={'class':'pageBoxBot'}), - #dict(name='table', attrs={'with':'100%'}), - dict(name='td', attrs={'nowrap':'nowrap'}), - dict(name='td', attrs={'valign':'middle'}), - dict(name='td', attrs={'align':'left'}), - dict(name='td', attrs={'align':'center'}), - dict(name='td', attrs={'height':'5'}), - dict(name='div', attrs={'class':'artikelBox navigatorBox'}), - dict(name='div', attrs={'class':'similar-article-box'}), - dict(name='div', attrs={'class':'videoBigHack'}), - dict(name='td', attrs={'class':'artikelDruckenRight'}), - dict(name='td', attrs={'class':'width="200"'}), - dict(name='a', attrs={'href':'/news'}), - dict(name='a', attrs={'href':'/'}), - dict(name='a', attrs={'href':'/articles'}), - dict(name='a', attrs={'href':'/cgi-bin/survey/survey.cgi'}), - dict(name='a', attrs={'href':'/cgi-bin/board/UltraBoard.pl'}), - dict(name='iframe'), - dict(name='form'), - #dict(name='tr', attrs={'td':'Click here to learn'}), - dict(name='span', attrs={'class':'hidePrint'}), - dict(id='headerLBox'), - dict(id='nointelliTXT'), - dict(id='rechteSpalte'), - dict(id='newsticker-list-small'), - dict(id='ntop5'), - dict(id='ntop5send'), - dict(id='ntop5commented'), - dict(id='nnav-bgheader'), - dict(id='nnav-headerteaser'), - dict(id='nnav-head'), - dict(id='nnav-top'), - dict(id='nnav-logodiv'), - dict(id='nnav-logo'), - dict(id='nnav-oly'), - dict(id='readcomment')] - - - - feeds = [ (u'Linuxdevices', u'http://www.linuxdevices.com/backend/headlines.rss') ] - +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal ' + +''' +Fetch Linuxdevices. +''' + +from calibre.web.feeds.news import BasicNewsRecipe + + +class Sueddeutsche(BasicNewsRecipe): + + title = u'Linuxdevices' + description = 'News about Linux driven Hardware' + __author__ = 'Oliver Niesner' + use_embedded_content = False + timefmt = ' [%a %d %b %Y]' + max_articles_per_feed = 50 + no_stylesheets = True + html2epub_options = 'linearize_tables = True\nbase_font_size2=14' + encoding = 'latin1' + + + remove_tags_after = [dict(id='nointelliTXT')] + filter_regexps = [r'ad\.doubleclick\.net'] + + remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}), + dict(name='div', attrs={'class':'bannerSky'}), + dict(name='div', attrs={'class':'footerLinks'}), + dict(name='div', attrs={'class':'seitenanfang'}), + dict(name='td', attrs={'class':'mar5'}), + dict(name='td', attrs={'class':'mar5'}), + dict(name='table', attrs={'class':'pageAktiv'}), + dict(name='table', attrs={'class':'xartable'}), + dict(name='table', attrs={'class':'wpnavi'}), + dict(name='table', attrs={'class':'bgcontent absatz'}), + dict(name='table', attrs={'class':'footer'}), + dict(name='table', attrs={'class':'artikelBox'}), + dict(name='table', attrs={'class':'kommentare'}), + dict(name='table', attrs={'class':'pageBoxBot'}), + dict(name='td', attrs={'nowrap':'nowrap'}), + dict(name='td', attrs={'valign':'middle'}), + dict(name='td', attrs={'align':'left'}), + dict(name='td', attrs={'align':'center'}), + dict(name='td', attrs={'height':'5'}), + dict(name='div', attrs={'class':'artikelBox navigatorBox'}), + dict(name='div', attrs={'class':'similar-article-box'}), + dict(name='div', attrs={'class':'videoBigHack'}), + dict(name='td', attrs={'class':'artikelDruckenRight'}), + dict(name='td', attrs={'class':'width="200"'}), + dict(name='a', attrs={'href':'/news'}), + dict(name='a', attrs={'href':'/'}), + dict(name='a', attrs={'href':'/articles'}), + dict(name='a', attrs={'href':'/cgi-bin/survey/survey.cgi'}), + dict(name='a', attrs={'href':'/cgi-bin/board/UltraBoard.pl'}), + dict(name='iframe'), + dict(name='form'), + dict(name='span', attrs={'class':'hidePrint'}), + dict(id='headerLBox'), + dict(id='nointelliTXT'), + dict(id='rechteSpalte'), + dict(id='newsticker-list-small'), + dict(id='ntop5'), + dict(id='ntop5send'), + dict(id='ntop5commented'), + dict(id='nnav-bgheader'), + dict(id='nnav-headerteaser'), + dict(id='nnav-head'), + dict(id='nnav-top'), + dict(id='nnav-logodiv'), + dict(id='nnav-logo'), + dict(id='nnav-oly'), + dict(id='readcomment')] + + + + feeds = [ (u'Linuxdevices', u'http://www.linuxdevices.com/backend/headlines.rss') ] + diff --git a/src/calibre/web/feeds/recipes/recipe_tomshardware_de.py b/src/calibre/web/feeds/recipes/recipe_tomshardware_de.py index 52f1583408..7ba656e1d5 100644 --- a/src/calibre/web/feeds/recipes/recipe_tomshardware_de.py +++ b/src/calibre/web/feeds/recipes/recipe_tomshardware_de.py @@ -8,26 +8,19 @@ Fetch tomshardware. from calibre.web.feeds.news import BasicNewsRecipe -class TomsHardwareDe(BasicNewsRecipe): - - title = 'Tom\'s Hardware German' - description = 'Computer news in german' +class cdnet(BasicNewsRecipe): + + title = 'tomshardware' + description = 'computer news in german' __author__ = 'Oliver Niesner' use_embedded_content = False timefmt = ' [%d %b %Y]' max_articles_per_feed = 50 - language = _('German') no_stylesheets = True + language = _('German') encoding = 'utf-8' - #preprocess_regexps = \ -# [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in -# [ -# (r'<84>', lambda match: ''), -# (r'<93>', lambda match: ''), -# ] -# ] - + remove_tags = [dict(id='outside-advert'), dict(id='advertRightWhite'), dict(id='header-advert'), @@ -36,9 +29,15 @@ class TomsHardwareDe(BasicNewsRecipe): dict(id='header-top'), dict(id='header-tools'), dict(id='nbComment'), + dict(id='commentTools'), dict(id='internalSidebar'), dict(id='header-news-infos'), + dict(id='header-news-tools'), dict(id='breadcrumbs'), + dict(id='emailTools'), + dict(id='bookmarkTools'), + dict(id='printTools'), + dict(id='header-nextNews'), dict(id=''), dict(name='div', attrs={'class':'pyjama'}), dict(name='href', attrs={'class':'comment'}), @@ -47,8 +46,10 @@ class TomsHardwareDe(BasicNewsRecipe): dict(name='div', attrs={'class':'greyBox clearfix'}), dict(id='')] #remove_tags_before = [dict(id='header-news-title')] - remove_tags_after = [dict(name='div', attrs={'class':'news-elm'})] + remove_tags_after = [dict(name='div', attrs={'class':'btmGreyTables'})] #remove_tags_after = [dict(name='div', attrs={'class':'intelliTXT'})] - - feeds = [ ('tomshardware', 'http://www.tomshardware.com/de/feeds/rss2/tom-s-hardware-de,12-1.xml') ] - + + feeds = [ ('tomshardware', 'http://www.tomshardware.com/de/feeds/rss2/tom-s-hardware-de,12-1.xml') ] + + +