Updated recipes for Linux devices and Toms Hardware (German)

This commit is contained in:
Kovid Goyal 2009-04-15 13:00:01 -07:00
parent 9d8e8dd8b9
commit 332dbf4444
3 changed files with 97 additions and 98 deletions

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
''' '''
Fetch Hessisch Niedersächsische Allgemeine. Fetch Hessisch Niedersachsische Allgemeine.
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe

View File

@ -1,80 +1,78 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
''' '''
Fetch Linuxdevices. Fetch Linuxdevices.
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Sueddeutsche(BasicNewsRecipe): class Sueddeutsche(BasicNewsRecipe):
title = u'Linuxdevices' title = u'Linuxdevices'
description = 'News about Linux driven Hardware' description = 'News about Linux driven Hardware'
__author__ = 'Oliver Niesner' __author__ = 'Oliver Niesner'
use_embedded_content = False use_embedded_content = False
timefmt = ' [%a, %d %b %Y]' timefmt = ' [%a %d %b %Y]'
language = _('English') max_articles_per_feed = 50
max_articles_per_feed = 50 no_stylesheets = True
no_stylesheets = True html2epub_options = 'linearize_tables = True\nbase_font_size2=14'
encoding = 'latin1' encoding = 'latin1'
remove_tags_after = [dict(id='nointelliTXT')]
filter_regexps = [r'ad\.doubleclick\.net'] remove_tags_after = [dict(id='nointelliTXT')]
filter_regexps = [r'ad\.doubleclick\.net']
remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}), remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}),
dict(name='div', attrs={'class':'bannerSky'}), dict(name='div', attrs={'class':'bannerSky'}),
dict(name='div', attrs={'class':'footerLinks'}), dict(name='div', attrs={'class':'footerLinks'}),
dict(name='div', attrs={'class':'seitenanfang'}), dict(name='div', attrs={'class':'seitenanfang'}),
dict(name='td', attrs={'class':'mar5'}), dict(name='td', attrs={'class':'mar5'}),
dict(name='td', attrs={'class':'mar5'}), dict(name='td', attrs={'class':'mar5'}),
dict(name='table', attrs={'class':'pageAktiv'}), dict(name='table', attrs={'class':'pageAktiv'}),
dict(name='table', attrs={'class':'xartable'}), dict(name='table', attrs={'class':'xartable'}),
dict(name='table', attrs={'class':'wpnavi'}), dict(name='table', attrs={'class':'wpnavi'}),
dict(name='table', attrs={'class':'bgcontent absatz'}), dict(name='table', attrs={'class':'bgcontent absatz'}),
dict(name='table', attrs={'class':'footer'}), dict(name='table', attrs={'class':'footer'}),
dict(name='table', attrs={'class':'artikelBox'}), dict(name='table', attrs={'class':'artikelBox'}),
dict(name='table', attrs={'class':'kommentare'}), dict(name='table', attrs={'class':'kommentare'}),
dict(name='table', attrs={'class':'pageBoxBot'}), dict(name='table', attrs={'class':'pageBoxBot'}),
#dict(name='table', attrs={'with':'100%'}), dict(name='td', attrs={'nowrap':'nowrap'}),
dict(name='td', attrs={'nowrap':'nowrap'}), dict(name='td', attrs={'valign':'middle'}),
dict(name='td', attrs={'valign':'middle'}), dict(name='td', attrs={'align':'left'}),
dict(name='td', attrs={'align':'left'}), dict(name='td', attrs={'align':'center'}),
dict(name='td', attrs={'align':'center'}), dict(name='td', attrs={'height':'5'}),
dict(name='td', attrs={'height':'5'}), dict(name='div', attrs={'class':'artikelBox navigatorBox'}),
dict(name='div', attrs={'class':'artikelBox navigatorBox'}), dict(name='div', attrs={'class':'similar-article-box'}),
dict(name='div', attrs={'class':'similar-article-box'}), dict(name='div', attrs={'class':'videoBigHack'}),
dict(name='div', attrs={'class':'videoBigHack'}), dict(name='td', attrs={'class':'artikelDruckenRight'}),
dict(name='td', attrs={'class':'artikelDruckenRight'}), dict(name='td', attrs={'class':'width="200"'}),
dict(name='td', attrs={'class':'width="200"'}), dict(name='a', attrs={'href':'/news'}),
dict(name='a', attrs={'href':'/news'}), dict(name='a', attrs={'href':'/'}),
dict(name='a', attrs={'href':'/'}), dict(name='a', attrs={'href':'/articles'}),
dict(name='a', attrs={'href':'/articles'}), dict(name='a', attrs={'href':'/cgi-bin/survey/survey.cgi'}),
dict(name='a', attrs={'href':'/cgi-bin/survey/survey.cgi'}), dict(name='a', attrs={'href':'/cgi-bin/board/UltraBoard.pl'}),
dict(name='a', attrs={'href':'/cgi-bin/board/UltraBoard.pl'}), dict(name='iframe'),
dict(name='iframe'), dict(name='form'),
dict(name='form'), dict(name='span', attrs={'class':'hidePrint'}),
#dict(name='tr', attrs={'td':'Click here to learn'}), dict(id='headerLBox'),
dict(name='span', attrs={'class':'hidePrint'}), dict(id='nointelliTXT'),
dict(id='headerLBox'), dict(id='rechteSpalte'),
dict(id='nointelliTXT'), dict(id='newsticker-list-small'),
dict(id='rechteSpalte'), dict(id='ntop5'),
dict(id='newsticker-list-small'), dict(id='ntop5send'),
dict(id='ntop5'), dict(id='ntop5commented'),
dict(id='ntop5send'), dict(id='nnav-bgheader'),
dict(id='ntop5commented'), dict(id='nnav-headerteaser'),
dict(id='nnav-bgheader'), dict(id='nnav-head'),
dict(id='nnav-headerteaser'), dict(id='nnav-top'),
dict(id='nnav-head'), dict(id='nnav-logodiv'),
dict(id='nnav-top'), dict(id='nnav-logo'),
dict(id='nnav-logodiv'), dict(id='nnav-oly'),
dict(id='nnav-logo'), dict(id='readcomment')]
dict(id='nnav-oly'),
dict(id='readcomment')]
feeds = [ (u'Linuxdevices', u'http://www.linuxdevices.com/backend/headlines.rss') ]
feeds = [ (u'Linuxdevices', u'http://www.linuxdevices.com/backend/headlines.rss') ]

View File

@ -8,26 +8,19 @@ Fetch tomshardware.
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class TomsHardwareDe(BasicNewsRecipe): class cdnet(BasicNewsRecipe):
title = 'Tom\'s Hardware German' title = 'tomshardware'
description = 'Computer news in german' description = 'computer news in german'
__author__ = 'Oliver Niesner' __author__ = 'Oliver Niesner'
use_embedded_content = False use_embedded_content = False
timefmt = ' [%d %b %Y]' timefmt = ' [%d %b %Y]'
max_articles_per_feed = 50 max_articles_per_feed = 50
language = _('German')
no_stylesheets = True no_stylesheets = True
language = _('German')
encoding = 'utf-8' encoding = 'utf-8'
#preprocess_regexps = \
# [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
# [
# (r'<84>', lambda match: ''),
# (r'<93>', lambda match: ''),
# ]
# ]
remove_tags = [dict(id='outside-advert'), remove_tags = [dict(id='outside-advert'),
dict(id='advertRightWhite'), dict(id='advertRightWhite'),
dict(id='header-advert'), dict(id='header-advert'),
@ -36,9 +29,15 @@ class TomsHardwareDe(BasicNewsRecipe):
dict(id='header-top'), dict(id='header-top'),
dict(id='header-tools'), dict(id='header-tools'),
dict(id='nbComment'), dict(id='nbComment'),
dict(id='commentTools'),
dict(id='internalSidebar'), dict(id='internalSidebar'),
dict(id='header-news-infos'), dict(id='header-news-infos'),
dict(id='header-news-tools'),
dict(id='breadcrumbs'), dict(id='breadcrumbs'),
dict(id='emailTools'),
dict(id='bookmarkTools'),
dict(id='printTools'),
dict(id='header-nextNews'),
dict(id=''), dict(id=''),
dict(name='div', attrs={'class':'pyjama'}), dict(name='div', attrs={'class':'pyjama'}),
dict(name='href', attrs={'class':'comment'}), dict(name='href', attrs={'class':'comment'}),
@ -47,8 +46,10 @@ class TomsHardwareDe(BasicNewsRecipe):
dict(name='div', attrs={'class':'greyBox clearfix'}), dict(name='div', attrs={'class':'greyBox clearfix'}),
dict(id='')] dict(id='')]
#remove_tags_before = [dict(id='header-news-title')] #remove_tags_before = [dict(id='header-news-title')]
remove_tags_after = [dict(name='div', attrs={'class':'news-elm'})] remove_tags_after = [dict(name='div', attrs={'class':'btmGreyTables'})]
#remove_tags_after = [dict(name='div', attrs={'class':'intelliTXT'})] #remove_tags_after = [dict(name='div', attrs={'class':'intelliTXT'})]
feeds = [ ('tomshardware', 'http://www.tomshardware.com/de/feeds/rss2/tom-s-hardware-de,12-1.xml') ] feeds = [ ('tomshardware', 'http://www.tomshardware.com/de/feeds/rss2/tom-s-hardware-de,12-1.xml') ]