Updated recipes for Linux devices and Toms Hardware (German)

This commit is contained in:
Kovid Goyal 2009-04-15 13:00:01 -07:00
parent 9d8e8dd8b9
commit 332dbf4444
3 changed files with 97 additions and 98 deletions

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
Fetch Hessisch Niedersächsische Allgemeine.
Fetch Hessisch Niedersachsische Allgemeine.
'''
from calibre.web.feeds.news import BasicNewsRecipe

View File

@ -1,80 +1,78 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
Fetch Linuxdevices.
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Sueddeutsche(BasicNewsRecipe):
title = u'Linuxdevices'
description = 'News about Linux driven Hardware'
__author__ = 'Oliver Niesner'
use_embedded_content = False
timefmt = ' [%a, %d %b %Y]'
language = _('English')
max_articles_per_feed = 50
no_stylesheets = True
encoding = 'latin1'
remove_tags_after = [dict(id='nointelliTXT')]
filter_regexps = [r'ad\.doubleclick\.net']
remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}),
dict(name='div', attrs={'class':'bannerSky'}),
dict(name='div', attrs={'class':'footerLinks'}),
dict(name='div', attrs={'class':'seitenanfang'}),
dict(name='td', attrs={'class':'mar5'}),
dict(name='td', attrs={'class':'mar5'}),
dict(name='table', attrs={'class':'pageAktiv'}),
dict(name='table', attrs={'class':'xartable'}),
dict(name='table', attrs={'class':'wpnavi'}),
dict(name='table', attrs={'class':'bgcontent absatz'}),
dict(name='table', attrs={'class':'footer'}),
dict(name='table', attrs={'class':'artikelBox'}),
dict(name='table', attrs={'class':'kommentare'}),
dict(name='table', attrs={'class':'pageBoxBot'}),
#dict(name='table', attrs={'with':'100%'}),
dict(name='td', attrs={'nowrap':'nowrap'}),
dict(name='td', attrs={'valign':'middle'}),
dict(name='td', attrs={'align':'left'}),
dict(name='td', attrs={'align':'center'}),
dict(name='td', attrs={'height':'5'}),
dict(name='div', attrs={'class':'artikelBox navigatorBox'}),
dict(name='div', attrs={'class':'similar-article-box'}),
dict(name='div', attrs={'class':'videoBigHack'}),
dict(name='td', attrs={'class':'artikelDruckenRight'}),
dict(name='td', attrs={'class':'width="200"'}),
dict(name='a', attrs={'href':'/news'}),
dict(name='a', attrs={'href':'/'}),
dict(name='a', attrs={'href':'/articles'}),
dict(name='a', attrs={'href':'/cgi-bin/survey/survey.cgi'}),
dict(name='a', attrs={'href':'/cgi-bin/board/UltraBoard.pl'}),
dict(name='iframe'),
dict(name='form'),
#dict(name='tr', attrs={'td':'Click here to learn'}),
dict(name='span', attrs={'class':'hidePrint'}),
dict(id='headerLBox'),
dict(id='nointelliTXT'),
dict(id='rechteSpalte'),
dict(id='newsticker-list-small'),
dict(id='ntop5'),
dict(id='ntop5send'),
dict(id='ntop5commented'),
dict(id='nnav-bgheader'),
dict(id='nnav-headerteaser'),
dict(id='nnav-head'),
dict(id='nnav-top'),
dict(id='nnav-logodiv'),
dict(id='nnav-logo'),
dict(id='nnav-oly'),
dict(id='readcomment')]
feeds = [ (u'Linuxdevices', u'http://www.linuxdevices.com/backend/headlines.rss') ]
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
Fetch Linuxdevices.
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Sueddeutsche(BasicNewsRecipe):
title = u'Linuxdevices'
description = 'News about Linux driven Hardware'
__author__ = 'Oliver Niesner'
use_embedded_content = False
timefmt = ' [%a %d %b %Y]'
max_articles_per_feed = 50
no_stylesheets = True
html2epub_options = 'linearize_tables = True\nbase_font_size2=14'
encoding = 'latin1'
remove_tags_after = [dict(id='nointelliTXT')]
filter_regexps = [r'ad\.doubleclick\.net']
remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}),
dict(name='div', attrs={'class':'bannerSky'}),
dict(name='div', attrs={'class':'footerLinks'}),
dict(name='div', attrs={'class':'seitenanfang'}),
dict(name='td', attrs={'class':'mar5'}),
dict(name='td', attrs={'class':'mar5'}),
dict(name='table', attrs={'class':'pageAktiv'}),
dict(name='table', attrs={'class':'xartable'}),
dict(name='table', attrs={'class':'wpnavi'}),
dict(name='table', attrs={'class':'bgcontent absatz'}),
dict(name='table', attrs={'class':'footer'}),
dict(name='table', attrs={'class':'artikelBox'}),
dict(name='table', attrs={'class':'kommentare'}),
dict(name='table', attrs={'class':'pageBoxBot'}),
dict(name='td', attrs={'nowrap':'nowrap'}),
dict(name='td', attrs={'valign':'middle'}),
dict(name='td', attrs={'align':'left'}),
dict(name='td', attrs={'align':'center'}),
dict(name='td', attrs={'height':'5'}),
dict(name='div', attrs={'class':'artikelBox navigatorBox'}),
dict(name='div', attrs={'class':'similar-article-box'}),
dict(name='div', attrs={'class':'videoBigHack'}),
dict(name='td', attrs={'class':'artikelDruckenRight'}),
dict(name='td', attrs={'class':'width="200"'}),
dict(name='a', attrs={'href':'/news'}),
dict(name='a', attrs={'href':'/'}),
dict(name='a', attrs={'href':'/articles'}),
dict(name='a', attrs={'href':'/cgi-bin/survey/survey.cgi'}),
dict(name='a', attrs={'href':'/cgi-bin/board/UltraBoard.pl'}),
dict(name='iframe'),
dict(name='form'),
dict(name='span', attrs={'class':'hidePrint'}),
dict(id='headerLBox'),
dict(id='nointelliTXT'),
dict(id='rechteSpalte'),
dict(id='newsticker-list-small'),
dict(id='ntop5'),
dict(id='ntop5send'),
dict(id='ntop5commented'),
dict(id='nnav-bgheader'),
dict(id='nnav-headerteaser'),
dict(id='nnav-head'),
dict(id='nnav-top'),
dict(id='nnav-logodiv'),
dict(id='nnav-logo'),
dict(id='nnav-oly'),
dict(id='readcomment')]
feeds = [ (u'Linuxdevices', u'http://www.linuxdevices.com/backend/headlines.rss') ]

View File

@ -8,26 +8,19 @@ Fetch tomshardware.
from calibre.web.feeds.news import BasicNewsRecipe
class TomsHardwareDe(BasicNewsRecipe):
title = 'Tom\'s Hardware German'
description = 'Computer news in german'
class cdnet(BasicNewsRecipe):
title = 'tomshardware'
description = 'computer news in german'
__author__ = 'Oliver Niesner'
use_embedded_content = False
timefmt = ' [%d %b %Y]'
max_articles_per_feed = 50
language = _('German')
no_stylesheets = True
language = _('German')
encoding = 'utf-8'
#preprocess_regexps = \
# [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
# [
# (r'<84>', lambda match: ''),
# (r'<93>', lambda match: ''),
# ]
# ]
remove_tags = [dict(id='outside-advert'),
dict(id='advertRightWhite'),
dict(id='header-advert'),
@ -36,9 +29,15 @@ class TomsHardwareDe(BasicNewsRecipe):
dict(id='header-top'),
dict(id='header-tools'),
dict(id='nbComment'),
dict(id='commentTools'),
dict(id='internalSidebar'),
dict(id='header-news-infos'),
dict(id='header-news-tools'),
dict(id='breadcrumbs'),
dict(id='emailTools'),
dict(id='bookmarkTools'),
dict(id='printTools'),
dict(id='header-nextNews'),
dict(id=''),
dict(name='div', attrs={'class':'pyjama'}),
dict(name='href', attrs={'class':'comment'}),
@ -47,8 +46,10 @@ class TomsHardwareDe(BasicNewsRecipe):
dict(name='div', attrs={'class':'greyBox clearfix'}),
dict(id='')]
#remove_tags_before = [dict(id='header-news-title')]
remove_tags_after = [dict(name='div', attrs={'class':'news-elm'})]
remove_tags_after = [dict(name='div', attrs={'class':'btmGreyTables'})]
#remove_tags_after = [dict(name='div', attrs={'class':'intelliTXT'})]
feeds = [ ('tomshardware', 'http://www.tomshardware.com/de/feeds/rss2/tom-s-hardware-de,12-1.xml') ]
feeds = [ ('tomshardware', 'http://www.tomshardware.com/de/feeds/rss2/tom-s-hardware-de,12-1.xml') ]