Updated recipes for Linux devices and Toms Hardware (German)

This commit is contained in:
Kovid Goyal 2009-04-15 13:00:01 -07:00
parent 9d8e8dd8b9
commit 332dbf4444
3 changed files with 97 additions and 98 deletions

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
''' '''
Fetch Hessisch Niedersächsische Allgemeine. Fetch Hessisch Niedersachsische Allgemeine.
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe

View File

@ -14,16 +14,16 @@ class Sueddeutsche(BasicNewsRecipe):
description = 'News about Linux driven Hardware' description = 'News about Linux driven Hardware'
__author__ = 'Oliver Niesner' __author__ = 'Oliver Niesner'
use_embedded_content = False use_embedded_content = False
timefmt = ' [%a, %d %b %Y]' timefmt = ' [%a %d %b %Y]'
language = _('English')
max_articles_per_feed = 50 max_articles_per_feed = 50
no_stylesheets = True no_stylesheets = True
html2epub_options = 'linearize_tables = True\nbase_font_size2=14'
encoding = 'latin1' encoding = 'latin1'
remove_tags_after = [dict(id='nointelliTXT')] remove_tags_after = [dict(id='nointelliTXT')]
filter_regexps = [r'ad\.doubleclick\.net'] filter_regexps = [r'ad\.doubleclick\.net']
remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}), remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}),
dict(name='div', attrs={'class':'bannerSky'}), dict(name='div', attrs={'class':'bannerSky'}),
dict(name='div', attrs={'class':'footerLinks'}), dict(name='div', attrs={'class':'footerLinks'}),
@ -38,7 +38,6 @@ class Sueddeutsche(BasicNewsRecipe):
dict(name='table', attrs={'class':'artikelBox'}), dict(name='table', attrs={'class':'artikelBox'}),
dict(name='table', attrs={'class':'kommentare'}), dict(name='table', attrs={'class':'kommentare'}),
dict(name='table', attrs={'class':'pageBoxBot'}), dict(name='table', attrs={'class':'pageBoxBot'}),
#dict(name='table', attrs={'with':'100%'}),
dict(name='td', attrs={'nowrap':'nowrap'}), dict(name='td', attrs={'nowrap':'nowrap'}),
dict(name='td', attrs={'valign':'middle'}), dict(name='td', attrs={'valign':'middle'}),
dict(name='td', attrs={'align':'left'}), dict(name='td', attrs={'align':'left'}),
@ -56,7 +55,6 @@ class Sueddeutsche(BasicNewsRecipe):
dict(name='a', attrs={'href':'/cgi-bin/board/UltraBoard.pl'}), dict(name='a', attrs={'href':'/cgi-bin/board/UltraBoard.pl'}),
dict(name='iframe'), dict(name='iframe'),
dict(name='form'), dict(name='form'),
#dict(name='tr', attrs={'td':'Click here to learn'}),
dict(name='span', attrs={'class':'hidePrint'}), dict(name='span', attrs={'class':'hidePrint'}),
dict(id='headerLBox'), dict(id='headerLBox'),
dict(id='nointelliTXT'), dict(id='nointelliTXT'),

View File

@ -8,25 +8,18 @@ Fetch tomshardware.
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class TomsHardwareDe(BasicNewsRecipe): class cdnet(BasicNewsRecipe):
title = 'Tom\'s Hardware German' title = 'tomshardware'
description = 'Computer news in german' description = 'computer news in german'
__author__ = 'Oliver Niesner' __author__ = 'Oliver Niesner'
use_embedded_content = False use_embedded_content = False
timefmt = ' [%d %b %Y]' timefmt = ' [%d %b %Y]'
max_articles_per_feed = 50 max_articles_per_feed = 50
language = _('German')
no_stylesheets = True no_stylesheets = True
language = _('German')
encoding = 'utf-8' encoding = 'utf-8'
#preprocess_regexps = \
# [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
# [
# (r'<84>', lambda match: ''),
# (r'<93>', lambda match: ''),
# ]
# ]
remove_tags = [dict(id='outside-advert'), remove_tags = [dict(id='outside-advert'),
dict(id='advertRightWhite'), dict(id='advertRightWhite'),
@ -36,9 +29,15 @@ class TomsHardwareDe(BasicNewsRecipe):
dict(id='header-top'), dict(id='header-top'),
dict(id='header-tools'), dict(id='header-tools'),
dict(id='nbComment'), dict(id='nbComment'),
dict(id='commentTools'),
dict(id='internalSidebar'), dict(id='internalSidebar'),
dict(id='header-news-infos'), dict(id='header-news-infos'),
dict(id='header-news-tools'),
dict(id='breadcrumbs'), dict(id='breadcrumbs'),
dict(id='emailTools'),
dict(id='bookmarkTools'),
dict(id='printTools'),
dict(id='header-nextNews'),
dict(id=''), dict(id=''),
dict(name='div', attrs={'class':'pyjama'}), dict(name='div', attrs={'class':'pyjama'}),
dict(name='href', attrs={'class':'comment'}), dict(name='href', attrs={'class':'comment'}),
@ -47,8 +46,10 @@ class TomsHardwareDe(BasicNewsRecipe):
dict(name='div', attrs={'class':'greyBox clearfix'}), dict(name='div', attrs={'class':'greyBox clearfix'}),
dict(id='')] dict(id='')]
#remove_tags_before = [dict(id='header-news-title')] #remove_tags_before = [dict(id='header-news-title')]
remove_tags_after = [dict(name='div', attrs={'class':'news-elm'})] remove_tags_after = [dict(name='div', attrs={'class':'btmGreyTables'})]
#remove_tags_after = [dict(name='div', attrs={'class':'intelliTXT'})] #remove_tags_after = [dict(name='div', attrs={'class':'intelliTXT'})]
feeds = [ ('tomshardware', 'http://www.tomshardware.com/de/feeds/rss2/tom-s-hardware-de,12-1.xml') ] feeds = [ ('tomshardware', 'http://www.tomshardware.com/de/feeds/rss2/tom-s-hardware-de,12-1.xml') ]