mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
81 lines
4.2 KiB
Plaintext
81 lines
4.2 KiB
Plaintext
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
|
|
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
|
|
|
title = u'Express.de'
|
|
__author__ = 'schuster'
|
|
oldest_article = 2
|
|
max_articles_per_feed = 50
|
|
no_stylesheets = True
|
|
use_embedded_content = False
|
|
language = 'de'
|
|
extra_css = '''
|
|
h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small;}
|
|
h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;}
|
|
'''
|
|
remove_javascript = True
|
|
remove_tags_befor = [dict(name='div', attrs={'class': 'Datum'})]
|
|
remove_tags_after = [dict(name='div', attrs={'class': 'MoreNews'})]
|
|
|
|
remove_tags = [dict(id='kalaydo'),
|
|
dict(id='Header'),
|
|
dict(id='Searchline'),
|
|
dict(id='MainNav'),
|
|
dict(id='Logo'),
|
|
dict(id='MainLinkSpacer'),
|
|
dict(id='MainLinks'),
|
|
dict(id='ContainerPfad'), # neu
|
|
dict(title='Diese Seite Bookmarken'),
|
|
|
|
dict(name='span'),
|
|
dict(name='div', attrs={'class': 'spacer_leftneu'}),
|
|
dict(name='div', attrs={'class': 'button kalaydologo'}),
|
|
dict(name='div', attrs={'class': 'button stellenneu'}),
|
|
dict(name='div', attrs={'class': 'button autoneu'}),
|
|
dict(name='div', attrs={'class': 'button immobilienneu'}),
|
|
dict(name='div', attrs={'class': 'button kleinanzeigen'}),
|
|
dict(name='div', attrs={'class': 'button tiereneu'}),
|
|
dict(name='div', attrs={'class': 'button ferienwohnungen'}),
|
|
dict(name='div', attrs={'class': 'button inserierenneu'}),
|
|
dict(name='div', attrs={'class': 'spacer_rightneu'}),
|
|
dict(name='div', attrs={'class': 'spacer_rightcorner'}),
|
|
dict(name='div', attrs={'class': 'HeaderMetaNav'}),
|
|
dict(name='div', attrs={'class': 'HeaderSearchOption'}),
|
|
dict(name='div', attrs={'class': 'HeaderSearch'}),
|
|
dict(name='div', attrs={'class': 'sbutton'}),
|
|
dict(name='div', attrs={'class': 'active'}),
|
|
dict(name='div', attrs={'class': 'MoreNews'}), # neu
|
|
dict(name='div', attrs={
|
|
'class': 'ContentBoxSubline'}) # neu
|
|
]
|
|
|
|
def preprocess_html(self, soup):
|
|
for alink in soup.findAll('a'):
|
|
if alink.string is not None:
|
|
tstr = alink.string
|
|
alink.replaceWith(tstr)
|
|
return soup
|
|
|
|
feeds = [(u'Top-Themen', u'http://www.express.de/home/-/2126/2126/-/view/asFeed/-/index.xml'),
|
|
(u'Regional - Köln',
|
|
u'http://www.express.de/regional/koeln/-/2856/2856/-/view/asFeed/-/index.xml'),
|
|
(u'Regional - Bonn',
|
|
u'http://www.express.de/regional/bonn/-/2860/2860/-/view/asFeed/-/index.xml'),
|
|
(u'Regional - Düsseldorf',
|
|
u'http://www.express.de/regional/duesseldorf/-/2858/2858/-/view/asFeed/-/index.xml'),
|
|
(u'Regional - Region',
|
|
u'http://www.express.de/regional/-/2178/2178/-/view/asFeed/-/index.xml'),
|
|
(u'Sport-News', u'http://www.express.de/sport/-/2176/2176/-/view/asFeed/-/index.xml'),
|
|
(u'Fussball-News', u'http://www.express.de/sport/fussball/-/3186/3186/-/view/asFeed/-/index.xml'),
|
|
(u'1.FC Köln News', u'http://www.express.de/sport/fussball/fc-koeln/-/3192/3192/-/view/asFeed/-/index.xml'),
|
|
(u'Alemannia Aachen News',
|
|
u'http://www.express.de/sport/fussball/alemannia/-/3290/3290/-/view/asFeed/-/index.xml'),
|
|
(u'Borussia M~Gladbach',
|
|
u'http://www.express.de/sport/fussball/gladbach/-/3286/3286/-/view/asFeed/-/index.xml'),
|
|
(u'Fortuna D~Dorf', u'http://www.express.de/sport/fussball/fortuna/-/3292/3292/-/view/asFeed/-/index.xml'),
|
|
(u'Basketball News',
|
|
u'http://www.express.de/sport/basketball/-/3190/3190/-/view/asFeed/-/index.xml'),
|
|
(u'Big Brother', u'http://www.express.de/news/promi-show/big-brother/-/2402/2402/-/view/asFeed/-/index.xml'),
|
|
]
|