This commit is contained in:
Kovid Goyal 2011-05-15 08:34:10 -06:00
parent e149160e9a
commit 902dc7aad6
3 changed files with 97 additions and 0 deletions

46
recipes/bild_de.recipe Normal file
View File

@ -0,0 +1,46 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Bild.de'
__author__ = 'schuster'
oldest_article = 1
max_articles_per_feed = 50
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
# get cover from myspace
cover_url = 'http://a3.l3-images.myspacecdn.com/images02/56/0232f842170b4d349779f8379c27e073/l.jpg'
# set what to fetch on the site
remove_tags_before = dict(name = 'h2', attrs={'id':'cover'})
remove_tags_after = dict(name ='div', attrs={'class':'back'})
# thanx to kiklop74 for code (see sticky thread -> Recipes - Re-usable code)
# this one removes a lot of direct-link's
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
# remove the ad's
filter_regexps = [r'.\.smartadserver\.com']
def skip_ad_pages(self, soup):
return None
#get the real url behind .feedsportal.com and fetch the artikels
def get_article_url(self, article):
return article.get('id', article.get('guid', None))
#list of the rss source from www.bild.de
feeds = [(u'Überblick', u'http://rss.bild.de/bild.xml'),
(u'News', u'http://rss.bild.de/bild-news.xml'),
(u'Politik', u'http://rss.bild.de/bild-politik.xml'),
(u'Unterhaltung', u'http://rss.bild.de/bild-unterhaltung.xml'),
(u'Sport', u'http://rss.bild.de/bild-sport.xml'),
(u'Lifestyle', u'http://rss.bild.de/bild-lifestyle.xml'),
(u'Ratgeber', u'http://rss.bild.de/bild-ratgeber.xml')
]

22
recipes/max_planck.recipe Normal file
View File

@ -0,0 +1,22 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Max-Planck-Inst.'
__author__ = 'schuster'
remove_tags = [dict(attrs={'class':['clearfix', 'lens', 'col2_box_list', 'col2_box_teaser group_ext no_print', 'dotted_line', 'col2_box_teaser', 'box_image small', 'bold', 'col2_box_teaser no_print', 'print_kontakt']}),
dict(id=['ie_clearing', 'col2', 'col2_content']),
dict(name=['script', 'noscript', 'style'])]
oldest_article = 30
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
def print_version(self, url):
split_url = url.split("/")
print_url = 'http://www.mpg.de/print/' + split_url[3]
return print_url
feeds = [(u'Forschung', u'http://www.mpg.de/de/forschung.rss')]

29
recipes/ngz.recipe Normal file
View File

@ -0,0 +1,29 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'NGZ-online'
__author__ = 'schuster'
remove_tags_before = dict(id='bu')
remove_tags_after = dict(id='noblock')
remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix', 'liketext']}),
dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index', 'Verlinken', 'vorheriger', 'LESERKOMMENTARE', 'bei facebook', 'bei twitter', 'Schreiben Sie jetzt Ihre Meinung:', 'Thema', 'Ihr Beitrag', 'Ihr Name', 'Ich möchte über weitere Lesermeinungen zu diesem Artikel per E-Mail informiert werden.', 'banneroben', 'bannerrechts', 'inserieren', 'stellen', 'auto', 'immobilien', 'kleinanzeige', 'tiere', 'ferienwohnung', 'NGZ Card', 'Mediengruppe RP', 'Werben', 'Newsletter', 'Wetter', 'RSS', 'Abo', 'Anzeigen', 'Redaktion', 'Schulprojekte', 'Gast', 'Mein NGZ', 'Nachrichten', 'Sport', 'Wirtschaft', 'Stadt-Infos', 'Bilderserien', 'Bookmarken', 'del.icio.us', 'Mister Wong', 'YiGG', 'Webnews', 'Shortnews', 'Twitter', 'Newsider', 'Facebook', 'StudiVZ/MeinVZ', 'Versenden', 'Drucken']),
dict(name=['script', 'noscript', 'style'])]
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
cover_url = 'http://www.rhein-kreis-neuss-macht-sport.de/sport/includes/bilder/ngz_logo.jpg'
def print_version(self, url):
return url + '?ot=de.circit.rpo.PopupPageLayout.ot'
feeds = [
(u'Grevenbroich', u'http://www.ngz-online.de/app/feed/rss/grevenbroich'),
(u'Kreis Neuss', u'http://www.ngz-online.de/app/feed/rss/rheinkreisneuss'),
(u'Dormagen', u'http://www.ngz-online.de/app/feed/rss/dormagen'),
(u'J\xfcchen', u'http://www.ngz-online.de/app/feed/rss/juechen'),
(u'Rommerskirchen', u'http://www.ngz-online.de/app/feed/rss/rommerskirchen')
]