mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
...
This commit is contained in:
parent
e149160e9a
commit
902dc7aad6
46
recipes/bild_de.recipe
Normal file
46
recipes/bild_de.recipe
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
||||||
|
title = u'Bild.de'
|
||||||
|
__author__ = 'schuster'
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 50
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'de'
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
# get cover from myspace
|
||||||
|
cover_url = 'http://a3.l3-images.myspacecdn.com/images02/56/0232f842170b4d349779f8379c27e073/l.jpg'
|
||||||
|
|
||||||
|
# set what to fetch on the site
|
||||||
|
remove_tags_before = dict(name = 'h2', attrs={'id':'cover'})
|
||||||
|
remove_tags_after = dict(name ='div', attrs={'class':'back'})
|
||||||
|
|
||||||
|
# thanx to kiklop74 for code (see sticky thread -> Recipes - Re-usable code)
|
||||||
|
# this one removes a lot of direct-link's
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for alink in soup.findAll('a'):
|
||||||
|
if alink.string is not None:
|
||||||
|
tstr = alink.string
|
||||||
|
alink.replaceWith(tstr)
|
||||||
|
return soup
|
||||||
|
|
||||||
|
# remove the ad's
|
||||||
|
filter_regexps = [r'.\.smartadserver\.com']
|
||||||
|
def skip_ad_pages(self, soup):
|
||||||
|
return None
|
||||||
|
|
||||||
|
#get the real url behind .feedsportal.com and fetch the artikels
|
||||||
|
def get_article_url(self, article):
|
||||||
|
return article.get('id', article.get('guid', None))
|
||||||
|
|
||||||
|
#list of the rss source from www.bild.de
|
||||||
|
feeds = [(u'Überblick', u'http://rss.bild.de/bild.xml'),
|
||||||
|
(u'News', u'http://rss.bild.de/bild-news.xml'),
|
||||||
|
(u'Politik', u'http://rss.bild.de/bild-politik.xml'),
|
||||||
|
(u'Unterhaltung', u'http://rss.bild.de/bild-unterhaltung.xml'),
|
||||||
|
(u'Sport', u'http://rss.bild.de/bild-sport.xml'),
|
||||||
|
(u'Lifestyle', u'http://rss.bild.de/bild-lifestyle.xml'),
|
||||||
|
(u'Ratgeber', u'http://rss.bild.de/bild-ratgeber.xml')
|
||||||
|
]
|
22
recipes/max_planck.recipe
Normal file
22
recipes/max_planck.recipe
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = u'Max-Planck-Inst.'
|
||||||
|
__author__ = 'schuster'
|
||||||
|
remove_tags = [dict(attrs={'class':['clearfix', 'lens', 'col2_box_list', 'col2_box_teaser group_ext no_print', 'dotted_line', 'col2_box_teaser', 'box_image small', 'bold', 'col2_box_teaser no_print', 'print_kontakt']}),
|
||||||
|
dict(id=['ie_clearing', 'col2', 'col2_content']),
|
||||||
|
dict(name=['script', 'noscript', 'style'])]
|
||||||
|
oldest_article = 30
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'de'
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
split_url = url.split("/")
|
||||||
|
print_url = 'http://www.mpg.de/print/' + split_url[3]
|
||||||
|
return print_url
|
||||||
|
|
||||||
|
feeds = [(u'Forschung', u'http://www.mpg.de/de/forschung.rss')]
|
||||||
|
|
29
recipes/ngz.recipe
Normal file
29
recipes/ngz.recipe
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = u'NGZ-online'
|
||||||
|
__author__ = 'schuster'
|
||||||
|
remove_tags_before = dict(id='bu')
|
||||||
|
remove_tags_after = dict(id='noblock')
|
||||||
|
remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix', 'liketext']}),
|
||||||
|
dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index', 'Verlinken', 'vorheriger', 'LESERKOMMENTARE', 'bei facebook', 'bei twitter', 'Schreiben Sie jetzt Ihre Meinung:', 'Thema', 'Ihr Beitrag', 'Ihr Name', 'Ich möchte über weitere Lesermeinungen zu diesem Artikel per E-Mail informiert werden.', 'banneroben', 'bannerrechts', 'inserieren', 'stellen', 'auto', 'immobilien', 'kleinanzeige', 'tiere', 'ferienwohnung', 'NGZ Card', 'Mediengruppe RP', 'Werben', 'Newsletter', 'Wetter', 'RSS', 'Abo', 'Anzeigen', 'Redaktion', 'Schulprojekte', 'Gast', 'Mein NGZ', 'Nachrichten', 'Sport', 'Wirtschaft', 'Stadt-Infos', 'Bilderserien', 'Bookmarken', 'del.icio.us', 'Mister Wong', 'YiGG', 'Webnews', 'Shortnews', 'Twitter', 'Newsider', 'Facebook', 'StudiVZ/MeinVZ', 'Versenden', 'Drucken']),
|
||||||
|
dict(name=['script', 'noscript', 'style'])]
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'de'
|
||||||
|
remove_javascript = True
|
||||||
|
cover_url = 'http://www.rhein-kreis-neuss-macht-sport.de/sport/includes/bilder/ngz_logo.jpg'
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '?ot=de.circit.rpo.PopupPageLayout.ot'
|
||||||
|
feeds = [
|
||||||
|
(u'Grevenbroich', u'http://www.ngz-online.de/app/feed/rss/grevenbroich'),
|
||||||
|
(u'Kreis Neuss', u'http://www.ngz-online.de/app/feed/rss/rheinkreisneuss'),
|
||||||
|
(u'Dormagen', u'http://www.ngz-online.de/app/feed/rss/dormagen'),
|
||||||
|
(u'J\xfcchen', u'http://www.ngz-online.de/app/feed/rss/juechen'),
|
||||||
|
(u'Rommerskirchen', u'http://www.ngz-online.de/app/feed/rss/rommerskirchen')
|
||||||
|
|
||||||
|
]
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user