Heise Online by schuster and improved express.de and max planck

This commit is contained in:
Kovid Goyal 2011-06-05 08:09:52 -06:00
parent 022f28086c
commit 4018fb48a5
3 changed files with 61 additions and 9 deletions

View File

@ -1,5 +1,4 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Express.de'
@ -12,7 +11,6 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe):
extra_css = '''
h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small;}
h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;}
'''
remove_javascript = True
remove_tags_befor = [dict(name='div', attrs={'class':'Datum'})]
@ -25,6 +23,7 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe):
dict(id='Logo'),
dict(id='MainLinkSpacer'),
dict(id='MainLinks'),
dict(id='ContainerPfad'), #neu
dict(title='Diese Seite Bookmarken'),
dict(name='span'),
@ -44,7 +43,8 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe):
dict(name='div', attrs={'class':'HeaderSearch'}),
dict(name='div', attrs={'class':'sbutton'}),
dict(name='div', attrs={'class':'active'}),
dict(name='div', attrs={'class':'MoreNews'}), #neu
dict(name='div', attrs={'class':'ContentBoxSubline'}) #neu
]
@ -68,7 +68,5 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe):
(u'Fortuna D~Dorf', u'http://www.express.de/sport/fussball/fortuna/-/3292/3292/-/view/asFeed/-/index.xml'),
(u'Basketball News', u'http://www.express.de/sport/basketball/-/3190/3190/-/view/asFeed/-/index.xml'),
(u'Big Brother', u'http://www.express.de/news/promi-show/big-brother/-/2402/2402/-/view/asFeed/-/index.xml'),
]
]

View File

@ -0,0 +1,52 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe(BasicNewsRecipe):
title = 'Heise-online'
description = 'News vom Heise-Verlag'
__author__ = 'schuster'
use_embedded_content = False
language = 'de'
oldest_article = 2
max_articles_per_feed = 35
rescale_images = True
remove_empty_feeds = True
timeout = 5
no_stylesheets = True
remove_tags_after = dict(name ='p', attrs={'class':'editor'})
remove_tags = [dict(id='navi_top_container'),
dict(id='navi_bottom'),
dict(id='mitte_rechts'),
dict(id='navigation'),
dict(id='subnavi'),
dict(id='social_bookmarks'),
dict(id='permalink'),
dict(id='content_foren'),
dict(id='seiten_navi'),
dict(id='adbottom'),
dict(id='sitemap')]
feeds = [
('Newsticker', 'http://www.heise.de/newsticker/heise.rdf'),
('Auto', 'http://www.heise.de/autos/rss/news.rdf'),
('Foto ', 'http://www.heise.de/foto/rss/news-atom.xml'),
('Mac&i', 'http://www.heise.de/mac-and-i/news.rdf'),
('Mobile ', 'http://www.heise.de/mobil/newsticker/heise-atom.xml'),
('Netz ', 'http://www.heise.de/netze/rss/netze-atom.xml'),
('Open ', 'http://www.heise.de/open/news/news-atom.xml'),
('Resale ', 'http://www.heise.de/resale/rss/resale.rdf'),
('Security ', 'http://www.heise.de/security/news/news-atom.xml'),
('C`t', 'http://www.heise.de/ct/rss/artikel-atom.xml'),
('iX', 'http://www.heise.de/ix/news/news.rdf'),
('Mach-flott', 'http://www.heise.de/mach-flott/rss/mach-flott-atom.xml'),
('Blog: Babel-Bulletin', 'http://www.heise.de/developer/rss/babel-bulletin/blog.rdf'),
('Blog: Der Dotnet-Doktor', 'http://www.heise.de/developer/rss/dotnet-doktor/blog.rdf'),
('Blog: Bernds Management-Welt', 'http://www.heise.de/developer/rss/bernds-management-welt/blog.rdf'),
('Blog: IT conversation', 'http://www.heise.de/developer/rss/world-of-it/blog.rdf'),
('Blog: Kais bewegtes Web', 'http://www.heise.de/developer/rss/kais-bewegtes-web/blog.rdf')
]
def print_version(self, url):
return url + '?view=print'

View File

@ -3,9 +3,6 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Max-Planck-Inst.'
__author__ = 'schuster'
remove_tags = [dict(attrs={'class':['clearfix', 'lens', 'col2_box_list', 'col2_box_teaser group_ext no_print', 'dotted_line', 'col2_box_teaser', 'box_image small', 'bold', 'col2_box_teaser no_print', 'print_kontakt']}),
dict(id=['ie_clearing', 'col2', 'col2_content']),
dict(name=['script', 'noscript', 'style'])]
oldest_article = 30
max_articles_per_feed = 100
no_stylesheets = True
@ -13,6 +10,11 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe):
language = 'de'
remove_javascript = True
remove_tags = [dict(attrs={'class':['box_url', 'print_kontakt']}),
dict(id=['skiplinks'])]
def print_version(self, url):
split_url = url.split("/")
print_url = 'http://www.mpg.de/print/' + split_url[3]