diff --git a/.bzrignore b/.bzrignore index d2a2d592dd..4dc2b4b9d4 100644 --- a/.bzrignore +++ b/.bzrignore @@ -14,6 +14,7 @@ resources/scripts.pickle resources/ebook-convert-complete.pickle resources/builtin_recipes.xml resources/builtin_recipes.zip +resources/template-functions.json setup/installer/windows/calibre/build.log src/calibre/translations/.errors src/cssutils/.svn/ diff --git a/recipes/brigitte_de.recipe b/recipes/brigitte_de.recipe new file mode 100644 index 0000000000..860d5176ac --- /dev/null +++ b/recipes/brigitte_de.recipe @@ -0,0 +1,36 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe(BasicNewsRecipe): + + title = u'Brigitte.de' + __author__ = 'schuster' + oldest_article = 14 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + language = 'de' + remove_javascript = True + remove_empty_feeds = True + timeout = 10 + cover_url = 'http://www.medienmilch.de/typo3temp/pics/Brigitte-Logo_d5feb4a6e4.jpg' + masthead_url = 'http://www.medienmilch.de/typo3temp/pics/Brigitte-Logo_d5feb4a6e4.jpg' + + + remove_tags = [dict(attrs={'class':['linklist', 'head', 'indent right relatedContent', 'artikel-meta segment', 'segment', 'comment commentFormWrapper segment borderBG', 'segment borderBG comments', 'segment borderBG box', 'center', 'segment nextPageLink', 'inCar']}), + dict(id=['header', 'artTools', 'context', 'interact', 'footer-navigation', 'bwNet', 'copy', 'keyboardNavigationHint']), + dict(name=['hjtrs', 'kud'])] + + feeds = [(u'Mode', u'http://www.brigitte.de/mode/feed.rss'), + (u'Beauty', u'http://www.brigitte.de/beauty/feed.rss'), + (u'Luxus', u'http://www.brigitte.de/luxus/feed.rss'), + (u'Figur', u'http://www.brigitte.de/figur/feed.rss'), + (u'Gesundheit', u'http://www.brigitte.de/gesundheit/feed.rss'), + (u'Liebe&Sex', u'http://www.brigitte.de/liebe-sex/feed.rss'), + (u'Gesellschaft', u'http://www.brigitte.de/gesellschaft/feed.rss'), + (u'Kultur', u'http://www.brigitte.de/kultur/feed.rss'), + (u'Reise', u'http://www.brigitte.de/reise/feed.rss'), + (u'Kochen', u'http://www.brigitte.de/kochen/feed.rss'), + (u'Wohnen', u'http://www.brigitte.de/wohnen/feed.rss'), + (u'Job', u'http://www.brigitte.de/job/feed.rss'), + (u'Erfahrungen', u'http://www.brigitte.de/erfahrungen/feed.rss'), +] diff --git a/recipes/express_de.recipe b/recipes/express_de.recipe index 255538b08e..10595b9d92 100644 --- a/recipes/express_de.recipe +++ b/recipes/express_de.recipe @@ -1,5 +1,4 @@ from calibre.web.feeds.news import BasicNewsRecipe - class AdvancedUserRecipe1303841067(BasicNewsRecipe): title = u'Express.de' @@ -12,7 +11,6 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe): extra_css = ''' h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small;} h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;} - ''' remove_javascript = True remove_tags_befor = [dict(name='div', attrs={'class':'Datum'})] @@ -25,6 +23,7 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe): dict(id='Logo'), dict(id='MainLinkSpacer'), dict(id='MainLinks'), + dict(id='ContainerPfad'), #neu dict(title='Diese Seite Bookmarken'), dict(name='span'), @@ -44,7 +43,8 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe): dict(name='div', attrs={'class':'HeaderSearch'}), dict(name='div', attrs={'class':'sbutton'}), dict(name='div', attrs={'class':'active'}), - + dict(name='div', attrs={'class':'MoreNews'}), #neu + dict(name='div', attrs={'class':'ContentBoxSubline'}) #neu ] @@ -68,7 +68,5 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe): (u'Fortuna D~Dorf', u'http://www.express.de/sport/fussball/fortuna/-/3292/3292/-/view/asFeed/-/index.xml'), (u'Basketball News', u'http://www.express.de/sport/basketball/-/3190/3190/-/view/asFeed/-/index.xml'), (u'Big Brother', u'http://www.express.de/news/promi-show/big-brother/-/2402/2402/-/view/asFeed/-/index.xml'), + ] - - -] diff --git a/recipes/heise_online.recipe b/recipes/heise_online.recipe new file mode 100644 index 0000000000..f83ff8126b --- /dev/null +++ b/recipes/heise_online.recipe @@ -0,0 +1,52 @@ +from calibre.web.feeds.news import BasicNewsRecipe +class AdvancedUserRecipe(BasicNewsRecipe): + + title = 'Heise-online' + description = 'News vom Heise-Verlag' + __author__ = 'schuster' + use_embedded_content = False + language = 'de' + oldest_article = 2 + max_articles_per_feed = 35 + rescale_images = True + remove_empty_feeds = True + timeout = 5 + no_stylesheets = True + + + remove_tags_after = dict(name ='p', attrs={'class':'editor'}) + remove_tags = [dict(id='navi_top_container'), + dict(id='navi_bottom'), + dict(id='mitte_rechts'), + dict(id='navigation'), + dict(id='subnavi'), + dict(id='social_bookmarks'), + dict(id='permalink'), + dict(id='content_foren'), + dict(id='seiten_navi'), + dict(id='adbottom'), + dict(id='sitemap')] + + feeds = [ + ('Newsticker', 'http://www.heise.de/newsticker/heise.rdf'), + ('Auto', 'http://www.heise.de/autos/rss/news.rdf'), + ('Foto ', 'http://www.heise.de/foto/rss/news-atom.xml'), + ('Mac&i', 'http://www.heise.de/mac-and-i/news.rdf'), + ('Mobile ', 'http://www.heise.de/mobil/newsticker/heise-atom.xml'), + ('Netz ', 'http://www.heise.de/netze/rss/netze-atom.xml'), + ('Open ', 'http://www.heise.de/open/news/news-atom.xml'), + ('Resale ', 'http://www.heise.de/resale/rss/resale.rdf'), + ('Security ', 'http://www.heise.de/security/news/news-atom.xml'), + ('C`t', 'http://www.heise.de/ct/rss/artikel-atom.xml'), + ('iX', 'http://www.heise.de/ix/news/news.rdf'), + ('Mach-flott', 'http://www.heise.de/mach-flott/rss/mach-flott-atom.xml'), + ('Blog: Babel-Bulletin', 'http://www.heise.de/developer/rss/babel-bulletin/blog.rdf'), + ('Blog: Der Dotnet-Doktor', 'http://www.heise.de/developer/rss/dotnet-doktor/blog.rdf'), + ('Blog: Bernds Management-Welt', 'http://www.heise.de/developer/rss/bernds-management-welt/blog.rdf'), + ('Blog: IT conversation', 'http://www.heise.de/developer/rss/world-of-it/blog.rdf'), + ('Blog: Kais bewegtes Web', 'http://www.heise.de/developer/rss/kais-bewegtes-web/blog.rdf') +] + + def print_version(self, url): + return url + '?view=print' + diff --git a/recipes/max_planck.recipe b/recipes/max_planck.recipe index e9bf62008a..cf778a7374 100644 --- a/recipes/max_planck.recipe +++ b/recipes/max_planck.recipe @@ -3,9 +3,6 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe): title = u'Max-Planck-Inst.' __author__ = 'schuster' - remove_tags = [dict(attrs={'class':['clearfix', 'lens', 'col2_box_list', 'col2_box_teaser group_ext no_print', 'dotted_line', 'col2_box_teaser', 'box_image small', 'bold', 'col2_box_teaser no_print', 'print_kontakt']}), - dict(id=['ie_clearing', 'col2', 'col2_content']), - dict(name=['script', 'noscript', 'style'])] oldest_article = 30 max_articles_per_feed = 100 no_stylesheets = True @@ -13,6 +10,11 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe): language = 'de' remove_javascript = True + remove_tags = [dict(attrs={'class':['box_url', 'print_kontakt']}), + dict(id=['skiplinks'])] + + + def print_version(self, url): split_url = url.split("/") print_url = 'http://www.mpg.de/print/' + split_url[3] diff --git a/recipes/newsweek.recipe b/recipes/newsweek.recipe index a31706e257..0cae4275b0 100644 --- a/recipes/newsweek.recipe +++ b/recipes/newsweek.recipe @@ -69,7 +69,11 @@ class Newsweek(BasicNewsRecipe): for section, shref in self.newsweek_sections(): self.log('Processing section', section, shref) articles = [] - soups = [self.index_to_soup(shref)] + try: + soups = [self.index_to_soup(shref)] + except: + self.log.warn('Section %s not found, skipping'%section) + continue na = soups[0].find('a', rel='next') if na: soups.append(self.index_to_soup(self.BASE_URL+na['href'])) diff --git a/recipes/polizeipress_de.recipe b/recipes/polizeipress_de.recipe new file mode 100644 index 0000000000..15114881ea --- /dev/null +++ b/recipes/polizeipress_de.recipe @@ -0,0 +1,35 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe(BasicNewsRecipe): + + title = u'Polizeipresse - Deutschland' + __author__ = 'schuster' + description = 'Tagesaktuelle "Polizeiberichte" aus ganz Deutschland (bis auf Ortsebene).' 'Um deinen Ort/Stadt/Kreis usw. einzubinden, gehe auf "http://www.presseportal.de/polizeipresse/" und suche im oberen "Suchfeld" nach dem Namen.' 'Oberhalb der Suchergebnisse (Folgen:) auf den üblichen link zu den RSS-Feeds klicken und den RSS-link im Rezept unter "feeds" eintragen wie üblich.' 'Die Auswahl von Orten kann vereinfacht werden wenn man den Suchbegriff wie folgt eingibt:' '"Stadt-Ort".' + oldest_article = 21 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + language = 'de' + remove_javascript = True + masthead_url = 'http://www.alt-heliservice.de/images/34_BPOL_Logo_4C_g_schutzbereich.jpg' + cover_url = 'http://berlinstadtservice.de/buerger/Bundespolizei-Logo.png' + + remove_tags = [ + dict(name='div', attrs={'id':'logo'}), + dict(name='div', attrs={'id':'origin'}), + dict(name='pre', attrs={'class':'xml_contact'})] + + def print_version(self,url): + segments = url.split('/') + printURL = 'http://www.presseportal.de/print.htx?nr=' + '/'.join(segments[5:6]) + '&type=polizei' + return printURL + + feeds = [(u'Frimmerdorf', u'http://www.presseportal.de/rss/rss2_vts.htx?q=Grevenbroich-frimmersdorf&w=public_service'), + (u'Neurath', u'http://www.presseportal.de/rss/rss2_vts.htx?q=Grevenbroich-neurath&w=public_service'), + (u'Gustorf', u'http://www.presseportal.de/rss/rss2_vts.htx?q=Grevenbroich-gustorf&w=public_service'), + (u'Neuenhausen', u'http://www.presseportal.de/rss/rss2_vts.htx?q=Grevenbroich-neuenhausen&w=public_service'), + (u'Wevelinghoven', u'http://www.presseportal.de/rss/rss2_vts.htx?q=Grevenbroich-Wevelinghoven&w=public_service'), + (u'Grevenbroich ges.', u'http://www.presseportal.de/rss/rss2_vts.htx?q=grevenbroich&w=public_service'), + (u'Kreis Neuss ges.', u'http://www.presseportal.de/rss/rss2_vts.htx?q=Rhein-Kreis+Neuss&w=public_service'), + ] + diff --git a/resources/content_server/browse/browse.html b/resources/content_server/browse/browse.html index de78e432d7..6a9697dc06 100644 --- a/resources/content_server/browse/browse.html +++ b/resources/content_server/browse/browse.html @@ -20,8 +20,8 @@ - - + +