New recipe for Der Standard by Gerhard Aigner

2026-01-07 20:50:20 -05:00 · 2009-04-13 09:09:07 -07:00 · 2009-04-13 09:09:07 -07:00 · 29c232c6ce
commit 29c232c6ce
parent b23b32a7ea
2 changed files with 43 additions and 1 deletions
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -39,7 +39,7 @@ recipe_modules = ['recipe_' + r for r in (
           'nacional_cro', '24sata', 'dnevni_avaz', 'glas_srpske', '24sata_rs',
           'krstarica', 'krstarica_en', 'tanjug', 'laprensa_ni', 'azstarnet',
           'corriere_della_sera_it', 'corriere_della_sera_en', 'msdnmag_en',
-           'moneynews',
+           'moneynews', 'der_standard',
          )]

 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/recipe_der_standard.py
+++ b/src/calibre/web/feeds/recipes/recipe_der_standard.py
@ -0,0 +1,42 @@
+
+''' http://www.derstandard.at - Austrian Newspaper '''
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class DerStandardRecipe(BasicNewsRecipe):
+    title          = u'derStandard'
+    __author__  = 'Gerhard Aigner'
+
+    oldest_article = 1
+    max_articles_per_feed = 100
+    feeds          = [(u'International', u'http://derstandard.at/?page=rss&ressort=internationalpolitik'),
+        (u'Inland', u'http://derstandard.at/?page=rss&ressort=innenpolitik'),
+        (u'Wirtschaft', u'http://derstandard.at/?page=rss&ressort=investor'),
+        (u'Web', u'http://derstandard.at/?page=rss&ressort=webstandard'),
+        (u'Sport', u'http://derstandard.at/?page=rss&ressort=sport'),
+        (u'Panorama', u'http://derstandard.at/?page=rss&ressort=panorama'),
+        (u'Etat', u'http://derstandard.at/?page=rss&ressort=etat'),
+        (u'Kultur', u'http://derstandard.at/?page=rss&ressort=kultur'),
+        (u'Wissenschaft', u'http://derstandard.at/?page=rss&ressort=wissenschaft'),
+        (u'Gesundheit', u'http://derstandard.at/?page=rss&ressort=gesundheit'),
+        (u'Bildung', u'http://derstandard.at/?page=rss&ressort=subildung')]
+
+    encoding = 'utf-8'
+    language = _('German')
+    recursions = 0
+    remove_tags = [dict(name='div'), dict(name='a'), dict(name='link'), dict(name='meta'),
+        dict(name='form',attrs={'name':'sitesearch'}), dict(name='hr')]
+    preprocess_regexps = [
+        (re.compile(r'\[[\d*]\]', re.DOTALL|re.IGNORECASE), lambda match: ''),
+        (re.compile(r'bgcolor="#\w{3,6}"', re.DOTALL|re.IGNORECASE), lambda match: '')
+    ]
+
+    def print_version(self, url):
+        return url.replace('?id=', 'txt/?id=')
+
+    def get_article_url(self, article):
+        '''if the article links to a index page (ressort) or a picture gallery
+           (ansichtssache), don't add it'''
+        if (article.link.count('ressort') > 0 or article.title.lower().count('ansichtssache') > 0):
+            return None
+        return article.link