Fix #3416 (Receipt Spiegel Online - German => no articles)

2025-07-09 03:04:10 -04:00 · 2009-09-19 20:53:04 -06:00 · 2009-09-19 20:53:04 -06:00 · 4efa4d7bb1
commit 4efa4d7bb1
parent ebfc8ec40f
2 changed files with 17 additions and 36 deletions
--- a/src/calibre/web/feeds/recipes/recipe_der_standard.py
+++ b/src/calibre/web/feeds/recipes/recipe_der_standard.py
@ -24,7 +24,6 @@ class DerStandardRecipe(BasicNewsRecipe):
    oldest_article = 1
    max_articles_per_feed = 100
    extra_css = '''
                .artikelBody{font-family:Arial,Helvetica,sans-serif;}
                .artikelLeft{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
@ -59,14 +58,15 @@ class DerStandardRecipe(BasicNewsRecipe):
    filter_regexps = [r'/r[1-9]*']
    #def print_version(self, url):
    #    return url.replace('?id=', 'txt/?id=')
    def get_article_url(self, article):
        '''if the article links to a index page (ressort) or a picture gallery
           (ansichtssache), don't add it'''
        if ( article.link.count('ressort') > 0 or article.title.lower().count('ansichtssache') > 0 ):
            return None
        matchObj = re.search( re.compile(r'/r'+'[1-9]*',flags=0), article.link,flags=0)
        if matchObj:
            return None
        return article.link
--- a/src/calibre/web/feeds/recipes/recipe_spiegelde.py
+++ b/src/calibre/web/feeds/recipes/recipe_spiegelde.py
@ -7,7 +7,6 @@ spiegel.de
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag
 class Spiegel_ger(BasicNewsRecipe):
    title                 = 'Spiegel Online - German'
@ -17,49 +16,31 @@ class Spiegel_ger(BasicNewsRecipe):
    category              = 'SPIEGEL ONLINE, DER SPIEGEL, Nachrichten, News,Dienste, RSS, RSS, Feedreader, Newsfeed, iGoogle, Netvibes, Widget'
    oldest_article        = 7
    max_articles_per_feed = 100
-    language = 'de'
+    language              = 'de'
    lang                  = 'de-DE'
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
-    html2lrf_options = [
+    conversion_options = {
-                          '--comment', description
+                          'comment'          : description
-                        , '--category', category
+                        , 'tags'             : category
-                        , '--publisher', publisher
+                        , 'publisher'        : publisher
-                        ]
+                        , 'language'         : lang
                        }
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
-    keep_only_tags = [dict(name='div', attrs={'id':'spMainContent'})]
+    keep_only_tags = [dict(name='div', attrs={'id':'spArticleContent'})]
-    remove_tags = [dict(name=['object','link','base'])]
+    remove_tags = [dict(name=['object','link','base','iframe'])]
    remove_tags_after = dict(name='div', attrs={'id':'spArticleBody'})
    feeds          = [(u'Spiegel Online', u'http://www.spiegel.de/schlagzeilen/index.rss')]
    def print_version(self, url):
-        main, sep, rest = url.rpartition(',')
+        rmt = url.rpartition('#')[0]
        main, sep, rest = rmt.rpartition(',')
        rmain, rsep, rrest = main.rpartition(',')
-        return rmain + ',druck-' + rrest + ',' + rest
+        purl = rmain + ',druck-' + rrest + ',' + rest
-
+        return purl
    def preprocess_html(self, soup):
        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
        soup.head.insert(0,mlang)
        soup.head.insert(1,mcharset)
        for item in soup.findAll(style=True):
            del item['style']
        htmltag = soup.find('html')
        if not htmltag:
            thtml = Tag(soup,'html',[("lang",self.lang),("xml:lang",self.lang),("dir","ltr")])
            soup.insert(0,thtml)
            thead = soup.head
            tbody = soup.body
            thead.extract()
            tbody.extract()
            soup.html.insert(0,tbody)
            soup.html.insert(0,thead)
        return soup