Fix #3416 (Receipt Spiegel Online - German => no articles)

2025-09-14 16:18:05 -04:00 · 2009-09-19 20:53:04 -06:00 · 2009-09-19 20:53:04 -06:00 · 4efa4d7bb1
commit 4efa4d7bb1
parent ebfc8ec40f
2 changed files with 17 additions and 36 deletions
--- a/src/calibre/web/feeds/recipes/recipe_der_standard.py
+++ b/src/calibre/web/feeds/recipes/recipe_der_standard.py
@ -24,7 +24,6 @@ class DerStandardRecipe(BasicNewsRecipe):
    oldest_article = 1
    max_articles_per_feed = 100

-
    extra_css = '''
                .artikelBody{font-family:Arial,Helvetica,sans-serif;}
                .artikelLeft{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
@ -59,14 +58,15 @@ class DerStandardRecipe(BasicNewsRecipe):

    filter_regexps = [r'/r[1-9]*']

-    #def print_version(self, url):
-    #    return url.replace('?id=', 'txt/?id=')
-
    def get_article_url(self, article):
        '''if the article links to a index page (ressort) or a picture gallery
           (ansichtssache), don't add it'''
        if ( article.link.count('ressort') > 0 or article.title.lower().count('ansichtssache') > 0 ):
            return None
+        matchObj = re.search( re.compile(r'/r'+'[1-9]*',flags=0), article.link,flags=0)
+
+        if matchObj:
+            return None

        return article.link

--- a/src/calibre/web/feeds/recipes/recipe_spiegelde.py
+++ b/src/calibre/web/feeds/recipes/recipe_spiegelde.py
@ -7,7 +7,6 @@ spiegel.de
 '''

 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag

 class Spiegel_ger(BasicNewsRecipe):
    title                 = 'Spiegel Online - German'
@ -17,49 +16,31 @@ class Spiegel_ger(BasicNewsRecipe):
    category              = 'SPIEGEL ONLINE, DER SPIEGEL, Nachrichten, News,Dienste, RSS, RSS, Feedreader, Newsfeed, iGoogle, Netvibes, Widget'
    oldest_article        = 7
    max_articles_per_feed = 100
-    language = 'de'
-
+    language              = 'de'
    lang                  = 'de-DE'
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'

-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        ]
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : lang
+                        }

-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'

-    keep_only_tags = [dict(name='div', attrs={'id':'spMainContent'})]
+    keep_only_tags = [dict(name='div', attrs={'id':'spArticleContent'})]

-    remove_tags = [dict(name=['object','link','base'])]
+    remove_tags = [dict(name=['object','link','base','iframe'])]

    remove_tags_after = dict(name='div', attrs={'id':'spArticleBody'})

    feeds          = [(u'Spiegel Online', u'http://www.spiegel.de/schlagzeilen/index.rss')]

    def print_version(self, url):
-        main, sep, rest = url.rpartition(',')
+        rmt = url.rpartition('#')[0]
+        main, sep, rest = rmt.rpartition(',')
        rmain, rsep, rrest = main.rpartition(',')
-        return rmain + ',druck-' + rrest + ',' + rest
-
-    def preprocess_html(self, soup):
-        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
-        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
-        soup.head.insert(0,mlang)
-        soup.head.insert(1,mcharset)
-        for item in soup.findAll(style=True):
-            del item['style']
-        htmltag = soup.find('html')
-        if not htmltag:
-            thtml = Tag(soup,'html',[("lang",self.lang),("xml:lang",self.lang),("dir","ltr")])
-            soup.insert(0,thtml)
-            thead = soup.head
-            tbody = soup.body
-            thead.extract()
-            tbody.extract()
-            soup.html.insert(0,tbody)
-            soup.html.insert(0,thead)
-        return soup
+        purl = rmain + ',druck-' + rrest + ',' + rest
+        return purl