New recipe for Smashing Magazine by Darko Miletic and improved recipe for Die Ziet

2025-12-12 16:15:03 -05:00 · 2009-09-22 09:13:15 -06:00 · 2009-09-22 09:13:15 -06:00 · 4be28fb1fa
commit 4be28fb1fa
parent d0865b25fb
4 changed files with 96 additions and 9 deletions
--- a/resources/images/news/smashing.png
+++ b/resources/images/news/smashing.png
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -57,7 +57,7 @@ recipe_modules = ['recipe_' + r for r in (
           'monitor', 'republika', 'beta', 'beta_en', 'glasjavnosti',
           'esquire', 'livemint', 'thedgesingapore', 'darknet', 'rga',
           'intelligencer', 'theoldfoodie', 'hln_be', 'honvedelem',
-           'the_new_republic', 'philly', 'salon', 'tweakers',
+           'the_new_republic', 'philly', 'salon', 'tweakers', 'smashing',
          )]
--- a/src/calibre/web/feeds/recipes/recipe_smashing.py
+++ b/src/calibre/web/feeds/recipes/recipe_smashing.py
@ -0,0 +1,51 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.smashingmagazine.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag
 class SmashingMagazine(BasicNewsRecipe):
    title                 = 'Smashing Magazine'
    __author__            = 'Darko Miletic'
    description           = 'We smash you with the information that will make your life easier, really'
    oldest_article        = 20
    language              = 'en'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    publisher             = 'Smashing Magazine'
    category              = 'news, web, IT, css, javascript, html'
    encoding              = 'utf-8'
    conversion_options = {  
                             'comments'    : description
                            ,'tags'        : category
                            ,'publisher'   : publisher
                         }
    keep_only_tags = [dict(name='div', attrs={'id':'leftcolumn'})]
    remove_tags_after = dict(name='ul',attrs={'class':'social'})
    remove_tags = [
                    dict(name=['link','object'])
                   ,dict(name='h1',attrs={'class':'logo'})
                   ,dict(name='div',attrs={'id':'booklogosec'})
                   ,dict(attrs={'src':'http://media2.smashingmagazine.com/wp-content/uploads/images/the-smashing-book/smbook6.gif'})
                  ]
    feeds          = [(u'Articles', u'http://rss1.smashingmagazine.com/feed/')]
    def preprocess_html(self, soup):
        for iter in soup.findAll('div',attrs={'class':'leftframe'}):
            it = iter.find('h1')
            if it == None:
               iter.extract()
        for item in soup.findAll('img'):
            oldParent = item.parent
            if oldParent.name == 'a':
               oldParent.name = 'div'            
        return soup
--- a/src/calibre/web/feeds/recipes/recipe_zeitde.py
+++ b/src/calibre/web/feeds/recipes/recipe_zeitde.py
@ -13,18 +13,17 @@ class ZeitDe(BasicNewsRecipe):
    title = 'Die Zeit Nachrichten'
    description = 'Die Zeit - Online Nachrichten'
    language = 'de'
    lang = 'de_DE'
-    __author__ = 'Kovid Goyal and Martin Pitt'
+    __author__ = 'Martin Pitt and Suajta Raman'
    use_embedded_content   = False
    timefmt = ' [%d %b %Y]'
    max_articles_per_feed = 40
    remove_empty_feeds = True
    no_stylesheets = True
-    encoding = 'utf8'
+    encoding = 'utf-8'
    remove_tags = [{'class': 'adwrap'}]
    keep_only_tags = [{'name': 'div', 'class': 'content'}]
-    feeds =  [ ('Kurznachrichten', 'http://newsfeed.zeit.de/index'),
+    feeds =  [
               ('Politik', 'http://newsfeed.zeit.de/politik/index'),
               ('Wirtschaft', 'http://newsfeed.zeit.de/wirtschaft/index'),
               ('Meinung', 'http://newsfeed.zeit.de/meinung/index'),
@ -33,6 +32,43 @@ class ZeitDe(BasicNewsRecipe):
               ('Wissen', 'http://newsfeed.zeit.de/wissen/index'),
             ]
-    def print_version(self,url):
+    extra_css = '''
-        return url.replace('http://www.zeit.de/', 'http://mobil.zeit.de/')
+                .supertitle{color:#990000; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
                .excerpt{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:large;}
                .title{font-family:Arial,Helvetica,sans-serif;font-size:large}
                .caption{color:#666666; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
                .copyright{color:#666666; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
                .article{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small}
                .headline iconportrait_inline{font-family:Arial,Helvetica,sans-serif;font-size:x-small}
                '''
    filter_regexps = [r'ad.de.doubleclick.net/']
    keep_only_tags = [
                        dict(name='div', attrs={'class':["article"]}) ,
                         ]
    remove_tags = [
                    dict(name='link'), dict(name='iframe'),dict(name='style'),
                    dict(name='div', attrs={'class':["pagination block","pagenav","inline link"] }),
                     dict(name='div', attrs={'id':["place_5","place_4"]})
                  ]
    def get_article_url(self, article):
          url = article.get('guid', None)
          if 'video' in url or 'quiz' in url :
              url = None
          return url
    def preprocess_html(self, soup):
        soup.html['xml:lang'] = self.lang
        soup.html['lang']     = self.lang
        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
        soup.head.insert(0,mtag)
        return soup
    #def print_version(self,url):
    #    return url.replace('http://www.zeit.de/', 'http://images.zeit.de/text/').replace('?from=rss', '')