From 4be28fb1fa978bbfdaa68b03b7e752ddb4911eba Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 22 Sep 2009 09:13:15 -0600
Subject: [PATCH] New recipe for Smashing Magazine by Darko Miletic and
 improved recipe for Die Ziet

---
 resources/images/news/smashing.png            | Bin 0 -> 843 bytes
 src/calibre/web/feeds/recipes/__init__.py     |   2 +-
 .../web/feeds/recipes/recipe_smashing.py      |  51 +++++++++++++++++
 .../web/feeds/recipes/recipe_zeitde.py        |  52 +++++++++++++++---
 4 files changed, 96 insertions(+), 9 deletions(-)
 create mode 100644 resources/images/news/smashing.png
 create mode 100644 src/calibre/web/feeds/recipes/recipe_smashing.py

diff --git a/resources/images/news/smashing.png b/resources/images/news/smashing.png
new file mode 100644
index 0000000000000000000000000000000000000000..05d029512fe7d16bc7f06b1eec4b2f31c241e6e8
GIT binary patch
literal 843
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b
zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87??^tT^vI!PM1zT=$%?Ba@>CZ{k`S)KHIv_
zPwZ`L5}dqZVsFrygo6zxE@xa$XLL-OnGkhNDeLLF*wtnS&m6iM@V{ZgYzOupC8adA
zCJ$ffvvIFJzpVa#@BZHB<xvs~<E#q8+TXXo|1Q(_vAVu<rR}+wmv;1CGY_2Zw39jd
z_0f_t#tAiFS#MsCS327>VMUC<?_Xj!AKBcx;TX%L=XzC`hgZ07Z-dPbxhn}ZT^}=b
zo}KaRh@PYVKao?o^?}T+BN=y`_@xq@zicRE$&(d&Br}KU#yz_&r43$e5+{O9`~p4}
z?C4J8yTlTD@Rz+gm*Am`oL=Gg4~1EM{ObG1Pd3+FF;l)Xw(A++5#bt}TlUhvdw7n`
zDY<CwVNoY|`!ma}JBRbv9{#8IiuDsu$b@AAHD4mkyxm(r{MpQ^{>*1)!VAeoE5kYG
z$vMV0{ePQYz-h;Svb?y-|Jbker~kIEIL&#kKk-n9(#eG1Y#n-gojzPKc(+{sWZhjh
z=|rh~8~saXChnb4P&4V2_4~l8mqsm$|HFUY;_Cf=-_`6Ir`?-Z^S<ixA5+R+t-}2|
z<-TrmLqdW;u$CypwrkE8{;QlkWRq5%6k)N?{{2N2M*Wt>=L7c^c7MuVFFws@+D7xw
zYGTDw`G!}umfiCFciQ=z{SMdccV&j|B7$)<jwJD<NZ!h8ezm`UK?2XRs8$W7eb(*w
zZ)&<(-fq2Wq9`M`(4>|%cl*V%*Duc9lRqlxI9Z}7yyT&+*z*-J^A~>mDZ9(=Z%@W2
zSD&rISDdFM?%us}PpxqEy8R2L+h2S5KKruLZ{EPrz|fDLIzMl9Ejj&NCH9xwr~Bdm
zXUo3-bFuBpdY_Z_8d?7FX0E3|iCMM8HKHUXu_V<hxhNG#F&G&bSn2|ikx__&rIn$9
zm4UIYfti(ofm{8WYbY9W^HVa@Dv>n+Rhw8?85=+}e2C|>0cv3IboFyt=akR{0664s
AumAu6

literal 0
HcmV?d00001

diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py
index ee5829948a..926580bba1 100644
--- a/src/calibre/web/feeds/recipes/__init__.py
+++ b/src/calibre/web/feeds/recipes/__init__.py
@@ -57,7 +57,7 @@ recipe_modules = ['recipe_' + r for r in (
            'monitor', 'republika', 'beta', 'beta_en', 'glasjavnosti',
            'esquire', 'livemint', 'thedgesingapore', 'darknet', 'rga',
            'intelligencer', 'theoldfoodie', 'hln_be', 'honvedelem',
-           'the_new_republic', 'philly', 'salon', 'tweakers',
+           'the_new_republic', 'philly', 'salon', 'tweakers', 'smashing',
           )]
 
 
diff --git a/src/calibre/web/feeds/recipes/recipe_smashing.py b/src/calibre/web/feeds/recipes/recipe_smashing.py
new file mode 100644
index 0000000000..cc4edd2c77
--- /dev/null
+++ b/src/calibre/web/feeds/recipes/recipe_smashing.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.smashingmagazine.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag
+
+class SmashingMagazine(BasicNewsRecipe):
+    title                 = 'Smashing Magazine'
+    __author__            = 'Darko Miletic'
+    description           = 'We smash you with the information that will make your life easier, really'
+    oldest_article        = 20
+    language              = 'en'
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    publisher             = 'Smashing Magazine'
+    category              = 'news, web, IT, css, javascript, html'
+    encoding              = 'utf-8'
+    
+    conversion_options = {  
+                             'comments'    : description
+                            ,'tags'        : category
+                            ,'publisher'   : publisher
+                         }
+                         
+    keep_only_tags = [dict(name='div', attrs={'id':'leftcolumn'})]
+    remove_tags_after = dict(name='ul',attrs={'class':'social'})
+    remove_tags = [
+                    dict(name=['link','object'])
+                   ,dict(name='h1',attrs={'class':'logo'})
+                   ,dict(name='div',attrs={'id':'booklogosec'})
+                   ,dict(attrs={'src':'http://media2.smashingmagazine.com/wp-content/uploads/images/the-smashing-book/smbook6.gif'})
+                  ]
+
+    feeds          = [(u'Articles', u'http://rss1.smashingmagazine.com/feed/')]
+
+    def preprocess_html(self, soup):
+        for iter in soup.findAll('div',attrs={'class':'leftframe'}):
+            it = iter.find('h1')
+            if it == None:
+               iter.extract()
+        for item in soup.findAll('img'):
+            oldParent = item.parent
+            if oldParent.name == 'a':
+               oldParent.name = 'div'            
+        return soup
diff --git a/src/calibre/web/feeds/recipes/recipe_zeitde.py b/src/calibre/web/feeds/recipes/recipe_zeitde.py
index 1c00b74146..648e3f9148 100644
--- a/src/calibre/web/feeds/recipes/recipe_zeitde.py
+++ b/src/calibre/web/feeds/recipes/recipe_zeitde.py
@@ -13,18 +13,17 @@ class ZeitDe(BasicNewsRecipe):
     title = 'Die Zeit Nachrichten'
     description = 'Die Zeit - Online Nachrichten'
     language = 'de'
+    lang = 'de_DE'
 
-    __author__ = 'Kovid Goyal and Martin Pitt'
+    __author__ = 'Martin Pitt and Suajta Raman'
     use_embedded_content   = False
-    timefmt = ' [%d %b %Y]'
     max_articles_per_feed = 40
+    remove_empty_feeds = True
     no_stylesheets = True
-    encoding = 'utf8'
+    encoding = 'utf-8'
 
-    remove_tags = [{'class': 'adwrap'}]
-    keep_only_tags = [{'name': 'div', 'class': 'content'}]
 
-    feeds =  [ ('Kurznachrichten', 'http://newsfeed.zeit.de/index'),
+    feeds =  [
                ('Politik', 'http://newsfeed.zeit.de/politik/index'),
                ('Wirtschaft', 'http://newsfeed.zeit.de/wirtschaft/index'),
                ('Meinung', 'http://newsfeed.zeit.de/meinung/index'),
@@ -33,6 +32,43 @@ class ZeitDe(BasicNewsRecipe):
                ('Wissen', 'http://newsfeed.zeit.de/wissen/index'),
              ]
 
-    def print_version(self,url):
-        return url.replace('http://www.zeit.de/', 'http://mobil.zeit.de/')
+    extra_css = '''
+                .supertitle{color:#990000; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
+                .excerpt{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:large;}
+                .title{font-family:Arial,Helvetica,sans-serif;font-size:large}
+                .caption{color:#666666; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
+                .copyright{color:#666666; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
+                .article{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small}
+                .headline iconportrait_inline{font-family:Arial,Helvetica,sans-serif;font-size:x-small}
+                '''
+    filter_regexps = [r'ad.de.doubleclick.net/']
+    keep_only_tags = [
+                        dict(name='div', attrs={'class':["article"]}) ,
+                         ]
+    remove_tags = [
+                    dict(name='link'), dict(name='iframe'),dict(name='style'),
+                    dict(name='div', attrs={'class':["pagination block","pagenav","inline link"] }),
+                     dict(name='div', attrs={'id':["place_5","place_4"]})
+                  ]
+
+    def get_article_url(self, article):
+
+          url = article.get('guid', None)
+
+          if 'video' in url or 'quiz' in url :
+
+              url = None
+
+          return url
+
+    def preprocess_html(self, soup):
+        soup.html['xml:lang'] = self.lang
+        soup.html['lang']     = self.lang
+        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
+        soup.head.insert(0,mtag)
+
+        return soup
+
+    #def print_version(self,url):
+    #    return url.replace('http://www.zeit.de/', 'http://images.zeit.de/text/').replace('?from=rss', '')