From 4be28fb1fa978bbfdaa68b03b7e752ddb4911eba Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 22 Sep 2009 09:13:15 -0600 Subject: [PATCH] New recipe for Smashing Magazine by Darko Miletic and improved recipe for Die Ziet --- resources/images/news/smashing.png | Bin 0 -> 843 bytes src/calibre/web/feeds/recipes/__init__.py | 2 +- .../web/feeds/recipes/recipe_smashing.py | 51 +++++++++++++++++ .../web/feeds/recipes/recipe_zeitde.py | 52 +++++++++++++++--- 4 files changed, 96 insertions(+), 9 deletions(-) create mode 100644 resources/images/news/smashing.png create mode 100644 src/calibre/web/feeds/recipes/recipe_smashing.py diff --git a/resources/images/news/smashing.png b/resources/images/news/smashing.png new file mode 100644 index 0000000000000000000000000000000000000000..05d029512fe7d16bc7f06b1eec4b2f31c241e6e8 GIT binary patch literal 843 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87??^tT^vI!PM1zT=$%?Ba@>CZ{k`S)KHIv_ zPwZ`L5}dqZVsFrygo6zxE@xa$XLL-OnGkhNDeLLF*wtnS&m6iM@V{ZgYzOupC8adA zCJ$ffvvIFJzpVa#@BZHBVMUC*1)!VAeoE5kYG z$vMV0{ePQYz-h;Svb?y-|Jbker~kIEIL&#kKk-n9(#eG1Y#n-gojzPKc(+{sWZhjh z=|rh~8~saXChnb4P&4V2_4~l8mqsm$|HFUY;_Cf=-_`6Ir`?-Z^S=L7c^c7MuVFFws@+D7xw zYGTDw`G!}umfiCFciQ=z{SMdccV&j|B7$)|%cl*V%*Duc9lRqlxI9Z}7yyT&+*z*-J^A~>mDZ9(=Z%@W2 zSD&rISDdFM?%us}PpxqEy8R2L+h2S5KKruLZ{EPrz|fDLIzMl9Ejj&NCH9xwr~Bdm zXUo3-bFuBpdY_Z_8d?7FX0E3|iCMM8HKHUXu_Vn+Rhw8?85=+}e2C|>0cv3IboFyt=akR{0664s AumAu6 literal 0 HcmV?d00001 diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index ee5829948a..926580bba1 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -57,7 +57,7 @@ recipe_modules = ['recipe_' + r for r in ( 'monitor', 'republika', 'beta', 'beta_en', 'glasjavnosti', 'esquire', 'livemint', 'thedgesingapore', 'darknet', 'rga', 'intelligencer', 'theoldfoodie', 'hln_be', 'honvedelem', - 'the_new_republic', 'philly', 'salon', 'tweakers', + 'the_new_republic', 'philly', 'salon', 'tweakers', 'smashing', )] diff --git a/src/calibre/web/feeds/recipes/recipe_smashing.py b/src/calibre/web/feeds/recipes/recipe_smashing.py new file mode 100644 index 0000000000..cc4edd2c77 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_smashing.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +www.smashingmagazine.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag + +class SmashingMagazine(BasicNewsRecipe): + title = 'Smashing Magazine' + __author__ = 'Darko Miletic' + description = 'We smash you with the information that will make your life easier, really' + oldest_article = 20 + language = 'en' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + publisher = 'Smashing Magazine' + category = 'news, web, IT, css, javascript, html' + encoding = 'utf-8' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'publisher' : publisher + } + + keep_only_tags = [dict(name='div', attrs={'id':'leftcolumn'})] + remove_tags_after = dict(name='ul',attrs={'class':'social'}) + remove_tags = [ + dict(name=['link','object']) + ,dict(name='h1',attrs={'class':'logo'}) + ,dict(name='div',attrs={'id':'booklogosec'}) + ,dict(attrs={'src':'http://media2.smashingmagazine.com/wp-content/uploads/images/the-smashing-book/smbook6.gif'}) + ] + + feeds = [(u'Articles', u'http://rss1.smashingmagazine.com/feed/')] + + def preprocess_html(self, soup): + for iter in soup.findAll('div',attrs={'class':'leftframe'}): + it = iter.find('h1') + if it == None: + iter.extract() + for item in soup.findAll('img'): + oldParent = item.parent + if oldParent.name == 'a': + oldParent.name = 'div' + return soup diff --git a/src/calibre/web/feeds/recipes/recipe_zeitde.py b/src/calibre/web/feeds/recipes/recipe_zeitde.py index 1c00b74146..648e3f9148 100644 --- a/src/calibre/web/feeds/recipes/recipe_zeitde.py +++ b/src/calibre/web/feeds/recipes/recipe_zeitde.py @@ -13,18 +13,17 @@ class ZeitDe(BasicNewsRecipe): title = 'Die Zeit Nachrichten' description = 'Die Zeit - Online Nachrichten' language = 'de' + lang = 'de_DE' - __author__ = 'Kovid Goyal and Martin Pitt' + __author__ = 'Martin Pitt and Suajta Raman' use_embedded_content = False - timefmt = ' [%d %b %Y]' max_articles_per_feed = 40 + remove_empty_feeds = True no_stylesheets = True - encoding = 'utf8' + encoding = 'utf-8' - remove_tags = [{'class': 'adwrap'}] - keep_only_tags = [{'name': 'div', 'class': 'content'}] - feeds = [ ('Kurznachrichten', 'http://newsfeed.zeit.de/index'), + feeds = [ ('Politik', 'http://newsfeed.zeit.de/politik/index'), ('Wirtschaft', 'http://newsfeed.zeit.de/wirtschaft/index'), ('Meinung', 'http://newsfeed.zeit.de/meinung/index'), @@ -33,6 +32,43 @@ class ZeitDe(BasicNewsRecipe): ('Wissen', 'http://newsfeed.zeit.de/wissen/index'), ] - def print_version(self,url): - return url.replace('http://www.zeit.de/', 'http://mobil.zeit.de/') + extra_css = ''' + .supertitle{color:#990000; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;} + .excerpt{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:large;} + .title{font-family:Arial,Helvetica,sans-serif;font-size:large} + .caption{color:#666666; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;} + .copyright{color:#666666; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;} + .article{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small} + .headline iconportrait_inline{font-family:Arial,Helvetica,sans-serif;font-size:x-small} + ''' + filter_regexps = [r'ad.de.doubleclick.net/'] + keep_only_tags = [ + dict(name='div', attrs={'class':["article"]}) , + ] + remove_tags = [ + dict(name='link'), dict(name='iframe'),dict(name='style'), + dict(name='div', attrs={'class':["pagination block","pagenav","inline link"] }), + dict(name='div', attrs={'id':["place_5","place_4"]}) + ] + + def get_article_url(self, article): + + url = article.get('guid', None) + + if 'video' in url or 'quiz' in url : + + url = None + + return url + + def preprocess_html(self, soup): + soup.html['xml:lang'] = self.lang + soup.html['lang'] = self.lang + mtag = '' + soup.head.insert(0,mtag) + + return soup + + #def print_version(self,url): + # return url.replace('http://www.zeit.de/', 'http://images.zeit.de/text/').replace('?from=rss', '')