New recipe for Electrolese by Oliver Niesner. Also updated recipes for Heise, Sueddeutsche and ZDnet

2026-06-07 06:25:26 -04:00 · 2009-05-27 10:31:27 -07:00
parent e35acbf104
commit 857d114ab9
9 changed files with 70 additions and 32 deletions
@@ -352,7 +352,7 @@ License: other
 Liberation Fonts
 -----------------
 calibre includes a copy of the liberation fonts, available from
-https://fedorahosted.org/liberation-fonts
+https://calibre.kovidgoyal.net/downloads/liberation-fonts

 BSD License (for all the BSD licensed code indicated above)
 -----------------------------------------------------------
@@ -39,6 +39,7 @@ def get_linux_data(version='1.0.0'):
                        ('debian', 'Debian Sid'),
                        ('exherbo', 'Exherbo'),
                        ('foresight', 'Foresight 2.1'),
+                        ('gentoo', 'Gentoo'),
                        ('ubuntu', 'Ubuntu Jaunty Jackalope'),
                        ('linux_mint', 'Linux Mint Gloria'),
                        ]:
@@ -43,7 +43,7 @@ recipe_modules = ['recipe_' + r for r in (
           'seattle_times', 'scott_hanselman', 'coding_horror', 'twitchfilms',
           'stackoverflow', 'telepolis_artikel', 'zaobao', 'usnews',
           'straitstimes', 'index_hu', 'pcworld_hu', 'hrt', 'rts',
-           'h1', 'h2', 'h3', 'phd_comics', 'woz_die',
+           'h1', 'h2', 'h3', 'phd_comics', 'woz_die', 'elektrolese',
          )]

 import re, imp, inspect, time, os
@@ -0,0 +1,35 @@
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
+
+'''
+Fetch elektrolese.
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class elektrolese(BasicNewsRecipe):
+
+    title = u'elektrolese'
+    description = 'News about electronic publishing'
+    __author__ = 'Oliver Niesner'
+    use_embedded_content   = False
+    timefmt = ' [%a %d %b %Y]'
+    language = _('German')
+    oldest_article = 14
+    max_articles_per_feed = 50
+    no_stylesheets = True
+    #html2epub_options = 'linearize_tables = True\nbase_font_size2=14'
+    encoding = 'utf-8'
+
+
+    remove_tags_after = [dict(id='comments')]
+    filter_regexps = [r'ad\.doubleclick\.net']
+
+    remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}),
+                   dict(id='comments')]
+
+
+
+    feeds =  [ (u'electrolese', u'http://elektrolese.blogspot.com/feeds/posts/default?alt=rss') ]
+
@@ -8,7 +8,7 @@ Fetch heise.
 from calibre.web.feeds.news import BasicNewsRecipe


-class HeiseDe(BasicNewsRecipe):
+class heiseDe(BasicNewsRecipe):

    title = 'heise'
    description = 'Computernews from Germany'
@@ -20,23 +20,26 @@ class HeiseDe(BasicNewsRecipe):
    no_stylesheets = True

    remove_tags = [dict(id='navi_top'),
-           dict(id='navi_bottom'),
-           dict(id='logo'),
-           dict(id='login_suche'),
-           dict(id='navi_login'),
-           dict(id='navigation'),
-           dict(id='breadcrumb'),
-           dict(id=''),
-           dict(id='sitemap'),
-           dict(id='bannerzone'),
-           dict(name='span', attrs={'class':'rsaquo'}),
-           dict(name='div', attrs={'class':'news_logo'}),
-           dict(name='p', attrs={'class':'news_option'}),
-           dict(name='p', attrs={'class':'news_navi'}),
-           dict(name='p', attrs={'class':'news_foren'})]
+		   dict(id='navi_bottom'),
+		   dict(id='logo'),
+		   dict(id='login_suche'),
+		   dict(id='navi_login'),
+		   dict(id='navigation'),
+		   dict(id='breadcrumb'),
+		   dict(id=''),
+		   dict(id='sitemap'),
+		   dict(id='bannerzone'),
+		   dict(name='span', attrs={'class':'rsaquo'}),
+		   dict(name='div', attrs={'class':'news_logo'}),
+		   dict(name='div', attrs={'class':'bcadv ISI_IGNORE'}),
+		   dict(name='p', attrs={'class':'news_option'}),
+		   dict(name='p', attrs={'class':'news_navi'}),
+		   dict(name='p', attrs={'class':'news_foren'})]
    remove_tags_after = [dict(name='p', attrs={'class':'news_foren'})]

    feeds =  [ ('heise', 'http://www.heise.de/newsticker/heise.rdf') ]



+
+
@@ -13,11 +13,11 @@ class Sueddeutsche(BasicNewsRecipe):
    description = 'News from Germany'
    __author__ = 'Oliver Niesner'
    use_embedded_content   = False
-    language = _('German')
    timefmt = ' [%d %b %Y]'
    oldest_article = 7
    max_articles_per_feed = 50
    no_stylesheets = True
+    language = _('German')
    encoding = 'iso-8859-15'
    remove_javascript = True

@@ -89,3 +89,5 @@ class Sueddeutsche(BasicNewsRecipe):
    def print_version(self, url):
        return url.replace('/text/', '/text/print.html')

+
+
@@ -6,11 +6,10 @@ Fetch zdnet.
 '''

 from calibre.web.feeds.news import BasicNewsRecipe
-import re


 class cdnet(BasicNewsRecipe):
-    
+
    title = 'zdnet'
    description = 'zdnet security'
    __author__ = 'Oliver Niesner'
@@ -19,16 +18,10 @@ class cdnet(BasicNewsRecipe):
    timefmt = ' [%d %b %Y]'
    max_articles_per_feed = 40
    no_stylesheets = True
-    encoding = 'iso-8859-1'
+    encoding = 'latin1'
+
+

-    #preprocess_regexps = \
-#	[(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
-#		[
-#		(r'<84>', lambda match: ''),
-#		(r'<93>', lambda match: ''),
-#		]
-#	]
-    
    remove_tags = [dict(id='eyebrows'),
 		   dict(id='header'),
 		   dict(id='search'),
@@ -36,12 +29,16 @@ class cdnet(BasicNewsRecipe):
 		   dict(id=''),
 		   dict(name='div', attrs={'class':'banner'}),
 		   dict(name='p', attrs={'class':'tags'}),
+		   dict(name='a', attrs={'href':'http://www.twitter.com/ryanaraine'}),
 		   dict(name='div', attrs={'class':'special1'})]
    remove_tags_after = [dict(name='div', attrs={'class':'bloggerDesc clear'})]
-    
-    feeds =  [ ('zdnet', 'http://feeds.feedburner.com/zdnet/security') ] 
-    
+
+    feeds =  [ ('zdnet', 'http://feeds.feedburner.com/zdnet/security') ]


+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup