Updated recipes for Telepolis. The news and articles have now been split into two separate recipes, for better formatting.

2025-06-23 15:30:45 -04:00 · 2009-04-22 23:36:44 -07:00 · 2009-04-22 23:36:44 -07:00 · 2ee05f9414
commit 2ee05f9414
parent 7a029e3d0e
5 changed files with 94 additions and 36 deletions
--- a/src/calibre/gui2/images/news/telepolis.png
+++ b/src/calibre/gui2/images/news/telepolis.png
--- a/src/calibre/gui2/images/news/telepolis_artikel.png
+++ b/src/calibre/gui2/images/news/telepolis_artikel.png
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -41,7 +41,7 @@ recipe_modules = ['recipe_' + r for r in (
           'corriere_della_sera_it', 'corriere_della_sera_en', 'msdnmag_en',
           'moneynews', 'der_standard', 'diepresse', 'nzz_ger', 'hna',
           'seattle_times', 'scott_hanselman', 'coding_horror',
-           'stackoverflow'
+           'stackoverflow', 'telepolis_artikel',
          )]

 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/recipe_telepolis.py
+++ b/src/calibre/web/feeds/recipes/recipe_telepolis.py
@ -1,35 +1,50 @@
-#!/usr/bin/env  python
+# -*- coding: utf-8 -*-

 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
-'''
-www.heise.de/tp
-'''
+__copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'

+''' http://www.derstandard.at - Austrian Newspaper '''
+import re
 from calibre.web.feeds.news import BasicNewsRecipe

-class Telepolis(BasicNewsRecipe):
-    title                 = 'Telepolis'
-    __author__            = 'Darko Miletic'
-    description           = 'News from Germany in German'
-    oldest_article        = 2
-    max_articles_per_feed = 100
-    language = _('German')
-    no_stylesheets        = True
-    use_embedded_content  = False
-    encoding              = 'utf-8'
+class TelepolisNews(BasicNewsRecipe):
+	title          = u'Telepolis (News)'
+	__author__ = 'Gerhard Aigner'
+	publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
+	description = 'News from telepolis'
+	category = 'news'
+	oldest_article = 7
+	max_articles_per_feed = 100
+	recursion = 0
+	no_stylesheets = True
+	encoding = "utf-8"

-    html2lrf_options = [  '--comment'       , description
-                        , '--category'      , 'blog,news'
-                       ]
+	use_embedded_content = False
+	remove_empty_feeds = True

-    keep_only_tags = [
-                       dict(name='table', attrs={'class':'inhalt-table'})
-                      ,dict(name='table', attrs={'class':'blogtable'   })
-                     ]
-    remove_tags = [
-                     dict(name='table', attrs={'class':'img'    })
-                    ,dict(name='img'  , attrs={'src':'/tp/r4/icons/inline/extlink.gif'})
-                  ]
+	preprocess_regexps = [(re.compile(r'<a[^>]*>', re.DOTALL|re.IGNORECASE), lambda match: ''),
+		(re.compile(r'</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),]

-    feeds       = [(u'Telepolis Newsfeed', u'http://www.heise.de/tp/news.rdf')]
+	keep_only_tags = [dict(name = 'table',attrs={'class':'blogtable'})]
+	remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'})]
+
+	feeds          = [(u'News', u'http://www.heise.de/tp/news.rdf')]
+
+	html2lrf_options = [
+		'--comment'  , description
+		, '--category' , category
+		, '--publisher', publisher
+	]
+
+	html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+
+	def get_article_url(self, article):
+		'''if the linked article is of kind artikel don't take it'''
+		if (article.link.count('artikel') > 0) :
+			return None
+		return article.link
+
+	def preprocess_html(self, soup):
+		mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
+		soup.head.insert(0,mtag)
+		return soup
--- a/src/calibre/web/feeds/recipes/recipe_telepolis_artikel.py
+++ b/src/calibre/web/feeds/recipes/recipe_telepolis_artikel.py
@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
+
+''' http://www.derstandard.at - Austrian Newspaper '''
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class TelepolisArtikel(BasicNewsRecipe):
+	title          = u'Telepolis (Artikel)'
+	__author__ = 'Gerhard Aigner'
+	publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
+	category = 'news'
+	description = 'Telepolis Artikel'
+	oldest_article = 7
+	max_articles_per_feed = 100
+	recursion = 0
+	no_stylesheets = True
+
+	use_embedded_content = False
+	remove_empty_feeds = True
+
+	remove_tags_before = dict(name='h1')
+	remove_tags = [dict(name='img')]
+
+	feeds          = [(u'Artikel', u'http://www.heise.de/tp/rss/news-a.rdf')]
+
+	preprocess_regexps = [(re.compile(r'<a[^>]*>', re.DOTALL|re.IGNORECASE), lambda match: ''),
+		(re.compile(r'</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),]
+
+	html2lrf_options = [
+		'--comment'  , description
+		, '--category' , category
+		, '--publisher', publisher]
+
+	html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+
+	def print_version(self, url):
+		p = re.compile(r'\d{5}', re.DOTALL|re.IGNORECASE)
+		m = p.search(url)
+		return "http://www.heise.de/bin/tp/issue/r4/dl-artikel2.cgi?artikelnr="+ m.group() +"&mode=print"
+