MiDDay by calibre Periodicals

2025-11-27 00:35:00 -05:00 · 2010-03-30 19:37:38 +05:30 · 2010-03-30 19:37:38 +05:30 · 91a75aa0c9
commit 91a75aa0c9
parent 8767957773
10 changed files with 119 additions and 73 deletions
--- a/resources/recipes/heise.recipe
+++ b/resources/recipes/heise.recipe
@ -9,15 +9,16 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class heiseDe(BasicNewsRecipe):
-    
+
    title = 'heise'
    description = 'Computernews from Germany'
    __author__ = 'Oliver Niesner'
    use_embedded_content   = False
    language = 'de'
    timefmt = ' [%d %b %Y]'
    max_articles_per_feed = 40
    no_stylesheets = True
-    
+
    remove_tags = [dict(id='navi_top'),
 		   dict(id='navi_bottom'),
 		   dict(id='logo'),
@ -35,8 +36,8 @@ class heiseDe(BasicNewsRecipe):
 		   dict(name='p', attrs={'class':'news_navi'}),
 		   dict(name='div', attrs={'class':'news_foren'})]
    remove_tags_after = [dict(name='div', attrs={'class':'news_foren'})]
-    
+
-    feeds =  [ ('heise', 'http://www.heise.de/newsticker/heise.rdf') ] 
+    feeds =  [ ('heise', 'http://www.heise.de/newsticker/heise.rdf') ]
-    
+
--- a/resources/recipes/iliteratura_cz.recipe
+++ b/resources/recipes/iliteratura_cz.recipe
@ -4,7 +4,7 @@ import re
 class SmeRecipe(BasicNewsRecipe):
    __license__  = 'GPL v3'
    __author__ = 'Abelturd'
-    language = 'cz'
+    language = 'cs'
    version = 1
    title = u'iLiteratura.cz'
--- a/resources/recipes/johm.recipe
+++ b/resources/recipes/johm.recipe
@ -9,6 +9,7 @@ class JournalofHospitalMedicine(BasicNewsRecipe):
    description = 'Medical news'
    timefmt = ' [%d %b, %Y]'
    needs_subscription = True
    language = 'en'
    no_stylesheets = True
    #remove_tags_before = dict(name='div', attrs={'align':'center'})
--- a/resources/recipes/midday.recipe
+++ b/resources/recipes/midday.recipe
@ -0,0 +1,13 @@
 from calibre.web.feeds.news import CalibrePeriodical
 class MiDDay(CalibrePeriodical):
    title = 'MiDDay'
    calibre_periodicals_slug = 'midday'
    description = '''Get your dose of the latest news, views and fun - from the
        world of politics, sports and Bollywood to the cartoons, comics and games of
        the entertainment section - India’s leading tabloid has it all. To subscribe
        visit <a href="http://news.calibre-ebook.com/periodical/midday">calibre
        Periodicals</a>.'''
    language = 'en_IN'
--- a/resources/recipes/nursingtimes.recipe
+++ b/resources/recipes/nursingtimes.recipe
@ -18,7 +18,7 @@ class NursingTimes(BasicNewsRecipe):
    encoding               = 'utf-8'
    publisher              = 'emap'
    category               = 'news, health, nursing, UK'
-    language               = 'en-UK'
+    language               = 'en_GB'
    needs_subscription     = True
    LOGIN                  = 'http://www.nursingtimes.net/sign-in'
--- a/resources/recipes/tanea.recipe
+++ b/resources/recipes/tanea.recipe
@ -6,6 +6,7 @@ class TaNea(BasicNewsRecipe):
    oldest_article = 1
    max_articles_per_feed = 100
    no_stylesheets         = True
    language = 'el'
    remove_tags_before = dict(name='div',attrs={'id':'print-body'})
    remove_tags_after = dict(name='div',attrs={'id':'text'})
--- a/resources/recipes/telepolis.recipe
+++ b/resources/recipes/telepolis.recipe
@ -8,43 +8,44 @@ import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class TelepolisNews(BasicNewsRecipe):
-	title          = u'Telepolis (News)'
+    title          = u'Telepolis (News)'
-	__author__ = 'Gerhard Aigner'
+    __author__ = 'Gerhard Aigner'
-	publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
+    publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
-	description = 'News from telepolis'
+    description = 'News from telepolis'
-	category = 'news'
+    category = 'news'
-	oldest_article = 7
+    oldest_article = 7
-	max_articles_per_feed = 100
+    max_articles_per_feed = 100
-	recursion = 0
+    recursion = 0
-	no_stylesheets = True
+    no_stylesheets = True
-	encoding = "utf-8"
+    encoding = "utf-8"
    language = 'de_AT'
-	use_embedded_content = False
+    use_embedded_content = False
-	remove_empty_feeds = True
+    remove_empty_feeds = True
-	preprocess_regexps = [(re.compile(r'<a[^>]*>', re.DOTALL|re.IGNORECASE), lambda match: ''),
+    preprocess_regexps = [(re.compile(r'<a[^>]*>', re.DOTALL|re.IGNORECASE), lambda match: ''),
-		(re.compile(r'</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),]
+        (re.compile(r'</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),]
-	keep_only_tags = [dict(name = 'table',attrs={'class':'blogtable'})]
+    keep_only_tags = [dict(name = 'table',attrs={'class':'blogtable'})]
-	remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'})]
+    remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'})]
-	feeds          = [(u'News', u'http://www.heise.de/tp/news.rdf')]
+    feeds          = [(u'News', u'http://www.heise.de/tp/news.rdf')]
-	html2lrf_options = [
+    html2lrf_options = [
-		'--comment'  , description
+        '--comment'  , description
-		, '--category' , category
+        , '--category' , category
-		, '--publisher', publisher
+        , '--publisher', publisher
-	]
+    ]
-	html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
-	def get_article_url(self, article):
+    def get_article_url(self, article):
-		'''if the linked article is of kind artikel don't take it'''
+        '''if the linked article is of kind artikel don't take it'''
-		if (article.link.count('artikel') > 0) :
+        if (article.link.count('artikel') > 0) :
-			return None
+            return None
-		return article.link
+        return article.link
-	def preprocess_html(self, soup):
+    def preprocess_html(self, soup):
-		mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
+        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
-		soup.head.insert(0,mtag)
+        soup.head.insert(0,mtag)
-		return soup
+        return soup
--- a/resources/recipes/telepolis_artikel.recipe
+++ b/resources/recipes/telepolis_artikel.recipe
@ -8,36 +8,37 @@ import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class TelepolisArtikel(BasicNewsRecipe):
-	title          = u'Telepolis (Artikel)'
+    title          = u'Telepolis (Artikel)'
-	__author__ = 'Gerhard Aigner'
+    __author__ = 'Gerhard Aigner'
-	publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
+    publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
-	category = 'news'
+    category = 'news'
-	description = 'Telepolis Artikel'
+    description = 'Telepolis Artikel'
-	oldest_article = 7
+    language = 'de_AT'
-	max_articles_per_feed = 100
+    oldest_article = 7
-	recursion = 0
+    max_articles_per_feed = 100
-	no_stylesheets = True
+    recursion = 0
    no_stylesheets = True
-	use_embedded_content = False
+    use_embedded_content = False
-	remove_empty_feeds = True
+    remove_empty_feeds = True
-	remove_tags_before = dict(name='h1')
+    remove_tags_before = dict(name='h1')
-	remove_tags = [dict(name='img')]
+    remove_tags = [dict(name='img')]
-	feeds          = [(u'Artikel', u'http://www.heise.de/tp/rss/news-a.rdf')]
+    feeds          = [(u'Artikel', u'http://www.heise.de/tp/rss/news-a.rdf')]
-	preprocess_regexps = [(re.compile(r'<a[^>]*>', re.DOTALL|re.IGNORECASE), lambda match: ''),
+    preprocess_regexps = [(re.compile(r'<a[^>]*>', re.DOTALL|re.IGNORECASE), lambda match: ''),
-		(re.compile(r'</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),]
+        (re.compile(r'</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),]
-	html2lrf_options = [
+    html2lrf_options = [
-		'--comment'  , description
+        '--comment'  , description
-		, '--category' , category
+        , '--category' , category
-		, '--publisher', publisher]
+        , '--publisher', publisher]
-	html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
-	def print_version(self, url):
+    def print_version(self, url):
-		p = re.compile(r'\d{5}', re.DOTALL|re.IGNORECASE)
+        p = re.compile(r'\d{5}', re.DOTALL|re.IGNORECASE)
-		m = p.search(url)
+        m = p.search(url)
-		return "http://www.heise.de/bin/tp/issue/r4/dl-artikel2.cgi?artikelnr="+ m.group() +"&mode=print"
+        return "http://www.heise.de/bin/tp/issue/r4/dl-artikel2.cgi?artikelnr="+ m.group() +"&mode=print"
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -1359,25 +1359,51 @@ class AutomaticNewsRecipe(BasicNewsRecipe):
            self.web2disk_options.keep_only_tags = []
        return BasicNewsRecipe.fetch_embedded_article(self, article, dir, f, a, num_of_feeds)
-class DownloadedNewsRecipe(BasicNewsRecipe):
+class LoginFailed(ValueError):
    pass
-    def get_downloaded_recipe(self):
+class CalibrePeriodical(BasicNewsRecipe):
-        'Return path on local filesystem to downloaded recipe'
+
-        raise NotImplementedError
+    #: Set this to the slug for the calibre periodical
    calibre_periodicals_slug = None
    LOG_IN = 'http://news.calibre-ebook.com/accounts/login'
    needs_subscription = True
    __author__ = 'calibre Periodicals'
    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        br.open(self.LOG_IN)
        br.select_form(name='login')
        br['username'] = self.username
        br['password'] = self.password
        raw = br.submit().read()
        if 'href="/my-account"' not in raw:
            raise LoginFailed(
                    'Failed to log in, check your username and password for'
                    ' the calibre Periodicals service.')
        return br
    def download(self):
        import cStringIO
        self.log('Fetching downloaded recipe')
-        rpath = self.get_downloaded_recipe()
+        raw = self.browser.open_novisit(
            'http://news.calibre-ebook.com/subscribed_files/%s/0/temp.downloaded_recipe'
            % self.calibre_periodicals_slug
                ).read()
        f = cStringIO.StringIO(raw)
        from calibre.utils.zipfile import ZipFile
-        zf = ZipFile(rpath)
+        zf = ZipFile(f)
        zf.extractall()
        zf.close()
        from calibre.web.feeds.recipes import compile_recipe
        from glob import glob
        try:
-            recipe = compile_recipe(open(glob('*.downloaded_recipe')[0],
+            recipe = compile_recipe(open(glob('*.recipe')[0],
                'rb').read())
            self.conversion_options = recipe.conversion_options
        except:
            self.log.exception('Failed to compile downloaded recipe')
        return os.path.abspath('index.html')
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -5,14 +5,16 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 Builtin recipes.
 '''
 import re, imp, inspect, time, os
-from calibre.web.feeds.news import BasicNewsRecipe, CustomIndexRecipe, AutomaticNewsRecipe
+from calibre.web.feeds.news import BasicNewsRecipe, CustomIndexRecipe, \
    AutomaticNewsRecipe, CalibrePeriodical
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ptempfile import PersistentTemporaryDirectory
 from calibre import __appname__, english_sort
 BeautifulSoup, time, english_sort
-basic_recipes = (BasicNewsRecipe, AutomaticNewsRecipe, CustomIndexRecipe)
+basic_recipes = (BasicNewsRecipe, AutomaticNewsRecipe, CustomIndexRecipe,
        CalibrePeriodical)
 _tdir = None
 _crep = 0
 def compile_recipe(src):