Updated recipes for The Straits Times and MSDN Magazine

2025-07-09 03:04:10 -04:00 · 2009-10-07 17:27:38 -06:00 · 2009-10-07 17:27:38 -06:00 · 64f0769b08
commit 64f0769b08
parent 4267c8f85d
2 changed files with 93 additions and 119 deletions
--- a/src/calibre/web/feeds/recipes/recipe_msdnmag_en.py
+++ b/src/calibre/web/feeds/recipes/recipe_msdnmag_en.py
@ -1,62 +1,43 @@
-#!/usr/bin/env  python
-
-__license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
-'''
-msdn.microsoft.com/en-us/magazine
-'''
-from calibre.web.feeds.news import BasicNewsRecipe
-
-class MSDNMagazine_en(BasicNewsRecipe):
-    title                 = 'MSDN Magazine'
-    __author__            = 'Darko Miletic'
-    description           = 'The Microsoft Journal for Developers'
-    publisher             = 'Microsoft Press'
-    category              = 'news, IT, Microsoft, programming, windows'
-    oldest_article        = 31
-    max_articles_per_feed = 100
-    no_stylesheets        = True
-    use_embedded_content  = False
-    encoding              = 'utf-8'
-    remove_javascript     = True
-    current_issue         = 'http://msdn.microsoft.com/en-us/magazine/default.aspx'
-    language = 'en'
-
-
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        ]
-
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
-
-    feeds = [(u'Articles', u'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1')]
-
-    keep_only_tags = [dict(name='div', attrs={'class':'topic'})]
-
-    remove_tags = [
-                     dict(name=['object','link','base','table'])
-                    ,dict(name='div', attrs={'class':'MTPS_CollapsibleRegion'})
-                  ]
-
-    def get_cover_url(self):
-        cover_url = None
-        soup = self.index_to_soup(self.current_issue)
-        link_item = soup.find('span',attrs={'class':'ContentsImageSpacer'})
-        if link_item:
-           imgt = link_item.find('img')
-           if imgt:
-              cover_url = imgt['src']
-        return cover_url
-
-
-    def preprocess_html(self, soup):
-        for item in soup.findAll('div',attrs={'class':['FeatureSmallHead','ColumnTypeSubTitle']}):
-            item.name="h2"
-        for item in soup.findAll('div',attrs={'class':['FeatureHeadline','ColumnTypeTitle']}):
-            item.name="h1"
-        for item in soup.findAll('div',attrs={'class':'ArticleTypeTitle'}):
-            item.name="h3"
-        return soup
-
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+'''
+msdn.microsoft.com/en-us/magazine
+'''
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class MSDNMagazine_en(BasicNewsRecipe):
+    title                 = 'MSDN Magazine'
+    __author__            = 'Darko Miletic'
+    description           = 'The Microsoft Journal for Developers'
+    publisher             = 'Microsoft Press'
+    category              = 'news, IT, Microsoft, programming, windows'
+    oldest_article        = 31
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    encoding              = 'utf-8'
+    language              = 'en'
+
+
+
+    feeds = [(u'Articles', u'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1')]
+
+    keep_only_tags = [dict(name='div', attrs={'class':'navpage'})]
+
+    remove_tags = [
+                     dict(name=['object','link','base','table'])
+                    ,dict(name='div', attrs={'class':'MTPS_CollapsibleRegion'})
+                  ]
+    remove_tags_after = dict(name='div', attrs={'class':'navpage'})
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll('div',attrs={'class':['FeatureSmallHead','ColumnTypeSubTitle']}):
+            item.name="h2"
+        for item in soup.findAll('div',attrs={'class':['FeatureHeadline','ColumnTypeTitle']}):
+            item.name="h1"
+        for item in soup.findAll('div',attrs={'class':'ArticleTypeTitle'}):
+            item.name="h3"
+        return soup
+
--- a/src/calibre/web/feeds/recipes/recipe_straitstimes.py
+++ b/src/calibre/web/feeds/recipes/recipe_straitstimes.py
@ -1,57 +1,50 @@
-#!/usr/bin/env  python
-
-__license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
-'''
-www.straitstimes.com
-'''
-
-from calibre.web.feeds.recipes import BasicNewsRecipe
-
-class StraitsTimes(BasicNewsRecipe):
-    title                  = 'The Straits Times'
-    __author__             = 'Darko Miletic'
-    description            = 'Singapore newspaper'
-    oldest_article         = 2
-    max_articles_per_feed  = 100
-    no_stylesheets         = True
-    use_embedded_content   = False
-    encoding               = 'cp1252'
-    publisher              = 'Singapore Press Holdings Ltd.'
-    category               = 'news, politics, singapore, asia'
-    language = 'en'
-
-
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        , '--ignore-tables'
-                        ]
-
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
-
-    remove_tags = [
-                     dict(name=['object','link'])
-                    ,dict(name='table', attrs={'width':'980'})
-                    ,dict(name='td'   , attrs={'class':'padlrt10'})
-                  ]
-
-    feeds = [
-               (u'Singapore'       , u'http://www.straitstimes.com/STI/STIFILES/rss/break_singapore.xml' )
-              ,(u'SE Asia'         , u'http://www.straitstimes.com/STI/STIFILES/rss/break_sea.xml'       )
-              ,(u'Money'           , u'http://www.straitstimes.com/STI/STIFILES/rss/break_money.xml'     )
-              ,(u'Sport'           , u'http://www.straitstimes.com/STI/STIFILES/rss/break_sport.xml'     )
-              ,(u'World'           , u'http://www.straitstimes.com/STI/STIFILES/rss/break_world.xml'     )
-              ,(u'Tech & Science'  , u'http://www.straitstimes.com/STI/STIFILES/rss/break_tech.xml'      )
-              ,(u'Lifestyle'       , u'http://www.straitstimes.com/STI/STIFILES/rss/break_lifestyle.xml' )
-            ]
-
-    def preprocess_html(self, soup):
-        for item in soup.findAll(style=True):
-            del item['style']
-        return soup
-
-    def print_version(self, url):
-        return url.replace('http://www.straitstimes.com','http://www.straitstimes.com/print')
-
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.straitstimes.com
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class StraitsTimes(BasicNewsRecipe):
+    title                  = 'The Straits Times'
+    __author__             = 'Darko Miletic'
+    description            = 'Singapore newspaper'
+    oldest_article         = 2
+    max_articles_per_feed  = 100
+    no_stylesheets         = True
+    use_embedded_content   = False
+    encoding               = 'cp1252'
+    publisher              = 'Singapore Press Holdings Ltd.'
+    category               = 'news, politics, singapore, asia'
+    language               = 'en'
+    extra_css              = ' .top_headline{font-size: x-large; font-weight: bold} '
+
+    conversion_options = {
+                             'comments'  : description
+                            ,'tags'      : category
+                            ,'language'  : language
+                            ,'publisher' : publisher
+                         }
+
+    remove_tags = [dict(name=['object','link','map'])]
+
+    keep_only_tags = [dict(name='div', attrs={'class':['top_headline','story_text']})]
+
+    feeds = [
+               (u'Singapore'       , u'http://www.straitstimes.com/STI/STIFILES/rss/break_singapore.xml' )
+              ,(u'SE Asia'         , u'http://www.straitstimes.com/STI/STIFILES/rss/break_sea.xml'       )
+              ,(u'Money'           , u'http://www.straitstimes.com/STI/STIFILES/rss/break_money.xml'     )
+              ,(u'Sport'           , u'http://www.straitstimes.com/STI/STIFILES/rss/break_sport.xml'     )
+              ,(u'World'           , u'http://www.straitstimes.com/STI/STIFILES/rss/break_world.xml'     )
+              ,(u'Tech & Science'  , u'http://www.straitstimes.com/STI/STIFILES/rss/break_tech.xml'      )
+              ,(u'Lifestyle'       , u'http://www.straitstimes.com/STI/STIFILES/rss/break_lifestyle.xml' )
+            ]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+