IGN:Various improved recipes

2025-07-09 03:04:10 -04:00 · 2009-11-18 07:14:15 -07:00 · 2009-11-18 07:14:15 -07:00 · 05bc40b53f
commit 05bc40b53f
parent 2ce5dec5ee
3 changed files with 66 additions and 17 deletions
--- a/resources/recipes/hbr.recipe
+++ b/resources/recipes/hbr.recipe
@ -6,10 +6,10 @@ class HBR(BasicNewsRecipe):
    title = 'Harvard Business Review'
    description = 'To subscribe go to http://hbr.harvardbusiness.org'
    needs_subscription = True
-    __author__ = 'Kovid Goyal'
+    __author__ = 'Kovid Goyal and Sujata Raman'
    timefmt                = ' [%B %Y]'
-    no_stylesheets = True   
-    
+    no_stylesheets = True
+
    LOGIN_URL = 'http://hbr.harvardbusiness.org/login?request_url=/'
    INDEX = 'http://hbr.harvardbusiness.org/current'

@ -20,14 +20,14 @@ class HBR(BasicNewsRecipe):
                'contentRight', 'summaryLink']),
            dict(name='form'),
            ]
-    
+
    extra_css = '''
                a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; }
                .article{font-family:Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
                h2{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large; }
                h4{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:small;  }
                #articleAuthors{font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000;font-size:x-small;}
-                #summaryText{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:x-small;}                
+                #summaryText{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:x-small;}
                '''

    def get_browser(self):
@ -100,10 +100,10 @@ class HBR(BasicNewsRecipe):
        index = 'http://hbr.harvardbusiness.org/current'
        soup = self.index_to_soup(index)
        link_item = soup.find('img', alt=re.compile("HBR Cover Image"), src=True)
-        
+
        if link_item:
           cover_url = 'http://hbr.harvardbusiness.org' + link_item['src']
-           
+
        return cover_url
-    
-  
+
+
--- a/resources/recipes/kellog_insight.recipe
+++ b/resources/recipes/kellog_insight.recipe
@ -12,20 +12,29 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class KellogInsight(BasicNewsRecipe):

    title          = 'Kellog Insight'
-    __author__     = 'Kovid Goyal'
+    __author__     = 'Kovid Goyal and Sujata Raman'
    description    = 'Articles from the Kellog School of Management'
    no_stylesheets = True
    encoding       = 'utf-8'
    language = 'en'

    oldest_article = 60
-    remove_tags_before = {'name':'h1'}
-    remove_tags_after = {'class':'col-two-text'}

+    keep_only_tags = [dict(name='div', attrs={'id':['print_no_comments']})]

+    remove_tags = [dict(name='div', attrs={'class':'col-three'})]

-    feeds = [('Articles',
-        'http://insight.kellogg.northwestern.edu/index.php/Kellogg/RSS')]
+    extra_css = '''
+                h1{font-family:arial; font-size:medium; color:#333333;}
+                .col-one{font-family:arial; font-size:xx-small;}
+                .col-two{font-family:arial; font-size:x-small; }
+                h2{font-family:arial; font-size:small; color:#666666;}
+                h3{font-family:arial; font-size:small; color:#333333;text-transform: uppercase; font-weight:normal;}
+                h4{color:#660000;font-family:arial; font-size:x-small;}
+                .col-two-text{font-family:arial; font-size:x-small; color:#333333;}
+                '''
+
+    feeds = [('Articles', 'http://insight.kellogg.northwestern.edu/index.php/Kellogg/RSS')]

    def get_article_url(self, article):
        # Get only article not blog links
@ -34,3 +43,11 @@ class KellogInsight(BasicNewsRecipe):
            return link
        self.log('Skipping non-article', link)
        return None
+
+    def preprocess_html(self, soup):
+
+            for tag in soup.findAll(name=['span']):
+                tag.nextSibling.name = 'h4'
+
+            return soup
+
--- a/resources/recipes/science_news.recipe
+++ b/resources/recipes/science_news.recipe
@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe

 class Sciencenews(BasicNewsRecipe):
    title                 = u'ScienceNews'
-    __author__            = u'Darko Miletic'
+    __author__            = u'Darko Miletic and Sujata Raman'
    description           = u"Science News is an award-winning weekly newsmagazine covering the most important research in all fields of science. Its 16 pages each week are packed with short, accurate articles that appeal to both general readers and scientists. Published since 1922, the magazine now reaches about 150,000 subscribers and more than 1 million readers. These are the latest News Items from Science News."
    oldest_article        = 30
    language = 'en'
@ -17,13 +17,45 @@ class Sciencenews(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
-    timefmt               = ' [%A, %d %B, %Y]' 
+    timefmt               = ' [%A, %d %B, %Y]'
+
+    extra_css = '''
+                .content_description{font-family:georgia ;font-size:x-large; color:#646464 ; font-weight:bold;}
+                .content_summary{font-family:georgia ;font-size:small ;color:#585858 ; font-weight:bold;}
+                .content_authors{font-family:helvetica,arial ;font-size: xx-small ;color:#14487E ;}
+                .content_edition{font-family:helvetica,arial ;font-size: xx-small ;}
+                .exclusive{color:#FF0000 ;}
+                .anonymous{color:#14487E ;}
+                .content_content{font-family:helvetica,arial ;font-size: x-small ; color:#000000;}
+                .description{color:#585858;font-family:helvetica,arial ;font-size: xx-small ;}
+                .credit{color:#A6A6A6;font-family:helvetica,arial ;font-size: xx-small ;}
+                '''

    keep_only_tags = [ dict(name='div', attrs={'id':'column_action'}) ]
    remove_tags_after = dict(name='ul', attrs={'id':'content_functions_bottom'})
    remove_tags = [
                     dict(name='ul', attrs={'id':'content_functions_bottom'})
-                    ,dict(name='div', attrs={'id':'content_functions_top'})
+                    ,dict(name='div', attrs={'id':['content_functions_top','breadcrumb_content']})
+                    ,dict(name='img', attrs={'class':'icon'})
+                    ,dict(name='div', attrs={'class': 'embiggen'})
                  ]

    feeds       = [(u"Science News / News Items", u'http://sciencenews.org/view/feed/type/news/name/news.rss')]
+
+    def get_cover_url(self):
+        cover_url = None
+        index = 'http://www.sciencenews.org/view/home'
+        soup = self.index_to_soup(index)
+        link_item = soup.find(name = 'img',alt = "issue")
+        print link_item
+        if link_item:
+           cover_url = 'http://www.sciencenews.org' + link_item['src'] + '.jpg'
+
+        return cover_url
+
+    def preprocess_html(self, soup):
+
+            for tag in soup.findAll(name=['span']):
+                tag.name = 'div'
+
+            return soup