From 5cc6465941057f1a3e2820f37f0750cd48774d26 Mon Sep 17 00:00:00 2001
From: Tom Scholl <thomasscholl@gmail.com>
Date: Mon, 4 Apr 2011 16:35:09 +0000
Subject: [PATCH 1/7] Added fast no image version of the daily mail recipe

---
 recipes/daily_mail_fast.recipe | 52 ++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)
 create mode 100644 recipes/daily_mail_fast.recipe

diff --git a/recipes/daily_mail_fast.recipe b/recipes/daily_mail_fast.recipe
new file mode 100644
index 0000000000..9ddb6f77f8
--- /dev/null
+++ b/recipes/daily_mail_fast.recipe
@@ -0,0 +1,52 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class TheDailyMail(BasicNewsRecipe):
+    title          = u'The Daily Mail (fast)'
+    oldest_article = 2
+    language = 'en_GB'
+
+    author = 'RufusA and Sujata Raman'
+    description = 'Faster and smaller version of the Daily Mail that does not download pictures'
+    simultaneous_downloads= 1
+    max_articles_per_feed = 50
+
+    extra_css = '''#js-article-text{font-family:Arial,Helvetica,sans-serif;}
+                    h1{font-size:x-large; font-weight:bold;}
+                    a.author{color:#003580;}
+                    .js-article-text{font-size:50%;}
+                    .imageCaption{font-size:x-small; font-weight:bold}
+
+
+                '''
+
+    remove_tags = [ dict(name='div', attrs={'class':['article-icon-links-container','print-or-mail-links cleared',
+                                                     'social-links cleared','explore-links','relatedItems','intellicrumbs box','travel','moduleHalf']}),
+                    dict(name='div', attrs={'id':['link-unit-wrapper','pushdown-ad','reader-comments','googleAds',]}),
+                    dict(name='h3', attrs={'class':['social-links-title']}),
+                     dict(name='span', attrs={'class':['clickToEnlargeTop']}),
+                     dict(name=['img']),
+                    ]
+    #remove_tags_before  = dict(name='div', attrs={'id':'content'})
+    keep_only_tags = [dict(name='div', attrs={'id':'js-article-text'})]
+
+    no_stylesheets = True
+
+    feeds          = [
+	(u'Home', u'http://www.dailymail.co.uk/home/index.rss'),
+	(u'News', u'http://www.dailymail.co.uk/news/index.rss'),
+	(u'Sport', u'http://www.dailymail.co.uk/sport/index.rss'),
+	(u'TV and Showbiz', u'http://www.dailymail.co.uk/tvshowbiz/index.rss'),
+	(u'Femail', u'http://www.dailymail.co.uk/femail/index.rss'),
+	(u'Health', u'http://www.dailymail.co.uk/health/index.rss'),
+	(u'Science and Technology', u'http://www.dailymail.co.uk/sciencetech/index.rss'),
+	(u'Money', u'http://www.dailymail.co.uk/money/index.rss'),
+	(u'Property', u'http://www.dailymail.co.uk/property/index.rss'),
+	(u'Motoring', u'http://www.dailymail.co.uk/motoring/index.rss'),
+	(u'Travel', u'http://www.dailymail.co.uk/travel/index.rss')
+        ]
+
+    #def print_version(self, url):
+    #    main = url.partition('?')[0]
+    #    return main + '?printingPage=true'
+
+

From 43b57cb343edcf9f6d17f0b7013ec0c8e2b67d51 Mon Sep 17 00:00:00 2001
From: Tom Scholl <thomasscholl@gmail.com>
Date: Thu, 7 Apr 2011 22:16:12 +0000
Subject: [PATCH 2/7] More detailed sections for guardian recipe

---
 recipes/guardian.recipe | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/recipes/guardian.recipe b/recipes/guardian.recipe
index 6211997b06..c5021cb91d 100644
--- a/recipes/guardian.recipe
+++ b/recipes/guardian.recipe
@@ -28,7 +28,7 @@ class Guardian(BasicNewsRecipe):
     # List of section titles to ignore
     # For example: ['Sport']
     ignore_sections = []
-
+    
     timefmt = ' [%a, %d %b %Y]'
     keep_only_tags = [
                       dict(name='div', attrs={'id':["content","article_header","main-article-info",]}),
@@ -87,8 +87,14 @@ class Guardian(BasicNewsRecipe):
         idx = soup.find('div', id='book-index')
         for s in idx.findAll('strong', attrs={'class':'book'}):
             a = s.find('a', href=True)
-            yield (self.tag_to_string(a), a['href'])
-
+            section_title = self.tag_to_string(a)
+            if not section_title in self.ignore_sections:
+                prefix = ''
+                if section_title != 'Main section':
+                    prefix = section_title + ': '
+                for subsection in s.parent.findAll('a', attrs={'class':'book-section'}):
+                    yield (prefix + self.tag_to_string(subsection), subsection['href'])
+    
     def find_articles(self, url):
         soup = self.index_to_soup(url)
         div = soup.find('div', attrs={'class':'book-index'})
@@ -109,15 +115,12 @@ class Guardian(BasicNewsRecipe):
                         'title': title, 'url':url, 'description':desc,
                         'date' : strftime('%a, %d %b'),
                         }
-
+    
     def parse_index(self):
         try:
             feeds = []
             for title, href in self.find_sections():
-                if not title in self.ignore_sections:
-                    feeds.append((title, list(self.find_articles(href))))
+                feeds.append((title, list(self.find_articles(href))))
             return feeds
         except:
             raise NotImplementedError
-
-

From 87f281cf4dd5c8d4eb05b3e3440b756674a19827 Mon Sep 17 00:00:00 2001
From: Tom Scholl <thomasscholl@gmail.com>
Date: Tue, 10 May 2011 11:17:39 +0000
Subject: [PATCH 3/7] Updated Newsweek recipe

---
 recipes/newsweek.recipe | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/recipes/newsweek.recipe b/recipes/newsweek.recipe
index 73837c1872..740bf5299d 100644
--- a/recipes/newsweek.recipe
+++ b/recipes/newsweek.recipe
@@ -11,7 +11,20 @@ class Newsweek(BasicNewsRecipe):
     no_stylesheets = True
 
     BASE_URL = 'http://www.newsweek.com'
-    INDEX = BASE_URL+'/topics.html'
+    
+    topics = {
+        'Culture' : '/tag/culture.html',
+        'Business' : '/tag/business.html',
+        'Society' : '/tag/society.html',
+        'Science' : '/tag/science.html',
+        'Education' : '/tag/education.html',
+        'Politics' : '/tag/politics.html',
+        'Health' : '/tag/health.html',
+        'World' : '/tag/world.html',
+        'Nation' : '/tag/nation.html',
+        'Technology' : '/tag/technology.html',
+        'Game Changers' : '/tag/game-changers.html',
+    }
 
     keep_only_tags = dict(name='article', attrs={'class':'article-text'})
     remove_tags = [dict(attrs={'data-dartad':True})]
@@ -23,10 +36,9 @@ class Newsweek(BasicNewsRecipe):
         return soup
 
     def newsweek_sections(self):
-        soup = self.index_to_soup(self.INDEX)
-        for a in soup.findAll('a', title='Primary tag', href=True):
-            yield (string.capitalize(self.tag_to_string(a)),
-                    self.BASE_URL+a['href'])
+        for topic_name, topic_url in self.topics.iteritems():
+            yield (topic_name,
+                    self.BASE_URL+topic_url)
 
 
     def newsweek_parse_section_page(self, soup):

From 8b3c13cedaa7f3ca6bed2939179a7240ffaf4405 Mon Sep 17 00:00:00 2001
From: Tom Scholl <thomasscholl@gmail.com>
Date: Wed, 18 May 2011 17:22:25 +0000
Subject: [PATCH 4/7] Added Finance section to Telegraph recipe

---
 recipes/telegraph_uk.recipe | 1 +
 1 file changed, 1 insertion(+)

diff --git a/recipes/telegraph_uk.recipe b/recipes/telegraph_uk.recipe
index f79f0fa50c..5fe5b168b8 100644
--- a/recipes/telegraph_uk.recipe
+++ b/recipes/telegraph_uk.recipe
@@ -49,6 +49,7 @@ class TelegraphUK(BasicNewsRecipe):
                          (u'UK News'        , u'http://www.telegraph.co.uk/news/uknews/rss'                                      )
                         ,(u'World News'     , u'http://www.telegraph.co.uk/news/worldnews/rss'                                   )
                         ,(u'Politics'       , u'http://www.telegraph.co.uk/news/newstopics/politics/rss'                         )
+                        ,(u'Finance'        , u'http://www.telegraph.co.uk/finance/rss'                                          )
                         ,(u'Technology News', u'http://www.telegraph.co.uk/scienceandtechnology/technology/technologynews/rss'   )
                         ,(u'UK News'        , u'http://www.telegraph.co.uk/scienceandtechnology/technology/technologyreviews/rss')
                         ,(u'Science News'   , u'http://www.telegraph.co.uk/scienceandtechnology/science/sciencenews/rss'         )

From afde9acfdf1e88c92867e25bc608edfd0637c3bd Mon Sep 17 00:00:00 2001
From: Tom Scholl <thomasscholl@gmail.com>
Date: Fri, 20 May 2011 06:49:24 +0000
Subject: [PATCH 5/7] Re-enabled time magazine

---
 recipes/time_magazine.recipe | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/recipes/time_magazine.recipe b/recipes/time_magazine.recipe
index 380bf71f8c..ac7821b65a 100644
--- a/recipes/time_magazine.recipe
+++ b/recipes/time_magazine.recipe
@@ -10,8 +10,8 @@ import re
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class Time(BasicNewsRecipe):
-    recipe_disabled = ('This recipe has been disabled as TIME no longer'
-            ' publish complete articles on the web.')
+    #recipe_disabled = ('This recipe has been disabled as TIME no longer'
+    #        ' publish complete articles on the web.')
     title                 = u'Time'
     __author__            = 'Kovid Goyal and Sujata Raman'
     description           = 'Weekly magazine'

From 0d4c451e6befbf59860efedd224fc85c83420702 Mon Sep 17 00:00:00 2001
From: Tom Scholl <thomasscholl@gmail.com>
Date: Tue, 24 May 2011 14:23:42 +0000
Subject: [PATCH 6/7] Resize large images to reduce size of Daily Mail news
 recipe

---
 recipes/daily_mail.recipe | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/recipes/daily_mail.recipe b/recipes/daily_mail.recipe
index ac2dfd1777..40d43864c7 100644
--- a/recipes/daily_mail.recipe
+++ b/recipes/daily_mail.recipe
@@ -1,4 +1,5 @@
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.utils.magick import Image, PixelWand
 
 class TheDailyMail(BasicNewsRecipe):
     title          = u'The Daily Mail'
@@ -46,5 +47,21 @@ class TheDailyMail(BasicNewsRecipe):
     #def print_version(self, url):
     #    main = url.partition('?')[0]
     #    return main + '?printingPage=true'
-
-
+    
+        
+    def postprocess_html(self, soup, first):
+        #process all the images. assumes that the new html has the correct path
+        for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
+            iurl = tag['src']
+            img = Image()
+            img.open(iurl)
+            width, height = img.size
+            print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
+            if img < 0:
+                raise RuntimeError('Out of memory')
+            pw = PixelWand()
+            if (width > 520 or height > 640):
+                print 'Resizing image to 50%'
+                img.size = (width / 2, height / 2)
+                img.save(iurl)
+        return soup

From 63fb99692956f1e6f0a98fe6157856dc38d260fc Mon Sep 17 00:00:00 2001
From: Tom Scholl <thomasscholl@gmail.com>
Date: Thu, 26 May 2011 22:27:15 +0000
Subject: [PATCH 7/7] Fixed erroneous log_debug calls in Zaobao news recipe

---
 recipes/zaobao.recipe | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/recipes/zaobao.recipe b/recipes/zaobao.recipe
index 91a5459e18..a6d7d2e08e 100644
--- a/recipes/zaobao.recipe
+++ b/recipes/zaobao.recipe
@@ -82,7 +82,7 @@ class ZAOBAO(BasicNewsRecipe):
         return soup
 
     def parse_feeds(self):
-        self.log_debug(_('ZAOBAO overrided parse_feeds()'))
+        self.log(_('ZAOBAO overrided parse_feeds()'))
         parsed_feeds = BasicNewsRecipe.parse_feeds(self)
 
         for id, obj in enumerate(self.INDEXES):
@@ -99,7 +99,7 @@ class ZAOBAO(BasicNewsRecipe):
                     a_title = self.tag_to_string(a)
                     date = ''
                     description = ''
-                    self.log_debug(_('adding %s at %s')%(a_title,a_url))
+                    self.log(_('adding %s at %s')%(a_title,a_url))
                     articles.append({
                                     'title':a_title,
                                     'date':date,
@@ -110,23 +110,23 @@ class ZAOBAO(BasicNewsRecipe):
             pfeeds = feeds_from_index([(title, articles)], oldest_article=self.oldest_article,
                                      max_articles_per_feed=self.max_articles_per_feed)
 
-            self.log_debug(_('adding %s to feed')%(title))
+            self.log(_('adding %s to feed')%(title))
             for feed in pfeeds:
-                self.log_debug(_('adding feed: %s')%(feed.title))
+                self.log(_('adding feed: %s')%(feed.title))
                 feed.description = self.DESC_SENSE
                 parsed_feeds.append(feed)
                 for a, article in enumerate(feed):
-                    self.log_debug(_('added article %s from %s')%(article.title, article.url))
-                self.log_debug(_('added feed %s')%(feed.title))
+                    self.log(_('added article %s from %s')%(article.title, article.url))
+                self.log(_('added feed %s')%(feed.title))
 
         for i, feed in enumerate(parsed_feeds):
             # workaorund a strange problem: Somethimes the xml encoding is not apllied correctly by parse()
             weired_encoding_detected = False
             if not isinstance(feed.description, unicode) and self.encoding and feed.description:
-                self.log_debug(_('Feed %s is not encoded correctly, manually replace it')%(feed.title))
+                self.log(_('Feed %s is not encoded correctly, manually replace it')%(feed.title))
                 feed.description = feed.description.decode(self.encoding, 'replace')
             elif feed.description.find(self.DESC_SENSE) == -1 and self.encoding and feed.description:
-                self.log_debug(_('Feed %s is weired encoded, manually redo all')%(feed.title))
+                self.log(_('Feed %s is weired encoded, manually redo all')%(feed.title))
                 feed.description = feed.description.encode('cp1252', 'replace').decode(self.encoding, 'replace')
                 weired_encoding_detected = True
 
@@ -148,7 +148,7 @@ class ZAOBAO(BasicNewsRecipe):
                         article.text_summary = article.text_summary.encode('cp1252', 'replace').decode(self.encoding, 'replace')
 
                 if article.title == "Untitled article":
-                    self.log_debug(_('Removing empty article %s from %s')%(article.title, article.url))
+                    self.log(_('Removing empty article %s from %s')%(article.title, article.url))
                     # remove the article
                     feed.articles[a:a+1] = []
         return parsed_feeds