From 6af7b6a43e4946609ee4d131af567cb232283f1b Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 31 Jan 2010 14:25:54 -0700
Subject: [PATCH] NY Times Sunday Book review by Krittika Goyal

---
 Changelog.yaml                               | 95 ++++++++++++++++++++
 resources/recipes/nytimesbook.recipe         | 56 ++++++++++++
 src/calibre/ebooks/oeb/transforms/rescale.py |  5 +-
 3 files changed, 155 insertions(+), 1 deletion(-)
 create mode 100644 resources/recipes/nytimesbook.recipe
diff --git a/Changelog.yaml b/Changelog.yaml
index c2124aadd9..7c5644fd63 100644
--- a/Changelog.yaml
+++ b/Changelog.yaml
@@ -4,6 +4,101 @@
 # for important features/bug fixes.
 # Also, each release can have new and improved recipes.
 
+- version: 0.6.37
+  date: 2010-01-31
+
+  new features:
+    - title: "E-book viewer: Add support for viewing SVG images"
+      type: major
+
+    - title: "Add category of Recently added books when generating catalog in e-book format"
+
+    - title: "OS X: Allow adding of books to calibre via drag and drop on the calibre dock icon"
+
+    - title: "Add support for masthead images when downloading news for the Kindle"
+
+    - title: "MOBI metadata: Allow setting of metadata in old PRC files without EXTH headers as well"
+
+  bug fixes:
+    - title: Changing the date in Dutch
+      tickets: [4732]
+
+    - title: "Fix regression that broke sending files to unupdated PRS 500s"
+
+    - title: "MOBI Input: Ignore width and height percentage measures for <img> tags."
+      tickets: [4726]
+
+    - title: "EPUB Output: Remove <img> tags that point to the internet for their images as this causes the ever delicate ADE to crash."
+      tickets: [4692]
+
+    - title: "Comic Input: Handle UTF-8 BOM when converting a cbc file"
+      tickets: [4683]
+
+    - title: "Allow rating to be cleared via the Bulk metadata edit dialog"
+      tickets: [4693]
+
+    - title: "Add workaround for broken linux systems with multiply encoded file names"
+      tickets: [4721]
+
+    - title: Fix bug preventing the the use of indices when setting save to disk templates
+      tickets: [4710]
+
+    - title: "Linux device mounting. Use filetype of auto to allow non vfat filesystems to be mounted"
+      tickets: [4707]
+
+    - title: "Catalog generation: Make sorting of numbers in title as text optional"
+
+    - title: "Fix error while sending book with non-ascii character in title/author to device on linux"
+      tickets: [4690]
+
+  new recipes:
+    - title: Kamera Bild
+      author: Darko Miletic
+
+    - title: The Online Photographer
+      author: Darko Miletic
+
+    - title: The Luminous Landscape
+      author: Darko Miletic
+
+    - title: Slovo
+      author: Abelturd
+
+    - title: Various Danish newspapers
+      author: Darko Miletic
+
+    - title: Heraldo de Aragon
+      author: Lorenzo Vigentini
+
+    - title: Orange County Register
+      author: Lorenzi Vigentini
+
+    - title: Open Left
+      author: Xanthan Gum
+
+    - title: Michelle Malkin
+      author: Walt Anthony
+
+    - title: The Metro Montreal
+      author: Jerry Clapperton
+
+    - title: The Gazette
+      author: Jerry Clapperton
+
+    - title: Macleans Magazine
+      author:  Nick Redding
+
+    - title: NY Time Sunday Book Review
+      author: Krittika Goyal
+
+
+  improved recipes:
+    - The Irish Times
+    - Washington Post
+    - NIN
+    - The Discover Magazine
+    - Pagina 12
+
 - version: 0.6.36
   date: 2010-01-25
 
diff --git a/resources/recipes/nytimesbook.recipe b/resources/recipes/nytimesbook.recipe
new file mode 100644
index 0000000000..686f30b69a
--- /dev/null
+++ b/resources/recipes/nytimesbook.recipe
@@ -0,0 +1,56 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+
+class NewYorkTimesBookReview(BasicNewsRecipe):
+    title          = u'New York Times Book Review'
+    language       = 'en'
+    __author__     = 'Krittika Goyal'
+    oldest_article = 8 #days
+    max_articles_per_feed = 1000
+    recursions = 2
+    #encoding = 'latin1'
+
+    remove_stylesheets = True
+    #remove_tags_before = dict(name='h1', attrs={'class':'heading'})
+    remove_tags_after  = dict(name='div', attrs={'id':'authorId'})
+    remove_tags = [
+       dict(name='iframe'),
+       dict(name=['div', 'a'], attrs={'class':['enlargeThis', 'jumpLink']}),
+       dict(name='div', attrs={'id':['sidebarArticles', 'toolsRight']}),
+       #dict(name='ul', attrs={'class':'article-tools'}),
+       #dict(name='ul', attrs={'class':'articleTools'}),
+    ]
+    match_regexps = [
+            r'http://www.nytimes.com/.+pagewanted=[2-9]+'
+            ]
+
+    feeds          = [
+('New York Times Sunday Book Review',
+ 'http://feeds.nytimes.com/nyt/rss/SundayBookReview'),
+]
+
+
+    def preprocess_html(self, soup):
+        story = soup.find(name='div', attrs={'id':'article'})
+        #td = heading.findParent(name='td')
+        #td.extract()
+        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
+        body = soup.find(name='body')
+        body.insert(0, story)
+        #for x in soup.findAll(name='p', text=lambda x:x and '--&gt;' in x):
+             #p = x.findParent('p')
+             #if p is not None:
+                  #p.extract()
+        return soup
+
+    def postprocess_html(self, soup, first):
+        for div in soup.findAll(id='pageLinks'):
+            div.extract()
+        if not first:
+            h1 = soup.find('h1')
+            if h1 is not None:
+                h1.extract()
+            t = soup.find(attrs={'class':'timestamp'})
+            if t is not None:
+                t.extract()
+        return soup
diff --git a/src/calibre/ebooks/oeb/transforms/rescale.py b/src/calibre/ebooks/oeb/transforms/rescale.py
index 7ce3b5a588..fbf0e9bc4f 100644
--- a/src/calibre/ebooks/oeb/transforms/rescale.py
+++ b/src/calibre/ebooks/oeb/transforms/rescale.py
@@ -35,7 +35,10 @@ class RescaleImages(object):
                 if not raw: continue
                 if qt:
                     img = QImage(10, 10, QImage.Format_ARGB32_Premultiplied)
-                    if not img.loadFromData(raw): continue
+                    try:
+                        if not img.loadFromData(raw): continue
+                    except:
+                        continue
                     width, height = img.width(), img.height()
                 else:
                     f = cStringIO.StringIO(raw)