Updated recipe for The New Yorker (thanks to Darko Miletic)

2026-01-07 20:50:20 -05:00 · 2008-12-14 07:58:42 -08:00 · 2008-12-14 07:58:42 -08:00 · 18a59e5f3a
commit 18a59e5f3a
parent 0e6674820f
6 changed files with 39 additions and 84 deletions
--- a/.pydevproject
+++ b/.pydevproject
@ -5,9 +5,5 @@
 <pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.5</pydev_property>
 <pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
 <path>/calibre/src</path>
-<path>/calibre/devices</path>
-<path>/calibre/libprs500.devices.prs500</path>
-<path>/calibre/prs500</path>
-<path>/calibre/gui2</path>
 </pydev_pathproperty>
 </pydev_project>
--- a/src/calibre/gui2/images/news/new_yorker.png
+++ b/src/calibre/gui2/images/news/new_yorker.png
--- a/src/calibre/gui2/images/news/newyorker.png
+++ b/src/calibre/gui2/images/news/newyorker.png
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@ -258,7 +258,7 @@ class Main(MainWindow, Ui_MainWindow):
            db = LibraryDatabase2(self.library_path)
        except OSError, err:
            error_dialog(self, _('Bad database location'), unicode(err)).exec_()
-            dir = unicode(QFileDialog.getExistingDirectory(self, 
+            dir = unicode(QFileDialog.getExistingDirectory(self,
                            _('Choose a location for your ebook library.'), os.path.expanduser('~')))
            if not dir:
                QCoreApplication.exit(1)
--- a/src/calibre/gui2/status.py
+++ b/src/calibre/gui2/status.py
@ -32,7 +32,10 @@ class BookInfoDisplay(QWidget):
            self.setMaximumWidth(width)
            QLabel.setPixmap(self, pixmap)
             
-            aspect_ratio = pixmap.width()/float(pixmap.height())
+            try:
+                aspect_ratio = pixmap.width()/float(pixmap.height())
+            except ZeroDivisionError:
+                aspect_ratio = 1
            self.setMaximumWidth(int(aspect_ratio*self.HEIGHT))
        
        def sizeHint(self):
--- a/src/calibre/web/feeds/recipes/new_yorker.py
+++ b/src/calibre/web/feeds/recipes/new_yorker.py
@ -1,78 +1,34 @@
-#!/usr/bin/env  python
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-__docformat__ = 'restructuredtext en'
-
-import re
-from calibre import strftime
-from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import NavigableString
-
-class NewYorker(BasicNewsRecipe):
-    
-    title       = 'The New Yorker'
-    __author__  = 'Kovid Goyal'
-    description = 'News and opinion'
-    
-    remove_tags = [
-                   dict(name='div', id=['printoptions', 'header', 'articleBottom']),
-                   dict(name='div', attrs={'class':['utils', 'icons']})
-                   ]
-    
-    
-    def parse_index(self):
-        toc_pat = re.compile(r'/magazine/toc/\d+/\d+/\d+/toc_\d+')
-        soup = self.soup(self.browser.open('http://www.newyorker.com/').read())
-        a = soup.find('a', href=toc_pat)
-        if a is None:
-            raise Exception('Could not find the current issue of The New Yorker')
-        href = a['href']
-        href = 'http://www.newyorker.com'+href[href.index('/magazine'):]
-        soup = self.soup(self.browser.open(href).read())
-        img = soup.find(id='inThisIssuePhoto')
-        if img is not None:
-            self.cover_url = 'http://www.newyorker.com'+img['src']
-            alt = img.get('alt', None)
-            if alt:
-                self.timefmt = ' [%s]'%alt
-        features = soup.findAll(attrs={'class':re.compile('feature')})
-        
-        category, sections, articles = None, [], []
-        for feature in features:
-            head = feature.find('img', alt=True, attrs={'class':'featurehed'})
-            if head is None:
-                continue
-            if articles:
-                sections.append((category, articles))
-            category, articles = head['alt'], []
-            if category in ('', 'AUDIO', 'VIDEO', 'BLOGS', 'GOINGS ON'):
-                continue
-            
-            for a in feature.findAll('a', href=True):
-                href = 'http://www.newyorker.com'+a['href']+'?printable=true'
-                title, in_title, desc = '', True, ''
-                for tag in a.contents:
-                    if getattr(tag, 'name', None) == 'br':
-                        in_title = False
-                        continue
-                    if isinstance(tag, NavigableString):
-                        text = unicode(tag)
-                        if in_title:
-                            title += text
-                        else:
-                            desc += text
-                if title and not 'Audio:' in title:
-                    art = {
-                           'title': title,
-                           'desc': desc, 'content':'',
-                           'url': href,
-                           'date': strftime('%a, %d %b'),
-                           }
-                    articles.append(art)
-                
-#        from IPython.Shell import IPShellEmbed
-#        ipshell = IPShellEmbed()
-#        ipshell()
-#        raise Exception()
-
-        return sections
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+'''
+newyorker.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class NewYorker(BasicNewsRecipe):
+    title                 = u'The New Yorker'
+    __author__            = 'Darko Miletic'
+    description           = 'Best of the US journalism'    
+    oldest_article        = 7
+    max_articles_per_feed = 100
+    no_stylesheets        = False
+    use_embedded_content  = False
+
+    keep_only_tags = [
+                        dict(name='div'  , attrs={'id':'printbody'   })
+                     ]
+    remove_tags = [
+                     dict(name='div'  , attrs={'class':'utils'       })
+                    ,dict(name='div'  , attrs={'id':'bottomFeatures' })
+                    ,dict(name='div'  , attrs={'id':'articleBottom'  })
+                  ]
+
+    feeds          = [
+                        (u'The New Yorker', u'http://feeds.newyorker.com/services/rss/feeds/everything.xml')
+                     ]
+
+    def print_version(self, url):
+        return url + '?printable=true'