Updated recipe for The New Yorker (thanks to Darko Miletic)

This commit is contained in:
Kovid Goyal 2008-12-14 07:58:42 -08:00
parent 0e6674820f
commit 18a59e5f3a
6 changed files with 39 additions and 84 deletions

View File

@ -5,9 +5,5 @@
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.5</pydev_property>
<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
<path>/calibre/src</path>
<path>/calibre/devices</path>
<path>/calibre/libprs500.devices.prs500</path>
<path>/calibre/prs500</path>
<path>/calibre/gui2</path>
</pydev_pathproperty>
</pydev_project>

Binary file not shown.

After

Width:  |  Height:  |  Size: 670 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.2 KiB

View File

@ -258,7 +258,7 @@ class Main(MainWindow, Ui_MainWindow):
db = LibraryDatabase2(self.library_path)
except OSError, err:
error_dialog(self, _('Bad database location'), unicode(err)).exec_()
dir = unicode(QFileDialog.getExistingDirectory(self,
dir = unicode(QFileDialog.getExistingDirectory(self,
_('Choose a location for your ebook library.'), os.path.expanduser('~')))
if not dir:
QCoreApplication.exit(1)

View File

@ -32,7 +32,10 @@ class BookInfoDisplay(QWidget):
self.setMaximumWidth(width)
QLabel.setPixmap(self, pixmap)
aspect_ratio = pixmap.width()/float(pixmap.height())
try:
aspect_ratio = pixmap.width()/float(pixmap.height())
except ZeroDivisionError:
aspect_ratio = 1
self.setMaximumWidth(int(aspect_ratio*self.HEIGHT))
def sizeHint(self):

View File

@ -1,78 +1,34 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
import re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import NavigableString
class NewYorker(BasicNewsRecipe):
title = 'The New Yorker'
__author__ = 'Kovid Goyal'
description = 'News and opinion'
remove_tags = [
dict(name='div', id=['printoptions', 'header', 'articleBottom']),
dict(name='div', attrs={'class':['utils', 'icons']})
]
def parse_index(self):
toc_pat = re.compile(r'/magazine/toc/\d+/\d+/\d+/toc_\d+')
soup = self.soup(self.browser.open('http://www.newyorker.com/').read())
a = soup.find('a', href=toc_pat)
if a is None:
raise Exception('Could not find the current issue of The New Yorker')
href = a['href']
href = 'http://www.newyorker.com'+href[href.index('/magazine'):]
soup = self.soup(self.browser.open(href).read())
img = soup.find(id='inThisIssuePhoto')
if img is not None:
self.cover_url = 'http://www.newyorker.com'+img['src']
alt = img.get('alt', None)
if alt:
self.timefmt = ' [%s]'%alt
features = soup.findAll(attrs={'class':re.compile('feature')})
category, sections, articles = None, [], []
for feature in features:
head = feature.find('img', alt=True, attrs={'class':'featurehed'})
if head is None:
continue
if articles:
sections.append((category, articles))
category, articles = head['alt'], []
if category in ('', 'AUDIO', 'VIDEO', 'BLOGS', 'GOINGS ON'):
continue
for a in feature.findAll('a', href=True):
href = 'http://www.newyorker.com'+a['href']+'?printable=true'
title, in_title, desc = '', True, ''
for tag in a.contents:
if getattr(tag, 'name', None) == 'br':
in_title = False
continue
if isinstance(tag, NavigableString):
text = unicode(tag)
if in_title:
title += text
else:
desc += text
if title and not 'Audio:' in title:
art = {
'title': title,
'desc': desc, 'content':'',
'url': href,
'date': strftime('%a, %d %b'),
}
articles.append(art)
# from IPython.Shell import IPShellEmbed
# ipshell = IPShellEmbed()
# ipshell()
# raise Exception()
return sections
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
newyorker.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class NewYorker(BasicNewsRecipe):
title = u'The New Yorker'
__author__ = 'Darko Miletic'
description = 'Best of the US journalism'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = False
use_embedded_content = False
keep_only_tags = [
dict(name='div' , attrs={'id':'printbody' })
]
remove_tags = [
dict(name='div' , attrs={'class':'utils' })
,dict(name='div' , attrs={'id':'bottomFeatures' })
,dict(name='div' , attrs={'id':'articleBottom' })
]
feeds = [
(u'The New Yorker', u'http://feeds.newyorker.com/services/rss/feeds/everything.xml')
]
def print_version(self, url):
return url + '?printable=true'