From 80c85b7483647a13a0e3502e8568151e2851d4fc Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 17 Jun 2010 15:41:11 -0600 Subject: [PATCH 1/3] Fix WSJ recipe to get content from all sections of today's paper --- resources/recipes/wsj.recipe | 63 +++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/resources/recipes/wsj.recipe b/resources/recipes/wsj.recipe index e8e29505c4..2e99a690f4 100644 --- a/resources/recipes/wsj.recipe +++ b/resources/recipes/wsj.recipe @@ -3,7 +3,6 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' -import string from calibre.web.feeds.news import BasicNewsRecipe # http://online.wsj.com/page/us_in_todays_paper.html @@ -66,7 +65,7 @@ class WallStreetJournal(BasicNewsRecipe): return soup def wsj_get_index(self): - return self.index_to_soup('http://online.wsj.com/page/us_in_todays_paper.html') + return self.index_to_soup('http://online.wsj.com/itp') def parse_index(self): soup = self.wsj_get_index() @@ -75,24 +74,35 @@ class WallStreetJournal(BasicNewsRecipe): if date is not None: self.timefmt = ' [%s]'%self.tag_to_string(date) - sections = {} - sec_order = [] + cov = soup.find('a', attrs={'class':'icon pdf'}, href=True) + if cov is not None: + self.cover_url = cov['href'] + + feeds = [] + div = soup.find('div', attrs={'class':'itpHeader'}) + div = div.find('ul', attrs={'class':'tab'}) + for a in div.findAll('a', href=lambda x: x and '/itp/' in x): + title = self.tag_to_string(a) + url = 'http://online.wsj.com' + a['href'] + self.log('Found section:', title) + articles = self.wsj_find_articles(url) + if articles: + feeds.append((title, articles)) + + return feeds + + def wsj_find_articles(self, url): + soup = self.index_to_soup(url) + + whats_news = soup.find('div', attrs={'class':lambda x: x and + 'whatsNews-simple' in x}) + if whats_news is not None: + whats_news.extract() + + articles = [] + for a in soup.findAll('a', attrs={'class':'mjLinkItem'}, href=True): container = a.findParent(['li', 'div']) - if container.name == 'div': - section = 'Page One' - else: - section = '' - sec = container.find('a', href=lambda x: x and '/search?' in x) - if sec is not None: - section = self.tag_to_string(sec).strip() - if not section: - h = container.find(['h1','h2','h3','h4','h5','h6']) - section = self.tag_to_string(h) - section = string.capitalize(section).replace('U.s.', 'U.S.') - if section not in sections: - sections[section] = [] - sec_order.append(section) meta = a.find(attrs={'class':'meta_sectionName'}) if meta is not None: meta.extract() @@ -103,27 +113,28 @@ class WallStreetJournal(BasicNewsRecipe): if p is not None: desc = self.tag_to_string(p) - sections[section].append({'title':title, 'url':url, + articles.append({'title':title, 'url':url, 'description':desc, 'date':''}) - self.log('Found article:', title) + self.log('\tFound article:', title) + ''' + # Find related articles a.extract() for a in container.findAll('a', href=lambda x: x and '/article/' - in x): + in x and 'articleTabs' not in x): url = a['href'] if not url.startswith('http:'): url = 'http://online.wsj.com'+url title = self.tag_to_string(a).strip() if not title or title.startswith('['): continue if title: - sections[section].append({'title':self.tag_to_string(a), + articles.append({'title':self.tag_to_string(a), 'url':url, 'description':'', 'date':''}) - self.log('\tFound related:', title) + self.log('\t\tFound related:', title) + ''' - - feeds = [(sec, sections[sec]) for sec in sec_order] - return feeds + return articles def cleanup(self): From 57232edb2d6a8a5c53280ef0010c5d52ba2bf21c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 17 Jun 2010 16:44:13 -0600 Subject: [PATCH 2/3] Fix #5864 (Python string exceptions no more allowed in Python 2.6) --- src/calibre/utils/Zeroconf.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/calibre/utils/Zeroconf.py b/src/calibre/utils/Zeroconf.py index 8a1e13c23f..f4a7119d16 100755 --- a/src/calibre/utils/Zeroconf.py +++ b/src/calibre/utils/Zeroconf.py @@ -596,10 +596,11 @@ class DNSIncoming(object): next = off + 1 off = ((len & 0x3F) << 8) | ord(self.data[off]) if off >= first: - raise 'Bad domain name (circular) at ' + str(off) + raise ValueError('Bad domain name (circular) at ' + + str(off)) first = off else: - raise 'Bad domain name at ' + str(off) + raise ValueError('Bad domain name at ' + str(off)) if next >= 0: self.offset = next From 4bb355481a151b7a649c9a446619d35b776f2505 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 17 Jun 2010 23:27:54 -0600 Subject: [PATCH 3/3] Kobo Output profile: Adjust the screen dimensions when converting comics --- src/calibre/customize/profiles.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py index 222f6128e0..c872c9df38 100644 --- a/src/calibre/customize/profiles.py +++ b/src/calibre/customize/profiles.py @@ -279,6 +279,7 @@ class KoboReaderOutput(OutputProfile): description = _('This profile is intended for the Kobo Reader.') screen_size = (590, 775) + comic_screen_size = (540, 718) dpi = 168.451 fbase = 12 fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24]