Merge from trunk

This commit is contained in:
Charles Haley 2010-06-18 09:35:14 +01:00
commit 871e34fb63
3 changed files with 41 additions and 28 deletions

View File

@ -3,7 +3,6 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
import string
from calibre.web.feeds.news import BasicNewsRecipe
# http://online.wsj.com/page/us_in_todays_paper.html
@ -66,7 +65,7 @@ class WallStreetJournal(BasicNewsRecipe):
return soup
def wsj_get_index(self):
return self.index_to_soup('http://online.wsj.com/page/us_in_todays_paper.html')
return self.index_to_soup('http://online.wsj.com/itp')
def parse_index(self):
soup = self.wsj_get_index()
@ -75,24 +74,35 @@ class WallStreetJournal(BasicNewsRecipe):
if date is not None:
self.timefmt = ' [%s]'%self.tag_to_string(date)
sections = {}
sec_order = []
cov = soup.find('a', attrs={'class':'icon pdf'}, href=True)
if cov is not None:
self.cover_url = cov['href']
feeds = []
div = soup.find('div', attrs={'class':'itpHeader'})
div = div.find('ul', attrs={'class':'tab'})
for a in div.findAll('a', href=lambda x: x and '/itp/' in x):
title = self.tag_to_string(a)
url = 'http://online.wsj.com' + a['href']
self.log('Found section:', title)
articles = self.wsj_find_articles(url)
if articles:
feeds.append((title, articles))
return feeds
def wsj_find_articles(self, url):
soup = self.index_to_soup(url)
whats_news = soup.find('div', attrs={'class':lambda x: x and
'whatsNews-simple' in x})
if whats_news is not None:
whats_news.extract()
articles = []
for a in soup.findAll('a', attrs={'class':'mjLinkItem'}, href=True):
container = a.findParent(['li', 'div'])
if container.name == 'div':
section = 'Page One'
else:
section = ''
sec = container.find('a', href=lambda x: x and '/search?' in x)
if sec is not None:
section = self.tag_to_string(sec).strip()
if not section:
h = container.find(['h1','h2','h3','h4','h5','h6'])
section = self.tag_to_string(h)
section = string.capitalize(section).replace('U.s.', 'U.S.')
if section not in sections:
sections[section] = []
sec_order.append(section)
meta = a.find(attrs={'class':'meta_sectionName'})
if meta is not None:
meta.extract()
@ -103,27 +113,28 @@ class WallStreetJournal(BasicNewsRecipe):
if p is not None:
desc = self.tag_to_string(p)
sections[section].append({'title':title, 'url':url,
articles.append({'title':title, 'url':url,
'description':desc, 'date':''})
self.log('Found article:', title)
self.log('\tFound article:', title)
'''
# Find related articles
a.extract()
for a in container.findAll('a', href=lambda x: x and '/article/'
in x):
in x and 'articleTabs' not in x):
url = a['href']
if not url.startswith('http:'):
url = 'http://online.wsj.com'+url
title = self.tag_to_string(a).strip()
if not title or title.startswith('['): continue
if title:
sections[section].append({'title':self.tag_to_string(a),
articles.append({'title':self.tag_to_string(a),
'url':url, 'description':'', 'date':''})
self.log('\tFound related:', title)
self.log('\t\tFound related:', title)
'''
feeds = [(sec, sections[sec]) for sec in sec_order]
return feeds
return articles
def cleanup(self):

View File

@ -279,6 +279,7 @@ class KoboReaderOutput(OutputProfile):
description = _('This profile is intended for the Kobo Reader.')
screen_size = (590, 775)
comic_screen_size = (540, 718)
dpi = 168.451
fbase = 12
fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24]

View File

@ -596,10 +596,11 @@ class DNSIncoming(object):
next = off + 1
off = ((len & 0x3F) << 8) | ord(self.data[off])
if off >= first:
raise 'Bad domain name (circular) at ' + str(off)
raise ValueError('Bad domain name (circular) at ' +
str(off))
first = off
else:
raise 'Bad domain name at ' + str(off)
raise ValueError('Bad domain name at ' + str(off))
if next >= 0:
self.offset = next