mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
871e34fb63
@ -3,7 +3,6 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import string
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
# http://online.wsj.com/page/us_in_todays_paper.html
|
||||
@ -66,7 +65,7 @@ class WallStreetJournal(BasicNewsRecipe):
|
||||
return soup
|
||||
|
||||
def wsj_get_index(self):
|
||||
return self.index_to_soup('http://online.wsj.com/page/us_in_todays_paper.html')
|
||||
return self.index_to_soup('http://online.wsj.com/itp')
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.wsj_get_index()
|
||||
@ -75,24 +74,35 @@ class WallStreetJournal(BasicNewsRecipe):
|
||||
if date is not None:
|
||||
self.timefmt = ' [%s]'%self.tag_to_string(date)
|
||||
|
||||
sections = {}
|
||||
sec_order = []
|
||||
cov = soup.find('a', attrs={'class':'icon pdf'}, href=True)
|
||||
if cov is not None:
|
||||
self.cover_url = cov['href']
|
||||
|
||||
feeds = []
|
||||
div = soup.find('div', attrs={'class':'itpHeader'})
|
||||
div = div.find('ul', attrs={'class':'tab'})
|
||||
for a in div.findAll('a', href=lambda x: x and '/itp/' in x):
|
||||
title = self.tag_to_string(a)
|
||||
url = 'http://online.wsj.com' + a['href']
|
||||
self.log('Found section:', title)
|
||||
articles = self.wsj_find_articles(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
return feeds
|
||||
|
||||
def wsj_find_articles(self, url):
|
||||
soup = self.index_to_soup(url)
|
||||
|
||||
whats_news = soup.find('div', attrs={'class':lambda x: x and
|
||||
'whatsNews-simple' in x})
|
||||
if whats_news is not None:
|
||||
whats_news.extract()
|
||||
|
||||
articles = []
|
||||
|
||||
for a in soup.findAll('a', attrs={'class':'mjLinkItem'}, href=True):
|
||||
container = a.findParent(['li', 'div'])
|
||||
if container.name == 'div':
|
||||
section = 'Page One'
|
||||
else:
|
||||
section = ''
|
||||
sec = container.find('a', href=lambda x: x and '/search?' in x)
|
||||
if sec is not None:
|
||||
section = self.tag_to_string(sec).strip()
|
||||
if not section:
|
||||
h = container.find(['h1','h2','h3','h4','h5','h6'])
|
||||
section = self.tag_to_string(h)
|
||||
section = string.capitalize(section).replace('U.s.', 'U.S.')
|
||||
if section not in sections:
|
||||
sections[section] = []
|
||||
sec_order.append(section)
|
||||
meta = a.find(attrs={'class':'meta_sectionName'})
|
||||
if meta is not None:
|
||||
meta.extract()
|
||||
@ -103,27 +113,28 @@ class WallStreetJournal(BasicNewsRecipe):
|
||||
if p is not None:
|
||||
desc = self.tag_to_string(p)
|
||||
|
||||
sections[section].append({'title':title, 'url':url,
|
||||
articles.append({'title':title, 'url':url,
|
||||
'description':desc, 'date':''})
|
||||
|
||||
self.log('Found article:', title)
|
||||
self.log('\tFound article:', title)
|
||||
|
||||
'''
|
||||
# Find related articles
|
||||
a.extract()
|
||||
for a in container.findAll('a', href=lambda x: x and '/article/'
|
||||
in x):
|
||||
in x and 'articleTabs' not in x):
|
||||
url = a['href']
|
||||
if not url.startswith('http:'):
|
||||
url = 'http://online.wsj.com'+url
|
||||
title = self.tag_to_string(a).strip()
|
||||
if not title or title.startswith('['): continue
|
||||
if title:
|
||||
sections[section].append({'title':self.tag_to_string(a),
|
||||
articles.append({'title':self.tag_to_string(a),
|
||||
'url':url, 'description':'', 'date':''})
|
||||
self.log('\tFound related:', title)
|
||||
self.log('\t\tFound related:', title)
|
||||
'''
|
||||
|
||||
|
||||
feeds = [(sec, sections[sec]) for sec in sec_order]
|
||||
return feeds
|
||||
return articles
|
||||
|
||||
|
||||
def cleanup(self):
|
||||
|
@ -279,6 +279,7 @@ class KoboReaderOutput(OutputProfile):
|
||||
description = _('This profile is intended for the Kobo Reader.')
|
||||
|
||||
screen_size = (590, 775)
|
||||
comic_screen_size = (540, 718)
|
||||
dpi = 168.451
|
||||
fbase = 12
|
||||
fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
|
||||
|
@ -596,10 +596,11 @@ class DNSIncoming(object):
|
||||
next = off + 1
|
||||
off = ((len & 0x3F) << 8) | ord(self.data[off])
|
||||
if off >= first:
|
||||
raise 'Bad domain name (circular) at ' + str(off)
|
||||
raise ValueError('Bad domain name (circular) at ' +
|
||||
str(off))
|
||||
first = off
|
||||
else:
|
||||
raise 'Bad domain name at ' + str(off)
|
||||
raise ValueError('Bad domain name at ' + str(off))
|
||||
|
||||
if next >= 0:
|
||||
self.offset = next
|
||||
|
Loading…
x
Reference in New Issue
Block a user