Merge from trunk

This commit is contained in:
Charles Haley 2011-10-23 17:24:02 +02:00
commit 682a31d9ed
95 changed files with 32689 additions and 26719 deletions

View File

@ -19,6 +19,65 @@
# new recipes: # new recipes:
# - title: # - title:
- version: 0.8.23
date: 2011-10-21
new features:
- title: "Drivers for T-Mobile Move, new Pandigital Novel, New Onyx Boox and Freescale MX 515"
- title: "SONY T1 driver: Support for periodicals and better timezone detection"
- title: "Add a remove cover entry to the right click menu of the cover display in the right panel"
tickets: [874689]
bug fixes:
- title: "Amazon metadata download: Fix for change in Amazon website that broke downloading metadata."
tickets: [878395]
- title: "MOBI metadata: When reading titles from MOBI files only use the title in the PDB header if there is no long title in the EXTH header"
tickets: [ 875243 ]
- title: "Fix regression that broke use of complex custom columns in save to disk templates."
tickets: [877366]
- title: "Fix regression that broke reading metadata from CHM files"
- title: "Fix a bug that broke conversion of some zipped up HTML files with non ascii filenames on certain windows installs."
tickets: [873288]
- title: "RTF Input: Fix bug in handling of paragraph separators."
tickets: [863735]
- title: "Fix a regression that broke downloading certain periodicals for the Kindle."
tickets: [875595]
- title: "Fix regression that broke updating of covers inside ebook files when saving to disk"
- title: "Fix regression breaking editing the 'show in tag browser' checkbox in custom column setup editing"
- title: "Fix typo that broke stopping selected jobs in 0.8.22"
improved recipes:
- Columbus Dispatch
- Ming Pao
- La Republica
- Korea Times
- USA Today
- CNN
- Liberation
- El Pais
- Helsingin Sanomat
new recipes:
- title: Kyugyhang, Hankyoreh and Hankyoreh21
author: Seongkyoun Yoo.
- title: English Katherimini
author: Thomas Scholl
- title: Various French news sources
author: Aurelien Chabot.
- version: 0.8.22 - version: 0.8.22
date: 2011-10-14 date: 2011-10-14

View File

@ -4,7 +4,6 @@ __copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
''' '''
20minutes.fr 20minutes.fr
''' '''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class Minutes(BasicNewsRecipe): class Minutes(BasicNewsRecipe):

View File

@ -14,67 +14,43 @@ class ColumbusDispatchRecipe(BasicNewsRecipe):
use_embedded_content = False use_embedded_content = False
remove_empty_feeds = True remove_empty_feeds = True
oldest_article = 1.2 oldest_article = 1.2
max_articles_per_feed = 100 use_embedded_content = False
no_stylesheets = True no_stylesheets = True
remove_javascript = True auto_cleanup = True
encoding = 'utf-8' #auto_cleanup_keep = '//div[@id="story-photos"]'
# Seems to work best, but YMMV
simultaneous_downloads = 2
# Feeds from http://www.dispatch.com/live/content/rss/index.html # Feeds from http://www.dispatch.com/live/content/rss/index.html
feeds = [] feeds = [
feeds.append((u'News: Local and state news', u'http://www.dispatch.com/live/static/crt/2_rss_localnews.xml')) ('Local',
feeds.append((u'News: National news', u'http://www.dispatch.com/live/static/crt/2_rss_nationalnews.xml')) 'http://www.dispatch.com/content/syndication/news_local-state.xml'),
feeds.append((u'News: Editorials', u'http://www.dispatch.com/live/static/crt/2_rss_editorials.xml')) ('National',
feeds.append((u'News: Columnists', u'http://www.dispatch.com/live/static/crt/2_rss_columnists.xml')) 'http://www.dispatch.com/content/syndication/news_national.xml'),
feeds.append((u'News: Health news', u'http://www.dispatch.com/live/static/crt/2_rss_health.xml')) ('Business',
feeds.append((u'News: Science news', u'http://www.dispatch.com/live/static/crt/2_rss_science.xml')) 'http://www.dispatch.com/content/syndication/news_business.xml'),
feeds.append((u'Sports: OSU football', u'http://www.dispatch.com/live/static/crt/2_rss_osufootball.xml')) ('Editorials',
feeds.append((u'Sports: OSU men\'s basketball', u'http://www.dispatch.com/live/static/crt/2_rss_osumensbball.xml')) 'http://www.dispatch.com/content/syndication/opinion_editorials.xml'),
feeds.append((u'Sports: OSU women\'s basketball', u'http://www.dispatch.com/live/static/crt/2_rss_osuwomensbball.xml')) ('Columnists',
feeds.append((u'Sports: OSU sports', u'http://www.dispatch.com/live/static/crt/2_rss_osusports.xml')) 'http://www.dispatch.com/content/syndication/opinion_columns.xml'),
feeds.append((u'Sports: Blue Jackets', u'http://www.dispatch.com/live/static/crt/2_rss_bluejackets.xml')) ('Life and Arts',
feeds.append((u'Sports: Crew', u'http://www.dispatch.com/live/static/crt/2_rss_crew.xml')) 'http://www.dispatch.com/content/syndication/lae_life-and-arts.xml'),
feeds.append((u'Sports: Clippers', u'http://www.dispatch.com/live/static/crt/2_rss_clippers.xml')) ('OSU Sports',
feeds.append((u'Sports: Indians', u'http://www.dispatch.com/live/static/crt/2_rss_indians.xml')) 'http://www.dispatch.com/content/syndication/sports_osu.xml'),
feeds.append((u'Sports: Reds', u'http://www.dispatch.com/live/static/crt/2_rss_reds.xml')) ('Auto Racing',
feeds.append((u'Sports: Golf', u'http://www.dispatch.com/live/static/crt/2_rss_golf.xml')) 'http://www.dispatch.com/content/syndication/sports_auto-racing.xml'),
feeds.append((u'Sports: Outdoors', u'http://www.dispatch.com/live/static/crt/2_rss_outdoors.xml')) ('Outdoors',
feeds.append((u'Sports: Cavs/NBA', u'http://www.dispatch.com/live/static/crt/2_rss_cavaliers.xml')) 'http://www.dispatch.com/content/syndication/sports_outdoors.xml'),
feeds.append((u'Sports: High Schools', u'http://www.dispatch.com/live/static/crt/2_rss_highschools.xml')) ('Bengals',
feeds.append((u'Sports: Browns', u'http://www.dispatch.com/live/static/crt/2_rss_browns.xml')) 'http://www.dispatch.com/content/syndication/sports_bengals.xml'),
feeds.append((u'Sports: Bengals', u'http://www.dispatch.com/live/static/crt/2_rss_bengals.xml')) ('Indians',
feeds.append((u'Sports: Auto Racing', u'http://www.dispatch.com/live/static/crt/2_rss_autoracing.xml')) 'http://www.dispatch.com/content/syndication/sports_indians.xml'),
feeds.append((u'Business News', u'http://www.dispatch.com/live/static/crt/2_rss_business.xml')) ('Clippers',
feeds.append((u'Features: Weekender', u'http://www.dispatch.com/live/static/crt/2_rss_weekender.xml')) 'http://www.dispatch.com/content/syndication/sports_clippers.xml'),
feeds.append((u'Features: Life and Arts', u'http://www.dispatch.com/live/static/crt/2_rss_lifearts.xml')) ('Crew',
feeds.append((u'Features: Food', u'http://www.dispatch.com/live/static/crt/2_rss_food.xml')) 'http://www.dispatch.com/content/syndication/sports_crew.xml'),
feeds.append((u'Features: NOW! for kids', u'http://www.dispatch.com/live/static/crt/2_rss_now.xml')) ('Reds',
feeds.append((u'Features: Travel', u'http://www.dispatch.com/live/static/crt/2_rss_travel.xml')) 'http://www.dispatch.com/content/syndication/sports_reds.xml'),
feeds.append((u'Features: Home and Garden', u'http://www.dispatch.com/live/static/crt/2_rss_homegarden.xml')) ('Blue Jackets',
feeds.append((u'Features: Faith and Values', u'http://www.dispatch.com/live/static/crt/2_rss_faithvalues.xml')) 'http://www.dispatch.com/content/syndication/sports_bluejackets.xml'),
#feeds.append((u'', u'')) ]
keep_only_tags = []
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'colhed'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'hed'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'subhed'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'date'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'byline'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'srcline'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'body'}))
remove_tags = []
remove_tags.append(dict(name = 'div', attrs = {'id': 'middle-story-ad-container'}))
extra_css = '''
body {font-family:verdana,arial,helvetica,geneva,sans-serif ;}
a {text-decoration: none; color: blue;}
div.colhed {font-weight: bold;}
div.hed {font-size: xx-large; font-weight: bold; margin-bottom: 0.2em;}
div.subhed {font-size: large;}
div.date {font-size: x-small; font-style: italic; color: #666666; margin-top: 0.4em; margin-bottom: 0.4em;}
div.byline, div.srcline {font-size: small; color: #696969;}
'''

View File

@ -1,5 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
class BasicUserRecipe1318572550(AutomaticNewsRecipe):
from calibre.web.feeds.news import BasicNewsRecipe
class BasicUserRecipe1318572550(BasicNewsRecipe):
title = u'FrAndroid' title = u'FrAndroid'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100

View File

@ -1,5 +1,8 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
class BasicUserRecipe1318572445(AutomaticNewsRecipe):
from calibre.web.feeds.news import BasicNewsRecipe
class BasicUserRecipe1318572445(BasicNewsRecipe):
title = u'Google Mobile Blog' title = u'Google Mobile Blog'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100

View File

@ -3,34 +3,31 @@ __copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
''' '''
Profile to download The Hankyoreh Profile to download The Hankyoreh
''' '''
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class Hankyoreh(BasicNewsRecipe): class Hankyoreh(BasicNewsRecipe):
title = u'Hankyoreh' title = u'Hankyoreh'
language = 'ko' language = 'ko'
description = u'The Hankyoreh News articles' description = u'The Hankyoreh News articles'
__author__ = 'Seongkyoun Yoo' __author__ = 'Seongkyoun Yoo'
oldest_article = 5 oldest_article = 5
recursions = 1 recursions = 1
max_articles_per_feed = 5 max_articles_per_feed = 5
no_stylesheets = True no_stylesheets = True
keep_only_tags = [ keep_only_tags = [
dict(name='tr', attrs={'height':['60px']}), dict(name='tr', attrs={'height':['60px']}),
dict(id=['fontSzArea']) dict(id=['fontSzArea'])
] ]
remove_tags = [ remove_tags = [
dict(target='_blank'), dict(target='_blank'),
dict(name='td', attrs={'style':['padding: 10px 8px 5px 8px;']}), dict(name='td', attrs={'style':['padding: 10px 8px 5px 8px;']}),
dict(name='iframe', attrs={'width':['590']}), dict(name='iframe', attrs={'width':['590']}),
] ]
remove_tags_after = [ remove_tags_after = [
dict(target='_top') dict(target='_top')
] ]
feeds = [ feeds = [
('All News','http://www.hani.co.kr/rss/'), ('All News','http://www.hani.co.kr/rss/'),
('Politics','http://www.hani.co.kr/rss/politics/'), ('Politics','http://www.hani.co.kr/rss/politics/'),
('Economy','http://www.hani.co.kr/rss/economy/'), ('Economy','http://www.hani.co.kr/rss/economy/'),
('Society','http://www.hani.co.kr/rss/society/'), ('Society','http://www.hani.co.kr/rss/society/'),

View File

@ -3,7 +3,6 @@ __copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
''' '''
Profile to download The Hankyoreh Profile to download The Hankyoreh
''' '''
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Hankyoreh21(BasicNewsRecipe): class Hankyoreh21(BasicNewsRecipe):

View File

@ -44,7 +44,11 @@ class JapanTimes(BasicNewsRecipe):
return rurl.partition('?')[0] return rurl.partition('?')[0]
def print_version(self, url): def print_version(self, url):
return url.replace('/cgi-bin/','/print/') if '/rss/' in url:
return url.replace('.jp/rss/','.jp/print/')
if '/text/' in url:
return url.replace('.jp/text/','.jp/print/')
return url
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):

View File

@ -1,5 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
class BasicUserRecipe1318619728(AutomaticNewsRecipe): from calibre.web.feeds.news import BasicNewsRecipe
class BasicUserRecipe1318619728(BasicNewsRecipe):
title = u'Korben' title = u'Korben'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100

View File

@ -4,7 +4,6 @@ __copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
''' '''
LePoint.fr LePoint.fr
''' '''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class lepoint(BasicNewsRecipe): class lepoint(BasicNewsRecipe):

View File

@ -4,7 +4,6 @@ __copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
''' '''
Lexpress.fr Lexpress.fr
''' '''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class lepoint(BasicNewsRecipe): class lepoint(BasicNewsRecipe):

View File

@ -18,10 +18,14 @@ __InclPremium__ = False
__ParsePFF__ = True __ParsePFF__ = True
# (HK only) Turn below to True if you wish hi-res images (Default: False) # (HK only) Turn below to True if you wish hi-res images (Default: False)
__HiResImg__ = False __HiResImg__ = False
# Override the date returned by the program if specifying a YYYYMMDD below
__Date__ = ''
''' '''
Change Log: Change Log:
2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
2011/10/19: fix a bug in txt source parsing
2011/10/17: disable fetching of premium content, also improved txt source parsing 2011/10/17: disable fetching of premium content, also improved txt source parsing
2011/10/04: option to get hi-res photos for the articles 2011/10/04: option to get hi-res photos for the articles
2011/09/21: fetching "column" section is made optional. 2011/09/21: fetching "column" section is made optional.
@ -170,13 +174,22 @@ class MPRecipe(BasicNewsRecipe):
return dt_local return dt_local
def get_fetchdate(self): def get_fetchdate(self):
return self.get_dtlocal().strftime("%Y%m%d") if __Date__ <> '':
return __Date__
else:
return self.get_dtlocal().strftime("%Y%m%d")
def get_fetchformatteddate(self): def get_fetchformatteddate(self):
return self.get_dtlocal().strftime("%Y-%m-%d") if __Date__ <> '':
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
else:
return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchday(self): def get_fetchday(self):
return self.get_dtlocal().strftime("%d") if __Date__ <> '':
return __Date__[6:8]
else:
return self.get_dtlocal().strftime("%d")
def get_cover_url(self): def get_cover_url(self):
if __Region__ == 'Hong Kong': if __Region__ == 'Hong Kong':
@ -477,53 +490,8 @@ class MPRecipe(BasicNewsRecipe):
# preprocess those .txt and javascript based files # preprocess those .txt and javascript based files
def preprocess_raw_html(self, raw_html, url): def preprocess_raw_html(self, raw_html, url):
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010') new_html = raw_html
if __HiResImg__ == True: if url.rfind('ftp') <> -1 or url.rfind('_print.htm') <> -1:
# TODO: add a _ in front of an image url
if url.rfind('news.mingpao.com') > -1:
imglist = re.findall('src="?.*?jpg"', raw_html)
br = mechanize.Browser()
br.set_handle_redirect(False)
for img in imglist:
gifimg = img.replace('jpg"', 'gif"')
try:
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
raw_html = raw_html.replace(img, gifimg)
except:
# find the location of the first _
pos = img.find('_')
if pos > -1:
# if found, insert _ after the first _
newimg = img[0:pos] + '_' + img[pos:]
raw_html = raw_html.replace(img, newimg)
else:
# if not found, insert _ after "
raw_html = raw_html.replace(img[1:], '"_' + img[1:])
elif url.rfind('life.mingpao.com') > -1:
imglist = re.findall('src=\'?.*?jpg\'', raw_html)
br = mechanize.Browser()
br.set_handle_redirect(False)
#print 'Img list: ', imglist, '\n'
for img in imglist:
gifimg = img.replace('jpg\'', 'gif\'')
try:
#print 'Original: ', url
#print 'To append: ', "/../" + gifimg[5:len(gifimg)-1]
gifurl = re.sub(r'dailynews.*txt', '', url)
#print 'newurl: ', gifurl + gifimg[5:len(gifimg)-1]
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
#print 'URL: ', url + "/../" + gifimg[5:len(gifimg)-1]
#br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
raw_html = raw_html.replace(img, gifimg)
except:
#print 'GIF not found'
pos = img.rfind('/')
newimg = img[0:pos+1] + '_' + img[pos+1:]
#print 'newimg: ', newimg
raw_html = raw_html.replace(img, newimg)
if url.rfind('ftp') == -1 and url.rfind('_print.htm') == -1:
return raw_html
else:
if url.rfind('_print.htm') <> -1: if url.rfind('_print.htm') <> -1:
# javascript based file # javascript based file
splitter = re.compile(r'\n') splitter = re.compile(r'\n')
@ -558,48 +526,113 @@ class MPRecipe(BasicNewsRecipe):
photo = photo.replace('</td>', '<br>') photo = photo.replace('</td>', '<br>')
photo = photo.replace('class="photo"', '') photo = photo.replace('class="photo"', '')
new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>' new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
return new_raw_html + '</body></html>' new_html = new_raw_html + '</body></html>'
else: else:
# .txt based file # .txt based file
splitter = re.compile(r'\n') # Match non-digits splitter = re.compile(r'\n') # Match non-digits
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">' new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
next_is_mov_link = False
next_is_img_txt = False next_is_img_txt = False
title_started = False title_started = False
met_article_start_char = False met_article_start_char = False
for item in splitter.split(raw_html): for item in splitter.split(raw_html):
item = item.strip()
if item.startswith(u'\u3010'): if item.startswith(u'\u3010'):
met_article_start_char = True met_article_start_char = True
new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n' new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
else: else:
if next_is_img_txt == False and next_is_mov_link == False: if next_is_img_txt == False:
item = item.strip()
if item.startswith("=@"): if item.startswith("=@"):
next_is_mov_link = True print 'skip movie link'
elif item.startswith("=?"): elif item.startswith("=?"):
next_is_img_txt = True next_is_img_txt = True
new_raw_html += '<img src="' + str(item)[2:].strip() + '.gif" /><p>\n' new_raw_html += '<img src="' + str(item)[2:].strip() + '.gif" /><p>\n'
elif item.startswith('=='):
next_is_img_txt = True
if False:
# TODO: check existence of .gif first
newimg = '_' + item[2:].strip() + '.jpg'
new_raw_html += '<img src="' + newimg + '" /><p>\n'
else:
new_raw_html += '<img src="' + str(item)[2:].strip() + '.jpg" /><p>\n'
elif item.startswith('='): elif item.startswith('='):
next_is_img_txt = True next_is_img_txt = True
new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n' if False:
# TODO: check existence of .gif first
newimg = '_' + item[1:].strip() + '.jpg'
new_raw_html += '<img src="' + newimg + '" /><p>\n'
else:
new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
else: else:
if item <> '': if next_is_img_txt == False and met_article_start_char == False:
if next_is_img_txt == False and met_article_start_char == False: if item <> '':
if title_started == False: if title_started == False:
#print 'Title started at ', item #print 'Title started at ', item
new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n' new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
title_started = True title_started = True
else: else:
new_raw_html = new_raw_html + item + '\n' new_raw_html = new_raw_html + item + '\n'
else: else:
new_raw_html = new_raw_html + item + '<p>\n' new_raw_html = new_raw_html + item + '<p>\n'
else: else:
if next_is_mov_link == True: next_is_img_txt = False
next_is_mov_link = False new_raw_html = new_raw_html + item + '\n'
else: new_html = new_raw_html + '</div></body></html>'
next_is_img_txt = False #raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
new_raw_html = new_raw_html + item + '\n' if __HiResImg__ == True:
return new_raw_html + '</div></body></html>' # TODO: add a _ in front of an image url
if url.rfind('news.mingpao.com') > -1:
imglist = re.findall('src="?.*?jpg"', new_html)
br = mechanize.Browser()
br.set_handle_redirect(False)
for img in imglist:
gifimg = img.replace('jpg"', 'gif"')
try:
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
new_html = new_html.replace(img, gifimg)
except:
# find the location of the first _
pos = img.find('_')
if pos > -1:
# if found, insert _ after the first _
newimg = img[0:pos] + '_' + img[pos:]
new_html = new_html.replace(img, newimg)
else:
# if not found, insert _ after "
new_html = new_html.replace(img[1:], '"_' + img[1:])
elif url.rfind('life.mingpao.com') > -1:
imglist = re.findall('src=\'?.*?jpg\'', new_html)
br = mechanize.Browser()
br.set_handle_redirect(False)
#print 'Img list: ', imglist, '\n'
for img in imglist:
#print 'Found img: ', img
gifimg = img.replace('jpg\'', 'gif\'')
try:
gifurl = re.sub(r'dailynews.*txt', '', url)
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
new_html = new_html.replace(img, gifimg)
except:
pos = img.rfind('/')
newimg = img[0:pos+1] + '_' + img[pos+1:]
new_html = new_html.replace(img, newimg)
# repeat with src quoted by double quotes, for text parsed from src txt
imglist = re.findall('src="?.*?jpg"', new_html)
for img in imglist:
#print 'Found img: ', img
gifimg = img.replace('jpg"', 'gif"')
try:
#print 'url', url
pos = url.rfind('/')
gifurl = url[:pos+1]
#print 'try it:', gifurl + gifimg[5:len(gifimg)-1]
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
new_html = new_html.replace(img, gifimg)
except:
pos = img.find('"')
newimg = img[0:pos+1] + '_' + img[pos+1:]
#print 'Use hi-res img', newimg
new_html = new_html.replace(img, newimg)
return new_html
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):

View File

@ -1,5 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
class BasicUserRecipe1318619832(AutomaticNewsRecipe):
from calibre.web.feeds.news import BasicNewsRecipe
class BasicUserRecipe1318619832(BasicNewsRecipe):
title = u'OmgUbuntu' title = u'OmgUbuntu'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100

View File

@ -9,49 +9,49 @@ msgstr ""
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-" "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
"devel@lists.alioth.debian.org>\n" "devel@lists.alioth.debian.org>\n"
"POT-Creation-Date: 2011-09-27 14:31+0000\n" "POT-Creation-Date: 2011-09-27 14:31+0000\n"
"PO-Revision-Date: 2011-09-27 18:23+0000\n" "PO-Revision-Date: 2011-10-15 17:29+0000\n"
"Last-Translator: Kovid Goyal <Unknown>\n" "Last-Translator: Devilinside <Unknown>\n"
"Language-Team: Hungarian <debian-l10n-hungarian@lists.d.o>\n" "Language-Team: Hungarian <debian-l10n-hungarian@lists.d.o>\n"
"MIME-Version: 1.0\n" "MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n" "Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n" "Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2011-09-28 04:50+0000\n" "X-Launchpad-Export-Date: 2011-10-16 05:14+0000\n"
"X-Generator: Launchpad (build 14049)\n" "X-Generator: Launchpad (build 14124)\n"
"X-Poedit-Country: HUNGARY\n" "X-Poedit-Country: HUNGARY\n"
"Language: hu\n" "Language: hu\n"
"X-Poedit-Language: Hungarian\n" "X-Poedit-Language: Hungarian\n"
#. name for aaa #. name for aaa
msgid "Ghotuo" msgid "Ghotuo"
msgstr "" msgstr "Ghotuo"
#. name for aab #. name for aab
msgid "Alumu-Tesu" msgid "Alumu-Tesu"
msgstr "" msgstr "Alumu-Tesu"
#. name for aac #. name for aac
msgid "Ari" msgid "Ari"
msgstr "" msgstr "Ari"
#. name for aad #. name for aad
msgid "Amal" msgid "Amal"
msgstr "" msgstr "Amal"
#. name for aae #. name for aae
msgid "Albanian; Arbëreshë" msgid "Albanian; Arbëreshë"
msgstr "" msgstr "Albán; Arbëreshë"
#. name for aaf #. name for aaf
msgid "Aranadan" msgid "Aranadan"
msgstr "" msgstr "Aranadan"
#. name for aag #. name for aag
msgid "Ambrak" msgid "Ambrak"
msgstr "" msgstr "Ambrak"
#. name for aah #. name for aah
msgid "Arapesh; Abu'" msgid "Arapesh; Abu'"
msgstr "" msgstr "Arapesh; Abu'"
#. name for aai #. name for aai
msgid "Arifama-Miniafia" msgid "Arifama-Miniafia"
@ -75,7 +75,7 @@ msgstr ""
#. name for aao #. name for aao
msgid "Arabic; Algerian Saharan" msgid "Arabic; Algerian Saharan"
msgstr "" msgstr "Arab; Algériai Szaharai"
#. name for aap #. name for aap
msgid "Arára; Pará" msgid "Arára; Pará"
@ -87,7 +87,7 @@ msgstr ""
#. name for aar #. name for aar
msgid "Afar" msgid "Afar"
msgstr "afar" msgstr "Afar"
#. name for aas #. name for aas
msgid "Aasáx" msgid "Aasáx"
@ -498,10 +498,9 @@ msgstr ""
msgid "Tapei" msgid "Tapei"
msgstr "" msgstr ""
# src/trans.h:281 src/trans.h:318
#. name for afr #. name for afr
msgid "Afrikaans" msgid "Afrikaans"
msgstr "afrikaans" msgstr "Afrikaans"
#. name for afs #. name for afs
msgid "Creole; Afro-Seminole" msgid "Creole; Afro-Seminole"
@ -801,7 +800,7 @@ msgstr ""
#. name for aka #. name for aka
msgid "Akan" msgid "Akan"
msgstr "akan" msgstr "Akan"
#. name for akb #. name for akb
msgid "Batak Angkola" msgid "Batak Angkola"
@ -1015,10 +1014,9 @@ msgstr ""
msgid "Amarag" msgid "Amarag"
msgstr "" msgstr ""
# src/trans.h:283
#. name for amh #. name for amh
msgid "Amharic" msgid "Amharic"
msgstr "amhara" msgstr "Amhara"
#. name for ami #. name for ami
msgid "Amis" msgid "Amis"
@ -1425,10 +1423,9 @@ msgstr ""
msgid "Arrarnta; Western" msgid "Arrarnta; Western"
msgstr "" msgstr ""
# src/trans.h:294
#. name for arg #. name for arg
msgid "Aragonese" msgid "Aragonese"
msgstr "aragóniai" msgstr "Aragóniai"
#. name for arh #. name for arh
msgid "Arhuaco" msgid "Arhuaco"
@ -1548,7 +1545,7 @@ msgstr ""
#. name for asm #. name for asm
msgid "Assamese" msgid "Assamese"
msgstr "asszámi" msgstr "Asszámi"
#. name for asn #. name for asn
msgid "Asuriní; Xingú" msgid "Asuriní; Xingú"
@ -1790,10 +1787,9 @@ msgstr ""
msgid "Arabic; Uzbeki" msgid "Arabic; Uzbeki"
msgstr "" msgstr ""
# src/trans.h:283
#. name for ava #. name for ava
msgid "Avaric" msgid "Avaric"
msgstr "avar" msgstr "Avar"
#. name for avb #. name for avb
msgid "Avau" msgid "Avau"
@ -1805,7 +1801,7 @@ msgstr ""
#. name for ave #. name for ave
msgid "Avestan" msgid "Avestan"
msgstr "aveszti" msgstr "Avesztai"
#. name for avi #. name for avi
msgid "Avikam" msgid "Avikam"
@ -1945,7 +1941,7 @@ msgstr ""
#. name for ayc #. name for ayc
msgid "Aymara; Southern" msgid "Aymara; Southern"
msgstr "" msgstr "Ajmara; Déli"
#. name for ayd #. name for ayd
msgid "Ayabadhu" msgid "Ayabadhu"
@ -1977,7 +1973,7 @@ msgstr ""
#. name for aym #. name for aym
msgid "Aymara" msgid "Aymara"
msgstr "aymara" msgstr "Ajmara"
#. name for ayn #. name for ayn
msgid "Arabic; Sanaani" msgid "Arabic; Sanaani"
@ -1997,7 +1993,7 @@ msgstr ""
#. name for ayr #. name for ayr
msgid "Aymara; Central" msgid "Aymara; Central"
msgstr "" msgstr "Ajmara; Közép"
#. name for ays #. name for ays
msgid "Ayta; Sorsogon" msgid "Ayta; Sorsogon"
@ -2025,12 +2021,11 @@ msgstr ""
#. name for azb #. name for azb
msgid "Azerbaijani; South" msgid "Azerbaijani; South"
msgstr "" msgstr "Azeri; Déli"
# src/trans.h:311
#. name for aze #. name for aze
msgid "Azerbaijani" msgid "Azerbaijani"
msgstr "azeri" msgstr "Azeri"
#. name for azg #. name for azg
msgid "Amuzgo; San Pedro Amuzgos" msgid "Amuzgo; San Pedro Amuzgos"
@ -2038,7 +2033,7 @@ msgstr ""
#. name for azj #. name for azj
msgid "Azerbaijani; North" msgid "Azerbaijani; North"
msgstr "" msgstr "Azeri; Északi"
#. name for azm #. name for azm
msgid "Amuzgo; Ipalapa" msgid "Amuzgo; Ipalapa"
@ -2090,7 +2085,7 @@ msgstr ""
#. name for bak #. name for bak
msgid "Bashkir" msgid "Bashkir"
msgstr "baskír" msgstr "Baskír"
#. name for bal #. name for bal
msgid "Baluchi" msgid "Baluchi"
@ -2115,7 +2110,7 @@ msgstr ""
#. name for bar #. name for bar
msgid "Bavarian" msgid "Bavarian"
msgstr "" msgstr "Bajor"
#. name for bas #. name for bas
msgid "Basa (Cameroon)" msgid "Basa (Cameroon)"
@ -2497,10 +2492,9 @@ msgstr "beja"
msgid "Bebeli" msgid "Bebeli"
msgstr "" msgstr ""
# src/trans.h:286
#. name for bel #. name for bel
msgid "Belarusian" msgid "Belarusian"
msgstr "belorusz" msgstr "Belarusz"
#. name for bem #. name for bem
msgid "Bemba (Zambia)" msgid "Bemba (Zambia)"
@ -2508,7 +2502,7 @@ msgstr ""
#. name for ben #. name for ben
msgid "Bengali" msgid "Bengali"
msgstr "bengáli" msgstr "Bengáli"
#. name for beo #. name for beo
msgid "Beami" msgid "Beami"
@ -3510,10 +3504,9 @@ msgstr ""
msgid "Borôro" msgid "Borôro"
msgstr "" msgstr ""
# src/trans.h:309
#. name for bos #. name for bos
msgid "Bosnian" msgid "Bosnian"
msgstr "bosnyák" msgstr "Bosnyák"
#. name for bot #. name for bot
msgid "Bongo" msgid "Bongo"
@ -3685,7 +3678,7 @@ msgstr ""
#. name for bqn #. name for bqn
msgid "Bulgarian Sign Language" msgid "Bulgarian Sign Language"
msgstr "" msgstr "Bolgár jelnyelv"
#. name for bqo #. name for bqo
msgid "Balo" msgid "Balo"
@ -4078,10 +4071,9 @@ msgstr ""
msgid "Bugawac" msgid "Bugawac"
msgstr "" msgstr ""
# src/trans.h:285
#. name for bul #. name for bul
msgid "Bulgarian" msgid "Bulgarian"
msgstr "bolgár" msgstr "Bolgár"
#. name for bum #. name for bum
msgid "Bulu (Cameroon)" msgid "Bulu (Cameroon)"
@ -7445,10 +7437,9 @@ msgstr ""
msgid "Semimi" msgid "Semimi"
msgstr "" msgstr ""
# src/trans.h:284
#. name for eus #. name for eus
msgid "Basque" msgid "Basque"
msgstr "baszk" msgstr "Baszk"
#. name for eve #. name for eve
msgid "Even" msgid "Even"
@ -7534,10 +7525,9 @@ msgstr ""
msgid "Fang (Equatorial Guinea)" msgid "Fang (Equatorial Guinea)"
msgstr "" msgstr ""
# src/trans.h:294
#. name for fao #. name for fao
msgid "Faroese" msgid "Faroese"
msgstr "feröi" msgstr "Feröeri"
#. name for fap #. name for fap
msgid "Palor" msgid "Palor"
@ -29414,7 +29404,7 @@ msgstr ""
#. name for xzp #. name for xzp
msgid "Zapotec; Ancient" msgid "Zapotec; Ancient"
msgstr "" msgstr "Zapoték; Ősi"
#. name for yaa #. name for yaa
msgid "Yaminahua" msgid "Yaminahua"
@ -30326,27 +30316,27 @@ msgstr ""
#. name for zaa #. name for zaa
msgid "Zapotec; Sierra de Juárez" msgid "Zapotec; Sierra de Juárez"
msgstr "" msgstr "Zapoték; Sierra de Juárezi"
#. name for zab #. name for zab
msgid "Zapotec; San Juan Guelavía" msgid "Zapotec; San Juan Guelavía"
msgstr "" msgstr "Zapoték; San Juan Guelavíai"
#. name for zac #. name for zac
msgid "Zapotec; Ocotlán" msgid "Zapotec; Ocotlán"
msgstr "" msgstr "Zapoték; Ocotláni"
#. name for zad #. name for zad
msgid "Zapotec; Cajonos" msgid "Zapotec; Cajonos"
msgstr "zapoték; Cajonos" msgstr "Zapoték; Cajonesi"
#. name for zae #. name for zae
msgid "Zapotec; Yareni" msgid "Zapotec; Yareni"
msgstr "zapoték; Yareni" msgstr "Zapoték; Yareni"
#. name for zaf #. name for zaf
msgid "Zapotec; Ayoquesco" msgid "Zapotec; Ayoquesco"
msgstr "" msgstr "Zapoték; Ayoquescoi"
#. name for zag #. name for zag
msgid "Zaghawa" msgid "Zaghawa"
@ -30358,7 +30348,7 @@ msgstr "zangval"
#. name for zai #. name for zai
msgid "Zapotec; Isthmus" msgid "Zapotec; Isthmus"
msgstr "zapoték; Isthmus" msgstr "Zapoték; Isthmusi"
#. name for zaj #. name for zaj
msgid "Zaramo" msgid "Zaramo"
@ -30374,31 +30364,31 @@ msgstr "zozu"
#. name for zam #. name for zam
msgid "Zapotec; Miahuatlán" msgid "Zapotec; Miahuatlán"
msgstr "" msgstr "Zapoték; Miahuatláni"
#. name for zao #. name for zao
msgid "Zapotec; Ozolotepec" msgid "Zapotec; Ozolotepec"
msgstr "" msgstr "Zapoték; Ozolotepeci"
#. name for zap #. name for zap
msgid "Zapotec" msgid "Zapotec"
msgstr "zapoték" msgstr "Zapoték"
#. name for zaq #. name for zaq
msgid "Zapotec; Aloápam" msgid "Zapotec; Aloápam"
msgstr "" msgstr "Zapoték; Aloápami"
#. name for zar #. name for zar
msgid "Zapotec; Rincón" msgid "Zapotec; Rincón"
msgstr "zapoték; Rincón" msgstr "Zapoték; Rincóni"
#. name for zas #. name for zas
msgid "Zapotec; Santo Domingo Albarradas" msgid "Zapotec; Santo Domingo Albarradas"
msgstr "" msgstr "Zapoték; Santo Domingo Albarradasi"
#. name for zat #. name for zat
msgid "Zapotec; Tabaa" msgid "Zapotec; Tabaa"
msgstr "zapoték; Tabaa" msgstr "Zapoték; Tabaa-i"
# src/trans.h:193 # src/trans.h:193
#. name for zau #. name for zau
@ -30407,15 +30397,15 @@ msgstr "zangskari"
#. name for zav #. name for zav
msgid "Zapotec; Yatzachi" msgid "Zapotec; Yatzachi"
msgstr "" msgstr "Zapoték; Yatzachi-i"
#. name for zaw #. name for zaw
msgid "Zapotec; Mitla" msgid "Zapotec; Mitla"
msgstr "zapoték; Mitla" msgstr "Zapoték; Mitlai"
#. name for zax #. name for zax
msgid "Zapotec; Xadani" msgid "Zapotec; Xadani"
msgstr "zapoték; Xadani" msgstr "Zapoték; Xadani-i"
#. name for zay #. name for zay
msgid "Zayse-Zergulla" msgid "Zayse-Zergulla"
@ -30991,7 +30981,7 @@ msgstr "tokano"
#. name for zul #. name for zul
msgid "Zulu" msgid "Zulu"
msgstr "zulu" msgstr "Zulu"
# src/trans.h:316 # src/trans.h:316
#. name for zum #. name for zum

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = u'calibre' __appname__ = u'calibre'
numeric_version = (0, 8, 22) numeric_version = (0, 8, 23)
__version__ = u'.'.join(map(unicode, numeric_version)) __version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>" __author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -77,7 +77,7 @@ class ANDROID(USBMS):
# Samsung # Samsung
0x04e8 : { 0x681d : [0x0222, 0x0223, 0x0224, 0x0400], 0x04e8 : { 0x681d : [0x0222, 0x0223, 0x0224, 0x0400],
0x681c : [0x0222, 0x0224, 0x0400], 0x681c : [0x0222, 0x0223, 0x0224, 0x0400],
0x6640 : [0x0100], 0x6640 : [0x0100],
0x685b : [0x0400], 0x685b : [0x0400],
0x685e : [0x0400], 0x685e : [0x0400],

View File

@ -376,7 +376,7 @@ class KOBO(USBMS):
path_prefix = '.kobo/images/' path_prefix = '.kobo/images/'
path = self._main_prefix + path_prefix + ImageID path = self._main_prefix + path_prefix + ImageID
file_endings = (' - iPhoneThumbnail.parsed', ' - bbMediumGridList.parsed', ' - NickelBookCover.parsed', ' - N3_LIBRARY_FULL.parsed', ' - N3_LIBRARY_GRID.parsed', ' - N3_LIBRARY_LIST.parsed', ' - N3_SOCIAL_CURRENTREAD.parsed',) file_endings = (' - iPhoneThumbnail.parsed', ' - bbMediumGridList.parsed', ' - NickelBookCover.parsed', ' - N3_LIBRARY_FULL.parsed', ' - N3_LIBRARY_GRID.parsed', ' - N3_LIBRARY_LIST.parsed', ' - N3_SOCIAL_CURRENTREAD.parsed', ' - N3_FULL.parsed',)
for ending in file_endings: for ending in file_endings:
fpath = path + ending fpath = path + ending
@ -852,6 +852,7 @@ class KOBO(USBMS):
' - N3_LIBRARY_FULL.parsed':(355,530), ' - N3_LIBRARY_FULL.parsed':(355,530),
' - N3_LIBRARY_GRID.parsed':(149,233), ' - N3_LIBRARY_GRID.parsed':(149,233),
' - N3_LIBRARY_LIST.parsed':(60,90), ' - N3_LIBRARY_LIST.parsed':(60,90),
' - N3_FULL.parsed':(600,800),
' - N3_SOCIAL_CURRENTREAD.parsed':(120,186)} ' - N3_SOCIAL_CURRENTREAD.parsed':(120,186)}
for ending, resize in file_endings.items(): for ending, resize in file_endings.items():

View File

@ -20,9 +20,8 @@ from calibre.devices.usbms.driver import USBMS, debug_print
from calibre.devices.usbms.device import USBDevice from calibre.devices.usbms.device import USBDevice
from calibre.devices.usbms.books import CollectionsBookList from calibre.devices.usbms.books import CollectionsBookList
from calibre.devices.usbms.books import BookList from calibre.devices.usbms.books import BookList
from calibre.ebooks.metadata import authors_to_sort_string from calibre.ebooks.metadata import authors_to_sort_string, authors_to_string
from calibre.constants import islinux from calibre.constants import islinux
from calibre.ebooks.metadata import authors_to_string, authors_to_sort_string
DBPATH = 'Sony_Reader/database/books.db' DBPATH = 'Sony_Reader/database/books.db'
THUMBPATH = 'Sony_Reader/database/cache/books/%s/thumbnail/main_thumbnail.jpg' THUMBPATH = 'Sony_Reader/database/cache/books/%s/thumbnail/main_thumbnail.jpg'
@ -40,7 +39,8 @@ class PRST1(USBMS):
path_sep = '/' path_sep = '/'
booklist_class = CollectionsBookList booklist_class = CollectionsBookList
FORMATS = ['epub', 'pdf', 'txt'] FORMATS = ['epub', 'pdf', 'txt', 'book', 'zbf'] # The last two are
# used in japan
CAN_SET_METADATA = ['collections'] CAN_SET_METADATA = ['collections']
CAN_DO_DEVICE_DB_PLUGBOARD = True CAN_DO_DEVICE_DB_PLUGBOARD = True

View File

@ -30,7 +30,7 @@ BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'ht
'html', 'htmlz', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc', 'html', 'htmlz', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
'epub', 'fb2', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip', 'epub', 'fb2', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb', 'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb',
'xps', 'oxps', 'azw4'] 'xps', 'oxps', 'azw4', 'book', 'zbf']
class HTMLRenderer(object): class HTMLRenderer(object):

View File

@ -30,9 +30,11 @@ class Worker(Thread): # Get details {{{
Get book details from amazons book page in a separate thread Get book details from amazons book page in a separate thread
''' '''
def __init__(self, url, result_queue, browser, log, relevance, domain, plugin, timeout=20): def __init__(self, url, result_queue, browser, log, relevance, domain,
plugin, timeout=20, testing=False):
Thread.__init__(self) Thread.__init__(self)
self.daemon = True self.daemon = True
self.testing = testing
self.url, self.result_queue = url, result_queue self.url, self.result_queue = url, result_queue
self.log, self.timeout = log, timeout self.log, self.timeout = log, timeout
self.relevance, self.plugin = relevance, plugin self.relevance, self.plugin = relevance, plugin
@ -189,10 +191,9 @@ class Worker(Thread): # Get details {{{
self.log.exception(msg) self.log.exception(msg)
return return
oraw = raw
raw = xml_to_unicode(raw, strip_encoding_pats=True, raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0] resolve_entities=True)[0]
#open('/t/t.html', 'wb').write(raw)
if '<title>404 - ' in raw: if '<title>404 - ' in raw:
self.log.error('URL malformed: %r'%self.url) self.log.error('URL malformed: %r'%self.url)
return return
@ -211,14 +212,20 @@ class Worker(Thread): # Get details {{{
self.log.error(msg) self.log.error(msg)
return return
self.parse_details(root) self.parse_details(oraw, root)
def parse_details(self, root): def parse_details(self, raw, root):
try: try:
asin = self.parse_asin(root) asin = self.parse_asin(root)
except: except:
self.log.exception('Error parsing asin for url: %r'%self.url) self.log.exception('Error parsing asin for url: %r'%self.url)
asin = None asin = None
if self.testing:
import tempfile
with tempfile.NamedTemporaryFile(prefix=asin + '_',
suffix='.html', delete=False) as f:
f.write(raw)
print ('Downloaded html for', asin, 'saved in', f.name)
try: try:
title = self.parse_title(root) title = self.parse_title(root)
@ -310,7 +317,7 @@ class Worker(Thread): # Get details {{{
return l.get('href').rpartition('/')[-1] return l.get('href').rpartition('/')[-1]
def parse_title(self, root): def parse_title(self, root):
tdiv = root.xpath('//h1[@class="parseasinTitle"]')[0] tdiv = root.xpath('//h1[contains(@class, "parseasinTitle")]')[0]
actual_title = tdiv.xpath('descendant::*[@id="btAsinTitle"]') actual_title = tdiv.xpath('descendant::*[@id="btAsinTitle"]')
if actual_title: if actual_title:
title = tostring(actual_title[0], encoding=unicode, title = tostring(actual_title[0], encoding=unicode,
@ -320,11 +327,11 @@ class Worker(Thread): # Get details {{{
return re.sub(r'[(\[].*[)\]]', '', title).strip() return re.sub(r'[(\[].*[)\]]', '', title).strip()
def parse_authors(self, root): def parse_authors(self, root):
x = '//h1[@class="parseasinTitle"]/following-sibling::span/*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]' x = '//h1[contains(@class, "parseasinTitle")]/following-sibling::span/*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]'
aname = root.xpath(x) aname = root.xpath(x)
if not aname: if not aname:
aname = root.xpath(''' aname = root.xpath('''
//h1[@class="parseasinTitle"]/following-sibling::*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")] //h1[contains(@class, "parseasinTitle")]/following-sibling::*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]
''') ''')
for x in aname: for x in aname:
x.tail = '' x.tail = ''
@ -666,7 +673,8 @@ class Amazon(Source):
log.error('No matches found with query: %r'%query) log.error('No matches found with query: %r'%query)
return return
workers = [Worker(url, result_queue, br, log, i, domain, self) for i, url in workers = [Worker(url, result_queue, br, log, i, domain, self,
testing=getattr(self, 'running_a_test', False)) for i, url in
enumerate(matches)] enumerate(matches)]
for w in workers: for w in workers:
@ -740,16 +748,6 @@ if __name__ == '__main__': # tests {{{
), ),
( # An e-book ISBN not on Amazon, the title/author search matches
# the Kindle edition, which has different markup for ratings and
# isbn
{'identifiers':{'isbn': '9780307459671'},
'title':'Invisible Gorilla', 'authors':['Christopher Chabris']},
[title_test('The Invisible Gorilla: And Other Ways Our Intuitions Deceive Us',
exact=True), authors_test(['Christopher Chabris', 'Daniel Simons'])]
),
( # This isbn not on amazon ( # This isbn not on amazon
{'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python', {'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python',
'authors':['Lutz']}, 'authors':['Lutz']},
@ -783,7 +781,7 @@ if __name__ == '__main__': # tests {{{
de_tests = [ # {{{ de_tests = [ # {{{
( (
{'identifiers':{'isbn': '3548283519'}}, {'identifiers':{'isbn': '3548283519'}},
[title_test('Wer Wind sät', [title_test('Wer Wind Sät: Der Fünfte Fall Für Bodenstein Und Kirchhoff',
exact=True), authors_test(['Nele Neuhaus']) exact=True), authors_test(['Nele Neuhaus'])
] ]
@ -835,6 +833,6 @@ if __name__ == '__main__': # tests {{{
] # }}} ] # }}}
test_identify_plugin(Amazon.name, com_tests) test_identify_plugin(Amazon.name, com_tests)
#test_identify_plugin(Amazon.name, es_tests) #test_identify_plugin(Amazon.name, de_tests)
# }}} # }}}

View File

@ -196,6 +196,7 @@ class Source(Plugin):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
Plugin.__init__(self, *args, **kwargs) Plugin.__init__(self, *args, **kwargs)
self.running_a_test = False # Set to True when using identify_test()
self._isbn_to_identifier_cache = {} self._isbn_to_identifier_cache = {}
self._identifier_to_cover_url_cache = {} self._identifier_to_cover_url_cache = {}
self.cache_lock = threading.RLock() self.cache_lock = threading.RLock()
@ -284,14 +285,15 @@ class Source(Plugin):
if authors: if authors:
# Leave ' in there for Irish names # Leave ' in there for Irish names
remove_pat = re.compile(r'[,!@#$%^&*(){}`~"\s\[\]/]') remove_pat = re.compile(r'[!@#$%^&*(){}`~"\s\[\]/]')
replace_pat = re.compile(r'[-+.:;]') replace_pat = re.compile(r'[-+.:;,]')
if only_first_author: if only_first_author:
authors = authors[:1] authors = authors[:1]
for au in authors: for au in authors:
has_comma = ',' in au
au = replace_pat.sub(' ', au) au = replace_pat.sub(' ', au)
parts = au.split() parts = au.split()
if ',' in au: if has_comma:
# au probably in ln, fn form # au probably in ln, fn form
parts = parts[1:] + parts[:1] parts = parts[1:] + parts[:1]
for tok in parts: for tok in parts:

View File

@ -183,7 +183,11 @@ def test_identify_plugin(name, tests): # {{{
rq = Queue() rq = Queue()
args = (log, rq, abort) args = (log, rq, abort)
start_time = time.time() start_time = time.time()
err = plugin.identify(*args, **kwargs) plugin.running_a_test = True
try:
err = plugin.identify(*args, **kwargs)
finally:
plugin.running_a_test = False
total_time = time.time() - start_time total_time = time.time() - start_time
times.append(total_time) times.append(total_time)
if err is not None: if err is not None:

View File

@ -66,12 +66,15 @@ class EXTHHeader(object):
# last update time # last update time
pass pass
elif id == 503: # Long title elif id == 503: # Long title
if not title or title == _('Unknown') or \ # Amazon seems to regard this as the definitive book title
'USER_CONTENT' in title or title.startswith('dtp_'): # rather than the title from the PDB header. In fact when
try: # sending MOBI files through Amazon's email service if the
title = content.decode(codec) # title contains non ASCII chars or non filename safe chars
except: # they are messed up in the PDB header
pass try:
title = content.decode(codec)
except:
pass
#else: #else:
# print 'unknown record', id, repr(content) # print 'unknown record', id, repr(content)
if title: if title:

View File

@ -20,6 +20,7 @@ from calibre.utils.config import DynamicConfig
from calibre.utils.logging import Log from calibre.utils.logging import Log
from calibre import guess_type, prints, prepare_string_for_xml from calibre import guess_type, prints, prepare_string_for_xml
from calibre.ebooks.oeb.transforms.cover import CoverManager from calibre.ebooks.oeb.transforms.cover import CoverManager
from calibre.constants import filesystem_encoding
TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(\ TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(\
'__ar__', 'none').replace('__viewbox__', '0 0 600 800' '__ar__', 'none').replace('__viewbox__', '0 0 600 800'
@ -180,6 +181,8 @@ class EbookIterator(object):
self.delete_on_exit = [] self.delete_on_exit = []
self._tdir = TemporaryDirectory('_ebook_iter') self._tdir = TemporaryDirectory('_ebook_iter')
self.base = self._tdir.__enter__() self.base = self._tdir.__enter__()
if not isinstance(self.base, unicode):
self.base = self.base.decode(filesystem_encoding)
from calibre.ebooks.conversion.plumber import Plumber, create_oebbook from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
plumber = Plumber(self.pathtoebook, self.base, self.log) plumber = Plumber(self.pathtoebook, self.base, self.log)
plumber.setup_options() plumber.setup_options()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff