Merge from trunk

This commit is contained in:
Charles Haley 2011-04-13 10:55:19 +01:00
commit 45fe4d69b6
17 changed files with 390 additions and 45 deletions

View File

@ -625,8 +625,9 @@ if test_eight_code:
from calibre.ebooks.metadata.sources.google import GoogleBooks
from calibre.ebooks.metadata.sources.amazon import Amazon
from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
from calibre.ebooks.metadata.sources.isbndb import ISBNDB
plugins += [GoogleBooks, Amazon, OpenLibrary]
plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB]
# }}}
else:

View File

@ -26,7 +26,7 @@ class ParserError(ValueError):
pass
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'htm', 'xhtm',
'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
'html', 'htmlz', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb']

View File

@ -12,7 +12,7 @@ from lxml import etree
from calibre.customize.conversion import OutputFormatPlugin, \
OptionRecommendation
from calibre.ebooks.oeb.base import OEB_IMAGES
from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.zipfile import ZipFile
@ -71,9 +71,13 @@ class HTMLZOutput(OutputFormatPlugin):
os.makedirs(os.path.join(tdir, 'images'))
for item in oeb_book.manifest:
if item.media_type in OEB_IMAGES and item.href in images:
if item.media_type == SVG_MIME:
data = unicode(etree.tostring(item.data, encoding=unicode))
else:
data = item.data
fname = os.path.join(tdir, 'images', images[item.href])
with open(fname, 'wb') as img:
img.write(item.data)
img.write(data)
# Metadata
with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:

View File

@ -8,12 +8,13 @@ Read meta information from extZ (TXTZ, HTMLZ...) files.
'''
import os
import posixpath
from cStringIO import StringIO
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.zipfile import ZipFile, safe_replace
def get_metadata(stream, extract_cover=True):
@ -23,16 +24,75 @@ def get_metadata(stream, extract_cover=True):
mi = MetaInformation(_('Unknown'), [_('Unknown')])
stream.seek(0)
with TemporaryDirectory('_untxtz_mdata') as tdir:
try:
zf = ZipFile(stream)
zf.extract('metadata.opf', tdir)
with open(os.path.join(tdir, 'metadata.opf'), 'rb') as opff:
mi = OPF(opff).to_book_metadata()
with ZipFile(stream) as zf:
opf_name = get_first_opf_name(zf)
opf_stream = StringIO(zf.read(opf_name))
opf = OPF(opf_stream)
mi = opf.to_book_metadata()
if extract_cover:
cover_name = opf.raster_cover
if cover_name:
mi.cover_data = ('jpg', zf.read(cover_name))
except:
return mi
return mi
def set_metadata(stream, mi):
opf = StringIO(metadata_to_opf(mi))
safe_replace(stream, 'metadata.opf', opf)
replacements = {}
# Get the OPF in the archive.
with ZipFile(stream) as zf:
opf_path = get_first_opf_name(zf)
opf_stream = StringIO(zf.read(opf_path))
opf = OPF(opf_stream)
# Cover.
new_cdata = None
try:
new_cdata = mi.cover_data[1]
if not new_cdata:
raise Exception('no cover')
except:
try:
new_cdata = open(mi.cover, 'rb').read()
except:
pass
if new_cdata:
raster_cover = opf.raster_cover
if not raster_cover:
raster_cover = 'cover.jpg'
cpath = posixpath.join(posixpath.dirname(opf_path), raster_cover)
new_cover = _write_new_cover(new_cdata, cpath)
replacements[cpath] = open(new_cover.name, 'rb')
# Update the metadata.
opf.smart_update(mi, replace_metadata=True)
newopf = StringIO(opf.render())
safe_replace(stream, opf_path, newopf, extra_replacements=replacements)
# Cleanup temporary files.
try:
if cpath is not None:
replacements[cpath].close()
os.remove(replacements[cpath].name)
except:
pass
def get_first_opf_name(zf):
names = zf.namelist()
opfs = []
for n in names:
if n.endswith('.opf') and '/' not in n:
opfs.append(n)
if not opfs:
raise Exception('No OPF found')
opfs.sort()
return opfs[0]
def _write_new_cover(new_cdata, cpath):
from calibre.utils.magick.draw import save_cover_data_to
new_cover = PersistentTemporaryFile(suffix=os.path.splitext(cpath)[1])
new_cover.close()
save_cover_data_to(new_cdata, new_cover.name)
return new_cover

View File

@ -181,6 +181,10 @@ class Source(Plugin):
#: construct the configuration widget for this plugin
options = ()
#: A string that is displayed at the top of the config widget for this
#: plugin
config_help_message = None
def __init__(self, *args, **kwargs):
Plugin.__init__(self, *args, **kwargs)

View File

@ -76,6 +76,11 @@ def run_download(log, results, abort,
(plugin, width, height, fmt, bytes)
'''
if title == _('Unknown'):
title = None
if authors == [_('Unknown')]:
authors = None
plugins = [p for p in metadata_plugins(['cover']) if p.is_configured()]
rq = Queue()
@ -145,7 +150,7 @@ def download_cover(log,
Synchronous cover download. Returns the "best" cover as per user
prefs/cover resolution.
Return cover is a tuple: (plugin, width, height, fmt, data)
Returned cover is a tuple: (plugin, width, height, fmt, data)
Returns None if no cover is found.
'''

View File

@ -253,6 +253,10 @@ def merge_identify_results(result_map, log):
def identify(log, abort, # {{{
title=None, authors=None, identifiers={}, timeout=30):
if title == _('Unknown'):
title = None
if authors == [_('Unknown')]:
authors = None
start_time = time.time()
plugins = [p for p in metadata_plugins(['identify']) if p.is_configured()]

View File

@ -7,7 +7,19 @@ __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.ebooks.metadata.sources.base import Source
from urllib import quote
from lxml import etree
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source, Option
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.icu import lower
from calibre.ebooks.metadata.book.base import Metadata
BASE_URL = 'http://isbndb.com/api/books.xml?access_key=%s&page_number=1&results=subjects,authors,texts&'
class ISBNDB(Source):
@ -18,6 +30,20 @@ class ISBNDB(Source):
touched_fields = frozenset(['title', 'authors',
'identifier:isbn', 'comments', 'publisher'])
supports_gzip_transfer_encoding = True
# Shortcut, since we have no cached cover URLS
cached_cover_url_is_reliable = False
options = (
Option('isbndb_key', 'string', None, _('IsbnDB key:'),
_('To use isbndb.com you have to sign up for a free account'
'at isbndb.com and get an access key.')),
)
config_help_message = '<p>'+_('To use metadata from isbndb.com you must sign'
' up for a free account and get an isbndb key and enter it below.'
' Instructions to get the key are '
'<a href="http://isbndb.com/docs/api/30-keys.html">here</a>.')
def __init__(self, *args, **kwargs):
Source.__init__(self, *args, **kwargs)
@ -35,9 +61,186 @@ class ISBNDB(Source):
except:
pass
self.isbndb_key = prefs['isbndb_key']
@property
def isbndb_key(self):
return self.prefs['isbndb_key']
def is_configured(self):
return self.isbndb_key is not None
def create_query(self, title=None, authors=None, identifiers={}): # {{{
base_url = BASE_URL%self.isbndb_key
isbn = check_isbn(identifiers.get('isbn', None))
q = ''
if isbn is not None:
q = 'index1=isbn&value1='+isbn
elif title or authors:
tokens = []
title_tokens = list(self.get_title_tokens(title))
tokens += title_tokens
author_tokens = self.get_author_tokens(authors,
only_first_author=True)
tokens += author_tokens
tokens = [quote(t) for t in tokens]
q = '+'.join(tokens)
q = 'index1=combined&value1='+q
if not q:
return None
if isinstance(q, unicode):
q = q.encode('utf-8')
return base_url + q
# }}}
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
identifiers={}, timeout=30):
if not self.is_configured():
return
query = self.create_query(title=title, authors=authors,
identifiers=identifiers)
if not query:
err = 'Insufficient metadata to construct query'
log.error(err)
return err
results = []
try:
results = self.make_query(query, abort, title=title, authors=authors,
identifiers=identifiers, timeout=timeout)
except:
err = 'Failed to make query to ISBNDb, aborting.'
log.exception(err)
return err
if not results and identifiers.get('isbn', False) and title and authors and \
not abort.is_set():
return self.identify(log, result_queue, abort, title=title,
authors=authors, timeout=timeout)
for result in results:
self.clean_downloaded_metadata(result)
result_queue.put(result)
def parse_feed(self, feed, seen, orig_title, orig_authors, identifiers):
def tostring(x):
if x is None:
return ''
return etree.tostring(x, method='text', encoding=unicode).strip()
orig_isbn = identifiers.get('isbn', None)
title_tokens = list(self.get_title_tokens(orig_title))
author_tokens = list(self.get_author_tokens(orig_authors))
results = []
def ismatch(title, authors):
authors = lower(' '.join(authors))
title = lower(title)
match = not title_tokens
for t in title_tokens:
if lower(t) in title:
match = True
break
amatch = not author_tokens
for a in author_tokens:
if lower(a) in authors:
amatch = True
break
if not author_tokens: amatch = True
return match and amatch
bl = feed.find('BookList')
if bl is None:
err = tostring(etree.find('errormessage'))
raise ValueError('ISBNDb query failed:' + err)
total_results = int(bl.get('total_results'))
shown_results = int(bl.get('shown_results'))
for bd in bl.xpath('.//BookData'):
isbn = check_isbn(bd.get('isbn13', bd.get('isbn', None)))
if not isbn:
continue
if orig_isbn and isbn != orig_isbn:
continue
title = tostring(bd.find('Title'))
if not title:
continue
authors = []
for au in bd.xpath('.//Authors/Person'):
au = tostring(au)
if au:
if ',' in au:
ln, _, fn = au.partition(',')
au = fn.strip() + ' ' + ln.strip()
authors.append(au)
if not authors:
continue
comments = tostring(bd.find('Summary'))
if not comments:
# Require comments, since without them the result is useless
# anyway
continue
id_ = (title, tuple(authors))
if id_ in seen:
continue
seen.add(id_)
if not ismatch(title, authors):
continue
publisher = tostring(bd.find('PublisherText'))
if not publisher: publisher = None
if publisher and 'audio' in publisher.lower():
continue
mi = Metadata(title, authors)
mi.isbn = isbn
mi.publisher = publisher
mi.comments = comments
results.append(mi)
return total_results, shown_results, results
def make_query(self, q, abort, title=None, authors=None, identifiers={},
max_pages=10, timeout=30):
page_num = 1
parser = etree.XMLParser(recover=True, no_network=True)
br = self.browser
seen = set()
candidates = []
total_found = 0
while page_num <= max_pages and not abort.is_set():
url = q.replace('&page_number=1&', '&page_number=%d&'%page_num)
page_num += 1
raw = br.open_novisit(url, timeout=timeout).read()
feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
strip_encoding_pats=True)[0], parser=parser)
total, found, results = self.parse_feed(
feed, seen, title, authors, identifiers)
total_found += found
candidates += results
if total_found >= total or len(candidates) > 9:
break
return candidates
# }}}
if __name__ == '__main__':
# To run these test use:
# calibre-debug -e src/calibre/ebooks/metadata/sources/isbndb.py
from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
title_test, authors_test)
test_identify_plugin(ISBNDB.name,
[
(
{'title':'Great Gatsby',
'authors':['Fitzgerald']},
[title_test('The great gatsby', exact=True),
authors_test(['F. Scott Fitzgerald'])]
),
(
{'title': 'Flatland', 'authors':['Abbott']},
[title_test('Flatland', exact=False)]
),
])

View File

@ -310,6 +310,7 @@ class Serializer(object):
if href not in id_offsets:
self.logger.warn('Hyperlink target %r not found' % href)
href, _ = urldefrag(href)
if href in self.id_offsets:
ioff = self.id_offsets[href]
for hoff in hoffs:
buffer.seek(hoff)

View File

@ -357,6 +357,7 @@ class FileIconProvider(QFileIconProvider):
'bmp' : 'bmp',
'svg' : 'svg',
'html' : 'html',
'htmlz' : 'html',
'htm' : 'html',
'xhtml' : 'html',
'xhtm' : 'html',

View File

@ -94,7 +94,7 @@ class EditMetadataAction(InterfaceAction):
def bulk_metadata_downloaded(self, job):
if job.failed:
self.job_exception(job, dialog_title=_('Failed to download metadata'))
self.gui.job_exception(job, dialog_title=_('Failed to download metadata'))
return
from calibre.gui2.metadata.bulk_download2 import proceed
proceed(self.gui, job)

View File

@ -54,6 +54,8 @@ def start_download(gui, ids, callback, identify, covers):
_('Download metadata for %d books')%len(ids),
download, (ids, gui.current_db, identify, covers), {}, callback)
gui.job_manager.run_threaded_job(job)
gui.status_bar.show_message(_('Metadata download started'), 3000)
class ViewLog(QDialog): # {{{
@ -75,7 +77,7 @@ class ViewLog(QDialog): # {{{
self.copy_button.clicked.connect(self.copy_to_clipboard)
l.addWidget(self.bb)
self.setModal(False)
self.resize(QSize(500, 400))
self.resize(QSize(700, 500))
self.setWindowTitle(_('Download log'))
self.setWindowIcon(QIcon(I('debug.png')))
self.show()
@ -110,25 +112,27 @@ class ApplyDialog(QDialog):
self.bb.accepted.connect(self.accept)
l.addWidget(self.bb)
self.db = gui.current_db
self.gui = gui
self.id_map = list(id_map.iteritems())
self.current_idx = 0
self.failures = []
self.ids = []
self.canceled = False
QTimer.singleShot(20, self.do_one)
self.exec_()
def do_one(self):
if self.canceled:
return
i, mi = self.id_map[self.current_idx]
db = self.gui.current_db
try:
set_title = not mi.is_null('title')
set_authors = not mi.is_null('authors')
self.db.set_metadata(i, mi, commit=False, set_title=set_title,
db.set_metadata(i, mi, commit=False, set_title=set_title,
set_authors=set_authors)
self.ids.append(i)
except:
import traceback
self.failures.append((i, traceback.format_exc()))
@ -156,9 +160,10 @@ class ApplyDialog(QDialog):
return
if self.failures:
msg = []
db = self.gui.current_db
for i, tb in self.failures:
title = self.db.title(i, index_is_id=True)
authors = self.db.authors(i, index_is_id=True)
title = db.title(i, index_is_id=True)
authors = db.authors(i, index_is_id=True)
if authors:
authors = [x.replace('|', ',') for x in authors.split(',')]
title += ' - ' + authors_to_string(authors)
@ -169,6 +174,12 @@ class ApplyDialog(QDialog):
' in your library. Click "Show Details" to see '
'details.'), det_msg='\n\n'.join(msg), show=True)
self.accept()
if self.ids:
cr = self.gui.library_view.currentIndex().row()
self.gui.library_view.model().refresh_ids(
self.ids, cr)
if self.gui.cover_flow:
self.gui.cover_flow.dataChanged()
_amd = None
def apply_metadata(job, gui, q, result):
@ -177,7 +188,7 @@ def apply_metadata(job, gui, q, result):
q.finished.disconnect()
if result != q.Accepted:
return
id_map, failed_ids = job.result
id_map, failed_ids, failed_covers, title_map = job.result
id_map = dict([(k, v) for k, v in id_map.iteritems() if k not in
failed_ids])
if not id_map:
@ -207,23 +218,32 @@ def apply_metadata(job, gui, q, result):
return
_amd = ApplyDialog(id_map, gui)
_amd.exec_()
def proceed(gui, job):
id_map, failed_ids = job.result
gui.status_bar.show_message(_('Metadata download completed'), 3000)
id_map, failed_ids, failed_covers, title_map = job.result
fmsg = det_msg = ''
if failed_ids:
fmsg = _('Could not download metadata for %d of the books. Click'
if failed_ids or failed_covers:
fmsg = '<p>'+_('Could not download metadata and/or covers for %d of the books. Click'
' "Show details" to see which books.')%len(failed_ids)
det_msg = '\n'.join([id_map[i].title for i in failed_ids])
det_msg = []
for i in failed_ids | failed_covers:
title = title_map[i]
if i in failed_ids:
title += (' ' + _('(Failed metadata)'))
if i in failed_covers:
title += (' ' + _('(Failed cover)'))
det_msg.append(title)
msg = '<p>' + _('Finished downloading metadata for <b>%d book(s)</b>. '
'Proceed with updating the metadata in your library?')%len(id_map)
q = MessageBox(MessageBox.QUESTION, _('Download complete'),
msg + fmsg, det_msg=det_msg, show_copy_button=bool(failed_ids),
msg + fmsg, det_msg='\n'.join(det_msg), show_copy_button=bool(failed_ids),
parent=gui)
q.vlb = q.bb.addButton(_('View log'), q.bb.ActionRole)
q.vlb.setIcon(QIcon(I('debug.png')))
q.vlb.clicked.connect(partial(view_log, job, q))
q.det_msg_toggle.setVisible(bool(failed_ids))
q.det_msg_toggle.setVisible(bool(failed_ids | failed_covers))
q.setModal(False)
q.show()
q.finished.connect(partial(apply_metadata, job, gui, q))
@ -242,12 +262,18 @@ def merge_result(oldmi, newmi):
if (not newmi.is_null(f) and getattr(newmi, f) == getattr(oldmi, f)):
setattr(newmi, f, getattr(dummy, f))
newmi.last_modified = oldmi.last_modified
return newmi
def download(ids, db, do_identify, covers,
log=None, abort=None, notifications=None):
ids = list(ids)
metadata = [db.get_metadata(i, index_is_id=True, get_user_categories=False)
for i in ids]
failed_ids = set()
failed_covers = set()
title_map = {}
ans = {}
count = 0
for i, mi in izip(ids, metadata):
@ -255,6 +281,7 @@ def download(ids, db, do_identify, covers,
log.error('Aborting...')
break
title, authors, identifiers = mi.title, mi.authors, mi.identifiers
title_map[i] = title
if do_identify:
results = []
try:
@ -265,22 +292,29 @@ def download(ids, db, do_identify, covers,
if results:
mi = merge_result(mi, results[0])
identifiers = mi.identifiers
if not mi.is_null('rating'):
# set_metadata expects a rating out of 10
mi.rating *= 2
else:
log.error('Failed to download metadata for', title)
failed_ids.add(mi)
failed_ids.add(i)
# We don't want set_metadata operating on anything but covers
mi = merge_result(mi, mi)
if covers:
cdata = download_cover(log, title=title, authors=authors,
identifiers=identifiers)
if cdata:
if cdata is not None:
with PersistentTemporaryFile('.jpg', 'downloaded-cover-') as f:
f.write(cdata)
f.write(cdata[-1])
mi.cover = f.name
else:
failed_covers.add(i)
ans[i] = mi
count += 1
notifications.put((count/len(ids),
_('Downloaded %d of %d')%(count, len(ids))))
log('Download complete, with %d failures'%len(failed_ids))
return (ans, failed_ids)
return (ans, failed_ids, failed_covers, title_map)

View File

@ -56,7 +56,12 @@ class ConfigWidget(QWidget):
self.setLayout(l)
self.gb = QGroupBox(_('Downloaded metadata fields'), self)
l.addWidget(self.gb, 0, 0, 1, 2)
if plugin.config_help_message:
self.pchm = QLabel(plugin.config_help_message)
self.pchm.setWordWrap(True)
self.pchm.setOpenExternalLinks(True)
l.addWidget(self.pchm, 0, 0, 1, 2)
l.addWidget(self.gb, l.rowCount(), 0, 1, 2)
self.gb.l = QGridLayout()
self.gb.setLayout(self.gb.l)
self.fields_view = v = QListView(self)
@ -81,7 +86,7 @@ class ConfigWidget(QWidget):
widget.setValue(val)
elif opt.type == 'string':
widget = QLineEdit(self)
widget.setText(val)
widget.setText(val if val else '')
elif opt.type == 'bool':
widget = QCheckBox(opt.label, self)
widget.setChecked(bool(val))

View File

@ -10,7 +10,7 @@ __docformat__ = 'restructuredtext en'
from operator import attrgetter
from PyQt4.Qt import (QAbstractTableModel, Qt, QAbstractListModel, QWidget,
pyqtSignal, QVBoxLayout, QDialogButtonBox, QFrame, QLabel)
pyqtSignal, QVBoxLayout, QDialogButtonBox, QFrame, QLabel, QIcon)
from calibre.gui2.preferences import ConfigWidgetBase, test_widget
from calibre.gui2.preferences.metadata_sources_ui import Ui_Form
@ -67,6 +67,13 @@ class SourcesModel(QAbstractTableModel): # {{{
return self.enabled_overrides.get(plugin, orig)
elif role == Qt.UserRole:
return plugin
elif (role == Qt.DecorationRole and col == 0 and not
plugin.is_configured()):
return QIcon(I('list_remove.png'))
elif role == Qt.ToolTipRole:
if plugin.is_configured():
return _('This source is configured and ready to go')
return _('This source needs configuration')
return NONE
def setData(self, index, val, role):

View File

@ -48,6 +48,16 @@
</property>
</widget>
</item>
<item>
<widget class="QLabel" name="label_5">
<property name="text">
<string>Sources with a red X next to their names must be configured before they will be used. </string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="configure_plugin_button">
<property name="text">

View File

@ -189,7 +189,11 @@ class ThreadedJobServer(Thread):
def run(self):
while self.keep_going:
try:
self.run_once()
except:
import traceback
traceback.print_exc()
time.sleep(0.1)
def run_once(self):

View File

@ -22,6 +22,8 @@ First start the |app| content server as shown below::
calibre-server --url-prefix /calibre --port 8080
The key parameter here is ``--url-prefix /calibre``. This causes the content server to serve all URLs prefixed by calibre. To see this in action, visit ``http://localhost:8080/calibre`` in your browser. You should see the normal content server website, but now it will run under /calibre.
Now suppose you are using Apache as your main server. First enable the proxy modules in apache, by adding the following to :file:`httpd.conf`::
LoadModule proxy_module modules/mod_proxy.so