EPUB metadata: Speed up reading metadata from very large EPUB files. Fixes #4908 (Importing large book hangs Calibre)

This commit is contained in:
Kovid Goyal 2010-02-15 08:25:35 -07:00
parent c484d9dcc1
commit 4a74faa6e5
3 changed files with 16 additions and 7 deletions

View File

@ -25,7 +25,7 @@ class SueddeutcheZeitung(BasicNewsRecipe):
LOGIN = PREFIX + '/app/lbox/index.html' LOGIN = PREFIX + '/app/lbox/index.html'
use_embedded_content = False use_embedded_content = False
masthead_url = 'http://pix.sueddeutsche.de/img/g_.gif' masthead_url = 'http://pix.sueddeutsche.de/img/g_.gif'
language = 'de_DE' language = 'de'
extra_css = ' body{font-family: Arial,Helvetica,sans-serif} ' extra_css = ' body{font-family: Arial,Helvetica,sans-serif} '
conversion_options = { conversion_options = {

View File

@ -69,7 +69,7 @@ class OCFReader(OCF):
self.opf_path = self.container[OPF.MIMETYPE] self.opf_path = self.container[OPF.MIMETYPE]
try: try:
with closing(self.open(self.opf_path)) as f: with closing(self.open(self.opf_path)) as f:
self.opf = OPF(f, self.root) self.opf = OPF(f, self.root, populate_spine=False)
except KeyError: except KeyError:
raise EPubException("missing OPF package file") raise EPubException("missing OPF package file")
@ -101,10 +101,9 @@ class OCFDirReader(OCFReader):
def get_cover(opf, opf_path, stream): def get_cover(opf, opf_path, stream):
from calibre.ebooks import render_html_svg_workaround from calibre.ebooks import render_html_svg_workaround
from calibre.utils.logging import default_log from calibre.utils.logging import default_log
spine = list(opf.spine_items()) cpage = opf.first_spine_item()
if not spine: if not cpage:
return return
cpage = spine[0]
with TemporaryDirectory('_epub_meta') as tdir: with TemporaryDirectory('_epub_meta') as tdir:
with CurrentDir(tdir): with CurrentDir(tdir):
stream.seek(0) stream.seek(0)

View File

@ -455,7 +455,8 @@ class OPF(object):
formatter=parse_date) formatter=parse_date)
def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True): def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True,
populate_spine=True):
if not hasattr(stream, 'read'): if not hasattr(stream, 'read'):
stream = open(stream, 'rb') stream = open(stream, 'rb')
raw = stream.read() raw = stream.read()
@ -478,7 +479,7 @@ class OPF(object):
self.manifest = Manifest.from_opf_manifest_element(m, basedir) self.manifest = Manifest.from_opf_manifest_element(m, basedir)
self.spine = None self.spine = None
s = self.spine_path(self.root) s = self.spine_path(self.root)
if s: if populate_spine and s:
self.spine = Spine.from_opf_spine_element(s, self.manifest) self.spine = Spine.from_opf_spine_element(s, self.manifest)
self.guide = None self.guide = None
guide = self.guide_path(self.root) guide = self.guide_path(self.root)
@ -585,6 +586,15 @@ class OPF(object):
if x.get('id', None) == idref: if x.get('id', None) == idref:
yield x.get('href', '') yield x.get('href', '')
def first_spine_item(self):
items = self.iterspine()
if not items:
return None
idref = items[0].get('idref', '')
for x in self.itermanifest():
if x.get('id', None) == idref:
return x.get('href', None)
def create_spine_item(self, idref): def create_spine_item(self, idref):
ans = etree.Element('{%s}itemref'%self.NAMESPACES['opf'], idref=idref) ans = etree.Element('{%s}itemref'%self.NAMESPACES['opf'], idref=idref)
ans.tail = '\n\t\t' ans.tail = '\n\t\t'