EPUB metadata: Speed up reading metadata from very large EPUB files. Fixes #4908 (Importing large book hangs Calibre)

This commit is contained in:
Kovid Goyal 2010-02-15 08:25:35 -07:00
parent c484d9dcc1
commit 4a74faa6e5
3 changed files with 16 additions and 7 deletions

View File

@ -25,7 +25,7 @@ class SueddeutcheZeitung(BasicNewsRecipe):
LOGIN = PREFIX + '/app/lbox/index.html'
use_embedded_content = False
masthead_url = 'http://pix.sueddeutsche.de/img/g_.gif'
language = 'de_DE'
language = 'de'
extra_css = ' body{font-family: Arial,Helvetica,sans-serif} '
conversion_options = {

View File

@ -69,7 +69,7 @@ class OCFReader(OCF):
self.opf_path = self.container[OPF.MIMETYPE]
try:
with closing(self.open(self.opf_path)) as f:
self.opf = OPF(f, self.root)
self.opf = OPF(f, self.root, populate_spine=False)
except KeyError:
raise EPubException("missing OPF package file")
@ -101,10 +101,9 @@ class OCFDirReader(OCFReader):
def get_cover(opf, opf_path, stream):
from calibre.ebooks import render_html_svg_workaround
from calibre.utils.logging import default_log
spine = list(opf.spine_items())
if not spine:
cpage = opf.first_spine_item()
if not cpage:
return
cpage = spine[0]
with TemporaryDirectory('_epub_meta') as tdir:
with CurrentDir(tdir):
stream.seek(0)

View File

@ -455,7 +455,8 @@ class OPF(object):
formatter=parse_date)
def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True):
def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True,
populate_spine=True):
if not hasattr(stream, 'read'):
stream = open(stream, 'rb')
raw = stream.read()
@ -478,7 +479,7 @@ class OPF(object):
self.manifest = Manifest.from_opf_manifest_element(m, basedir)
self.spine = None
s = self.spine_path(self.root)
if s:
if populate_spine and s:
self.spine = Spine.from_opf_spine_element(s, self.manifest)
self.guide = None
guide = self.guide_path(self.root)
@ -585,6 +586,15 @@ class OPF(object):
if x.get('id', None) == idref:
yield x.get('href', '')
def first_spine_item(self):
items = self.iterspine()
if not items:
return None
idref = items[0].get('idref', '')
for x in self.itermanifest():
if x.get('id', None) == idref:
return x.get('href', None)
def create_spine_item(self, idref):
ans = etree.Element('{%s}itemref'%self.NAMESPACES['opf'], idref=idref)
ans.tail = '\n\t\t'