Fix adding html books from the top of a deep folder hierarchy very slow

This commit is contained in:
Kovid Goyal 2011-04-27 11:05:46 -06:00
parent 007ad0e7c4
commit a36e9c4243
2 changed files with 16 additions and 14 deletions

View File

@ -309,9 +309,9 @@ class HTMLInput(InputFormatPlugin):
def create_oebbook(self, htmlpath, basedir, opts, log, mi): def create_oebbook(self, htmlpath, basedir, opts, log, mi):
from calibre.ebooks.conversion.plumber import create_oebbook from calibre.ebooks.conversion.plumber import create_oebbook
from calibre.ebooks.oeb.base import DirContainer, \ from calibre.ebooks.oeb.base import (DirContainer,
rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES, \ rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES,
xpath xpath)
from calibre import guess_type from calibre import guess_type
from calibre.ebooks.oeb.transforms.metadata import \ from calibre.ebooks.oeb.transforms.metadata import \
meta_info_to_oeb_metadata meta_info_to_oeb_metadata
@ -345,7 +345,8 @@ class HTMLInput(InputFormatPlugin):
htmlfile_map = {} htmlfile_map = {}
for f in filelist: for f in filelist:
path = f.path path = f.path
oeb.container = DirContainer(os.path.dirname(path), log) oeb.container = DirContainer(os.path.dirname(path), log,
ignore_opf=True)
bname = os.path.basename(path) bname = os.path.basename(path)
id, href = oeb.manifest.generate(id='html', id, href = oeb.manifest.generate(id='html',
href=ascii_filename(bname)) href=ascii_filename(bname))
@ -369,7 +370,7 @@ class HTMLInput(InputFormatPlugin):
for f in filelist: for f in filelist:
path = f.path path = f.path
dpath = os.path.dirname(path) dpath = os.path.dirname(path)
oeb.container = DirContainer(dpath, log) oeb.container = DirContainer(dpath, log, ignore_opf=True)
item = oeb.manifest.hrefs[htmlfile_map[path]] item = oeb.manifest.hrefs[htmlfile_map[path]]
rewrite_links(item.data, partial(self.resource_adder, base=dpath)) rewrite_links(item.data, partial(self.resource_adder, base=dpath))
@ -409,7 +410,7 @@ class HTMLInput(InputFormatPlugin):
if not item.linear: continue if not item.linear: continue
toc.add(title, item.href) toc.add(title, item.href)
oeb.container = DirContainer(os.getcwdu(), oeb.log) oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True)
return oeb return oeb
def link_to_local_path(self, link_, base=None): def link_to_local_path(self, link_, base=None):
@ -456,7 +457,7 @@ class HTMLInput(InputFormatPlugin):
href=bhref) href=bhref)
self.oeb.log.debug('Added', link) self.oeb.log.debug('Added', link)
self.oeb.container = self.DirContainer(os.path.dirname(link), self.oeb.container = self.DirContainer(os.path.dirname(link),
self.oeb.log) self.oeb.log, ignore_opf=True)
# Load into memory # Load into memory
guessed = self.guess_type(href)[0] guessed = self.guess_type(href)[0]
media_type = guessed or self.BINARY_MIME media_type = guessed or self.BINARY_MIME

View File

@ -446,22 +446,23 @@ class NullContainer(object):
class DirContainer(object): class DirContainer(object):
"""Filesystem directory container.""" """Filesystem directory container."""
def __init__(self, path, log): def __init__(self, path, log, ignore_opf=False):
self.log = log self.log = log
if isbytestring(path): if isbytestring(path):
path = path.decode(filesystem_encoding) path = path.decode(filesystem_encoding)
self.opfname = None
ext = os.path.splitext(path)[1].lower() ext = os.path.splitext(path)[1].lower()
if ext == '.opf': if ext == '.opf':
self.opfname = os.path.basename(path) self.opfname = os.path.basename(path)
self.rootdir = os.path.dirname(path) self.rootdir = os.path.dirname(path)
return return
self.rootdir = path self.rootdir = path
for path in self.namelist(): if not ignore_opf:
ext = os.path.splitext(path)[1].lower() for path in self.namelist():
if ext == '.opf': ext = os.path.splitext(path)[1].lower()
self.opfname = path if ext == '.opf':
return self.opfname = path
self.opfname = None return
def read(self, path): def read(self, path):
if path is None: if path is None: