mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
HTML Input: Fix conversion when HTML files in different directories have the same name. Also ignore binary files.
This commit is contained in:
parent
05f02201fb
commit
3f60a90d94
@ -328,6 +328,7 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
|
|
||||||
|
|
||||||
filelist = get_filelist(htmlpath, basedir, opts, log)
|
filelist = get_filelist(htmlpath, basedir, opts, log)
|
||||||
|
filelist = [f for f in filelist if not f.is_binary]
|
||||||
htmlfile_map = {}
|
htmlfile_map = {}
|
||||||
for f in filelist:
|
for f in filelist:
|
||||||
path = f.path
|
path = f.path
|
||||||
@ -336,6 +337,7 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
id, href = oeb.manifest.generate(id='html', href=bname)
|
id, href = oeb.manifest.generate(id='html', href=bname)
|
||||||
htmlfile_map[path] = href
|
htmlfile_map[path] = href
|
||||||
item = oeb.manifest.add(id, href, 'text/html')
|
item = oeb.manifest.add(id, href, 'text/html')
|
||||||
|
item.html_input_href = bname
|
||||||
oeb.spine.add(item, True)
|
oeb.spine.add(item, True)
|
||||||
|
|
||||||
self.added_resources = {}
|
self.added_resources = {}
|
||||||
@ -409,8 +411,9 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
if not islinux:
|
if not islinux:
|
||||||
link = link.lower()
|
link = link.lower()
|
||||||
if link not in self.added_resources:
|
if link not in self.added_resources:
|
||||||
|
bhref = os.path.basename(link)
|
||||||
id, href = self.oeb.manifest.generate(id='added',
|
id, href = self.oeb.manifest.generate(id='added',
|
||||||
href=os.path.basename(link))
|
href=bhref)
|
||||||
self.oeb.log.debug('Added', link)
|
self.oeb.log.debug('Added', link)
|
||||||
self.oeb.container = self.DirContainer(os.path.dirname(link),
|
self.oeb.container = self.DirContainer(os.path.dirname(link),
|
||||||
self.oeb.log)
|
self.oeb.log)
|
||||||
@ -418,7 +421,9 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
guessed = self.guess_type(href)[0]
|
guessed = self.guess_type(href)[0]
|
||||||
media_type = guessed or self.BINARY_MIME
|
media_type = guessed or self.BINARY_MIME
|
||||||
|
|
||||||
self.oeb.manifest.add(id, href, media_type).data
|
item = self.oeb.manifest.add(id, href, media_type)
|
||||||
|
item.html_input_href = bhref
|
||||||
|
item.data
|
||||||
self.added_resources[link] = href
|
self.added_resources[link] = href
|
||||||
|
|
||||||
nlink = self.added_resources[link]
|
nlink = self.added_resources[link]
|
||||||
|
@ -793,7 +793,7 @@ class Manifest(object):
|
|||||||
self.oeb.log.warn('File %r does not appear to be (X)HTML'%self.href)
|
self.oeb.log.warn('File %r does not appear to be (X)HTML'%self.href)
|
||||||
nroot = etree.fromstring('<html></html>')
|
nroot = etree.fromstring('<html></html>')
|
||||||
for child in list(data):
|
for child in list(data):
|
||||||
child.getparent.remove(child)
|
child.getparent().remove(child)
|
||||||
nroot.append(child)
|
nroot.append(child)
|
||||||
data = nroot
|
data = nroot
|
||||||
elif not namespace(data.tag):
|
elif not namespace(data.tag):
|
||||||
@ -927,7 +927,8 @@ class Manifest(object):
|
|||||||
if data is None:
|
if data is None:
|
||||||
if self._loader is None:
|
if self._loader is None:
|
||||||
return None
|
return None
|
||||||
data = self._loader(self.href)
|
data = self._loader(getattr(self, 'html_input_href',
|
||||||
|
self.href))
|
||||||
if not isinstance(data, basestring):
|
if not isinstance(data, basestring):
|
||||||
pass # already parsed
|
pass # already parsed
|
||||||
elif self.media_type.lower() in OEB_DOCS:
|
elif self.media_type.lower() in OEB_DOCS:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user