HTML Input: Fix conversion when HTML files in different directories have the same name. Also ignore binary files.

This commit is contained in:
Kovid Goyal 2009-07-10 14:05:28 -06:00
parent 05f02201fb
commit 3f60a90d94
2 changed files with 10 additions and 4 deletions

View File

@ -328,6 +328,7 @@ class HTMLInput(InputFormatPlugin):
filelist = get_filelist(htmlpath, basedir, opts, log) filelist = get_filelist(htmlpath, basedir, opts, log)
filelist = [f for f in filelist if not f.is_binary]
htmlfile_map = {} htmlfile_map = {}
for f in filelist: for f in filelist:
path = f.path path = f.path
@ -336,6 +337,7 @@ class HTMLInput(InputFormatPlugin):
id, href = oeb.manifest.generate(id='html', href=bname) id, href = oeb.manifest.generate(id='html', href=bname)
htmlfile_map[path] = href htmlfile_map[path] = href
item = oeb.manifest.add(id, href, 'text/html') item = oeb.manifest.add(id, href, 'text/html')
item.html_input_href = bname
oeb.spine.add(item, True) oeb.spine.add(item, True)
self.added_resources = {} self.added_resources = {}
@ -409,8 +411,9 @@ class HTMLInput(InputFormatPlugin):
if not islinux: if not islinux:
link = link.lower() link = link.lower()
if link not in self.added_resources: if link not in self.added_resources:
bhref = os.path.basename(link)
id, href = self.oeb.manifest.generate(id='added', id, href = self.oeb.manifest.generate(id='added',
href=os.path.basename(link)) href=bhref)
self.oeb.log.debug('Added', link) self.oeb.log.debug('Added', link)
self.oeb.container = self.DirContainer(os.path.dirname(link), self.oeb.container = self.DirContainer(os.path.dirname(link),
self.oeb.log) self.oeb.log)
@ -418,7 +421,9 @@ class HTMLInput(InputFormatPlugin):
guessed = self.guess_type(href)[0] guessed = self.guess_type(href)[0]
media_type = guessed or self.BINARY_MIME media_type = guessed or self.BINARY_MIME
self.oeb.manifest.add(id, href, media_type).data item = self.oeb.manifest.add(id, href, media_type)
item.html_input_href = bhref
item.data
self.added_resources[link] = href self.added_resources[link] = href
nlink = self.added_resources[link] nlink = self.added_resources[link]

View File

@ -793,7 +793,7 @@ class Manifest(object):
self.oeb.log.warn('File %r does not appear to be (X)HTML'%self.href) self.oeb.log.warn('File %r does not appear to be (X)HTML'%self.href)
nroot = etree.fromstring('<html></html>') nroot = etree.fromstring('<html></html>')
for child in list(data): for child in list(data):
child.getparent.remove(child) child.getparent().remove(child)
nroot.append(child) nroot.append(child)
data = nroot data = nroot
elif not namespace(data.tag): elif not namespace(data.tag):
@ -927,7 +927,8 @@ class Manifest(object):
if data is None: if data is None:
if self._loader is None: if self._loader is None:
return None return None
data = self._loader(self.href) data = self._loader(getattr(self, 'html_input_href',
self.href))
if not isinstance(data, basestring): if not isinstance(data, basestring):
pass # already parsed pass # already parsed
elif self.media_type.lower() in OEB_DOCS: elif self.media_type.lower() in OEB_DOCS: