MOBI Input:Improve detection of Table of Contents. Fixes #1936 (SInce Calibre 0.4.134 medatada in reader doesn't works)

This commit is contained in:
Kovid Goyal 2009-02-26 14:13:43 -08:00
parent b5e9f19502
commit 4d44f2d8bd

View File

@ -235,7 +235,7 @@ class MobiReader(object):
if self.verbose: if self.verbose:
print 'Creating OPF...' print 'Creating OPF...'
ncx = cStringIO.StringIO() ncx = cStringIO.StringIO()
opf = self.create_opf(htmlfile, guide) opf = self.create_opf(htmlfile, guide, root)
opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'), ncx) opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'), ncx)
ncx = ncx.getvalue() ncx = ncx.getvalue()
if ncx: if ncx:
@ -328,7 +328,7 @@ class MobiReader(object):
except ValueError: except ValueError:
pass pass
def create_opf(self, htmlfile, guide=None): def create_opf(self, htmlfile, guide=None, root=None):
mi = self.book_header.exth.mi mi = self.book_header.exth.mi
opf = OPFCreator(os.path.dirname(htmlfile), mi) opf = OPFCreator(os.path.dirname(htmlfile), mi)
if hasattr(self.book_header.exth, 'cover_offset'): if hasattr(self.book_header.exth, 'cover_offset'):
@ -347,21 +347,27 @@ class MobiReader(object):
if ref.type.lower() == 'toc': if ref.type.lower() == 'toc':
toc = ref.href() toc = ref.href()
if toc: if toc:
index = self.processed_html.find('<a id="%s" name="%s"'%(toc.partition('#')[-1], toc.partition('#')[-1])) elems = root.xpath('//*[@id="%s"]'%toc.partition('#')[-1])
tocobj = None tocobj = None
ent_pat = re.compile(r'&(\S+?);') ent_pat = re.compile(r'&(\S+?);')
if index > -1: if elems:
raw = '<html><body>'+self.processed_html[index:]
root = html.fromstring(raw)
tocobj = TOC() tocobj = TOC()
for a in root.xpath('//a[@href]'): reached = False
try: for x in root.iter():
text = u' '.join([t.strip() for t in a.xpath('descendant::text()')]) if x == elems[-1]:
except: reached = True
text = '' continue
text = ent_pat.sub(entity_to_unicode, text) if reached and x.tag == 'a':
if a.get('href', '').startswith('#'): href = x.get('href', '')
tocobj.add_item(toc.partition('#')[0], a.attrib['href'][1:], text) if href:
try:
text = u' '.join([t.strip() for t in \
x.xpath('descendant::text()')])
except:
text = ''
text = ent_pat.sub(entity_to_unicode, text)
tocobj.add_item(toc.partition('#')[0], href[1:],
text)
if tocobj is not None: if tocobj is not None:
opf.set_toc(tocobj) opf.set_toc(tocobj)