mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
MOBI Input: Use the viasual formatting of the Table of Contents to try to automatically create a multi-level TOC when converting/viewing MOBI files. Fixes #763681 (Private bug)
This commit is contained in:
parent
549b89c82f
commit
bacd84c21d
@ -7,7 +7,7 @@ Code for the conversion of ebook formats and the reading of metadata
|
|||||||
from various formats.
|
from various formats.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import traceback, os
|
import traceback, os, re
|
||||||
from calibre import CurrentDir
|
from calibre import CurrentDir
|
||||||
|
|
||||||
class ConversionError(Exception):
|
class ConversionError(Exception):
|
||||||
@ -169,3 +169,42 @@ def calibre_cover(title, author_string, series_string=None,
|
|||||||
lines.append(TextLine(series_string, author_size))
|
lines.append(TextLine(series_string, author_size))
|
||||||
return create_cover_page(lines, I('library.png'), output_format='jpg')
|
return create_cover_page(lines, I('library.png'), output_format='jpg')
|
||||||
|
|
||||||
|
UNIT_RE = re.compile(r'^(-*[0-9]*[.]?[0-9]*)\s*(%|em|ex|en|px|mm|cm|in|pt|pc)$')
|
||||||
|
|
||||||
|
def unit_convert(value, base, font, dpi):
|
||||||
|
' Return value in pts'
|
||||||
|
if isinstance(value, (int, long, float)):
|
||||||
|
return value
|
||||||
|
try:
|
||||||
|
return float(value) * 72.0 / dpi
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
result = value
|
||||||
|
m = UNIT_RE.match(value)
|
||||||
|
if m is not None and m.group(1):
|
||||||
|
value = float(m.group(1))
|
||||||
|
unit = m.group(2)
|
||||||
|
if unit == '%':
|
||||||
|
result = (value / 100.0) * base
|
||||||
|
elif unit == 'px':
|
||||||
|
result = value * 72.0 / dpi
|
||||||
|
elif unit == 'in':
|
||||||
|
result = value * 72.0
|
||||||
|
elif unit == 'pt':
|
||||||
|
result = value
|
||||||
|
elif unit == 'em':
|
||||||
|
result = value * font
|
||||||
|
elif unit in ('ex', 'en'):
|
||||||
|
# This is a hack for ex since we have no way to know
|
||||||
|
# the x-height of the font
|
||||||
|
font = font
|
||||||
|
result = value * font * 0.5
|
||||||
|
elif unit == 'pc':
|
||||||
|
result = value * 12.0
|
||||||
|
elif unit == 'mm':
|
||||||
|
result = value * 0.04
|
||||||
|
elif unit == 'cm':
|
||||||
|
result = value * 0.40
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
@ -20,7 +20,7 @@ from calibre.utils.filenames import ascii_filename
|
|||||||
from calibre.utils.date import parse_date
|
from calibre.utils.date import parse_date
|
||||||
from calibre.utils.cleantext import clean_ascii_chars
|
from calibre.utils.cleantext import clean_ascii_chars
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
from calibre.ebooks import DRMError
|
from calibre.ebooks import DRMError, unit_convert
|
||||||
from calibre.ebooks.chardet import ENCODING_PATS
|
from calibre.ebooks.chardet import ENCODING_PATS
|
||||||
from calibre.ebooks.mobi import MobiError
|
from calibre.ebooks.mobi import MobiError
|
||||||
from calibre.ebooks.mobi.huffcdic import HuffReader
|
from calibre.ebooks.mobi.huffcdic import HuffReader
|
||||||
@ -258,6 +258,8 @@ class MobiReader(object):
|
|||||||
}
|
}
|
||||||
''')
|
''')
|
||||||
self.tag_css_rules = {}
|
self.tag_css_rules = {}
|
||||||
|
self.left_margins = {}
|
||||||
|
self.text_indents = {}
|
||||||
|
|
||||||
if hasattr(filename_or_stream, 'read'):
|
if hasattr(filename_or_stream, 'read'):
|
||||||
stream = filename_or_stream
|
stream = filename_or_stream
|
||||||
@ -567,9 +569,21 @@ class MobiReader(object):
|
|||||||
elif tag.tag == 'img':
|
elif tag.tag == 'img':
|
||||||
tag.set('width', width)
|
tag.set('width', width)
|
||||||
else:
|
else:
|
||||||
styles.append('text-indent: %s' % self.ensure_unit(width))
|
ewidth = self.ensure_unit(width)
|
||||||
|
styles.append('text-indent: %s' % ewidth)
|
||||||
|
try:
|
||||||
|
ewidth_val = unit_convert(ewidth, 12, 500, 166)
|
||||||
|
self.text_indents[tag] = ewidth_val
|
||||||
|
except:
|
||||||
|
pass
|
||||||
if width.startswith('-'):
|
if width.startswith('-'):
|
||||||
styles.append('margin-left: %s' % self.ensure_unit(width[1:]))
|
styles.append('margin-left: %s' % self.ensure_unit(width[1:]))
|
||||||
|
try:
|
||||||
|
ewidth_val = unit_convert(ewidth[1:], 12, 500, 166)
|
||||||
|
self.left_margins[tag] = ewidth_val
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
if attrib.has_key('align'):
|
if attrib.has_key('align'):
|
||||||
align = attrib.pop('align').strip()
|
align = attrib.pop('align').strip()
|
||||||
if align:
|
if align:
|
||||||
@ -661,6 +675,26 @@ class MobiReader(object):
|
|||||||
if hasattr(parent, 'remove'):
|
if hasattr(parent, 'remove'):
|
||||||
parent.remove(tag)
|
parent.remove(tag)
|
||||||
|
|
||||||
|
def get_left_whitespace(self, tag):
|
||||||
|
|
||||||
|
def whitespace(tag):
|
||||||
|
lm = ti = 0.0
|
||||||
|
if tag.tag == 'p':
|
||||||
|
ti = unit_convert('1.5em', 12, 500, 166)
|
||||||
|
if tag.tag == 'blockquote':
|
||||||
|
lm = unit_convert('2em', 12, 500, 166)
|
||||||
|
lm = self.left_margins.get(tag, lm)
|
||||||
|
ti = self.text_indents.get(tag, ti)
|
||||||
|
return lm + ti
|
||||||
|
|
||||||
|
parent = tag
|
||||||
|
ans = 0.0
|
||||||
|
while parent is not None:
|
||||||
|
ans += whitespace(parent)
|
||||||
|
parent = parent.getparent()
|
||||||
|
|
||||||
|
return ans
|
||||||
|
|
||||||
def create_opf(self, htmlfile, guide=None, root=None):
|
def create_opf(self, htmlfile, guide=None, root=None):
|
||||||
mi = getattr(self.book_header.exth, 'mi', self.embedded_mi)
|
mi = getattr(self.book_header.exth, 'mi', self.embedded_mi)
|
||||||
if mi is None:
|
if mi is None:
|
||||||
@ -731,16 +765,45 @@ class MobiReader(object):
|
|||||||
except:
|
except:
|
||||||
text = ''
|
text = ''
|
||||||
text = ent_pat.sub(entity_to_unicode, text)
|
text = ent_pat.sub(entity_to_unicode, text)
|
||||||
tocobj.add_item(toc.partition('#')[0], href[1:],
|
item = tocobj.add_item(toc.partition('#')[0], href[1:],
|
||||||
text)
|
text)
|
||||||
|
item.left_space = int(self.get_left_whitespace(x))
|
||||||
found = True
|
found = True
|
||||||
if reached and found and x.get('class', None) == 'mbp_pagebreak':
|
if reached and found and x.get('class', None) == 'mbp_pagebreak':
|
||||||
break
|
break
|
||||||
if tocobj is not None:
|
if tocobj is not None:
|
||||||
|
tocobj = self.structure_toc(tocobj)
|
||||||
opf.set_toc(tocobj)
|
opf.set_toc(tocobj)
|
||||||
|
|
||||||
return opf, ncx_manifest_entry
|
return opf, ncx_manifest_entry
|
||||||
|
|
||||||
|
def structure_toc(self, toc):
|
||||||
|
indent_vals = set()
|
||||||
|
for item in toc:
|
||||||
|
indent_vals.add(item.left_space)
|
||||||
|
if len(indent_vals) > 6 or len(indent_vals) < 2:
|
||||||
|
# Too many or too few levels, give up
|
||||||
|
return toc
|
||||||
|
indent_vals = sorted(indent_vals)
|
||||||
|
|
||||||
|
last_found = [None for i in indent_vals]
|
||||||
|
|
||||||
|
newtoc = TOC()
|
||||||
|
|
||||||
|
def find_parent(level):
|
||||||
|
candidates = last_found[:level]
|
||||||
|
for x in reversed(candidates):
|
||||||
|
if x is not None:
|
||||||
|
return x
|
||||||
|
return newtoc
|
||||||
|
|
||||||
|
for item in toc:
|
||||||
|
level = indent_vals.index(item.left_space)
|
||||||
|
parent = find_parent(level)
|
||||||
|
last_found[level] = parent.add_item(item.href, item.fragment,
|
||||||
|
item.text)
|
||||||
|
|
||||||
|
return newtoc
|
||||||
|
|
||||||
def sizeof_trailing_entries(self, data):
|
def sizeof_trailing_entries(self, data):
|
||||||
def sizeof_trailing_entry(ptr, psize):
|
def sizeof_trailing_entry(ptr, psize):
|
||||||
|
@ -18,6 +18,7 @@ from cssutils import profile as cssprofiles
|
|||||||
from lxml import etree
|
from lxml import etree
|
||||||
from lxml.cssselect import css_to_xpath, ExpressionError, SelectorSyntaxError
|
from lxml.cssselect import css_to_xpath, ExpressionError, SelectorSyntaxError
|
||||||
from calibre import force_unicode
|
from calibre import force_unicode
|
||||||
|
from calibre.ebooks import unit_convert
|
||||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
|
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
|
||||||
from calibre.ebooks.oeb.base import XPNSMAP, xpath, urlnormalize
|
from calibre.ebooks.oeb.base import XPNSMAP, xpath, urlnormalize
|
||||||
from calibre.ebooks.oeb.profile import PROFILES
|
from calibre.ebooks.oeb.profile import PROFILES
|
||||||
@ -444,7 +445,6 @@ class Stylizer(object):
|
|||||||
|
|
||||||
|
|
||||||
class Style(object):
|
class Style(object):
|
||||||
UNIT_RE = re.compile(r'^(-*[0-9]*[.]?[0-9]*)\s*(%|em|ex|en|px|mm|cm|in|pt|pc)$')
|
|
||||||
MS_PAT = re.compile(r'^\s*(mso-|panose-|text-underline|tab-interval)')
|
MS_PAT = re.compile(r'^\s*(mso-|panose-|text-underline|tab-interval)')
|
||||||
|
|
||||||
def __init__(self, element, stylizer):
|
def __init__(self, element, stylizer):
|
||||||
@ -507,43 +507,11 @@ class Style(object):
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
def _unit_convert(self, value, base=None, font=None):
|
def _unit_convert(self, value, base=None, font=None):
|
||||||
' Return value in pts'
|
'Return value in pts'
|
||||||
if isinstance(value, (int, long, float)):
|
if base is None:
|
||||||
return value
|
base = self.width
|
||||||
try:
|
font = font or self.fontSize
|
||||||
return float(value) * 72.0 / self._profile.dpi
|
return unit_convert(value, base, font, self._profile.dpi)
|
||||||
except:
|
|
||||||
pass
|
|
||||||
result = value
|
|
||||||
m = self.UNIT_RE.match(value)
|
|
||||||
if m is not None and m.group(1):
|
|
||||||
value = float(m.group(1))
|
|
||||||
unit = m.group(2)
|
|
||||||
if unit == '%':
|
|
||||||
if base is None:
|
|
||||||
base = self.width
|
|
||||||
result = (value / 100.0) * base
|
|
||||||
elif unit == 'px':
|
|
||||||
result = value * 72.0 / self._profile.dpi
|
|
||||||
elif unit == 'in':
|
|
||||||
result = value * 72.0
|
|
||||||
elif unit == 'pt':
|
|
||||||
result = value
|
|
||||||
elif unit == 'em':
|
|
||||||
font = font or self.fontSize
|
|
||||||
result = value * font
|
|
||||||
elif unit in ('ex', 'en'):
|
|
||||||
# This is a hack for ex since we have no way to know
|
|
||||||
# the x-height of the font
|
|
||||||
font = font or self.fontSize
|
|
||||||
result = value * font * 0.5
|
|
||||||
elif unit == 'pc':
|
|
||||||
result = value * 12.0
|
|
||||||
elif unit == 'mm':
|
|
||||||
result = value * 0.04
|
|
||||||
elif unit == 'cm':
|
|
||||||
result = value * 0.40
|
|
||||||
return result
|
|
||||||
|
|
||||||
def pt_to_px(self, value):
|
def pt_to_px(self, value):
|
||||||
return (self._profile.dpi / 72.0) * value
|
return (self._profile.dpi / 72.0) * value
|
||||||
|
Loading…
x
Reference in New Issue
Block a user