mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
MOBI Input: Use the viasual formatting of the Table of Contents to try to automatically create a multi-level TOC when converting/viewing MOBI files. Fixes #763681 (Private bug)
This commit is contained in:
parent
549b89c82f
commit
bacd84c21d
@ -7,7 +7,7 @@ Code for the conversion of ebook formats and the reading of metadata
|
||||
from various formats.
|
||||
'''
|
||||
|
||||
import traceback, os
|
||||
import traceback, os, re
|
||||
from calibre import CurrentDir
|
||||
|
||||
class ConversionError(Exception):
|
||||
@ -169,3 +169,42 @@ def calibre_cover(title, author_string, series_string=None,
|
||||
lines.append(TextLine(series_string, author_size))
|
||||
return create_cover_page(lines, I('library.png'), output_format='jpg')
|
||||
|
||||
UNIT_RE = re.compile(r'^(-*[0-9]*[.]?[0-9]*)\s*(%|em|ex|en|px|mm|cm|in|pt|pc)$')
|
||||
|
||||
def unit_convert(value, base, font, dpi):
|
||||
' Return value in pts'
|
||||
if isinstance(value, (int, long, float)):
|
||||
return value
|
||||
try:
|
||||
return float(value) * 72.0 / dpi
|
||||
except:
|
||||
pass
|
||||
result = value
|
||||
m = UNIT_RE.match(value)
|
||||
if m is not None and m.group(1):
|
||||
value = float(m.group(1))
|
||||
unit = m.group(2)
|
||||
if unit == '%':
|
||||
result = (value / 100.0) * base
|
||||
elif unit == 'px':
|
||||
result = value * 72.0 / dpi
|
||||
elif unit == 'in':
|
||||
result = value * 72.0
|
||||
elif unit == 'pt':
|
||||
result = value
|
||||
elif unit == 'em':
|
||||
result = value * font
|
||||
elif unit in ('ex', 'en'):
|
||||
# This is a hack for ex since we have no way to know
|
||||
# the x-height of the font
|
||||
font = font
|
||||
result = value * font * 0.5
|
||||
elif unit == 'pc':
|
||||
result = value * 12.0
|
||||
elif unit == 'mm':
|
||||
result = value * 0.04
|
||||
elif unit == 'cm':
|
||||
result = value * 0.40
|
||||
return result
|
||||
|
||||
|
||||
|
@ -20,7 +20,7 @@ from calibre.utils.filenames import ascii_filename
|
||||
from calibre.utils.date import parse_date
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.ebooks import DRMError
|
||||
from calibre.ebooks import DRMError, unit_convert
|
||||
from calibre.ebooks.chardet import ENCODING_PATS
|
||||
from calibre.ebooks.mobi import MobiError
|
||||
from calibre.ebooks.mobi.huffcdic import HuffReader
|
||||
@ -258,6 +258,8 @@ class MobiReader(object):
|
||||
}
|
||||
''')
|
||||
self.tag_css_rules = {}
|
||||
self.left_margins = {}
|
||||
self.text_indents = {}
|
||||
|
||||
if hasattr(filename_or_stream, 'read'):
|
||||
stream = filename_or_stream
|
||||
@ -567,9 +569,21 @@ class MobiReader(object):
|
||||
elif tag.tag == 'img':
|
||||
tag.set('width', width)
|
||||
else:
|
||||
styles.append('text-indent: %s' % self.ensure_unit(width))
|
||||
ewidth = self.ensure_unit(width)
|
||||
styles.append('text-indent: %s' % ewidth)
|
||||
try:
|
||||
ewidth_val = unit_convert(ewidth, 12, 500, 166)
|
||||
self.text_indents[tag] = ewidth_val
|
||||
except:
|
||||
pass
|
||||
if width.startswith('-'):
|
||||
styles.append('margin-left: %s' % self.ensure_unit(width[1:]))
|
||||
try:
|
||||
ewidth_val = unit_convert(ewidth[1:], 12, 500, 166)
|
||||
self.left_margins[tag] = ewidth_val
|
||||
except:
|
||||
pass
|
||||
|
||||
if attrib.has_key('align'):
|
||||
align = attrib.pop('align').strip()
|
||||
if align:
|
||||
@ -661,6 +675,26 @@ class MobiReader(object):
|
||||
if hasattr(parent, 'remove'):
|
||||
parent.remove(tag)
|
||||
|
||||
def get_left_whitespace(self, tag):
|
||||
|
||||
def whitespace(tag):
|
||||
lm = ti = 0.0
|
||||
if tag.tag == 'p':
|
||||
ti = unit_convert('1.5em', 12, 500, 166)
|
||||
if tag.tag == 'blockquote':
|
||||
lm = unit_convert('2em', 12, 500, 166)
|
||||
lm = self.left_margins.get(tag, lm)
|
||||
ti = self.text_indents.get(tag, ti)
|
||||
return lm + ti
|
||||
|
||||
parent = tag
|
||||
ans = 0.0
|
||||
while parent is not None:
|
||||
ans += whitespace(parent)
|
||||
parent = parent.getparent()
|
||||
|
||||
return ans
|
||||
|
||||
def create_opf(self, htmlfile, guide=None, root=None):
|
||||
mi = getattr(self.book_header.exth, 'mi', self.embedded_mi)
|
||||
if mi is None:
|
||||
@ -731,16 +765,45 @@ class MobiReader(object):
|
||||
except:
|
||||
text = ''
|
||||
text = ent_pat.sub(entity_to_unicode, text)
|
||||
tocobj.add_item(toc.partition('#')[0], href[1:],
|
||||
item = tocobj.add_item(toc.partition('#')[0], href[1:],
|
||||
text)
|
||||
item.left_space = int(self.get_left_whitespace(x))
|
||||
found = True
|
||||
if reached and found and x.get('class', None) == 'mbp_pagebreak':
|
||||
break
|
||||
if tocobj is not None:
|
||||
tocobj = self.structure_toc(tocobj)
|
||||
opf.set_toc(tocobj)
|
||||
|
||||
return opf, ncx_manifest_entry
|
||||
|
||||
def structure_toc(self, toc):
|
||||
indent_vals = set()
|
||||
for item in toc:
|
||||
indent_vals.add(item.left_space)
|
||||
if len(indent_vals) > 6 or len(indent_vals) < 2:
|
||||
# Too many or too few levels, give up
|
||||
return toc
|
||||
indent_vals = sorted(indent_vals)
|
||||
|
||||
last_found = [None for i in indent_vals]
|
||||
|
||||
newtoc = TOC()
|
||||
|
||||
def find_parent(level):
|
||||
candidates = last_found[:level]
|
||||
for x in reversed(candidates):
|
||||
if x is not None:
|
||||
return x
|
||||
return newtoc
|
||||
|
||||
for item in toc:
|
||||
level = indent_vals.index(item.left_space)
|
||||
parent = find_parent(level)
|
||||
last_found[level] = parent.add_item(item.href, item.fragment,
|
||||
item.text)
|
||||
|
||||
return newtoc
|
||||
|
||||
def sizeof_trailing_entries(self, data):
|
||||
def sizeof_trailing_entry(ptr, psize):
|
||||
|
@ -18,6 +18,7 @@ from cssutils import profile as cssprofiles
|
||||
from lxml import etree
|
||||
from lxml.cssselect import css_to_xpath, ExpressionError, SelectorSyntaxError
|
||||
from calibre import force_unicode
|
||||
from calibre.ebooks import unit_convert
|
||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
|
||||
from calibre.ebooks.oeb.base import XPNSMAP, xpath, urlnormalize
|
||||
from calibre.ebooks.oeb.profile import PROFILES
|
||||
@ -444,7 +445,6 @@ class Stylizer(object):
|
||||
|
||||
|
||||
class Style(object):
|
||||
UNIT_RE = re.compile(r'^(-*[0-9]*[.]?[0-9]*)\s*(%|em|ex|en|px|mm|cm|in|pt|pc)$')
|
||||
MS_PAT = re.compile(r'^\s*(mso-|panose-|text-underline|tab-interval)')
|
||||
|
||||
def __init__(self, element, stylizer):
|
||||
@ -507,43 +507,11 @@ class Style(object):
|
||||
return result
|
||||
|
||||
def _unit_convert(self, value, base=None, font=None):
|
||||
' Return value in pts'
|
||||
if isinstance(value, (int, long, float)):
|
||||
return value
|
||||
try:
|
||||
return float(value) * 72.0 / self._profile.dpi
|
||||
except:
|
||||
pass
|
||||
result = value
|
||||
m = self.UNIT_RE.match(value)
|
||||
if m is not None and m.group(1):
|
||||
value = float(m.group(1))
|
||||
unit = m.group(2)
|
||||
if unit == '%':
|
||||
if base is None:
|
||||
base = self.width
|
||||
result = (value / 100.0) * base
|
||||
elif unit == 'px':
|
||||
result = value * 72.0 / self._profile.dpi
|
||||
elif unit == 'in':
|
||||
result = value * 72.0
|
||||
elif unit == 'pt':
|
||||
result = value
|
||||
elif unit == 'em':
|
||||
font = font or self.fontSize
|
||||
result = value * font
|
||||
elif unit in ('ex', 'en'):
|
||||
# This is a hack for ex since we have no way to know
|
||||
# the x-height of the font
|
||||
font = font or self.fontSize
|
||||
result = value * font * 0.5
|
||||
elif unit == 'pc':
|
||||
result = value * 12.0
|
||||
elif unit == 'mm':
|
||||
result = value * 0.04
|
||||
elif unit == 'cm':
|
||||
result = value * 0.40
|
||||
return result
|
||||
'Return value in pts'
|
||||
if base is None:
|
||||
base = self.width
|
||||
font = font or self.fontSize
|
||||
return unit_convert(value, base, font, self._profile.dpi)
|
||||
|
||||
def pt_to_px(self, value):
|
||||
return (self._profile.dpi / 72.0) * value
|
||||
|
Loading…
x
Reference in New Issue
Block a user