diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py index a56abb907e..d5b214884e 100644 --- a/src/calibre/ebooks/__init__.py +++ b/src/calibre/ebooks/__init__.py @@ -7,7 +7,7 @@ Code for the conversion of ebook formats and the reading of metadata from various formats. ''' -import traceback, os +import traceback, os, re from calibre import CurrentDir class ConversionError(Exception): @@ -169,3 +169,42 @@ def calibre_cover(title, author_string, series_string=None, lines.append(TextLine(series_string, author_size)) return create_cover_page(lines, I('library.png'), output_format='jpg') +UNIT_RE = re.compile(r'^(-*[0-9]*[.]?[0-9]*)\s*(%|em|ex|en|px|mm|cm|in|pt|pc)$') + +def unit_convert(value, base, font, dpi): + ' Return value in pts' + if isinstance(value, (int, long, float)): + return value + try: + return float(value) * 72.0 / dpi + except: + pass + result = value + m = UNIT_RE.match(value) + if m is not None and m.group(1): + value = float(m.group(1)) + unit = m.group(2) + if unit == '%': + result = (value / 100.0) * base + elif unit == 'px': + result = value * 72.0 / dpi + elif unit == 'in': + result = value * 72.0 + elif unit == 'pt': + result = value + elif unit == 'em': + result = value * font + elif unit in ('ex', 'en'): + # This is a hack for ex since we have no way to know + # the x-height of the font + font = font + result = value * font * 0.5 + elif unit == 'pc': + result = value * 12.0 + elif unit == 'mm': + result = value * 0.04 + elif unit == 'cm': + result = value * 0.40 + return result + + diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index a65649dfd2..d9c6853795 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -20,7 +20,7 @@ from calibre.utils.filenames import ascii_filename from calibre.utils.date import parse_date from calibre.utils.cleantext import clean_ascii_chars from calibre.ptempfile import TemporaryDirectory -from calibre.ebooks import DRMError +from calibre.ebooks import DRMError, unit_convert from calibre.ebooks.chardet import ENCODING_PATS from calibre.ebooks.mobi import MobiError from calibre.ebooks.mobi.huffcdic import HuffReader @@ -258,6 +258,8 @@ class MobiReader(object): } ''') self.tag_css_rules = {} + self.left_margins = {} + self.text_indents = {} if hasattr(filename_or_stream, 'read'): stream = filename_or_stream @@ -567,9 +569,21 @@ class MobiReader(object): elif tag.tag == 'img': tag.set('width', width) else: - styles.append('text-indent: %s' % self.ensure_unit(width)) + ewidth = self.ensure_unit(width) + styles.append('text-indent: %s' % ewidth) + try: + ewidth_val = unit_convert(ewidth, 12, 500, 166) + self.text_indents[tag] = ewidth_val + except: + pass if width.startswith('-'): styles.append('margin-left: %s' % self.ensure_unit(width[1:])) + try: + ewidth_val = unit_convert(ewidth[1:], 12, 500, 166) + self.left_margins[tag] = ewidth_val + except: + pass + if attrib.has_key('align'): align = attrib.pop('align').strip() if align: @@ -661,6 +675,26 @@ class MobiReader(object): if hasattr(parent, 'remove'): parent.remove(tag) + def get_left_whitespace(self, tag): + + def whitespace(tag): + lm = ti = 0.0 + if tag.tag == 'p': + ti = unit_convert('1.5em', 12, 500, 166) + if tag.tag == 'blockquote': + lm = unit_convert('2em', 12, 500, 166) + lm = self.left_margins.get(tag, lm) + ti = self.text_indents.get(tag, ti) + return lm + ti + + parent = tag + ans = 0.0 + while parent is not None: + ans += whitespace(parent) + parent = parent.getparent() + + return ans + def create_opf(self, htmlfile, guide=None, root=None): mi = getattr(self.book_header.exth, 'mi', self.embedded_mi) if mi is None: @@ -731,16 +765,45 @@ class MobiReader(object): except: text = '' text = ent_pat.sub(entity_to_unicode, text) - tocobj.add_item(toc.partition('#')[0], href[1:], + item = tocobj.add_item(toc.partition('#')[0], href[1:], text) + item.left_space = int(self.get_left_whitespace(x)) found = True if reached and found and x.get('class', None) == 'mbp_pagebreak': break if tocobj is not None: + tocobj = self.structure_toc(tocobj) opf.set_toc(tocobj) return opf, ncx_manifest_entry + def structure_toc(self, toc): + indent_vals = set() + for item in toc: + indent_vals.add(item.left_space) + if len(indent_vals) > 6 or len(indent_vals) < 2: + # Too many or too few levels, give up + return toc + indent_vals = sorted(indent_vals) + + last_found = [None for i in indent_vals] + + newtoc = TOC() + + def find_parent(level): + candidates = last_found[:level] + for x in reversed(candidates): + if x is not None: + return x + return newtoc + + for item in toc: + level = indent_vals.index(item.left_space) + parent = find_parent(level) + last_found[level] = parent.add_item(item.href, item.fragment, + item.text) + + return newtoc def sizeof_trailing_entries(self, data): def sizeof_trailing_entry(ptr, psize): diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py index 39ab41eede..4f06efba9f 100644 --- a/src/calibre/ebooks/oeb/stylizer.py +++ b/src/calibre/ebooks/oeb/stylizer.py @@ -18,6 +18,7 @@ from cssutils import profile as cssprofiles from lxml import etree from lxml.cssselect import css_to_xpath, ExpressionError, SelectorSyntaxError from calibre import force_unicode +from calibre.ebooks import unit_convert from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES from calibre.ebooks.oeb.base import XPNSMAP, xpath, urlnormalize from calibre.ebooks.oeb.profile import PROFILES @@ -444,7 +445,6 @@ class Stylizer(object): class Style(object): - UNIT_RE = re.compile(r'^(-*[0-9]*[.]?[0-9]*)\s*(%|em|ex|en|px|mm|cm|in|pt|pc)$') MS_PAT = re.compile(r'^\s*(mso-|panose-|text-underline|tab-interval)') def __init__(self, element, stylizer): @@ -507,43 +507,11 @@ class Style(object): return result def _unit_convert(self, value, base=None, font=None): - ' Return value in pts' - if isinstance(value, (int, long, float)): - return value - try: - return float(value) * 72.0 / self._profile.dpi - except: - pass - result = value - m = self.UNIT_RE.match(value) - if m is not None and m.group(1): - value = float(m.group(1)) - unit = m.group(2) - if unit == '%': - if base is None: - base = self.width - result = (value / 100.0) * base - elif unit == 'px': - result = value * 72.0 / self._profile.dpi - elif unit == 'in': - result = value * 72.0 - elif unit == 'pt': - result = value - elif unit == 'em': - font = font or self.fontSize - result = value * font - elif unit in ('ex', 'en'): - # This is a hack for ex since we have no way to know - # the x-height of the font - font = font or self.fontSize - result = value * font * 0.5 - elif unit == 'pc': - result = value * 12.0 - elif unit == 'mm': - result = value * 0.04 - elif unit == 'cm': - result = value * 0.40 - return result + 'Return value in pts' + if base is None: + base = self.width + font = font or self.fontSize + return unit_convert(value, base, font, self._profile.dpi) def pt_to_px(self, value): return (self._profile.dpi / 72.0) * value