diff --git a/setup.py b/setup.py index a0bfbcaa0e..d31ea9d523 100644 --- a/setup.py +++ b/setup.py @@ -166,7 +166,7 @@ if __name__ == '__main__': metadata_sqlite = 'library/metadata_sqlite.sql', jquery = 'gui2/viewer/jquery.js', jquery_scrollTo = 'gui2/viewer/jquery_scrollTo.js', - html_css = 'ebooks/lit/html.css', + html_css = 'ebooks/oeb/html.css', ) DEST = os.path.join('src', APPNAME, 'resources.py') diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index 90df14e2c0..461c067382 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -15,7 +15,7 @@ from lxml import etree from calibre.ebooks.lit import LitError from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP import calibre.ebooks.lit.mssha1 as mssha1 -from calibre.ebooks.lit.oeb import urlnormalize +from calibre.ebooks.oeb.base import urlnormalize from calibre.ebooks import DRMError from calibre import plugins lzx, lxzerror = plugins['lzx'] diff --git a/src/calibre/ebooks/lit/writer.py b/src/calibre/ebooks/lit/writer.py index af11f04eb1..1d8f9020bd 100644 --- a/src/calibre/ebooks/lit/writer.py +++ b/src/calibre/ebooks/lit/writer.py @@ -23,11 +23,12 @@ from urllib import unquote as urlunquote from lxml import etree from calibre.ebooks.lit.reader import DirectoryEntry import calibre.ebooks.lit.maps as maps -from calibre.ebooks.lit.oeb import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \ +from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \ CSS_MIME, OPF_MIME, XML_NS, XML -from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize, xpath -from calibre.ebooks.lit.oeb import prefixname, FauxLogger, OEBBook -from calibre.ebooks.lit.stylizer import Stylizer +from calibre.ebooks.oeb.base import namespace, barename, prefixname, \ + urlnormalize, xpath +from calibre.ebooks.oeb.base import FauxLogger, OEBBook +from calibre.ebooks.oeb.stylizer import Stylizer from calibre.ebooks.lit.lzx import Compressor import calibre from calibre import LoggingInterface diff --git a/src/calibre/ebooks/oeb/__init__.py b/src/calibre/ebooks/oeb/__init__.py new file mode 100644 index 0000000000..4f8588535f --- /dev/null +++ b/src/calibre/ebooks/oeb/__init__.py @@ -0,0 +1,2 @@ +__license__ = 'GPL v3' +__copyright__ = '2008, Marshall T. Vandegrift ' diff --git a/src/calibre/ebooks/lit/oeb.py b/src/calibre/ebooks/oeb/base.py similarity index 90% rename from src/calibre/ebooks/lit/oeb.py rename to src/calibre/ebooks/oeb/base.py index bc705f9c64..137448d155 100644 --- a/src/calibre/ebooks/lit/oeb.py +++ b/src/calibre/ebooks/oeb/base.py @@ -38,12 +38,14 @@ def OPF(name): return '{%s}%s' % (OPF2_NS, name) def DC(name): return '{%s}%s' % (DC11_NS, name) def NCX(name): return '{%s}%s' % (NCX_NS, name) +EPUB_MIME = 'application/epub+zip' XHTML_MIME = 'application/xhtml+xml' CSS_MIME = 'text/css' NCX_MIME = 'application/x-dtbncx+xml' OPF_MIME = 'application/oebps-package+xml' OEB_DOC_MIME = 'text/x-oeb1-document' OEB_CSS_MIME = 'text/x-oeb1-css' +OPENTYPE_MIME = 'font/opentype' OEB_STYLES = set([CSS_MIME, OEB_CSS_MIME, 'text/x-oeb-css']) OEB_DOCS = set([XHTML_MIME, 'text/html', OEB_DOC_MIME, 'text/x-oeb-document']) @@ -75,7 +77,14 @@ def prefixname(name, nsrmap): def xpath(elem, expr): return elem.xpath(expr, namespaces=XPNSMAP) -URL_UNSAFE = r"""`!@#$%^&*[](){}?+=;:'",<>\| """ +def xml2str(root): + return etree.tostring(root, encoding='utf-8', xml_declaration=True) + +ASCII_CHARS = set(chr(x) for x in xrange(128)) +URL_SAFE = set(u'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + u'abcdefghijklmnopqrstuvwxyz' + u'0123456789' u'_.-/~') +URL_UNSAFE = ASCII_CHARS - URL_SAFE def urlquote(href): result = [] for char in href: @@ -116,6 +125,9 @@ class DirContainer(AbstractContainer): def write(self, path, data): path = os.path.join(self.rootdir, path) + dir = os.path.dirname(path) + if not os.path.isdir(dir): + os.makedirs(dir) with open(urlunquote(path), 'wb') as f: return f.write(data) @@ -123,6 +135,21 @@ class DirContainer(AbstractContainer): path = os.path.join(self.rootdir, path) return os.path.isfile(urlunquote(path)) +class DirWriter(object): + def __init__(self, version=2.0): + self.version = version + + def dump(self, oeb, path): + if not os.path.isdir(path): + os.mkdir(path) + output = DirContainer(path) + for item in oeb.manifest.values(): + output.write(item.href, str(item)) + metadata = oeb.to_opf2() if self.version == 2 else oeb.to_opf1() + for href, data in metadata.values(): + output.write(href, xml2str(data)) + return + class Metadata(object): TERMS = set(['contributor', 'coverage', 'creator', 'date', 'description', @@ -277,11 +304,33 @@ class Manifest(object): return property(fget, fset, fdel) data = data() + def __str__(self): + data = self.data + if isinstance(data, etree._Element): + return xml2str(data) + return str(data) + def __cmp__(self, other): result = cmp(self.spine_position, other.spine_position) if result != 0: return result return cmp(self.id, other.id) + + def relhref(self, href): + if '/' not in self.href: + return href + base = os.path.dirname(self.href).split('/') + target, frag = urldefrag(href) + target = target.split('/') + for index in xrange(min(len(base), len(target))): + if base[index] != target[index]: break + else: + index += 1 + relhref = (['..'] * (len(base) - index)) + target[index:] + relhref = '/'.join(relhref) + if frag: + relhref = '#'.join((relhref, frag)) + return relhref def abshref(self, href): if '/' not in self.href: @@ -361,7 +410,7 @@ class Manifest(object): def to_opf2(self, parent=None): elem = element(parent, OPF('manifest')) - for item in self.items.values(): + for item in self.ids.values(): attrib = {'id': item.id, 'href': item.href, 'media-type': item.media_type} if item.fallback: @@ -375,18 +424,35 @@ class Spine(object): self.oeb = oeb self.items = [] - def add(self, item, linear): + def _linear(self, linear): if isinstance(linear, StringTypes): linear = linear.lower() if linear is None or linear in ('yes', 'true'): linear = True elif linear in ('no', 'false'): linear = False - item.linear = linear + return linear + + def add(self, item, linear=None): + item.linear = self._linear(linear) item.spine_position = len(self.items) self.items.append(item) return item + def insert(self, index, item, linear): + item.linear = self._linear(linear) + item.spine_position = index + self.items.insert(index, item) + for i in xrange(index, len(self.items)): + self.items[i].spine_position = i + return item + + def remove(self, item): + index = item.spine_position + self.items.pop(index) + for i in xrange(index, len(self.items)): + self.items[i].spine_position = i + def __iter__(self): for item in self.items: yield item @@ -493,6 +559,12 @@ class TOC(object): node = TOC(title, href, klass, id) self.nodes.append(node) return node + + def iterdescendants(self): + for node in self.nodes: + yield node + for child in node.iterdescendants(): + yield child def __iter__(self): for node in self.nodes: @@ -500,6 +572,15 @@ class TOC(object): def __getitem__(self, index): return self.nodes[index] + + def autolayer(self): + prev = None + for node in list(self.nodes): + if prev and urldefrag(prev.href)[0] == urldefrag(node.href)[0]: + self.nodes.remove(node) + prev.nodes.append(node) + else: + prev = node def depth(self, level=0): if self.nodes: @@ -533,14 +614,15 @@ class TOC(object): class OEBBook(object): - def __init__(self, opfpath, container=None, logger=FauxLogger()): - if not container: + def __init__(self, opfpath=None, container=None, logger=FauxLogger()): + if opfpath and not container: container = DirContainer(os.path.dirname(opfpath)) opfpath = os.path.basename(opfpath) self.container = container self.logger = logger - opf = self._read_opf(opfpath) - self._all_from_opf(opf) + if opfpath or container: + opf = self._read_opf(opfpath) + self._all_from_opf(opf) def _convert_opf1(self, opf): nroot = etree.Element(OPF('package'), diff --git a/src/calibre/ebooks/lit/html.css b/src/calibre/ebooks/oeb/html.css similarity index 87% rename from src/calibre/ebooks/lit/html.css rename to src/calibre/ebooks/oeb/html.css index 9401b19cf2..a454b9b716 100644 --- a/src/calibre/ebooks/lit/html.css +++ b/src/calibre/ebooks/oeb/html.css @@ -45,7 +45,6 @@ html, div, map, dt, isindex, form { body { display: block; - margin: 8px; } p, dl, multicol { @@ -59,7 +58,7 @@ dd { blockquote { display: block; - margin: 1em 40px; + margin: 1em; } address { @@ -74,7 +73,7 @@ center { blockquote[type=cite] { display: block; - margin: 1em 0px; + margin: 1em 0em; border-color: blue; border-width: thin; } @@ -234,14 +233,6 @@ th { /* inlines */ -q:before { - content: open-quote; -} - -q:after { - content: close-quote; -} - b, strong { font-weight: bolder; } @@ -392,22 +383,6 @@ spacer { float: none ! important; } -/* focusable content: anything w/ tabindex >=0 is focusable */ -abbr:focus, acronym:focus, address:focus, applet:focus, b:focus, -base:focus, big:focus, blockquote:focus, br:focus, canvas:focus, caption:focus, -center:focus, cite:focus, code:focus, col:focus, colgroup:focus, dd:focus, -del:focus, dfn:focus, dir:focus, div:focus, dl:focus, dt:focus, em:focus, -fieldset:focus, font:focus, form:focus, h1:focus, h2:focus, h3:focus, h4:focus, -h5:focus, h6:focus, hr:focus, i:focus, img:focus, ins:focus, -kbd:focus, label:focus, legend:focus, li:focus, link:focus, menu:focus, -object:focus, ol:focus, p:focus, pre:focus, q:focus, s:focus, samp:focus, -small:focus, span:focus, strike:focus, strong:focus, sub:focus, sup:focus, -table:focus, tbody:focus, td:focus, tfoot:focus, th:focus, thead:focus, -tr:focus, tt:focus, u:focus, ul:focus, var:focus { - /* Don't specify the outline-color, we should always use initial value. */ - outline: 1px dotted; -} - /* hidden elements */ area, base, basefont, head, meta, script, style, title, noembed, param, link { diff --git a/src/calibre/ebooks/lit/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py similarity index 78% rename from src/calibre/ebooks/lit/stylizer.py rename to src/calibre/ebooks/oeb/stylizer.py index 7a89474d89..28f3218fb6 100644 --- a/src/calibre/ebooks/lit/stylizer.py +++ b/src/calibre/ebooks/oeb/stylizer.py @@ -16,16 +16,19 @@ import itertools import types import re import copy +from itertools import izip import cssutils from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \ CSSValueList, cssproperties from lxml import etree -from calibre.ebooks.lit.oeb import XHTML_NS, CSS_MIME, OEB_STYLES -from calibre.ebooks.lit.oeb import barename, urlnormalize +from lxml.cssselect import css_to_xpath, ExpressionError +from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES +from calibre.ebooks.oeb.base import barename, urlnormalize from calibre.resources import html_css +XHTML_CSS_NAMESPACE = '@namespace "%s";\n' % XHTML_NS HTML_CSS_STYLESHEET = cssutils.parseString(html_css) -XHTML_CSS_NAMESPACE = "@namespace url(http://www.w3.org/1999/xhtml);\n" +HTML_CSS_STYLESHEET.namespaces['h'] = XHTML_NS INHERITED = set(['azimuth', 'border-collapse', 'border-spacing', 'caption-side', 'color', 'cursor', 'direction', 'elevation', @@ -82,35 +85,48 @@ DEFAULTS = {'azimuth': 'center', 'background-attachment': 'scroll', FONT_SIZE_NAMES = set(['xx-small', 'x-small', 'small', 'medium', 'large', 'x-large', 'xx-large']) -FONT_SIZE_LIST = [('xx-small', 1, 6.), - ('x-small', None, 7.), - ('small', 2, 8.), - ('medium', 3, 9.), - ('large', 4, 11.), - ('x-large', 5, 13.), - ('xx-large', 6, 15.), - (None, 7, 17.)] +FONT_SIZES = [('xx-small', 1), + ('x-small', None), + ('small', 2), + ('medium', 3), + ('large', 4), + ('x-large', 5), + ('xx-large', 6), + (None, 7)] -FONT_SIZE_BY_NAME = {} -FONT_SIZE_BY_NUM = {} -for name, num, size in FONT_SIZE_LIST: - FONT_SIZE_BY_NAME[name] = size - FONT_SIZE_BY_NUM[num] = size XPNSMAP = {'h': XHTML_NS,} def xpath(elem, expr): return elem.xpath(expr, namespaces=XPNSMAP) +class CSSSelector(etree.XPath): + def __init__(self, css, namespaces=XPNSMAP): + path = css_to_xpath(css) + etree.XPath.__init__(self, path, namespaces=namespaces) + self.css = css + + def __repr__(self): + return '<%s %s for %r>' % ( + self.__class__.__name__, + hex(abs(id(self)))[2:], + self.css) + class Page(object): - def __init__(self, width, height, dpi): - self.width = float(width) - self.height = float(height) + def __init__(self, width, height, dpi, fbase, fsizes): + self.width = (float(width) / dpi) * 72. + self.height = (float(height) / dpi) * 72. self.dpi = float(dpi) + self.fbase = float(fbase) + self.fsizes = [] + for (name, num), size in izip(FONT_SIZES, fsizes): + self.fsizes.append((name, num, float(size))) + self.fnames = dict((name, sz) for name, _, sz in self.fsizes if name) + self.fnums = dict((num, sz) for _, num, sz in self.fsizes if num) class Profiles(object): - PRS500 = Page(584, 754, 168.451) - PRS505 = PRS500 + PRS505 = Page(584, 754, 168.451, 12, [7.5, 9, 10, 12, 15.5, 20, 22, 24]) + MSLIT = Page(652, 480, 100.0, 13, [10, 11, 13, 16, 18, 20, 22, 26]) class Stylizer(object): @@ -126,12 +142,13 @@ class Stylizer(object): parser = cssutils.CSSParser() parser.setFetcher(lambda path: ('utf-8', oeb.container.read(path))) for elem in head: - tag = barename(elem.tag) - if tag == 'style': - text = ''.join(elem.text) + if elem.tag == XHTML('style') and elem.text \ + and elem.get('type', CSS_MIME) in OEB_STYLES: + text = XHTML_CSS_NAMESPACE + elem.text stylesheet = parser.parseString(text, href=cssname) + stylesheet.namespaces['h'] = XHTML_NS stylesheets.append(stylesheet) - elif tag == 'link' \ + elif elem.tag == XHTML('link') and elem.get('href') \ and elem.get('rel', 'stylesheet') == 'stylesheet' \ and elem.get('type', CSS_MIME) in OEB_STYLES: href = urlnormalize(elem.attrib['href']) @@ -143,11 +160,13 @@ class Stylizer(object): data = XHTML_CSS_NAMESPACE data += oeb.manifest.hrefs[path].data stylesheet = parser.parseString(data, href=path) + stylesheet.namespaces['h'] = XHTML_NS self.STYLESHEETS[path] = stylesheet stylesheets.append(stylesheet) rules = [] index = 0 self.stylesheets = set() + self.page_rule = {} for stylesheet in stylesheets: href = stylesheet.href self.stylesheets.add(href) @@ -157,6 +176,16 @@ class Stylizer(object): rules.sort() self.rules = rules self._styles = {} + for _, _, cssdict, text, _ in rules: + try: + selector = CSSSelector(text) + except ExpressionError, e: + continue + for elem in selector(tree): + self.style(elem)._update_cssdict(cssdict) + for elem in tree.xpath('//*[@style]'): + self.style(elem)._apply_style_tag() + def flatten_rule(self, rule, href, index): results = [] @@ -169,7 +198,7 @@ class Stylizer(object): results.append((specificity, selector, style, text, href)) elif isinstance(rule, CSSPageRule): style = self.flatten_style(rule.style) - results.append(((0, 0, 0, 0), [], style, '@page', href)) + self.page_rule.update(style) return results def flatten_style(self, cssstyle): @@ -186,7 +215,7 @@ class Stylizer(object): size = style['font-size'] if size == 'normal': size = 'medium' if size in FONT_SIZE_NAMES: - style['font-size'] = "%dpt" % FONT_SIZE_BY_NAME[size] + style['font-size'] = "%dpt" % self.page.fnames[size] return style def _normalize_edge(self, cssvalue, name): @@ -233,9 +262,10 @@ class Stylizer(object): return style def style(self, element): - try: return self._styles[element] - except: pass - return Style(element, self) + try: + return self._styles[element] + except KeyError: + return Style(element, self) def stylesheet(self, name, font_scale=None): rules = [] @@ -250,74 +280,23 @@ class Stylizer(object): rules.append('%s {\n %s;\n}' % (selector, style)) return '\n'.join(rules) + class Style(object): def __init__(self, element, stylizer): self._element = element self._page = stylizer.page self._stylizer = stylizer - self._style = self._assemble_style(element, stylizer) + self._style = {} stylizer._styles[element] = self + + def _update_cssdict(self, cssdict): + self._style.update(cssdict) - def _assemble_style(self, element, stylizer): - result = {} - rules = stylizer.rules - for _, selector, style, _, _ in rules: - if self._selects_element(element, selector): - result.update(style) - try: - style = CSSStyleDeclaration(element.attrib['style']) - result.update(stylizer.flatten_style(style)) - except KeyError: - pass - return result - - def _selects_element(self, element, selector): - def _selects_element(element, items, index): - if index == -1: - return True - item = items[index] - if item.type == 'universal': - pass - elif item.type == 'type-selector': - name1 = ("{%s}%s" % item.value).lower() - name2 = element.tag.lower() - if name1 != name2: - return False - elif item.type == 'id': - name1 = item.value[1:] - name2 = element.get('id', '') - if name1 != name2: - return False - elif item.type == 'class': - name = item.value[1:].lower() - classes = element.get('class', '').lower().split() - if name not in classes: - return False - elif item.type == 'child': - parent = element.getparent() - if parent is None: - return False - element = parent - elif item.type == 'descendant': - element = element.getparent() - while element is not None: - if _selects_element(element, items, index - 1): - return True - element = element.getparent() - return False - elif item.type == 'pseudo-class': - if item.value == ':first-child': - e = element.getprevious() - if e is not None: - return False - else: - return False - elif item.type == 'pseudo-element': - return False - else: - return False - return _selects_element(element, items, index - 1) - return _selects_element(element, selector, len(selector) - 1) + def _apply_style_tag(self): + attrib = self._element.attrib + if 'style' in attrib: + style = CSSStyleDeclaration(attrib['style']) + self._style.update(self._stylizer.flatten_style(style)) def _has_parent(self): parent = self._element.getparent() @@ -383,18 +362,19 @@ class Style(object): result = None factor = None if value == 'inherit': - value = 'medium' + # We should only see this if the root element + value = self._page.fbase if value in FONT_SIZE_NAMES: - result = FONT_SIZE_BY_NAME[value] + result = self._page.fnames[value] elif value == 'smaller': factor = 1.0/1.2 - for _, _, size in FONT_SIZE_LIST: + for _, _, size in self._page.fsizes: if base <= size: break factor = None result = size elif value == 'larger': factor = 1.2 - for _, _, size in reversed(FONT_SIZE_LIST): + for _, _, size in reversed(self._page.fsizes): if base >= size: break factor = None result = size @@ -410,7 +390,7 @@ class Style(object): styles = self._stylizer._styles base = styles[self._element.getparent()].fontSize else: - base = normalize_fontsize(DEFAULTS['font-size']) + base = self._page.fbase if 'font-size' in self._style: size = self._style['font-size'] result = normalize_fontsize(size, base) @@ -441,4 +421,8 @@ class Style(object): def __str__(self): items = self._style.items() + items.sort() return '; '.join("%s: %s" % (key, val) for key, val in items) + + def cssdict(self): + return dict(self._style)