Implement "ugly-printing" for LIT markup.

This commit is contained in:
Marshall T. Vandegrift 2008-12-10 00:56:10 -05:00
parent 946b91f767
commit 210ad8d20a
5 changed files with 56 additions and 26 deletions

View File

@ -410,7 +410,7 @@ tr:focus, tt:focus, u:focus, ul:focus, var:focus {
/* hidden elements */ /* hidden elements */
area, base, basefont, head, meta, script, style, title, area, base, basefont, head, meta, script, style, title,
noembed, param { noembed, param, link {
display: none; display: none;
} }
@ -418,3 +418,9 @@ noembed, param {
body { body {
page-break-before: always; page-break-before: always;
} }
/* Explicit line-breaks are blocks, sure... */
br {
display: block;
}

View File

@ -8,8 +8,8 @@ from urlparse import urldefrag, urlparse, urlunparse
from urllib import unquote as urlunquote from urllib import unquote as urlunquote
from lxml import etree from lxml import etree
XML_PARSER = etree.XMLParser( XML_PARSER = etree.XMLParser(recover=True, resolve_entities=False)
remove_blank_text=True, recover=True, resolve_entities=False) XML_NS = 'http://www.w3.org/XML/1998/namespace'
XHTML_NS = 'http://www.w3.org/1999/xhtml' XHTML_NS = 'http://www.w3.org/1999/xhtml'
OPF1_NS = 'http://openebook.org/namespaces/oeb-package/1.0/' OPF1_NS = 'http://openebook.org/namespaces/oeb-package/1.0/'
OPF2_NS = 'http://www.idpf.org/2007/opf' OPF2_NS = 'http://www.idpf.org/2007/opf'
@ -23,6 +23,7 @@ XPNSMAP = {'h': XHTML_NS, 'o1': OPF1_NS, 'o2': OPF2_NS,
'd09': DC09_NS, 'd10': DC10_NS, 'd11': DC11_NS, 'd09': DC09_NS, 'd10': DC10_NS, 'd11': DC11_NS,
'xsi': XSI_NS, 'dt': DCTERMS_NS, 'ncx': NCX_NS} 'xsi': XSI_NS, 'dt': DCTERMS_NS, 'ncx': NCX_NS}
def XML(name): return '{%s}%s' % (XML_NS, name)
def XHTML(name): return '{%s}%s' % (XHTML_NS, name) def XHTML(name): return '{%s}%s' % (XHTML_NS, name)
def OPF(name): return '{%s}%s' % (OPF2_NS, name) def OPF(name): return '{%s}%s' % (OPF2_NS, name)
def DC(name): return '{%s}%s' % (DC11_NS, name) def DC(name): return '{%s}%s' % (DC11_NS, name)

View File

@ -387,7 +387,7 @@ def preserve(function):
class LitReader(object): class LitReader(object):
PIECE_SIZE = 16 PIECE_SIZE = 16
XML_PARSER = etree.XMLParser( XML_PARSER = etree.XMLParser(
remove_blank_text=True, resolve_entities=False) recover=True, resolve_entities=False)
def magic(): def magic():
@preserve @preserve

View File

@ -14,7 +14,8 @@ import cssutils
from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \ from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
CSSValueList, cssproperties CSSValueList, cssproperties
from lxml import etree from lxml import etree
from calibre.ebooks.lit.oeb import XHTML_NS, CSS_MIME, OEB_STYLES, barename from calibre.ebooks.lit.oeb import XHTML_NS, CSS_MIME, OEB_STYLES
from calibre.ebooks.lit.oeb import barename, urlnormalize
from calibre.resources import html_css from calibre.resources import html_css
HTML_CSS_STYLESHEET = cssutils.parseString(html_css) HTML_CSS_STYLESHEET = cssutils.parseString(html_css)
@ -125,7 +126,7 @@ class Stylizer(object):
elif tag == 'link' \ elif tag == 'link' \
and elem.get('rel', 'stylesheet') == 'stylesheet' \ and elem.get('rel', 'stylesheet') == 'stylesheet' \
and elem.get('type', CSS_MIME) in OEB_STYLES: and elem.get('type', CSS_MIME) in OEB_STYLES:
href = elem.attrib['href'] href = urlnormalize(elem.attrib['href'])
path = os.path.join(base, href) path = os.path.join(base, href)
path = os.path.normpath(path).replace('\\', '/') path = os.path.normpath(path).replace('\\', '/')
if path in self.STYLESHEETS: if path in self.STYLESHEETS:
@ -275,13 +276,13 @@ class Style(object):
if name1 != name2: if name1 != name2:
return False return False
elif item.type == 'id': elif item.type == 'id':
name1 = item.value[1:].lower() name1 = item.value[1:]
name2 = element.attrib.get('id', '').lower().split() name2 = element.get('id', '')
if name1 != name2: if name1 != name2:
return False return False
elif item.type == 'class': elif item.type == 'class':
name = item.value[1:].lower() name = item.value[1:].lower()
classes = element.attrib.get('class', '').lower().split() classes = element.get('class', '').lower().split()
if name not in classes: if name not in classes:
return False return False
elif item.type == 'child': elif item.type == 'child':

View File

@ -3,7 +3,7 @@ import sys
import os import os
from cStringIO import StringIO from cStringIO import StringIO
from struct import pack, unpack from struct import pack, unpack
from itertools import izip, count from itertools import izip, count, chain
import time import time
import random import random
import re import re
@ -15,7 +15,7 @@ from urllib import unquote as urlunquote
from lxml import etree from lxml import etree
from calibre.ebooks.lit.reader import msguid, DirectoryEntry from calibre.ebooks.lit.reader import msguid, DirectoryEntry
import calibre.ebooks.lit.maps as maps import calibre.ebooks.lit.maps as maps
from calibre.ebooks.lit.oeb import CSS_MIME, OPF_MIME from calibre.ebooks.lit.oeb import CSS_MIME, OPF_MIME, XML_NS, XML
from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize
from calibre.ebooks.lit.oeb import Oeb from calibre.ebooks.lit.oeb import Oeb
from calibre.ebooks.lit.stylizer import Stylizer from calibre.ebooks.lit.stylizer import Stylizer
@ -116,6 +116,8 @@ def randbytes(n):
return ''.join(chr(random.randint(0, 255)) for x in xrange(n)) return ''.join(chr(random.randint(0, 255)) for x in xrange(n))
class ReBinary(object): class ReBinary(object):
NSRMAP = {'': None, XML_NS: 'xml'}
def __init__(self, root, path, oeb, map=HTML_MAP): def __init__(self, root, path, oeb, map=HTML_MAP):
self.dir = os.path.dirname(path) self.dir = os.path.dirname(path)
self.manifest = oeb.manifest self.manifest = oeb.manifest
@ -135,8 +137,11 @@ class ReBinary(object):
if isinstance(value, (int, long)): if isinstance(value, (int, long)):
value = unichr(value) value = unichr(value)
self.buf.write(value.encode('utf-8')) self.buf.write(value.encode('utf-8'))
def tree_to_binary(self, elem, nsrmap={'': None}, parents=[], def is_block(self, style):
return style['display'] not in ('inline', 'inline-block')
def tree_to_binary(self, elem, nsrmap=NSRMAP, parents=[],
inhead=False, preserve=False): inhead=False, preserve=False):
if not isinstance(elem.tag, basestring): if not isinstance(elem.tag, basestring):
self.write(etree.tostring(elem)) self.write(etree.tostring(elem))
@ -158,7 +163,7 @@ class ReBinary(object):
flags |= FLAG_CLOSING flags |= FLAG_CLOSING
if inhead: if inhead:
flags |= FLAG_HEAD flags |= FLAG_HEAD
if style and style['display'] in ('block', 'table'): if style and self.is_block(style):
flags |= FLAG_BLOCK flags |= FLAG_BLOCK
self.write(0, flags) self.write(0, flags)
tattrs = self.tattrs[0] tattrs = self.tattrs[0]
@ -198,24 +203,41 @@ class ReBinary(object):
except ValueError: except ValueError:
self.write(len(value)+1, value) self.write(len(value)+1, value)
self.write(0) self.write(0)
old_preserve = preserve
if style:
preserve = (style['white-space'] in ('pre', 'pre-wrap'))
xml_space = elem.get(XML('space'))
if xml_space == 'preserve':
preserve = True
elif xml_space == 'normal':
preserve = False
if elem.text: if elem.text:
text = elem.text if preserve:
if style and style['white-space'] == 'pre': self.write(elem.text)
preserve = True elif len(elem) > 0 or not elem.text.isspace():
if elem.get('xml:space') == 'preserve': self.write(COLLAPSE.sub(' ', elem.text))
preserve = True
if not preserve:
text = COLLAPSE.sub(' ', text)
self.write(text)
parents.append(tag_offset) parents.append(tag_offset)
for child in elem: child = cstyle = nstyle = None
self.tree_to_binary(child, nsrmap, parents, inhead, preserve) for next in chain(elem, [None]):
if self.stylizer:
nstyle = self.stylizer.style(next) \
if (next is not None) else None
if child is not None:
if not preserve \
and (inhead or not nstyle
or self.is_block(cstyle)
or self.is_block(nstyle)) \
and child.tail and child.tail.isspace():
child.tail = None
self.tree_to_binary(child, nsrmap, parents, inhead, preserve)
child, cstyle = next, nstyle
parents.pop() parents.pop()
preserve = old_preserve
if not flags & FLAG_CLOSING: if not flags & FLAG_CLOSING:
self.write(0, (flags & ~FLAG_OPENING) | FLAG_CLOSING, 0) self.write(0, (flags & ~FLAG_OPENING) | FLAG_CLOSING, 0)
if elem.tail: if elem.tail and tag != 'html':
tail = elem.tail tail = elem.tail
if tag != 'pre': if not preserve:
tail = COLLAPSE.sub(' ', tail) tail = COLLAPSE.sub(' ', tail)
self.write(tail) self.write(tail)
if style and style['page-break-after'] not in ('avoid', 'auto'): if style and style['page-break-after'] not in ('avoid', 'auto'):