Implement "ugly-printing" for LIT markup.

This commit is contained in:
Marshall T. Vandegrift 2008-12-10 00:56:10 -05:00
parent 946b91f767
commit 210ad8d20a
5 changed files with 56 additions and 26 deletions

View File

@ -410,7 +410,7 @@ tr:focus, tt:focus, u:focus, ul:focus, var:focus {
/* hidden elements */
area, base, basefont, head, meta, script, style, title,
noembed, param {
noembed, param, link {
display: none;
}
@ -418,3 +418,9 @@ noembed, param {
body {
page-break-before: always;
}
/* Explicit line-breaks are blocks, sure... */
br {
display: block;
}

View File

@ -8,8 +8,8 @@ from urlparse import urldefrag, urlparse, urlunparse
from urllib import unquote as urlunquote
from lxml import etree
XML_PARSER = etree.XMLParser(
remove_blank_text=True, recover=True, resolve_entities=False)
XML_PARSER = etree.XMLParser(recover=True, resolve_entities=False)
XML_NS = 'http://www.w3.org/XML/1998/namespace'
XHTML_NS = 'http://www.w3.org/1999/xhtml'
OPF1_NS = 'http://openebook.org/namespaces/oeb-package/1.0/'
OPF2_NS = 'http://www.idpf.org/2007/opf'
@ -23,6 +23,7 @@ XPNSMAP = {'h': XHTML_NS, 'o1': OPF1_NS, 'o2': OPF2_NS,
'd09': DC09_NS, 'd10': DC10_NS, 'd11': DC11_NS,
'xsi': XSI_NS, 'dt': DCTERMS_NS, 'ncx': NCX_NS}
def XML(name): return '{%s}%s' % (XML_NS, name)
def XHTML(name): return '{%s}%s' % (XHTML_NS, name)
def OPF(name): return '{%s}%s' % (OPF2_NS, name)
def DC(name): return '{%s}%s' % (DC11_NS, name)

View File

@ -387,7 +387,7 @@ def preserve(function):
class LitReader(object):
PIECE_SIZE = 16
XML_PARSER = etree.XMLParser(
remove_blank_text=True, resolve_entities=False)
recover=True, resolve_entities=False)
def magic():
@preserve

View File

@ -14,7 +14,8 @@ import cssutils
from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
CSSValueList, cssproperties
from lxml import etree
from calibre.ebooks.lit.oeb import XHTML_NS, CSS_MIME, OEB_STYLES, barename
from calibre.ebooks.lit.oeb import XHTML_NS, CSS_MIME, OEB_STYLES
from calibre.ebooks.lit.oeb import barename, urlnormalize
from calibre.resources import html_css
HTML_CSS_STYLESHEET = cssutils.parseString(html_css)
@ -125,7 +126,7 @@ class Stylizer(object):
elif tag == 'link' \
and elem.get('rel', 'stylesheet') == 'stylesheet' \
and elem.get('type', CSS_MIME) in OEB_STYLES:
href = elem.attrib['href']
href = urlnormalize(elem.attrib['href'])
path = os.path.join(base, href)
path = os.path.normpath(path).replace('\\', '/')
if path in self.STYLESHEETS:
@ -275,13 +276,13 @@ class Style(object):
if name1 != name2:
return False
elif item.type == 'id':
name1 = item.value[1:].lower()
name2 = element.attrib.get('id', '').lower().split()
name1 = item.value[1:]
name2 = element.get('id', '')
if name1 != name2:
return False
elif item.type == 'class':
name = item.value[1:].lower()
classes = element.attrib.get('class', '').lower().split()
classes = element.get('class', '').lower().split()
if name not in classes:
return False
elif item.type == 'child':

View File

@ -3,7 +3,7 @@ import sys
import os
from cStringIO import StringIO
from struct import pack, unpack
from itertools import izip, count
from itertools import izip, count, chain
import time
import random
import re
@ -15,7 +15,7 @@ from urllib import unquote as urlunquote
from lxml import etree
from calibre.ebooks.lit.reader import msguid, DirectoryEntry
import calibre.ebooks.lit.maps as maps
from calibre.ebooks.lit.oeb import CSS_MIME, OPF_MIME
from calibre.ebooks.lit.oeb import CSS_MIME, OPF_MIME, XML_NS, XML
from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize
from calibre.ebooks.lit.oeb import Oeb
from calibre.ebooks.lit.stylizer import Stylizer
@ -116,6 +116,8 @@ def randbytes(n):
return ''.join(chr(random.randint(0, 255)) for x in xrange(n))
class ReBinary(object):
NSRMAP = {'': None, XML_NS: 'xml'}
def __init__(self, root, path, oeb, map=HTML_MAP):
self.dir = os.path.dirname(path)
self.manifest = oeb.manifest
@ -136,7 +138,10 @@ class ReBinary(object):
value = unichr(value)
self.buf.write(value.encode('utf-8'))
def tree_to_binary(self, elem, nsrmap={'': None}, parents=[],
def is_block(self, style):
return style['display'] not in ('inline', 'inline-block')
def tree_to_binary(self, elem, nsrmap=NSRMAP, parents=[],
inhead=False, preserve=False):
if not isinstance(elem.tag, basestring):
self.write(etree.tostring(elem))
@ -158,7 +163,7 @@ class ReBinary(object):
flags |= FLAG_CLOSING
if inhead:
flags |= FLAG_HEAD
if style and style['display'] in ('block', 'table'):
if style and self.is_block(style):
flags |= FLAG_BLOCK
self.write(0, flags)
tattrs = self.tattrs[0]
@ -198,24 +203,41 @@ class ReBinary(object):
except ValueError:
self.write(len(value)+1, value)
self.write(0)
old_preserve = preserve
if style:
preserve = (style['white-space'] in ('pre', 'pre-wrap'))
xml_space = elem.get(XML('space'))
if xml_space == 'preserve':
preserve = True
elif xml_space == 'normal':
preserve = False
if elem.text:
text = elem.text
if style and style['white-space'] == 'pre':
preserve = True
if elem.get('xml:space') == 'preserve':
preserve = True
if not preserve:
text = COLLAPSE.sub(' ', text)
self.write(text)
if preserve:
self.write(elem.text)
elif len(elem) > 0 or not elem.text.isspace():
self.write(COLLAPSE.sub(' ', elem.text))
parents.append(tag_offset)
for child in elem:
self.tree_to_binary(child, nsrmap, parents, inhead, preserve)
child = cstyle = nstyle = None
for next in chain(elem, [None]):
if self.stylizer:
nstyle = self.stylizer.style(next) \
if (next is not None) else None
if child is not None:
if not preserve \
and (inhead or not nstyle
or self.is_block(cstyle)
or self.is_block(nstyle)) \
and child.tail and child.tail.isspace():
child.tail = None
self.tree_to_binary(child, nsrmap, parents, inhead, preserve)
child, cstyle = next, nstyle
parents.pop()
preserve = old_preserve
if not flags & FLAG_CLOSING:
self.write(0, (flags & ~FLAG_OPENING) | FLAG_CLOSING, 0)
if elem.tail:
if elem.tail and tag != 'html':
tail = elem.tail
if tag != 'pre':
if not preserve:
tail = COLLAPSE.sub(' ', tail)
self.write(tail)
if style and style['page-break-after'] not in ('avoid', 'auto'):