mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
Implement "ugly-printing" for LIT markup.
This commit is contained in:
parent
946b91f767
commit
210ad8d20a
@ -410,7 +410,7 @@ tr:focus, tt:focus, u:focus, ul:focus, var:focus {
|
||||
|
||||
/* hidden elements */
|
||||
area, base, basefont, head, meta, script, style, title,
|
||||
noembed, param {
|
||||
noembed, param, link {
|
||||
display: none;
|
||||
}
|
||||
|
||||
@ -418,3 +418,9 @@ noembed, param {
|
||||
body {
|
||||
page-break-before: always;
|
||||
}
|
||||
|
||||
/* Explicit line-breaks are blocks, sure... */
|
||||
br {
|
||||
display: block;
|
||||
}
|
||||
|
||||
|
@ -8,8 +8,8 @@ from urlparse import urldefrag, urlparse, urlunparse
|
||||
from urllib import unquote as urlunquote
|
||||
from lxml import etree
|
||||
|
||||
XML_PARSER = etree.XMLParser(
|
||||
remove_blank_text=True, recover=True, resolve_entities=False)
|
||||
XML_PARSER = etree.XMLParser(recover=True, resolve_entities=False)
|
||||
XML_NS = 'http://www.w3.org/XML/1998/namespace'
|
||||
XHTML_NS = 'http://www.w3.org/1999/xhtml'
|
||||
OPF1_NS = 'http://openebook.org/namespaces/oeb-package/1.0/'
|
||||
OPF2_NS = 'http://www.idpf.org/2007/opf'
|
||||
@ -23,6 +23,7 @@ XPNSMAP = {'h': XHTML_NS, 'o1': OPF1_NS, 'o2': OPF2_NS,
|
||||
'd09': DC09_NS, 'd10': DC10_NS, 'd11': DC11_NS,
|
||||
'xsi': XSI_NS, 'dt': DCTERMS_NS, 'ncx': NCX_NS}
|
||||
|
||||
def XML(name): return '{%s}%s' % (XML_NS, name)
|
||||
def XHTML(name): return '{%s}%s' % (XHTML_NS, name)
|
||||
def OPF(name): return '{%s}%s' % (OPF2_NS, name)
|
||||
def DC(name): return '{%s}%s' % (DC11_NS, name)
|
||||
|
@ -387,7 +387,7 @@ def preserve(function):
|
||||
class LitReader(object):
|
||||
PIECE_SIZE = 16
|
||||
XML_PARSER = etree.XMLParser(
|
||||
remove_blank_text=True, resolve_entities=False)
|
||||
recover=True, resolve_entities=False)
|
||||
|
||||
def magic():
|
||||
@preserve
|
||||
|
@ -14,7 +14,8 @@ import cssutils
|
||||
from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
|
||||
CSSValueList, cssproperties
|
||||
from lxml import etree
|
||||
from calibre.ebooks.lit.oeb import XHTML_NS, CSS_MIME, OEB_STYLES, barename
|
||||
from calibre.ebooks.lit.oeb import XHTML_NS, CSS_MIME, OEB_STYLES
|
||||
from calibre.ebooks.lit.oeb import barename, urlnormalize
|
||||
from calibre.resources import html_css
|
||||
|
||||
HTML_CSS_STYLESHEET = cssutils.parseString(html_css)
|
||||
@ -125,7 +126,7 @@ class Stylizer(object):
|
||||
elif tag == 'link' \
|
||||
and elem.get('rel', 'stylesheet') == 'stylesheet' \
|
||||
and elem.get('type', CSS_MIME) in OEB_STYLES:
|
||||
href = elem.attrib['href']
|
||||
href = urlnormalize(elem.attrib['href'])
|
||||
path = os.path.join(base, href)
|
||||
path = os.path.normpath(path).replace('\\', '/')
|
||||
if path in self.STYLESHEETS:
|
||||
@ -275,13 +276,13 @@ class Style(object):
|
||||
if name1 != name2:
|
||||
return False
|
||||
elif item.type == 'id':
|
||||
name1 = item.value[1:].lower()
|
||||
name2 = element.attrib.get('id', '').lower().split()
|
||||
name1 = item.value[1:]
|
||||
name2 = element.get('id', '')
|
||||
if name1 != name2:
|
||||
return False
|
||||
elif item.type == 'class':
|
||||
name = item.value[1:].lower()
|
||||
classes = element.attrib.get('class', '').lower().split()
|
||||
classes = element.get('class', '').lower().split()
|
||||
if name not in classes:
|
||||
return False
|
||||
elif item.type == 'child':
|
||||
|
@ -3,7 +3,7 @@ import sys
|
||||
import os
|
||||
from cStringIO import StringIO
|
||||
from struct import pack, unpack
|
||||
from itertools import izip, count
|
||||
from itertools import izip, count, chain
|
||||
import time
|
||||
import random
|
||||
import re
|
||||
@ -15,7 +15,7 @@ from urllib import unquote as urlunquote
|
||||
from lxml import etree
|
||||
from calibre.ebooks.lit.reader import msguid, DirectoryEntry
|
||||
import calibre.ebooks.lit.maps as maps
|
||||
from calibre.ebooks.lit.oeb import CSS_MIME, OPF_MIME
|
||||
from calibre.ebooks.lit.oeb import CSS_MIME, OPF_MIME, XML_NS, XML
|
||||
from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize
|
||||
from calibre.ebooks.lit.oeb import Oeb
|
||||
from calibre.ebooks.lit.stylizer import Stylizer
|
||||
@ -116,6 +116,8 @@ def randbytes(n):
|
||||
return ''.join(chr(random.randint(0, 255)) for x in xrange(n))
|
||||
|
||||
class ReBinary(object):
|
||||
NSRMAP = {'': None, XML_NS: 'xml'}
|
||||
|
||||
def __init__(self, root, path, oeb, map=HTML_MAP):
|
||||
self.dir = os.path.dirname(path)
|
||||
self.manifest = oeb.manifest
|
||||
@ -135,8 +137,11 @@ class ReBinary(object):
|
||||
if isinstance(value, (int, long)):
|
||||
value = unichr(value)
|
||||
self.buf.write(value.encode('utf-8'))
|
||||
|
||||
def tree_to_binary(self, elem, nsrmap={'': None}, parents=[],
|
||||
|
||||
def is_block(self, style):
|
||||
return style['display'] not in ('inline', 'inline-block')
|
||||
|
||||
def tree_to_binary(self, elem, nsrmap=NSRMAP, parents=[],
|
||||
inhead=False, preserve=False):
|
||||
if not isinstance(elem.tag, basestring):
|
||||
self.write(etree.tostring(elem))
|
||||
@ -158,7 +163,7 @@ class ReBinary(object):
|
||||
flags |= FLAG_CLOSING
|
||||
if inhead:
|
||||
flags |= FLAG_HEAD
|
||||
if style and style['display'] in ('block', 'table'):
|
||||
if style and self.is_block(style):
|
||||
flags |= FLAG_BLOCK
|
||||
self.write(0, flags)
|
||||
tattrs = self.tattrs[0]
|
||||
@ -198,24 +203,41 @@ class ReBinary(object):
|
||||
except ValueError:
|
||||
self.write(len(value)+1, value)
|
||||
self.write(0)
|
||||
old_preserve = preserve
|
||||
if style:
|
||||
preserve = (style['white-space'] in ('pre', 'pre-wrap'))
|
||||
xml_space = elem.get(XML('space'))
|
||||
if xml_space == 'preserve':
|
||||
preserve = True
|
||||
elif xml_space == 'normal':
|
||||
preserve = False
|
||||
if elem.text:
|
||||
text = elem.text
|
||||
if style and style['white-space'] == 'pre':
|
||||
preserve = True
|
||||
if elem.get('xml:space') == 'preserve':
|
||||
preserve = True
|
||||
if not preserve:
|
||||
text = COLLAPSE.sub(' ', text)
|
||||
self.write(text)
|
||||
if preserve:
|
||||
self.write(elem.text)
|
||||
elif len(elem) > 0 or not elem.text.isspace():
|
||||
self.write(COLLAPSE.sub(' ', elem.text))
|
||||
parents.append(tag_offset)
|
||||
for child in elem:
|
||||
self.tree_to_binary(child, nsrmap, parents, inhead, preserve)
|
||||
child = cstyle = nstyle = None
|
||||
for next in chain(elem, [None]):
|
||||
if self.stylizer:
|
||||
nstyle = self.stylizer.style(next) \
|
||||
if (next is not None) else None
|
||||
if child is not None:
|
||||
if not preserve \
|
||||
and (inhead or not nstyle
|
||||
or self.is_block(cstyle)
|
||||
or self.is_block(nstyle)) \
|
||||
and child.tail and child.tail.isspace():
|
||||
child.tail = None
|
||||
self.tree_to_binary(child, nsrmap, parents, inhead, preserve)
|
||||
child, cstyle = next, nstyle
|
||||
parents.pop()
|
||||
preserve = old_preserve
|
||||
if not flags & FLAG_CLOSING:
|
||||
self.write(0, (flags & ~FLAG_OPENING) | FLAG_CLOSING, 0)
|
||||
if elem.tail:
|
||||
if elem.tail and tag != 'html':
|
||||
tail = elem.tail
|
||||
if tag != 'pre':
|
||||
if not preserve:
|
||||
tail = COLLAPSE.sub(' ', tail)
|
||||
self.write(tail)
|
||||
if style and style['page-break-after'] not in ('avoid', 'auto'):
|
||||
|
Loading…
x
Reference in New Issue
Block a user