mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Implement "ugly-printing" for LIT markup.
This commit is contained in:
parent
946b91f767
commit
210ad8d20a
@ -410,7 +410,7 @@ tr:focus, tt:focus, u:focus, ul:focus, var:focus {
|
|||||||
|
|
||||||
/* hidden elements */
|
/* hidden elements */
|
||||||
area, base, basefont, head, meta, script, style, title,
|
area, base, basefont, head, meta, script, style, title,
|
||||||
noembed, param {
|
noembed, param, link {
|
||||||
display: none;
|
display: none;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -418,3 +418,9 @@ noembed, param {
|
|||||||
body {
|
body {
|
||||||
page-break-before: always;
|
page-break-before: always;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Explicit line-breaks are blocks, sure... */
|
||||||
|
br {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -8,8 +8,8 @@ from urlparse import urldefrag, urlparse, urlunparse
|
|||||||
from urllib import unquote as urlunquote
|
from urllib import unquote as urlunquote
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
XML_PARSER = etree.XMLParser(
|
XML_PARSER = etree.XMLParser(recover=True, resolve_entities=False)
|
||||||
remove_blank_text=True, recover=True, resolve_entities=False)
|
XML_NS = 'http://www.w3.org/XML/1998/namespace'
|
||||||
XHTML_NS = 'http://www.w3.org/1999/xhtml'
|
XHTML_NS = 'http://www.w3.org/1999/xhtml'
|
||||||
OPF1_NS = 'http://openebook.org/namespaces/oeb-package/1.0/'
|
OPF1_NS = 'http://openebook.org/namespaces/oeb-package/1.0/'
|
||||||
OPF2_NS = 'http://www.idpf.org/2007/opf'
|
OPF2_NS = 'http://www.idpf.org/2007/opf'
|
||||||
@ -23,6 +23,7 @@ XPNSMAP = {'h': XHTML_NS, 'o1': OPF1_NS, 'o2': OPF2_NS,
|
|||||||
'd09': DC09_NS, 'd10': DC10_NS, 'd11': DC11_NS,
|
'd09': DC09_NS, 'd10': DC10_NS, 'd11': DC11_NS,
|
||||||
'xsi': XSI_NS, 'dt': DCTERMS_NS, 'ncx': NCX_NS}
|
'xsi': XSI_NS, 'dt': DCTERMS_NS, 'ncx': NCX_NS}
|
||||||
|
|
||||||
|
def XML(name): return '{%s}%s' % (XML_NS, name)
|
||||||
def XHTML(name): return '{%s}%s' % (XHTML_NS, name)
|
def XHTML(name): return '{%s}%s' % (XHTML_NS, name)
|
||||||
def OPF(name): return '{%s}%s' % (OPF2_NS, name)
|
def OPF(name): return '{%s}%s' % (OPF2_NS, name)
|
||||||
def DC(name): return '{%s}%s' % (DC11_NS, name)
|
def DC(name): return '{%s}%s' % (DC11_NS, name)
|
||||||
|
@ -387,7 +387,7 @@ def preserve(function):
|
|||||||
class LitReader(object):
|
class LitReader(object):
|
||||||
PIECE_SIZE = 16
|
PIECE_SIZE = 16
|
||||||
XML_PARSER = etree.XMLParser(
|
XML_PARSER = etree.XMLParser(
|
||||||
remove_blank_text=True, resolve_entities=False)
|
recover=True, resolve_entities=False)
|
||||||
|
|
||||||
def magic():
|
def magic():
|
||||||
@preserve
|
@preserve
|
||||||
|
@ -14,7 +14,8 @@ import cssutils
|
|||||||
from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
|
from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
|
||||||
CSSValueList, cssproperties
|
CSSValueList, cssproperties
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from calibre.ebooks.lit.oeb import XHTML_NS, CSS_MIME, OEB_STYLES, barename
|
from calibre.ebooks.lit.oeb import XHTML_NS, CSS_MIME, OEB_STYLES
|
||||||
|
from calibre.ebooks.lit.oeb import barename, urlnormalize
|
||||||
from calibre.resources import html_css
|
from calibre.resources import html_css
|
||||||
|
|
||||||
HTML_CSS_STYLESHEET = cssutils.parseString(html_css)
|
HTML_CSS_STYLESHEET = cssutils.parseString(html_css)
|
||||||
@ -125,7 +126,7 @@ class Stylizer(object):
|
|||||||
elif tag == 'link' \
|
elif tag == 'link' \
|
||||||
and elem.get('rel', 'stylesheet') == 'stylesheet' \
|
and elem.get('rel', 'stylesheet') == 'stylesheet' \
|
||||||
and elem.get('type', CSS_MIME) in OEB_STYLES:
|
and elem.get('type', CSS_MIME) in OEB_STYLES:
|
||||||
href = elem.attrib['href']
|
href = urlnormalize(elem.attrib['href'])
|
||||||
path = os.path.join(base, href)
|
path = os.path.join(base, href)
|
||||||
path = os.path.normpath(path).replace('\\', '/')
|
path = os.path.normpath(path).replace('\\', '/')
|
||||||
if path in self.STYLESHEETS:
|
if path in self.STYLESHEETS:
|
||||||
@ -275,13 +276,13 @@ class Style(object):
|
|||||||
if name1 != name2:
|
if name1 != name2:
|
||||||
return False
|
return False
|
||||||
elif item.type == 'id':
|
elif item.type == 'id':
|
||||||
name1 = item.value[1:].lower()
|
name1 = item.value[1:]
|
||||||
name2 = element.attrib.get('id', '').lower().split()
|
name2 = element.get('id', '')
|
||||||
if name1 != name2:
|
if name1 != name2:
|
||||||
return False
|
return False
|
||||||
elif item.type == 'class':
|
elif item.type == 'class':
|
||||||
name = item.value[1:].lower()
|
name = item.value[1:].lower()
|
||||||
classes = element.attrib.get('class', '').lower().split()
|
classes = element.get('class', '').lower().split()
|
||||||
if name not in classes:
|
if name not in classes:
|
||||||
return False
|
return False
|
||||||
elif item.type == 'child':
|
elif item.type == 'child':
|
||||||
|
@ -3,7 +3,7 @@ import sys
|
|||||||
import os
|
import os
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
from struct import pack, unpack
|
from struct import pack, unpack
|
||||||
from itertools import izip, count
|
from itertools import izip, count, chain
|
||||||
import time
|
import time
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
@ -15,7 +15,7 @@ from urllib import unquote as urlunquote
|
|||||||
from lxml import etree
|
from lxml import etree
|
||||||
from calibre.ebooks.lit.reader import msguid, DirectoryEntry
|
from calibre.ebooks.lit.reader import msguid, DirectoryEntry
|
||||||
import calibre.ebooks.lit.maps as maps
|
import calibre.ebooks.lit.maps as maps
|
||||||
from calibre.ebooks.lit.oeb import CSS_MIME, OPF_MIME
|
from calibre.ebooks.lit.oeb import CSS_MIME, OPF_MIME, XML_NS, XML
|
||||||
from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize
|
from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize
|
||||||
from calibre.ebooks.lit.oeb import Oeb
|
from calibre.ebooks.lit.oeb import Oeb
|
||||||
from calibre.ebooks.lit.stylizer import Stylizer
|
from calibre.ebooks.lit.stylizer import Stylizer
|
||||||
@ -116,6 +116,8 @@ def randbytes(n):
|
|||||||
return ''.join(chr(random.randint(0, 255)) for x in xrange(n))
|
return ''.join(chr(random.randint(0, 255)) for x in xrange(n))
|
||||||
|
|
||||||
class ReBinary(object):
|
class ReBinary(object):
|
||||||
|
NSRMAP = {'': None, XML_NS: 'xml'}
|
||||||
|
|
||||||
def __init__(self, root, path, oeb, map=HTML_MAP):
|
def __init__(self, root, path, oeb, map=HTML_MAP):
|
||||||
self.dir = os.path.dirname(path)
|
self.dir = os.path.dirname(path)
|
||||||
self.manifest = oeb.manifest
|
self.manifest = oeb.manifest
|
||||||
@ -135,8 +137,11 @@ class ReBinary(object):
|
|||||||
if isinstance(value, (int, long)):
|
if isinstance(value, (int, long)):
|
||||||
value = unichr(value)
|
value = unichr(value)
|
||||||
self.buf.write(value.encode('utf-8'))
|
self.buf.write(value.encode('utf-8'))
|
||||||
|
|
||||||
def tree_to_binary(self, elem, nsrmap={'': None}, parents=[],
|
def is_block(self, style):
|
||||||
|
return style['display'] not in ('inline', 'inline-block')
|
||||||
|
|
||||||
|
def tree_to_binary(self, elem, nsrmap=NSRMAP, parents=[],
|
||||||
inhead=False, preserve=False):
|
inhead=False, preserve=False):
|
||||||
if not isinstance(elem.tag, basestring):
|
if not isinstance(elem.tag, basestring):
|
||||||
self.write(etree.tostring(elem))
|
self.write(etree.tostring(elem))
|
||||||
@ -158,7 +163,7 @@ class ReBinary(object):
|
|||||||
flags |= FLAG_CLOSING
|
flags |= FLAG_CLOSING
|
||||||
if inhead:
|
if inhead:
|
||||||
flags |= FLAG_HEAD
|
flags |= FLAG_HEAD
|
||||||
if style and style['display'] in ('block', 'table'):
|
if style and self.is_block(style):
|
||||||
flags |= FLAG_BLOCK
|
flags |= FLAG_BLOCK
|
||||||
self.write(0, flags)
|
self.write(0, flags)
|
||||||
tattrs = self.tattrs[0]
|
tattrs = self.tattrs[0]
|
||||||
@ -198,24 +203,41 @@ class ReBinary(object):
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
self.write(len(value)+1, value)
|
self.write(len(value)+1, value)
|
||||||
self.write(0)
|
self.write(0)
|
||||||
|
old_preserve = preserve
|
||||||
|
if style:
|
||||||
|
preserve = (style['white-space'] in ('pre', 'pre-wrap'))
|
||||||
|
xml_space = elem.get(XML('space'))
|
||||||
|
if xml_space == 'preserve':
|
||||||
|
preserve = True
|
||||||
|
elif xml_space == 'normal':
|
||||||
|
preserve = False
|
||||||
if elem.text:
|
if elem.text:
|
||||||
text = elem.text
|
if preserve:
|
||||||
if style and style['white-space'] == 'pre':
|
self.write(elem.text)
|
||||||
preserve = True
|
elif len(elem) > 0 or not elem.text.isspace():
|
||||||
if elem.get('xml:space') == 'preserve':
|
self.write(COLLAPSE.sub(' ', elem.text))
|
||||||
preserve = True
|
|
||||||
if not preserve:
|
|
||||||
text = COLLAPSE.sub(' ', text)
|
|
||||||
self.write(text)
|
|
||||||
parents.append(tag_offset)
|
parents.append(tag_offset)
|
||||||
for child in elem:
|
child = cstyle = nstyle = None
|
||||||
self.tree_to_binary(child, nsrmap, parents, inhead, preserve)
|
for next in chain(elem, [None]):
|
||||||
|
if self.stylizer:
|
||||||
|
nstyle = self.stylizer.style(next) \
|
||||||
|
if (next is not None) else None
|
||||||
|
if child is not None:
|
||||||
|
if not preserve \
|
||||||
|
and (inhead or not nstyle
|
||||||
|
or self.is_block(cstyle)
|
||||||
|
or self.is_block(nstyle)) \
|
||||||
|
and child.tail and child.tail.isspace():
|
||||||
|
child.tail = None
|
||||||
|
self.tree_to_binary(child, nsrmap, parents, inhead, preserve)
|
||||||
|
child, cstyle = next, nstyle
|
||||||
parents.pop()
|
parents.pop()
|
||||||
|
preserve = old_preserve
|
||||||
if not flags & FLAG_CLOSING:
|
if not flags & FLAG_CLOSING:
|
||||||
self.write(0, (flags & ~FLAG_OPENING) | FLAG_CLOSING, 0)
|
self.write(0, (flags & ~FLAG_OPENING) | FLAG_CLOSING, 0)
|
||||||
if elem.tail:
|
if elem.tail and tag != 'html':
|
||||||
tail = elem.tail
|
tail = elem.tail
|
||||||
if tag != 'pre':
|
if not preserve:
|
||||||
tail = COLLAPSE.sub(' ', tail)
|
tail = COLLAPSE.sub(' ', tail)
|
||||||
self.write(tail)
|
self.write(tail)
|
||||||
if style and style['page-break-after'] not in ('avoid', 'auto'):
|
if style and style['page-break-after'] not in ('avoid', 'auto'):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user