Implement --pretty-print

This commit is contained in:
Kovid Goyal 2009-04-16 18:15:49 -07:00
parent c822d25916
commit b9f80aa229
6 changed files with 39 additions and 25 deletions

View File

@ -74,6 +74,9 @@ def option_recommendation_to_cli_option(add_option, rec):
switches.append('--'+opt.long_switch)
attrs = dict(dest=opt.name, help=opt.help,
choices=opt.choices, default=rec.recommended_value)
if isinstance(rec.recommended_value, type(True)):
attrs['action'] = 'store_false' if rec.recommended_value else \
'store_true'
add_option(Option(*switches, **attrs))
def add_input_output_options(parser, plumber):

View File

@ -324,7 +324,7 @@ OptionRecommendation(name='language',
self.input_fmt, self.log,
accelerators, tdir)
if not hasattr(self.oeb, 'manifest'):
self.oeb = create_oebbook(self.log, self.oeb)
self.oeb = create_oebbook(self.log, self.oeb, self.opts)
self.opts.source = self.opts.input_profile
self.opts.dest = self.opts.output_profile
@ -365,7 +365,7 @@ OptionRecommendation(name='language',
self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
self.opts, self.log)
def create_oebbook(log, opfpath):
def create_oebbook(log, opfpath, opts):
'''
Create an OEBBook from an OPF file.
'''
@ -373,7 +373,8 @@ def create_oebbook(log, opfpath):
from calibre.ebooks.oeb.base import OEBBook
html_preprocessor = HTMLPreProcessor()
reader = OEBReader()
oeb = OEBBook(log, html_preprocessor=html_preprocessor)
oeb = OEBBook(log, html_preprocessor=html_preprocessor,
pretty_print=opts.pretty_print)
# Read OEB Book into OEBBook
log.info('Parsing all content...')
reader(oeb, opfpath)

View File

@ -277,7 +277,7 @@ class HTMLInput(InputFormatPlugin):
opfpath = os.path.abspath('metadata.opf')
from calibre.ebooks.conversion.plumber import create_oebbook
oeb = create_oebbook(log, opfpath)
oeb = create_oebbook(log, opfpath, opts)
from calibre.ebooks.oeb.transforms.package import Package
Package(os.getcwdu())(oeb, opts)

View File

@ -273,11 +273,7 @@ def xpath(elem, expr):
return elem.xpath(expr, namespaces=XPNSMAP)
def _prepare_xml_for_serialization(root):
root.set('xmlns', XHTML_NS)
root.set('{%s}xlink'%XHTML_NS, XLINK_NS)
for x in root.iter():
if hasattr(x.tag, 'rpartition') and x.tag.rpartition('}')[-1].lower() == 'svg':
x.set('xmlns', SVG_NS)
pass
def xml2str(root, pretty_print=False, strip_comments=False):
_prepare_xml_for_serialization(root)

View File

@ -128,6 +128,7 @@ class Package(object):
self.log = oeb.log
self.oeb = oeb
self.old_base_path = os.path.abspath(oeb.container.rootdir)
self.log.info('Packaging HTML files...')
hrefs = set([])
for item in self.oeb.manifest:

View File

@ -12,7 +12,7 @@ assumes a prior call to the flatcss transform.
import os, math, functools, collections, re, copy
from lxml.etree import XPath as _XPath
from lxml import etree, html
from lxml import etree
from lxml.cssselect import CSSSelector
from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP, urldefrag, \
@ -96,24 +96,32 @@ class Split(object):
page_breaks = set([])
for selector, before in page_break_selectors:
for elem in selector(item.data):
elem.pb_before = before
if before:
elem.set('pb_before', '1')
page_breaks.add(elem)
for i, elem in enumerate(item.data.iter()):
elem.pb_order = i
elem.set('pb_order', str(i))
page_breaks = list(page_breaks)
page_breaks.sort(cmp=lambda x,y : cmp(x.pb_order, y.pb_order))
page_breaks.sort(cmp=
lambda x,y : cmp(int(x.get('pb_order')), int(y.get('pb_order'))))
page_break_ids, page_breaks_ = [], []
for i, x in enumerate(page_breaks):
x.set('id', x.get('id', 'calibre_pb_%d'%i))
id = x.get('id')
page_breaks_.append((XPath('//*[@id="%s"]'%id), x.pb_before))
page_breaks_.append((XPath('//*[@id="%s"]'%id),
x.get('pb_before', False)))
page_break_ids.append(id)
for elem in item.data.iter():
elem.attrib.pop('pb_order')
if elem.get('pb_before', False):
elem.attrib.pop('pb_before')
return page_breaks_, page_break_ids
def fix_links(self, opf):
def fix_links(self):
'''
Fix references to the split files in other content files.
'''
@ -129,13 +137,14 @@ class Split(object):
anchor_map = self.map[href]
nhref = anchor_map[frag if frag else None]
if frag:
nhref = '#'.joinn(href, frag)
nhref = '#'.join(href, frag)
return nhref
return url
class FlowSplitter(object):
'The actual splitting logic'
def __init__(self, item, page_breaks, page_break_ids, max_flow_size, oeb):
self.item = item
@ -149,10 +158,10 @@ class FlowSplitter(object):
base, ext = os.path.splitext(self.base)
self.base = base.replace('%', '%%')+'_split_%d'+ext
self.trees = [self.item.data]
self.trees = [self.item.data.getroottree()]
self.splitting_on_page_breaks = True
if self.page_breaks:
self.split_on_page_breaks(self.item.data)
self.split_on_page_breaks(self.trees[0])
self.splitting_on_page_breaks = False
if self.max_flow_size > 0:
@ -192,6 +201,12 @@ class FlowSplitter(object):
self.trees.append(tree)
self.trees = [t for t in self.trees if not self.is_page_empty(t.getroot())]
def get_body(self, root):
body = root.xpath('//h:body', namespaces=NAMESPACES)
if not body:
return None
return body[0]
def do_split(self, tree, split_point, before):
'''
Split ``tree`` into a *before* and *after* tree at ``split_point``,
@ -206,7 +221,7 @@ class FlowSplitter(object):
tree, tree2 = copy.deepcopy(tree), copy.deepcopy(tree)
root = tree.getroot()
root2 = tree2.getroot()
body, body2 = root.body, root2.body
body, body2 = map(self.get_body, (root, root2))
split_point = root.xpath(path)[0]
split_point2 = root2.xpath(path)[0]
@ -262,13 +277,14 @@ class FlowSplitter(object):
return tree, tree2
def is_page_empty(self, root):
body = root.find('body')
body = self.get_body(root)
if body is None:
return False
txt = re.sub(r'\s+', '', html.tostring(body, method='text', encoding=unicode))
txt = re.sub(r'\s+', '',
etree.tostring(body, method='text', encoding=unicode))
if len(txt) > 4:
return False
for img in root.xpath('//img'):
for img in root.xpath('//h:img', namespaces=NAMESPACES):
if img.get('style', '') != 'display:none':
return False
return True
@ -438,6 +454,3 @@ class FlowSplitter(object):
fix_toc_entry(self.oeb.toc)
self.oeb.manifest.remove(self.item)