mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Implement --pretty-print
This commit is contained in:
parent
c822d25916
commit
b9f80aa229
@ -74,6 +74,9 @@ def option_recommendation_to_cli_option(add_option, rec):
|
|||||||
switches.append('--'+opt.long_switch)
|
switches.append('--'+opt.long_switch)
|
||||||
attrs = dict(dest=opt.name, help=opt.help,
|
attrs = dict(dest=opt.name, help=opt.help,
|
||||||
choices=opt.choices, default=rec.recommended_value)
|
choices=opt.choices, default=rec.recommended_value)
|
||||||
|
if isinstance(rec.recommended_value, type(True)):
|
||||||
|
attrs['action'] = 'store_false' if rec.recommended_value else \
|
||||||
|
'store_true'
|
||||||
add_option(Option(*switches, **attrs))
|
add_option(Option(*switches, **attrs))
|
||||||
|
|
||||||
def add_input_output_options(parser, plumber):
|
def add_input_output_options(parser, plumber):
|
||||||
|
@ -324,7 +324,7 @@ OptionRecommendation(name='language',
|
|||||||
self.input_fmt, self.log,
|
self.input_fmt, self.log,
|
||||||
accelerators, tdir)
|
accelerators, tdir)
|
||||||
if not hasattr(self.oeb, 'manifest'):
|
if not hasattr(self.oeb, 'manifest'):
|
||||||
self.oeb = create_oebbook(self.log, self.oeb)
|
self.oeb = create_oebbook(self.log, self.oeb, self.opts)
|
||||||
|
|
||||||
self.opts.source = self.opts.input_profile
|
self.opts.source = self.opts.input_profile
|
||||||
self.opts.dest = self.opts.output_profile
|
self.opts.dest = self.opts.output_profile
|
||||||
@ -365,7 +365,7 @@ OptionRecommendation(name='language',
|
|||||||
self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
|
self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
|
||||||
self.opts, self.log)
|
self.opts, self.log)
|
||||||
|
|
||||||
def create_oebbook(log, opfpath):
|
def create_oebbook(log, opfpath, opts):
|
||||||
'''
|
'''
|
||||||
Create an OEBBook from an OPF file.
|
Create an OEBBook from an OPF file.
|
||||||
'''
|
'''
|
||||||
@ -373,7 +373,8 @@ def create_oebbook(log, opfpath):
|
|||||||
from calibre.ebooks.oeb.base import OEBBook
|
from calibre.ebooks.oeb.base import OEBBook
|
||||||
html_preprocessor = HTMLPreProcessor()
|
html_preprocessor = HTMLPreProcessor()
|
||||||
reader = OEBReader()
|
reader = OEBReader()
|
||||||
oeb = OEBBook(log, html_preprocessor=html_preprocessor)
|
oeb = OEBBook(log, html_preprocessor=html_preprocessor,
|
||||||
|
pretty_print=opts.pretty_print)
|
||||||
# Read OEB Book into OEBBook
|
# Read OEB Book into OEBBook
|
||||||
log.info('Parsing all content...')
|
log.info('Parsing all content...')
|
||||||
reader(oeb, opfpath)
|
reader(oeb, opfpath)
|
||||||
|
@ -277,7 +277,7 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
opfpath = os.path.abspath('metadata.opf')
|
opfpath = os.path.abspath('metadata.opf')
|
||||||
|
|
||||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||||
oeb = create_oebbook(log, opfpath)
|
oeb = create_oebbook(log, opfpath, opts)
|
||||||
|
|
||||||
from calibre.ebooks.oeb.transforms.package import Package
|
from calibre.ebooks.oeb.transforms.package import Package
|
||||||
Package(os.getcwdu())(oeb, opts)
|
Package(os.getcwdu())(oeb, opts)
|
||||||
|
@ -273,11 +273,7 @@ def xpath(elem, expr):
|
|||||||
return elem.xpath(expr, namespaces=XPNSMAP)
|
return elem.xpath(expr, namespaces=XPNSMAP)
|
||||||
|
|
||||||
def _prepare_xml_for_serialization(root):
|
def _prepare_xml_for_serialization(root):
|
||||||
root.set('xmlns', XHTML_NS)
|
pass
|
||||||
root.set('{%s}xlink'%XHTML_NS, XLINK_NS)
|
|
||||||
for x in root.iter():
|
|
||||||
if hasattr(x.tag, 'rpartition') and x.tag.rpartition('}')[-1].lower() == 'svg':
|
|
||||||
x.set('xmlns', SVG_NS)
|
|
||||||
|
|
||||||
def xml2str(root, pretty_print=False, strip_comments=False):
|
def xml2str(root, pretty_print=False, strip_comments=False):
|
||||||
_prepare_xml_for_serialization(root)
|
_prepare_xml_for_serialization(root)
|
||||||
|
@ -128,6 +128,7 @@ class Package(object):
|
|||||||
self.log = oeb.log
|
self.log = oeb.log
|
||||||
self.oeb = oeb
|
self.oeb = oeb
|
||||||
self.old_base_path = os.path.abspath(oeb.container.rootdir)
|
self.old_base_path = os.path.abspath(oeb.container.rootdir)
|
||||||
|
self.log.info('Packaging HTML files...')
|
||||||
|
|
||||||
hrefs = set([])
|
hrefs = set([])
|
||||||
for item in self.oeb.manifest:
|
for item in self.oeb.manifest:
|
||||||
|
@ -12,7 +12,7 @@ assumes a prior call to the flatcss transform.
|
|||||||
import os, math, functools, collections, re, copy
|
import os, math, functools, collections, re, copy
|
||||||
|
|
||||||
from lxml.etree import XPath as _XPath
|
from lxml.etree import XPath as _XPath
|
||||||
from lxml import etree, html
|
from lxml import etree
|
||||||
from lxml.cssselect import CSSSelector
|
from lxml.cssselect import CSSSelector
|
||||||
|
|
||||||
from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP, urldefrag, \
|
from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP, urldefrag, \
|
||||||
@ -96,24 +96,32 @@ class Split(object):
|
|||||||
page_breaks = set([])
|
page_breaks = set([])
|
||||||
for selector, before in page_break_selectors:
|
for selector, before in page_break_selectors:
|
||||||
for elem in selector(item.data):
|
for elem in selector(item.data):
|
||||||
elem.pb_before = before
|
if before:
|
||||||
|
elem.set('pb_before', '1')
|
||||||
page_breaks.add(elem)
|
page_breaks.add(elem)
|
||||||
|
|
||||||
for i, elem in enumerate(item.data.iter()):
|
for i, elem in enumerate(item.data.iter()):
|
||||||
elem.pb_order = i
|
elem.set('pb_order', str(i))
|
||||||
|
|
||||||
page_breaks = list(page_breaks)
|
page_breaks = list(page_breaks)
|
||||||
page_breaks.sort(cmp=lambda x,y : cmp(x.pb_order, y.pb_order))
|
page_breaks.sort(cmp=
|
||||||
|
lambda x,y : cmp(int(x.get('pb_order')), int(y.get('pb_order'))))
|
||||||
page_break_ids, page_breaks_ = [], []
|
page_break_ids, page_breaks_ = [], []
|
||||||
for i, x in enumerate(page_breaks):
|
for i, x in enumerate(page_breaks):
|
||||||
x.set('id', x.get('id', 'calibre_pb_%d'%i))
|
x.set('id', x.get('id', 'calibre_pb_%d'%i))
|
||||||
id = x.get('id')
|
id = x.get('id')
|
||||||
page_breaks_.append((XPath('//*[@id="%s"]'%id), x.pb_before))
|
page_breaks_.append((XPath('//*[@id="%s"]'%id),
|
||||||
|
x.get('pb_before', False)))
|
||||||
page_break_ids.append(id)
|
page_break_ids.append(id)
|
||||||
|
|
||||||
|
for elem in item.data.iter():
|
||||||
|
elem.attrib.pop('pb_order')
|
||||||
|
if elem.get('pb_before', False):
|
||||||
|
elem.attrib.pop('pb_before')
|
||||||
|
|
||||||
return page_breaks_, page_break_ids
|
return page_breaks_, page_break_ids
|
||||||
|
|
||||||
def fix_links(self, opf):
|
def fix_links(self):
|
||||||
'''
|
'''
|
||||||
Fix references to the split files in other content files.
|
Fix references to the split files in other content files.
|
||||||
'''
|
'''
|
||||||
@ -129,13 +137,14 @@ class Split(object):
|
|||||||
anchor_map = self.map[href]
|
anchor_map = self.map[href]
|
||||||
nhref = anchor_map[frag if frag else None]
|
nhref = anchor_map[frag if frag else None]
|
||||||
if frag:
|
if frag:
|
||||||
nhref = '#'.joinn(href, frag)
|
nhref = '#'.join(href, frag)
|
||||||
return nhref
|
return nhref
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class FlowSplitter(object):
|
class FlowSplitter(object):
|
||||||
|
'The actual splitting logic'
|
||||||
|
|
||||||
def __init__(self, item, page_breaks, page_break_ids, max_flow_size, oeb):
|
def __init__(self, item, page_breaks, page_break_ids, max_flow_size, oeb):
|
||||||
self.item = item
|
self.item = item
|
||||||
@ -149,10 +158,10 @@ class FlowSplitter(object):
|
|||||||
base, ext = os.path.splitext(self.base)
|
base, ext = os.path.splitext(self.base)
|
||||||
self.base = base.replace('%', '%%')+'_split_%d'+ext
|
self.base = base.replace('%', '%%')+'_split_%d'+ext
|
||||||
|
|
||||||
self.trees = [self.item.data]
|
self.trees = [self.item.data.getroottree()]
|
||||||
self.splitting_on_page_breaks = True
|
self.splitting_on_page_breaks = True
|
||||||
if self.page_breaks:
|
if self.page_breaks:
|
||||||
self.split_on_page_breaks(self.item.data)
|
self.split_on_page_breaks(self.trees[0])
|
||||||
self.splitting_on_page_breaks = False
|
self.splitting_on_page_breaks = False
|
||||||
|
|
||||||
if self.max_flow_size > 0:
|
if self.max_flow_size > 0:
|
||||||
@ -192,6 +201,12 @@ class FlowSplitter(object):
|
|||||||
self.trees.append(tree)
|
self.trees.append(tree)
|
||||||
self.trees = [t for t in self.trees if not self.is_page_empty(t.getroot())]
|
self.trees = [t for t in self.trees if not self.is_page_empty(t.getroot())]
|
||||||
|
|
||||||
|
def get_body(self, root):
|
||||||
|
body = root.xpath('//h:body', namespaces=NAMESPACES)
|
||||||
|
if not body:
|
||||||
|
return None
|
||||||
|
return body[0]
|
||||||
|
|
||||||
def do_split(self, tree, split_point, before):
|
def do_split(self, tree, split_point, before):
|
||||||
'''
|
'''
|
||||||
Split ``tree`` into a *before* and *after* tree at ``split_point``,
|
Split ``tree`` into a *before* and *after* tree at ``split_point``,
|
||||||
@ -206,7 +221,7 @@ class FlowSplitter(object):
|
|||||||
tree, tree2 = copy.deepcopy(tree), copy.deepcopy(tree)
|
tree, tree2 = copy.deepcopy(tree), copy.deepcopy(tree)
|
||||||
root = tree.getroot()
|
root = tree.getroot()
|
||||||
root2 = tree2.getroot()
|
root2 = tree2.getroot()
|
||||||
body, body2 = root.body, root2.body
|
body, body2 = map(self.get_body, (root, root2))
|
||||||
split_point = root.xpath(path)[0]
|
split_point = root.xpath(path)[0]
|
||||||
split_point2 = root2.xpath(path)[0]
|
split_point2 = root2.xpath(path)[0]
|
||||||
|
|
||||||
@ -262,13 +277,14 @@ class FlowSplitter(object):
|
|||||||
return tree, tree2
|
return tree, tree2
|
||||||
|
|
||||||
def is_page_empty(self, root):
|
def is_page_empty(self, root):
|
||||||
body = root.find('body')
|
body = self.get_body(root)
|
||||||
if body is None:
|
if body is None:
|
||||||
return False
|
return False
|
||||||
txt = re.sub(r'\s+', '', html.tostring(body, method='text', encoding=unicode))
|
txt = re.sub(r'\s+', '',
|
||||||
|
etree.tostring(body, method='text', encoding=unicode))
|
||||||
if len(txt) > 4:
|
if len(txt) > 4:
|
||||||
return False
|
return False
|
||||||
for img in root.xpath('//img'):
|
for img in root.xpath('//h:img', namespaces=NAMESPACES):
|
||||||
if img.get('style', '') != 'display:none':
|
if img.get('style', '') != 'display:none':
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
@ -438,6 +454,3 @@ class FlowSplitter(object):
|
|||||||
fix_toc_entry(self.oeb.toc)
|
fix_toc_entry(self.oeb.toc)
|
||||||
|
|
||||||
self.oeb.manifest.remove(self.item)
|
self.oeb.manifest.remove(self.item)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user