diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py index e8f4aa68e2..a3d57be191 100644 --- a/src/calibre/ebooks/conversion/cli.py +++ b/src/calibre/ebooks/conversion/cli.py @@ -74,6 +74,9 @@ def option_recommendation_to_cli_option(add_option, rec): switches.append('--'+opt.long_switch) attrs = dict(dest=opt.name, help=opt.help, choices=opt.choices, default=rec.recommended_value) + if isinstance(rec.recommended_value, type(True)): + attrs['action'] = 'store_false' if rec.recommended_value else \ + 'store_true' add_option(Option(*switches, **attrs)) def add_input_output_options(parser, plumber): diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 119ae4d63e..93fc376bea 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -324,7 +324,7 @@ OptionRecommendation(name='language', self.input_fmt, self.log, accelerators, tdir) if not hasattr(self.oeb, 'manifest'): - self.oeb = create_oebbook(self.log, self.oeb) + self.oeb = create_oebbook(self.log, self.oeb, self.opts) self.opts.source = self.opts.input_profile self.opts.dest = self.opts.output_profile @@ -365,7 +365,7 @@ OptionRecommendation(name='language', self.output_plugin.convert(self.oeb, self.output, self.input_plugin, self.opts, self.log) -def create_oebbook(log, opfpath): +def create_oebbook(log, opfpath, opts): ''' Create an OEBBook from an OPF file. ''' @@ -373,7 +373,8 @@ def create_oebbook(log, opfpath): from calibre.ebooks.oeb.base import OEBBook html_preprocessor = HTMLPreProcessor() reader = OEBReader() - oeb = OEBBook(log, html_preprocessor=html_preprocessor) + oeb = OEBBook(log, html_preprocessor=html_preprocessor, + pretty_print=opts.pretty_print) # Read OEB Book into OEBBook log.info('Parsing all content...') reader(oeb, opfpath) diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index 951b0824a5..5b9a085b1d 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -277,7 +277,7 @@ class HTMLInput(InputFormatPlugin): opfpath = os.path.abspath('metadata.opf') from calibre.ebooks.conversion.plumber import create_oebbook - oeb = create_oebbook(log, opfpath) + oeb = create_oebbook(log, opfpath, opts) from calibre.ebooks.oeb.transforms.package import Package Package(os.getcwdu())(oeb, opts) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index ed7981df4f..5d2c51c4ba 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -273,11 +273,7 @@ def xpath(elem, expr): return elem.xpath(expr, namespaces=XPNSMAP) def _prepare_xml_for_serialization(root): - root.set('xmlns', XHTML_NS) - root.set('{%s}xlink'%XHTML_NS, XLINK_NS) - for x in root.iter(): - if hasattr(x.tag, 'rpartition') and x.tag.rpartition('}')[-1].lower() == 'svg': - x.set('xmlns', SVG_NS) + pass def xml2str(root, pretty_print=False, strip_comments=False): _prepare_xml_for_serialization(root) diff --git a/src/calibre/ebooks/oeb/transforms/package.py b/src/calibre/ebooks/oeb/transforms/package.py index faf5486475..20fe6e2650 100644 --- a/src/calibre/ebooks/oeb/transforms/package.py +++ b/src/calibre/ebooks/oeb/transforms/package.py @@ -128,6 +128,7 @@ class Package(object): self.log = oeb.log self.oeb = oeb self.old_base_path = os.path.abspath(oeb.container.rootdir) + self.log.info('Packaging HTML files...') hrefs = set([]) for item in self.oeb.manifest: diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py index 20205e9c6d..1bb5b50d06 100644 --- a/src/calibre/ebooks/oeb/transforms/split.py +++ b/src/calibre/ebooks/oeb/transforms/split.py @@ -12,7 +12,7 @@ assumes a prior call to the flatcss transform. import os, math, functools, collections, re, copy from lxml.etree import XPath as _XPath -from lxml import etree, html +from lxml import etree from lxml.cssselect import CSSSelector from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP, urldefrag, \ @@ -96,24 +96,32 @@ class Split(object): page_breaks = set([]) for selector, before in page_break_selectors: for elem in selector(item.data): - elem.pb_before = before + if before: + elem.set('pb_before', '1') page_breaks.add(elem) for i, elem in enumerate(item.data.iter()): - elem.pb_order = i + elem.set('pb_order', str(i)) page_breaks = list(page_breaks) - page_breaks.sort(cmp=lambda x,y : cmp(x.pb_order, y.pb_order)) + page_breaks.sort(cmp= + lambda x,y : cmp(int(x.get('pb_order')), int(y.get('pb_order')))) page_break_ids, page_breaks_ = [], [] for i, x in enumerate(page_breaks): x.set('id', x.get('id', 'calibre_pb_%d'%i)) id = x.get('id') - page_breaks_.append((XPath('//*[@id="%s"]'%id), x.pb_before)) + page_breaks_.append((XPath('//*[@id="%s"]'%id), + x.get('pb_before', False))) page_break_ids.append(id) + for elem in item.data.iter(): + elem.attrib.pop('pb_order') + if elem.get('pb_before', False): + elem.attrib.pop('pb_before') + return page_breaks_, page_break_ids - def fix_links(self, opf): + def fix_links(self): ''' Fix references to the split files in other content files. ''' @@ -129,13 +137,14 @@ class Split(object): anchor_map = self.map[href] nhref = anchor_map[frag if frag else None] if frag: - nhref = '#'.joinn(href, frag) + nhref = '#'.join(href, frag) return nhref return url class FlowSplitter(object): + 'The actual splitting logic' def __init__(self, item, page_breaks, page_break_ids, max_flow_size, oeb): self.item = item @@ -149,10 +158,10 @@ class FlowSplitter(object): base, ext = os.path.splitext(self.base) self.base = base.replace('%', '%%')+'_split_%d'+ext - self.trees = [self.item.data] + self.trees = [self.item.data.getroottree()] self.splitting_on_page_breaks = True if self.page_breaks: - self.split_on_page_breaks(self.item.data) + self.split_on_page_breaks(self.trees[0]) self.splitting_on_page_breaks = False if self.max_flow_size > 0: @@ -192,6 +201,12 @@ class FlowSplitter(object): self.trees.append(tree) self.trees = [t for t in self.trees if not self.is_page_empty(t.getroot())] + def get_body(self, root): + body = root.xpath('//h:body', namespaces=NAMESPACES) + if not body: + return None + return body[0] + def do_split(self, tree, split_point, before): ''' Split ``tree`` into a *before* and *after* tree at ``split_point``, @@ -206,7 +221,7 @@ class FlowSplitter(object): tree, tree2 = copy.deepcopy(tree), copy.deepcopy(tree) root = tree.getroot() root2 = tree2.getroot() - body, body2 = root.body, root2.body + body, body2 = map(self.get_body, (root, root2)) split_point = root.xpath(path)[0] split_point2 = root2.xpath(path)[0] @@ -262,13 +277,14 @@ class FlowSplitter(object): return tree, tree2 def is_page_empty(self, root): - body = root.find('body') + body = self.get_body(root) if body is None: return False - txt = re.sub(r'\s+', '', html.tostring(body, method='text', encoding=unicode)) + txt = re.sub(r'\s+', '', + etree.tostring(body, method='text', encoding=unicode)) if len(txt) > 4: return False - for img in root.xpath('//img'): + for img in root.xpath('//h:img', namespaces=NAMESPACES): if img.get('style', '') != 'display:none': return False return True @@ -438,6 +454,3 @@ class FlowSplitter(object): fix_toc_entry(self.oeb.toc) self.oeb.manifest.remove(self.item) - - -