diff --git a/src/calibre/ebooks/oeb/polish/container.py b/src/calibre/ebooks/oeb/polish/container.py index 27cbb36ba8..0961cc5d9d 100644 --- a/src/calibre/ebooks/oeb/polish/container.py +++ b/src/calibre/ebooks/oeb/polish/container.py @@ -539,7 +539,7 @@ class Container(object): # {{{ spine[-1].tail = last_tail self.dirty(self.opf_name) - def remove_item(self, name): + def remove_item(self, name, remove_from_guide=True): ''' Remove the item identified by name from this container. This removes all references to the item in the OPF manifest, guide and spine as well as from @@ -571,10 +571,11 @@ class Container(object): # {{{ self.remove_from_xml(meta) self.dirty(self.opf_name) - for item in self.opf_xpath('//opf:guide/opf:reference[@href]'): - if self.href_to_name(item.get('href'), self.opf_name) == name: - self.remove_from_xml(item) - self.dirty(self.opf_name) + if remove_from_guide: + for item in self.opf_xpath('//opf:guide/opf:reference[@href]'): + if self.href_to_name(item.get('href'), self.opf_name) == name: + self.remove_from_xml(item) + self.dirty(self.opf_name) path = self.name_path_map.pop(name, None) if path and os.path.exists(path): @@ -872,7 +873,7 @@ class EpubContainer(Container): def names_that_must_not_be_changed(self): return super(EpubContainer, self).names_that_must_not_be_changed | {'META-INF/' + x for x in self.META_INF} - def remove_item(self, name): + def remove_item(self, name, remove_from_guide=True): # Handle removal of obfuscated fonts if name == 'META-INF/encryption.xml': self.obfuscated_fonts.clear() @@ -890,7 +891,7 @@ class EpubContainer(Container): if name == self.href_to_name(cr.get('URI')): self.remove_from_xml(em.getparent()) self.dirty('META-INF/encryption.xml') - super(EpubContainer, self).remove_item(name) + super(EpubContainer, self).remove_item(name, remove_from_guide=remove_from_guide) def process_encryption(self): fonts = {} diff --git a/src/calibre/ebooks/oeb/polish/split.py b/src/calibre/ebooks/oeb/polish/split.py index 234b588289..6cd76fe750 100644 --- a/src/calibre/ebooks/oeb/polish/split.py +++ b/src/calibre/ebooks/oeb/polish/split.py @@ -6,12 +6,16 @@ from __future__ import (unicode_literals, division, absolute_import, __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' -import copy +import copy, os from future_builtins import map from urlparse import urlparse -from calibre.ebooks.oeb.base import barename, XPNSMAP, XPath, OPF +from calibre.ebooks.oeb.base import barename, XPNSMAP, XPath, OPF, XHTML from calibre.ebooks.oeb.polish.toc import node_from_loc +from calibre.ebooks.oeb.polish.replace import LinkRebaser + +class AbortError(ValueError): + pass def in_table(node): while node is not None: @@ -167,9 +171,9 @@ def split(container, name, loc_or_xpath, before=True): else: split_point = node_from_loc(root, loc_or_xpath) if in_table(split_point): - raise ValueError('Cannot split inside tables') + raise AbortError('Cannot split inside tables') if split_point.tag.endswith('}body'): - raise ValueError('Cannot split on the tag') + raise AbortError('Cannot split on the tag') tree1, tree2 = do_split(split_point, container.log, before=before) root1, root2 = tree1.getroot(), tree2.getroot() anchors_in_top = frozenset(root1.xpath('//*/@id')) | frozenset(root1.xpath('//*/@name')) | {''} @@ -211,3 +215,157 @@ def split(container, name, loc_or_xpath, before=True): container.insert_into_xml(spine, si, index=index) container.dirty(container.opf_name) return bottom_name + +class MergeLinkReplacer(object): + + def __init__(self, base, anchor_map, master, container): + self.container, self.anchor_map = container, anchor_map + self.master = master + self.base = base + self.replaced = False + + def __call__(self, url): + if url and url.startswith('#'): + return url + name = self.container.href_to_name(url, self.base) + amap = self.anchor_map.get(name, None) + if amap is None: + return url + purl = urlparse(url) + frag = purl.fragment or '' + frag = amap.get(frag, frag) + url = self.container.name_to_href(self.master, self.base) + '#' + frag + self.replaced = True + return url + + +def add_text(body, text): + if len(body) > 0: + body[-1].tail = (body[-1].tail or '') + text + else: + body.text = (body.text or '') + text + +def all_anchors(root): + return set(root.xpath('//*/@id')) | set(root.xpath('//*/@name')) + +def all_stylesheets(container, name): + for link in XPath('//h:head/h:link[@href]')(container.parsed(name)): + name = container.href_to_name(link.get('href'), name) + typ = link.get('type', 'text/css') + if typ == 'text/css': + yield name + +def unique_anchor(seen_anchors, current): + c = 0 + ans = current + while ans in seen_anchors: + c += 1 + ans = '%s_%d' % (current, c) + return ans + +def remove_name_attributes(root): + # Remove all name attributes, replacing them with id attributes + for elem in root.xpath('//*[@id and @name]'): + del elem.attrib['name'] + for elem in root.xpath('//*[@name]'): + elem.set('id', elem.attrib.pop('name')) + +def merge_html(container, names, master): + p = container.parsed + root = p(master) + + # Ensure master has a + head = root.find('h:head', namespaces=XPNSMAP) + if head is None: + head = root.makeelement(XHTML('head')) + container.insert_into_xml(root, head, 0) + + seen_anchors = all_anchors(root) + seen_stylesheets = set(all_stylesheets(container, master)) + master_body = p(master).findall('h:body', namespaces=XPNSMAP)[-1] + master_base = os.path.dirname(master) + anchor_map = {n:{} for n in names if n != master} + + for name in names: + if name == master: + continue + # Insert new stylesheets into master + for sheet in all_stylesheets(container, name): + if sheet not in seen_stylesheets: + seen_stylesheets.add(sheet) + link = head.makeelement(XHTML('link'), rel='stylesheet', type='text/css', href=container.name_to_href(sheet, master)) + container.insert_into_xml(head, link) + + # Rebase links if master is in a different directory + if os.path.dirname(name) != master_base: + container.replace_links(name, LinkRebaser(container, name, master)) + + root = p(name) + children = [] + for body in p(name).findall('h:body', namespaces=XPNSMAP): + children.append(body.text if body.text and body.text.strip() else '\n\n') + children.extend(body) + + first_child = '' + for first_child in children: + if not isinstance(first_child, basestring): + break + if isinstance(first_child, basestring): + # Empty document, ignore + continue + + amap = anchor_map[name] + remove_name_attributes(root) + + for elem in root.xpath('//*[@id]'): + val = elem.get('id') + if not val: + continue + if val in seen_anchors: + nval = unique_anchor(seen_anchors, val) + elem.set('id', nval) + amap[val] = nval + else: + seen_anchors.add(val) + + if 'id' not in first_child.attrib: + first_child.set('id', unique_anchor(seen_anchors, 'top')) + seen_anchors.add(first_child.get('id')) + + amap[''] = first_child.get('id') + + # Fix links that point to local changed anchors + for a in XPath('//h:a[starts-with(@href, "#")]')(root): + q = a.get('href')[1:] + if q in amap: + a.set('href', '#' + amap[q]) + + for child in children: + if isinstance(child, basestring): + add_text(master_body, child) + else: + master_body.append(copy.deepcopy(child)) + + container.remove_item(name, remove_from_guide=False) + + # Fix all links in the container that point to merged files + for fname, media_type in container.mime_map.iteritems(): + repl = MergeLinkReplacer(fname, anchor_map, master, container) + container.replace_links(fname, repl) + + +def merge(container, category, names, master): + if category not in {'text', 'styles'}: + raise AbortError('Cannot merge files of type: %s' % category) + if len(names) < 2: + raise AbortError('Must specify at least two files to be merged') + if master not in names: + raise AbortError('The master file must be one of the files being merged') + + if category == 'text': + merge_html(container, names, master) + elif category == 'styles': + merge_css(container, names, master) # noqa + + container.dirty(master) + diff --git a/src/calibre/ebooks/oeb/polish/tests/container.py b/src/calibre/ebooks/oeb/polish/tests/container.py index c30f0b6831..34d255e811 100644 --- a/src/calibre/ebooks/oeb/polish/tests/container.py +++ b/src/calibre/ebooks/oeb/polish/tests/container.py @@ -12,7 +12,7 @@ from calibre.ebooks.oeb.polish.tests.base import BaseTest, get_simple_book, get_ from calibre.ebooks.oeb.polish.container import get_container as _gc, clone_container, OCF_NS from calibre.ebooks.oeb.polish.replace import rename_files -from calibre.ebooks.oeb.polish.split import split +from calibre.ebooks.oeb.polish.split import split, merge from calibre.utils.filenames import nlinks_file from calibre.ptempfile import TemporaryFile @@ -188,3 +188,20 @@ class ContainerTests(BaseTest): self.assertEqual(1, len(root.xpath('//*[@id="container"]')), 'Split point was not adjusted') self.assertEqual(0, len(troot.xpath('//*[@id="container"]')), 'Split point was not adjusted') self.check_links(c) + + def test_merge_file(self): + ' Test merging of files ' + book = get_simple_book() + c = get_container(book) + merge(c, 'text', ('index_split_000.html', 'index_split_001.html'), 'index_split_000.html') + self.check_links(c) + + book = get_simple_book() + c = get_container(book) + one, two = 'one/one.html', 'two/two.html' + c.add_file(one, b'

12') # noqa + c.add_file(two, b'

123') # noqa + merge(c, 'text', (one, two), one) + self.check_links(c) + root = c.parsed(one) + self.assertEqual(1, len(root.xpath('//*[@href="../page_styles.css"]'))) diff --git a/src/calibre/gui2/tweak_book/boss.py b/src/calibre/gui2/tweak_book/boss.py index ed2758a3c9..e73de41c03 100644 --- a/src/calibre/gui2/tweak_book/boss.py +++ b/src/calibre/gui2/tweak_book/boss.py @@ -20,7 +20,7 @@ from calibre.ebooks.oeb.base import urlnormalize from calibre.ebooks.oeb.polish.main import SUPPORTED, tweak_polish from calibre.ebooks.oeb.polish.container import get_container as _gc, clone_container, guess_type from calibre.ebooks.oeb.polish.replace import rename_files -from calibre.ebooks.oeb.polish.split import split +from calibre.ebooks.oeb.polish.split import split, merge, AbortError from calibre.gui2 import error_dialog, choose_files, question_dialog, info_dialog from calibre.gui2.dialogs.confirm_delete import confirm from calibre.gui2.tweak_book import set_current_container, current_container, tprefs, actions, editors @@ -54,6 +54,7 @@ class Boss(QObject): fl.reorder_spine.connect(self.reorder_spine) fl.rename_requested.connect(self.rename_requested) fl.edit_file.connect(self.edit_file_requested) + fl.merge_requested.connect(self.merge_requested) self.gui.central.current_editor_changed.connect(self.apply_current_editor_state) self.gui.central.close_requested.connect(self.editor_close_requested) self.gui.central.search_panel.search_triggered.connect(self.search) @@ -515,15 +516,28 @@ class Boss(QObject): def split_requested(self, name, loc): if not self.check_dirtied(): return - self.add_savepoint(self.gui.elided_text(_('Split %s') % name)) + self.add_savepoint(_('Split %s') % self.gui.elided_text(name)) try: bottom_name = split(current_container(), name, loc) - except: + except AbortError: self.rewind_savepoint() raise self.apply_container_update_to_gui() self.edit_file(bottom_name, 'html') + def merge_requested(self, category, names, master): + if not self.check_dirtied(): + return + self.add_savepoint(_('Merge files into %s') % self.gui.elided_text(master)) + try: + merge(current_container(), category, names, master) + except AbortError: + self.rewind_savepoint() + raise + self.apply_container_update_to_gui() + if master in editors: + self.show_editor(master) + def sync_editor_to_preview(self, name, lnum): editor = self.edit_file(name, 'html') self.ignore_preview_to_editor_sync = True