diff --git a/src/calibre/ebooks/oeb/polish/container.py b/src/calibre/ebooks/oeb/polish/container.py index 045f95ffea..27cbb36ba8 100644 --- a/src/calibre/ebooks/oeb/polish/container.py +++ b/src/calibre/ebooks/oeb/polish/container.py @@ -158,6 +158,14 @@ class Container(object): # {{{ for name, path in self.name_path_map.iteritems()} } + def guess_type(self, name): + # epubcheck complains if the mimetype for text documents is set to + # text/html in EPUB 2 books. Sigh. + ans = guess_type(name) + if ans == 'text/html': + ans = 'application/xhtml+xml' + return ans + def add_file(self, name, data, media_type=None): ''' Add a file to this container. Entries for the file are automatically created in the OPF manifest and spine @@ -176,7 +184,7 @@ class Container(object): # {{{ os.makedirs(base) with open(path, 'wb') as f: f.write(data) - mt = media_type or guess_type(name) + mt = media_type or self.guess_type(name) self.name_path_map[name] = path self.mime_map[name] = mt if name in self.names_that_need_not_be_manifested: @@ -660,8 +668,15 @@ class Container(object): # {{{ self.insert_into_xml(manifest, item) self.dirty(self.opf_name) name = self.href_to_name(href, self.opf_name) - self.name_path_map[name] = self.name_to_abspath(name) + self.name_path_map[name] = path = self.name_to_abspath(name) self.mime_map[name] = media_type + # Ensure that the file corresponding to the newly created item exists + # otherwise cloned containers will fail when they try to get the number + # of links to the file + base = os.path.dirname(path) + if not os.path.exists(base): + os.makedirs(path) + open(path, 'wb').close() return item def format_opf(self): diff --git a/src/calibre/ebooks/oeb/polish/split.py b/src/calibre/ebooks/oeb/polish/split.py index 0775f64489..d4cdbd7fa5 100644 --- a/src/calibre/ebooks/oeb/polish/split.py +++ b/src/calibre/ebooks/oeb/polish/split.py @@ -8,8 +8,9 @@ __copyright__ = '2013, Kovid Goyal ' import copy from future_builtins import map +from urlparse import urlparse -from calibre.ebooks.oeb.base import barename, XPNSMAP, XPath +from calibre.ebooks.oeb.base import barename, XPNSMAP, XPath, OPF from calibre.ebooks.oeb.polish.toc import node_from_loc def in_table(node): @@ -137,6 +138,25 @@ def do_split(split_point, log, before=True): return tree, tree2 +class SplitLinkReplacer(object): + + def __init__(self, base, bottom_anchors, top_name, bottom_name, container): + self.bottom_anchors, self.bottom_name = bottom_anchors, bottom_name + self.container, self.top_name = container, top_name + self.base = base + self.replaced = False + + def __call__(self, url): + if url and url.startswith('#'): + return url + name = self.container.href_to_name(url, self.base) + if name != self.top_name: + return url + purl = urlparse(url) + if purl.fragment and purl.fragment in self.bottom_anchors: + url = self.container.name_to_href(self.bottom_name, self.base) + '#' + purl.fragment + self.replaced = True + return url def split(container, name, loc): root = container.parsed(name) @@ -145,5 +165,44 @@ def split(container, name, loc): raise ValueError('Cannot split inside tables') if split_point.tag.endswith('}body'): raise ValueError('Cannot split on the tag') + tree1, tree2 = do_split(split_point, container.log) + root1, root2 = tree1.getroot(), tree2.getroot() + anchors_in_top = frozenset(root1.xpath('//*/@id')) | frozenset(root1.xpath('//*/@name')) | {''} + anchors_in_bottom = frozenset(root2.xpath('//*/@id')) | frozenset(root2.xpath('//*/@name')) + manifest_item = container.generate_item(name, media_type=container.mime_map[name]) + bottom_name = container.href_to_name(manifest_item.get('href'), container.opf_name) + # Fix links in the split trees + for r, rname, anchors in [(root1, bottom_name, anchors_in_bottom), (root2, name, anchors_in_top)]: + for a in r.xpath('//*[@href]'): + url = a.get('href') + if url.startswith('#'): + fname = name + else: + fname = container.href_to_name(url, name) + if fname == name: + purl = urlparse(url) + if purl.fragment in anchors: + a.set('href', '%s#%s' % (container.name_to_href(rname, name), purl.fragment)) + # Fix all links in the container that point to anchors in the bottom tree + for fname, media_type in container.mime_map.iteritems(): + if fname not in {name, bottom_name}: + repl = SplitLinkReplacer(fname, anchors_in_bottom, name, bottom_name, container) + container.replace_links(fname, repl) + + container.replace(name, root1) + container.replace(bottom_name, root2) + + spine = container.opf_xpath('//opf:spine')[0] + for spine_item, spine_name, linear in container.spine_iter: + if spine_name == name: + break + index = spine.index(spine_item) + 1 + + si = spine.makeelement(OPF('itemref'), idref=manifest_item.get('id')) + if not linear: + si.set('linear', 'no') + container.insert_into_xml(spine, si, index=index) + container.dirty(container.opf_name) + return bottom_name diff --git a/src/calibre/gui2/tweak_book/boss.py b/src/calibre/gui2/tweak_book/boss.py index 68078066e6..d846bfba77 100644 --- a/src/calibre/gui2/tweak_book/boss.py +++ b/src/calibre/gui2/tweak_book/boss.py @@ -20,6 +20,7 @@ from calibre.ebooks.oeb.base import urlnormalize from calibre.ebooks.oeb.polish.main import SUPPORTED, tweak_polish from calibre.ebooks.oeb.polish.container import get_container as _gc, clone_container, guess_type from calibre.ebooks.oeb.polish.replace import rename_files +from calibre.ebooks.oeb.polish.split import split from calibre.gui2 import error_dialog, choose_files, question_dialog, info_dialog from calibre.gui2.dialogs.confirm_delete import confirm from calibre.gui2.tweak_book import set_current_container, current_container, tprefs, actions, editors @@ -58,6 +59,7 @@ class Boss(QObject): self.gui.central.search_panel.search_triggered.connect(self.search) self.gui.preview.sync_requested.connect(self.sync_editor_to_preview) self.gui.preview.split_start_requested.connect(self.split_start_requested) + self.gui.preview.split_requested.connect(self.split_requested) def mkdtemp(self, prefix=''): self.container_count += 1 @@ -508,6 +510,18 @@ class Boss(QObject): return self.gui.preview.stop_split() self.gui.preview.do_start_split() + def split_requested(self, name, loc): + if not self.check_dirtied(): + return + self.add_savepoint(self.gui.elided_text(_('Split %s') % name)) + try: + bottom_name = split(current_container(), name, loc) + except: + self.rewind_savepoint() + raise + self.apply_container_update_to_gui() + self.edit_file(bottom_name, 'html') + def sync_editor_to_preview(self, name, lnum): editor = self.edit_file(name, 'html') self.ignore_preview_to_editor_sync = True