mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merging of HTML files
This commit is contained in:
parent
5326452976
commit
099632502f
@ -539,7 +539,7 @@ class Container(object): # {{{
|
|||||||
spine[-1].tail = last_tail
|
spine[-1].tail = last_tail
|
||||||
self.dirty(self.opf_name)
|
self.dirty(self.opf_name)
|
||||||
|
|
||||||
def remove_item(self, name):
|
def remove_item(self, name, remove_from_guide=True):
|
||||||
'''
|
'''
|
||||||
Remove the item identified by name from this container. This removes all
|
Remove the item identified by name from this container. This removes all
|
||||||
references to the item in the OPF manifest, guide and spine as well as from
|
references to the item in the OPF manifest, guide and spine as well as from
|
||||||
@ -571,10 +571,11 @@ class Container(object): # {{{
|
|||||||
self.remove_from_xml(meta)
|
self.remove_from_xml(meta)
|
||||||
self.dirty(self.opf_name)
|
self.dirty(self.opf_name)
|
||||||
|
|
||||||
for item in self.opf_xpath('//opf:guide/opf:reference[@href]'):
|
if remove_from_guide:
|
||||||
if self.href_to_name(item.get('href'), self.opf_name) == name:
|
for item in self.opf_xpath('//opf:guide/opf:reference[@href]'):
|
||||||
self.remove_from_xml(item)
|
if self.href_to_name(item.get('href'), self.opf_name) == name:
|
||||||
self.dirty(self.opf_name)
|
self.remove_from_xml(item)
|
||||||
|
self.dirty(self.opf_name)
|
||||||
|
|
||||||
path = self.name_path_map.pop(name, None)
|
path = self.name_path_map.pop(name, None)
|
||||||
if path and os.path.exists(path):
|
if path and os.path.exists(path):
|
||||||
@ -872,7 +873,7 @@ class EpubContainer(Container):
|
|||||||
def names_that_must_not_be_changed(self):
|
def names_that_must_not_be_changed(self):
|
||||||
return super(EpubContainer, self).names_that_must_not_be_changed | {'META-INF/' + x for x in self.META_INF}
|
return super(EpubContainer, self).names_that_must_not_be_changed | {'META-INF/' + x for x in self.META_INF}
|
||||||
|
|
||||||
def remove_item(self, name):
|
def remove_item(self, name, remove_from_guide=True):
|
||||||
# Handle removal of obfuscated fonts
|
# Handle removal of obfuscated fonts
|
||||||
if name == 'META-INF/encryption.xml':
|
if name == 'META-INF/encryption.xml':
|
||||||
self.obfuscated_fonts.clear()
|
self.obfuscated_fonts.clear()
|
||||||
@ -890,7 +891,7 @@ class EpubContainer(Container):
|
|||||||
if name == self.href_to_name(cr.get('URI')):
|
if name == self.href_to_name(cr.get('URI')):
|
||||||
self.remove_from_xml(em.getparent())
|
self.remove_from_xml(em.getparent())
|
||||||
self.dirty('META-INF/encryption.xml')
|
self.dirty('META-INF/encryption.xml')
|
||||||
super(EpubContainer, self).remove_item(name)
|
super(EpubContainer, self).remove_item(name, remove_from_guide=remove_from_guide)
|
||||||
|
|
||||||
def process_encryption(self):
|
def process_encryption(self):
|
||||||
fonts = {}
|
fonts = {}
|
||||||
|
@ -6,12 +6,16 @@ from __future__ import (unicode_literals, division, absolute_import,
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
import copy
|
import copy, os
|
||||||
from future_builtins import map
|
from future_builtins import map
|
||||||
from urlparse import urlparse
|
from urlparse import urlparse
|
||||||
|
|
||||||
from calibre.ebooks.oeb.base import barename, XPNSMAP, XPath, OPF
|
from calibre.ebooks.oeb.base import barename, XPNSMAP, XPath, OPF, XHTML
|
||||||
from calibre.ebooks.oeb.polish.toc import node_from_loc
|
from calibre.ebooks.oeb.polish.toc import node_from_loc
|
||||||
|
from calibre.ebooks.oeb.polish.replace import LinkRebaser
|
||||||
|
|
||||||
|
class AbortError(ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
def in_table(node):
|
def in_table(node):
|
||||||
while node is not None:
|
while node is not None:
|
||||||
@ -167,9 +171,9 @@ def split(container, name, loc_or_xpath, before=True):
|
|||||||
else:
|
else:
|
||||||
split_point = node_from_loc(root, loc_or_xpath)
|
split_point = node_from_loc(root, loc_or_xpath)
|
||||||
if in_table(split_point):
|
if in_table(split_point):
|
||||||
raise ValueError('Cannot split inside tables')
|
raise AbortError('Cannot split inside tables')
|
||||||
if split_point.tag.endswith('}body'):
|
if split_point.tag.endswith('}body'):
|
||||||
raise ValueError('Cannot split on the <body> tag')
|
raise AbortError('Cannot split on the <body> tag')
|
||||||
tree1, tree2 = do_split(split_point, container.log, before=before)
|
tree1, tree2 = do_split(split_point, container.log, before=before)
|
||||||
root1, root2 = tree1.getroot(), tree2.getroot()
|
root1, root2 = tree1.getroot(), tree2.getroot()
|
||||||
anchors_in_top = frozenset(root1.xpath('//*/@id')) | frozenset(root1.xpath('//*/@name')) | {''}
|
anchors_in_top = frozenset(root1.xpath('//*/@id')) | frozenset(root1.xpath('//*/@name')) | {''}
|
||||||
@ -211,3 +215,157 @@ def split(container, name, loc_or_xpath, before=True):
|
|||||||
container.insert_into_xml(spine, si, index=index)
|
container.insert_into_xml(spine, si, index=index)
|
||||||
container.dirty(container.opf_name)
|
container.dirty(container.opf_name)
|
||||||
return bottom_name
|
return bottom_name
|
||||||
|
|
||||||
|
class MergeLinkReplacer(object):
|
||||||
|
|
||||||
|
def __init__(self, base, anchor_map, master, container):
|
||||||
|
self.container, self.anchor_map = container, anchor_map
|
||||||
|
self.master = master
|
||||||
|
self.base = base
|
||||||
|
self.replaced = False
|
||||||
|
|
||||||
|
def __call__(self, url):
|
||||||
|
if url and url.startswith('#'):
|
||||||
|
return url
|
||||||
|
name = self.container.href_to_name(url, self.base)
|
||||||
|
amap = self.anchor_map.get(name, None)
|
||||||
|
if amap is None:
|
||||||
|
return url
|
||||||
|
purl = urlparse(url)
|
||||||
|
frag = purl.fragment or ''
|
||||||
|
frag = amap.get(frag, frag)
|
||||||
|
url = self.container.name_to_href(self.master, self.base) + '#' + frag
|
||||||
|
self.replaced = True
|
||||||
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
def add_text(body, text):
|
||||||
|
if len(body) > 0:
|
||||||
|
body[-1].tail = (body[-1].tail or '') + text
|
||||||
|
else:
|
||||||
|
body.text = (body.text or '') + text
|
||||||
|
|
||||||
|
def all_anchors(root):
|
||||||
|
return set(root.xpath('//*/@id')) | set(root.xpath('//*/@name'))
|
||||||
|
|
||||||
|
def all_stylesheets(container, name):
|
||||||
|
for link in XPath('//h:head/h:link[@href]')(container.parsed(name)):
|
||||||
|
name = container.href_to_name(link.get('href'), name)
|
||||||
|
typ = link.get('type', 'text/css')
|
||||||
|
if typ == 'text/css':
|
||||||
|
yield name
|
||||||
|
|
||||||
|
def unique_anchor(seen_anchors, current):
|
||||||
|
c = 0
|
||||||
|
ans = current
|
||||||
|
while ans in seen_anchors:
|
||||||
|
c += 1
|
||||||
|
ans = '%s_%d' % (current, c)
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def remove_name_attributes(root):
|
||||||
|
# Remove all name attributes, replacing them with id attributes
|
||||||
|
for elem in root.xpath('//*[@id and @name]'):
|
||||||
|
del elem.attrib['name']
|
||||||
|
for elem in root.xpath('//*[@name]'):
|
||||||
|
elem.set('id', elem.attrib.pop('name'))
|
||||||
|
|
||||||
|
def merge_html(container, names, master):
|
||||||
|
p = container.parsed
|
||||||
|
root = p(master)
|
||||||
|
|
||||||
|
# Ensure master has a <head>
|
||||||
|
head = root.find('h:head', namespaces=XPNSMAP)
|
||||||
|
if head is None:
|
||||||
|
head = root.makeelement(XHTML('head'))
|
||||||
|
container.insert_into_xml(root, head, 0)
|
||||||
|
|
||||||
|
seen_anchors = all_anchors(root)
|
||||||
|
seen_stylesheets = set(all_stylesheets(container, master))
|
||||||
|
master_body = p(master).findall('h:body', namespaces=XPNSMAP)[-1]
|
||||||
|
master_base = os.path.dirname(master)
|
||||||
|
anchor_map = {n:{} for n in names if n != master}
|
||||||
|
|
||||||
|
for name in names:
|
||||||
|
if name == master:
|
||||||
|
continue
|
||||||
|
# Insert new stylesheets into master
|
||||||
|
for sheet in all_stylesheets(container, name):
|
||||||
|
if sheet not in seen_stylesheets:
|
||||||
|
seen_stylesheets.add(sheet)
|
||||||
|
link = head.makeelement(XHTML('link'), rel='stylesheet', type='text/css', href=container.name_to_href(sheet, master))
|
||||||
|
container.insert_into_xml(head, link)
|
||||||
|
|
||||||
|
# Rebase links if master is in a different directory
|
||||||
|
if os.path.dirname(name) != master_base:
|
||||||
|
container.replace_links(name, LinkRebaser(container, name, master))
|
||||||
|
|
||||||
|
root = p(name)
|
||||||
|
children = []
|
||||||
|
for body in p(name).findall('h:body', namespaces=XPNSMAP):
|
||||||
|
children.append(body.text if body.text and body.text.strip() else '\n\n')
|
||||||
|
children.extend(body)
|
||||||
|
|
||||||
|
first_child = ''
|
||||||
|
for first_child in children:
|
||||||
|
if not isinstance(first_child, basestring):
|
||||||
|
break
|
||||||
|
if isinstance(first_child, basestring):
|
||||||
|
# Empty document, ignore
|
||||||
|
continue
|
||||||
|
|
||||||
|
amap = anchor_map[name]
|
||||||
|
remove_name_attributes(root)
|
||||||
|
|
||||||
|
for elem in root.xpath('//*[@id]'):
|
||||||
|
val = elem.get('id')
|
||||||
|
if not val:
|
||||||
|
continue
|
||||||
|
if val in seen_anchors:
|
||||||
|
nval = unique_anchor(seen_anchors, val)
|
||||||
|
elem.set('id', nval)
|
||||||
|
amap[val] = nval
|
||||||
|
else:
|
||||||
|
seen_anchors.add(val)
|
||||||
|
|
||||||
|
if 'id' not in first_child.attrib:
|
||||||
|
first_child.set('id', unique_anchor(seen_anchors, 'top'))
|
||||||
|
seen_anchors.add(first_child.get('id'))
|
||||||
|
|
||||||
|
amap[''] = first_child.get('id')
|
||||||
|
|
||||||
|
# Fix links that point to local changed anchors
|
||||||
|
for a in XPath('//h:a[starts-with(@href, "#")]')(root):
|
||||||
|
q = a.get('href')[1:]
|
||||||
|
if q in amap:
|
||||||
|
a.set('href', '#' + amap[q])
|
||||||
|
|
||||||
|
for child in children:
|
||||||
|
if isinstance(child, basestring):
|
||||||
|
add_text(master_body, child)
|
||||||
|
else:
|
||||||
|
master_body.append(copy.deepcopy(child))
|
||||||
|
|
||||||
|
container.remove_item(name, remove_from_guide=False)
|
||||||
|
|
||||||
|
# Fix all links in the container that point to merged files
|
||||||
|
for fname, media_type in container.mime_map.iteritems():
|
||||||
|
repl = MergeLinkReplacer(fname, anchor_map, master, container)
|
||||||
|
container.replace_links(fname, repl)
|
||||||
|
|
||||||
|
|
||||||
|
def merge(container, category, names, master):
|
||||||
|
if category not in {'text', 'styles'}:
|
||||||
|
raise AbortError('Cannot merge files of type: %s' % category)
|
||||||
|
if len(names) < 2:
|
||||||
|
raise AbortError('Must specify at least two files to be merged')
|
||||||
|
if master not in names:
|
||||||
|
raise AbortError('The master file must be one of the files being merged')
|
||||||
|
|
||||||
|
if category == 'text':
|
||||||
|
merge_html(container, names, master)
|
||||||
|
elif category == 'styles':
|
||||||
|
merge_css(container, names, master) # noqa
|
||||||
|
|
||||||
|
container.dirty(master)
|
||||||
|
|
||||||
|
@ -12,7 +12,7 @@ from calibre.ebooks.oeb.polish.tests.base import BaseTest, get_simple_book, get_
|
|||||||
|
|
||||||
from calibre.ebooks.oeb.polish.container import get_container as _gc, clone_container, OCF_NS
|
from calibre.ebooks.oeb.polish.container import get_container as _gc, clone_container, OCF_NS
|
||||||
from calibre.ebooks.oeb.polish.replace import rename_files
|
from calibre.ebooks.oeb.polish.replace import rename_files
|
||||||
from calibre.ebooks.oeb.polish.split import split
|
from calibre.ebooks.oeb.polish.split import split, merge
|
||||||
from calibre.utils.filenames import nlinks_file
|
from calibre.utils.filenames import nlinks_file
|
||||||
from calibre.ptempfile import TemporaryFile
|
from calibre.ptempfile import TemporaryFile
|
||||||
|
|
||||||
@ -188,3 +188,20 @@ class ContainerTests(BaseTest):
|
|||||||
self.assertEqual(1, len(root.xpath('//*[@id="container"]')), 'Split point was not adjusted')
|
self.assertEqual(1, len(root.xpath('//*[@id="container"]')), 'Split point was not adjusted')
|
||||||
self.assertEqual(0, len(troot.xpath('//*[@id="container"]')), 'Split point was not adjusted')
|
self.assertEqual(0, len(troot.xpath('//*[@id="container"]')), 'Split point was not adjusted')
|
||||||
self.check_links(c)
|
self.check_links(c)
|
||||||
|
|
||||||
|
def test_merge_file(self):
|
||||||
|
' Test merging of files '
|
||||||
|
book = get_simple_book()
|
||||||
|
c = get_container(book)
|
||||||
|
merge(c, 'text', ('index_split_000.html', 'index_split_001.html'), 'index_split_000.html')
|
||||||
|
self.check_links(c)
|
||||||
|
|
||||||
|
book = get_simple_book()
|
||||||
|
c = get_container(book)
|
||||||
|
one, two = 'one/one.html', 'two/two.html'
|
||||||
|
c.add_file(one, b'<head><link href="../stylesheet.css"><p><a name="one" href="../two/two.html">1</a><a name="two" href="../two/two.html#one">2</a>') # noqa
|
||||||
|
c.add_file(two, b'<head><link href="../page_styles.css"><p><a name="one" href="two.html#two">1</a><a name="two" href="../one/one.html#one">2</a><a href="#one">3</a>') # noqa
|
||||||
|
merge(c, 'text', (one, two), one)
|
||||||
|
self.check_links(c)
|
||||||
|
root = c.parsed(one)
|
||||||
|
self.assertEqual(1, len(root.xpath('//*[@href="../page_styles.css"]')))
|
||||||
|
@ -20,7 +20,7 @@ from calibre.ebooks.oeb.base import urlnormalize
|
|||||||
from calibre.ebooks.oeb.polish.main import SUPPORTED, tweak_polish
|
from calibre.ebooks.oeb.polish.main import SUPPORTED, tweak_polish
|
||||||
from calibre.ebooks.oeb.polish.container import get_container as _gc, clone_container, guess_type
|
from calibre.ebooks.oeb.polish.container import get_container as _gc, clone_container, guess_type
|
||||||
from calibre.ebooks.oeb.polish.replace import rename_files
|
from calibre.ebooks.oeb.polish.replace import rename_files
|
||||||
from calibre.ebooks.oeb.polish.split import split
|
from calibre.ebooks.oeb.polish.split import split, merge, AbortError
|
||||||
from calibre.gui2 import error_dialog, choose_files, question_dialog, info_dialog
|
from calibre.gui2 import error_dialog, choose_files, question_dialog, info_dialog
|
||||||
from calibre.gui2.dialogs.confirm_delete import confirm
|
from calibre.gui2.dialogs.confirm_delete import confirm
|
||||||
from calibre.gui2.tweak_book import set_current_container, current_container, tprefs, actions, editors
|
from calibre.gui2.tweak_book import set_current_container, current_container, tprefs, actions, editors
|
||||||
@ -54,6 +54,7 @@ class Boss(QObject):
|
|||||||
fl.reorder_spine.connect(self.reorder_spine)
|
fl.reorder_spine.connect(self.reorder_spine)
|
||||||
fl.rename_requested.connect(self.rename_requested)
|
fl.rename_requested.connect(self.rename_requested)
|
||||||
fl.edit_file.connect(self.edit_file_requested)
|
fl.edit_file.connect(self.edit_file_requested)
|
||||||
|
fl.merge_requested.connect(self.merge_requested)
|
||||||
self.gui.central.current_editor_changed.connect(self.apply_current_editor_state)
|
self.gui.central.current_editor_changed.connect(self.apply_current_editor_state)
|
||||||
self.gui.central.close_requested.connect(self.editor_close_requested)
|
self.gui.central.close_requested.connect(self.editor_close_requested)
|
||||||
self.gui.central.search_panel.search_triggered.connect(self.search)
|
self.gui.central.search_panel.search_triggered.connect(self.search)
|
||||||
@ -515,15 +516,28 @@ class Boss(QObject):
|
|||||||
def split_requested(self, name, loc):
|
def split_requested(self, name, loc):
|
||||||
if not self.check_dirtied():
|
if not self.check_dirtied():
|
||||||
return
|
return
|
||||||
self.add_savepoint(self.gui.elided_text(_('Split %s') % name))
|
self.add_savepoint(_('Split %s') % self.gui.elided_text(name))
|
||||||
try:
|
try:
|
||||||
bottom_name = split(current_container(), name, loc)
|
bottom_name = split(current_container(), name, loc)
|
||||||
except:
|
except AbortError:
|
||||||
self.rewind_savepoint()
|
self.rewind_savepoint()
|
||||||
raise
|
raise
|
||||||
self.apply_container_update_to_gui()
|
self.apply_container_update_to_gui()
|
||||||
self.edit_file(bottom_name, 'html')
|
self.edit_file(bottom_name, 'html')
|
||||||
|
|
||||||
|
def merge_requested(self, category, names, master):
|
||||||
|
if not self.check_dirtied():
|
||||||
|
return
|
||||||
|
self.add_savepoint(_('Merge files into %s') % self.gui.elided_text(master))
|
||||||
|
try:
|
||||||
|
merge(current_container(), category, names, master)
|
||||||
|
except AbortError:
|
||||||
|
self.rewind_savepoint()
|
||||||
|
raise
|
||||||
|
self.apply_container_update_to_gui()
|
||||||
|
if master in editors:
|
||||||
|
self.show_editor(master)
|
||||||
|
|
||||||
def sync_editor_to_preview(self, name, lnum):
|
def sync_editor_to_preview(self, name, lnum):
|
||||||
editor = self.edit_file(name, 'html')
|
editor = self.edit_file(name, 'html')
|
||||||
self.ignore_preview_to_editor_sync = True
|
self.ignore_preview_to_editor_sync = True
|
||||||
|
Loading…
x
Reference in New Issue
Block a user