mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merging of HTML files
This commit is contained in:
parent
5326452976
commit
099632502f
@ -539,7 +539,7 @@ class Container(object): # {{{
|
||||
spine[-1].tail = last_tail
|
||||
self.dirty(self.opf_name)
|
||||
|
||||
def remove_item(self, name):
|
||||
def remove_item(self, name, remove_from_guide=True):
|
||||
'''
|
||||
Remove the item identified by name from this container. This removes all
|
||||
references to the item in the OPF manifest, guide and spine as well as from
|
||||
@ -571,6 +571,7 @@ class Container(object): # {{{
|
||||
self.remove_from_xml(meta)
|
||||
self.dirty(self.opf_name)
|
||||
|
||||
if remove_from_guide:
|
||||
for item in self.opf_xpath('//opf:guide/opf:reference[@href]'):
|
||||
if self.href_to_name(item.get('href'), self.opf_name) == name:
|
||||
self.remove_from_xml(item)
|
||||
@ -872,7 +873,7 @@ class EpubContainer(Container):
|
||||
def names_that_must_not_be_changed(self):
|
||||
return super(EpubContainer, self).names_that_must_not_be_changed | {'META-INF/' + x for x in self.META_INF}
|
||||
|
||||
def remove_item(self, name):
|
||||
def remove_item(self, name, remove_from_guide=True):
|
||||
# Handle removal of obfuscated fonts
|
||||
if name == 'META-INF/encryption.xml':
|
||||
self.obfuscated_fonts.clear()
|
||||
@ -890,7 +891,7 @@ class EpubContainer(Container):
|
||||
if name == self.href_to_name(cr.get('URI')):
|
||||
self.remove_from_xml(em.getparent())
|
||||
self.dirty('META-INF/encryption.xml')
|
||||
super(EpubContainer, self).remove_item(name)
|
||||
super(EpubContainer, self).remove_item(name, remove_from_guide=remove_from_guide)
|
||||
|
||||
def process_encryption(self):
|
||||
fonts = {}
|
||||
|
@ -6,12 +6,16 @@ from __future__ import (unicode_literals, division, absolute_import,
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import copy
|
||||
import copy, os
|
||||
from future_builtins import map
|
||||
from urlparse import urlparse
|
||||
|
||||
from calibre.ebooks.oeb.base import barename, XPNSMAP, XPath, OPF
|
||||
from calibre.ebooks.oeb.base import barename, XPNSMAP, XPath, OPF, XHTML
|
||||
from calibre.ebooks.oeb.polish.toc import node_from_loc
|
||||
from calibre.ebooks.oeb.polish.replace import LinkRebaser
|
||||
|
||||
class AbortError(ValueError):
|
||||
pass
|
||||
|
||||
def in_table(node):
|
||||
while node is not None:
|
||||
@ -167,9 +171,9 @@ def split(container, name, loc_or_xpath, before=True):
|
||||
else:
|
||||
split_point = node_from_loc(root, loc_or_xpath)
|
||||
if in_table(split_point):
|
||||
raise ValueError('Cannot split inside tables')
|
||||
raise AbortError('Cannot split inside tables')
|
||||
if split_point.tag.endswith('}body'):
|
||||
raise ValueError('Cannot split on the <body> tag')
|
||||
raise AbortError('Cannot split on the <body> tag')
|
||||
tree1, tree2 = do_split(split_point, container.log, before=before)
|
||||
root1, root2 = tree1.getroot(), tree2.getroot()
|
||||
anchors_in_top = frozenset(root1.xpath('//*/@id')) | frozenset(root1.xpath('//*/@name')) | {''}
|
||||
@ -211,3 +215,157 @@ def split(container, name, loc_or_xpath, before=True):
|
||||
container.insert_into_xml(spine, si, index=index)
|
||||
container.dirty(container.opf_name)
|
||||
return bottom_name
|
||||
|
||||
class MergeLinkReplacer(object):
|
||||
|
||||
def __init__(self, base, anchor_map, master, container):
|
||||
self.container, self.anchor_map = container, anchor_map
|
||||
self.master = master
|
||||
self.base = base
|
||||
self.replaced = False
|
||||
|
||||
def __call__(self, url):
|
||||
if url and url.startswith('#'):
|
||||
return url
|
||||
name = self.container.href_to_name(url, self.base)
|
||||
amap = self.anchor_map.get(name, None)
|
||||
if amap is None:
|
||||
return url
|
||||
purl = urlparse(url)
|
||||
frag = purl.fragment or ''
|
||||
frag = amap.get(frag, frag)
|
||||
url = self.container.name_to_href(self.master, self.base) + '#' + frag
|
||||
self.replaced = True
|
||||
return url
|
||||
|
||||
|
||||
def add_text(body, text):
|
||||
if len(body) > 0:
|
||||
body[-1].tail = (body[-1].tail or '') + text
|
||||
else:
|
||||
body.text = (body.text or '') + text
|
||||
|
||||
def all_anchors(root):
|
||||
return set(root.xpath('//*/@id')) | set(root.xpath('//*/@name'))
|
||||
|
||||
def all_stylesheets(container, name):
|
||||
for link in XPath('//h:head/h:link[@href]')(container.parsed(name)):
|
||||
name = container.href_to_name(link.get('href'), name)
|
||||
typ = link.get('type', 'text/css')
|
||||
if typ == 'text/css':
|
||||
yield name
|
||||
|
||||
def unique_anchor(seen_anchors, current):
|
||||
c = 0
|
||||
ans = current
|
||||
while ans in seen_anchors:
|
||||
c += 1
|
||||
ans = '%s_%d' % (current, c)
|
||||
return ans
|
||||
|
||||
def remove_name_attributes(root):
|
||||
# Remove all name attributes, replacing them with id attributes
|
||||
for elem in root.xpath('//*[@id and @name]'):
|
||||
del elem.attrib['name']
|
||||
for elem in root.xpath('//*[@name]'):
|
||||
elem.set('id', elem.attrib.pop('name'))
|
||||
|
||||
def merge_html(container, names, master):
|
||||
p = container.parsed
|
||||
root = p(master)
|
||||
|
||||
# Ensure master has a <head>
|
||||
head = root.find('h:head', namespaces=XPNSMAP)
|
||||
if head is None:
|
||||
head = root.makeelement(XHTML('head'))
|
||||
container.insert_into_xml(root, head, 0)
|
||||
|
||||
seen_anchors = all_anchors(root)
|
||||
seen_stylesheets = set(all_stylesheets(container, master))
|
||||
master_body = p(master).findall('h:body', namespaces=XPNSMAP)[-1]
|
||||
master_base = os.path.dirname(master)
|
||||
anchor_map = {n:{} for n in names if n != master}
|
||||
|
||||
for name in names:
|
||||
if name == master:
|
||||
continue
|
||||
# Insert new stylesheets into master
|
||||
for sheet in all_stylesheets(container, name):
|
||||
if sheet not in seen_stylesheets:
|
||||
seen_stylesheets.add(sheet)
|
||||
link = head.makeelement(XHTML('link'), rel='stylesheet', type='text/css', href=container.name_to_href(sheet, master))
|
||||
container.insert_into_xml(head, link)
|
||||
|
||||
# Rebase links if master is in a different directory
|
||||
if os.path.dirname(name) != master_base:
|
||||
container.replace_links(name, LinkRebaser(container, name, master))
|
||||
|
||||
root = p(name)
|
||||
children = []
|
||||
for body in p(name).findall('h:body', namespaces=XPNSMAP):
|
||||
children.append(body.text if body.text and body.text.strip() else '\n\n')
|
||||
children.extend(body)
|
||||
|
||||
first_child = ''
|
||||
for first_child in children:
|
||||
if not isinstance(first_child, basestring):
|
||||
break
|
||||
if isinstance(first_child, basestring):
|
||||
# Empty document, ignore
|
||||
continue
|
||||
|
||||
amap = anchor_map[name]
|
||||
remove_name_attributes(root)
|
||||
|
||||
for elem in root.xpath('//*[@id]'):
|
||||
val = elem.get('id')
|
||||
if not val:
|
||||
continue
|
||||
if val in seen_anchors:
|
||||
nval = unique_anchor(seen_anchors, val)
|
||||
elem.set('id', nval)
|
||||
amap[val] = nval
|
||||
else:
|
||||
seen_anchors.add(val)
|
||||
|
||||
if 'id' not in first_child.attrib:
|
||||
first_child.set('id', unique_anchor(seen_anchors, 'top'))
|
||||
seen_anchors.add(first_child.get('id'))
|
||||
|
||||
amap[''] = first_child.get('id')
|
||||
|
||||
# Fix links that point to local changed anchors
|
||||
for a in XPath('//h:a[starts-with(@href, "#")]')(root):
|
||||
q = a.get('href')[1:]
|
||||
if q in amap:
|
||||
a.set('href', '#' + amap[q])
|
||||
|
||||
for child in children:
|
||||
if isinstance(child, basestring):
|
||||
add_text(master_body, child)
|
||||
else:
|
||||
master_body.append(copy.deepcopy(child))
|
||||
|
||||
container.remove_item(name, remove_from_guide=False)
|
||||
|
||||
# Fix all links in the container that point to merged files
|
||||
for fname, media_type in container.mime_map.iteritems():
|
||||
repl = MergeLinkReplacer(fname, anchor_map, master, container)
|
||||
container.replace_links(fname, repl)
|
||||
|
||||
|
||||
def merge(container, category, names, master):
|
||||
if category not in {'text', 'styles'}:
|
||||
raise AbortError('Cannot merge files of type: %s' % category)
|
||||
if len(names) < 2:
|
||||
raise AbortError('Must specify at least two files to be merged')
|
||||
if master not in names:
|
||||
raise AbortError('The master file must be one of the files being merged')
|
||||
|
||||
if category == 'text':
|
||||
merge_html(container, names, master)
|
||||
elif category == 'styles':
|
||||
merge_css(container, names, master) # noqa
|
||||
|
||||
container.dirty(master)
|
||||
|
||||
|
@ -12,7 +12,7 @@ from calibre.ebooks.oeb.polish.tests.base import BaseTest, get_simple_book, get_
|
||||
|
||||
from calibre.ebooks.oeb.polish.container import get_container as _gc, clone_container, OCF_NS
|
||||
from calibre.ebooks.oeb.polish.replace import rename_files
|
||||
from calibre.ebooks.oeb.polish.split import split
|
||||
from calibre.ebooks.oeb.polish.split import split, merge
|
||||
from calibre.utils.filenames import nlinks_file
|
||||
from calibre.ptempfile import TemporaryFile
|
||||
|
||||
@ -188,3 +188,20 @@ class ContainerTests(BaseTest):
|
||||
self.assertEqual(1, len(root.xpath('//*[@id="container"]')), 'Split point was not adjusted')
|
||||
self.assertEqual(0, len(troot.xpath('//*[@id="container"]')), 'Split point was not adjusted')
|
||||
self.check_links(c)
|
||||
|
||||
def test_merge_file(self):
|
||||
' Test merging of files '
|
||||
book = get_simple_book()
|
||||
c = get_container(book)
|
||||
merge(c, 'text', ('index_split_000.html', 'index_split_001.html'), 'index_split_000.html')
|
||||
self.check_links(c)
|
||||
|
||||
book = get_simple_book()
|
||||
c = get_container(book)
|
||||
one, two = 'one/one.html', 'two/two.html'
|
||||
c.add_file(one, b'<head><link href="../stylesheet.css"><p><a name="one" href="../two/two.html">1</a><a name="two" href="../two/two.html#one">2</a>') # noqa
|
||||
c.add_file(two, b'<head><link href="../page_styles.css"><p><a name="one" href="two.html#two">1</a><a name="two" href="../one/one.html#one">2</a><a href="#one">3</a>') # noqa
|
||||
merge(c, 'text', (one, two), one)
|
||||
self.check_links(c)
|
||||
root = c.parsed(one)
|
||||
self.assertEqual(1, len(root.xpath('//*[@href="../page_styles.css"]')))
|
||||
|
@ -20,7 +20,7 @@ from calibre.ebooks.oeb.base import urlnormalize
|
||||
from calibre.ebooks.oeb.polish.main import SUPPORTED, tweak_polish
|
||||
from calibre.ebooks.oeb.polish.container import get_container as _gc, clone_container, guess_type
|
||||
from calibre.ebooks.oeb.polish.replace import rename_files
|
||||
from calibre.ebooks.oeb.polish.split import split
|
||||
from calibre.ebooks.oeb.polish.split import split, merge, AbortError
|
||||
from calibre.gui2 import error_dialog, choose_files, question_dialog, info_dialog
|
||||
from calibre.gui2.dialogs.confirm_delete import confirm
|
||||
from calibre.gui2.tweak_book import set_current_container, current_container, tprefs, actions, editors
|
||||
@ -54,6 +54,7 @@ class Boss(QObject):
|
||||
fl.reorder_spine.connect(self.reorder_spine)
|
||||
fl.rename_requested.connect(self.rename_requested)
|
||||
fl.edit_file.connect(self.edit_file_requested)
|
||||
fl.merge_requested.connect(self.merge_requested)
|
||||
self.gui.central.current_editor_changed.connect(self.apply_current_editor_state)
|
||||
self.gui.central.close_requested.connect(self.editor_close_requested)
|
||||
self.gui.central.search_panel.search_triggered.connect(self.search)
|
||||
@ -515,15 +516,28 @@ class Boss(QObject):
|
||||
def split_requested(self, name, loc):
|
||||
if not self.check_dirtied():
|
||||
return
|
||||
self.add_savepoint(self.gui.elided_text(_('Split %s') % name))
|
||||
self.add_savepoint(_('Split %s') % self.gui.elided_text(name))
|
||||
try:
|
||||
bottom_name = split(current_container(), name, loc)
|
||||
except:
|
||||
except AbortError:
|
||||
self.rewind_savepoint()
|
||||
raise
|
||||
self.apply_container_update_to_gui()
|
||||
self.edit_file(bottom_name, 'html')
|
||||
|
||||
def merge_requested(self, category, names, master):
|
||||
if not self.check_dirtied():
|
||||
return
|
||||
self.add_savepoint(_('Merge files into %s') % self.gui.elided_text(master))
|
||||
try:
|
||||
merge(current_container(), category, names, master)
|
||||
except AbortError:
|
||||
self.rewind_savepoint()
|
||||
raise
|
||||
self.apply_container_update_to_gui()
|
||||
if master in editors:
|
||||
self.show_editor(master)
|
||||
|
||||
def sync_editor_to_preview(self, name, lnum):
|
||||
editor = self.edit_file(name, 'html')
|
||||
self.ignore_preview_to_editor_sync = True
|
||||
|
Loading…
x
Reference in New Issue
Block a user