mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Implement renaming in book containers
This commit is contained in:
parent
f48d9a47ca
commit
f83313f593
@ -100,6 +100,12 @@ def close_self_closing_tags(raw):
|
||||
def uuid_id():
|
||||
return 'u'+unicode(uuid.uuid4())
|
||||
|
||||
def itercsslinks(raw):
|
||||
for match in _css_url_re.finditer(raw):
|
||||
yield match.group(1), match.start(1)
|
||||
for match in _css_import_re.finditer(raw):
|
||||
yield match.group(1), match.start(1)
|
||||
|
||||
def iterlinks(root, find_links_in_css=True):
|
||||
'''
|
||||
Iterate over all links in a OEB Document.
|
||||
|
@ -7,7 +7,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, logging, sys, hashlib, uuid, re, shutil, copy
|
||||
import os, logging, sys, hashlib, uuid, re, shutil
|
||||
from collections import defaultdict
|
||||
from io import BytesIO
|
||||
from urllib import unquote as urlunquote, quote as urlquote
|
||||
@ -15,6 +15,7 @@ from urlparse import urlparse
|
||||
from future_builtins import zip
|
||||
|
||||
from lxml import etree
|
||||
from cssutils import replaceUrls, getUrls
|
||||
|
||||
from calibre import guess_type as _guess_type, CurrentDir
|
||||
from calibre.customize.ui import (plugin_for_input_format,
|
||||
@ -27,7 +28,8 @@ from calibre.ebooks.mobi import MobiError
|
||||
from calibre.ebooks.mobi.reader.headers import MetadataHeader
|
||||
from calibre.ebooks.mobi.tweak import set_cover
|
||||
from calibre.ebooks.oeb.base import (
|
||||
serialize, OEB_DOCS, _css_logger, OEB_STYLES, OPF2_NS, DC11_NS, OPF)
|
||||
serialize, OEB_DOCS, _css_logger, OEB_STYLES, OPF2_NS, DC11_NS, OPF,
|
||||
rewrite_links, iterlinks, itercsslinks)
|
||||
from calibre.ebooks.oeb.polish.errors import InvalidBook, DRMError
|
||||
from calibre.ebooks.oeb.parse_utils import NotHTML, parse_html, RECOVER_PARSER
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
|
||||
@ -105,6 +107,7 @@ class Container(object): # {{{
|
||||
self.encoding_map = {}
|
||||
self.pretty_print = set()
|
||||
self.cloned = False
|
||||
self.cache_names = ('parsed_cache', 'mime_map', 'name_path_map', 'encoding_map', 'dirtied', 'pretty_print')
|
||||
|
||||
if clone_data is not None:
|
||||
self.cloned = True
|
||||
@ -153,6 +156,100 @@ class Container(object): # {{{
|
||||
for name, path in self.name_path_map.iteritems()}
|
||||
}
|
||||
|
||||
def rename(self, current_name, new_name):
|
||||
''' Renames a file from current_name to new_name. It automatically
|
||||
rebases all links inside the file if the directory the file is in
|
||||
changes. Note however, that links are not updated in the other files
|
||||
that could reference this file. This is for performance, such updates
|
||||
should be done once, in bulk. '''
|
||||
if current_name in self.names_that_must_not_be_changed:
|
||||
raise ValueError('Renaming of %s is not allowed' % current_name)
|
||||
if self.exists(new_name):
|
||||
raise ValueError('Cannot rename %s to %s as %s already exists' % (self.opf_name, new_name, new_name))
|
||||
new_path = self.name_to_abspath(new_name)
|
||||
base = os.path.dirname(new_path)
|
||||
if os.path.isfile(base):
|
||||
raise ValueError('Cannot rename %s to %s as %s is a file' % (self.opf_name, new_name, base))
|
||||
if not os.path.exists(base):
|
||||
os.makedirs(base)
|
||||
old_path = parent_dir = self.name_to_abspath(current_name)
|
||||
self.commit_item(current_name)
|
||||
os.rename(old_path, new_path)
|
||||
# Remove empty directories
|
||||
while parent_dir:
|
||||
parent_dir = os.path.dirname(parent_dir)
|
||||
try:
|
||||
os.rmdir(parent_dir)
|
||||
except EnvironmentError:
|
||||
break
|
||||
|
||||
for x in ('mime_map', 'encoding_map'):
|
||||
x = getattr(self, x)
|
||||
if current_name in x:
|
||||
x[new_name] = x[current_name]
|
||||
self.name_path_map[new_name] = new_path
|
||||
for x in self.cache_names:
|
||||
x = getattr(self, x)
|
||||
try:
|
||||
x.pop(current_name, None)
|
||||
except TypeError:
|
||||
x.discard(current_name)
|
||||
if current_name == self.opf_name:
|
||||
self.opf_name = new_name
|
||||
if os.path.dirname(old_path) != os.path.dirname(new_path):
|
||||
from calibre.ebooks.oeb.polish.replace import LinkRebaser
|
||||
repl = LinkRebaser(self, current_name, new_name)
|
||||
self.replace_links(new_name, repl)
|
||||
self.dirty(new_name)
|
||||
|
||||
def replace_links(self, name, replace_func):
|
||||
''' Replace all links in name using replace_func, which must be a
|
||||
callable that accepts a URL and returns the replaced URL. It must also
|
||||
have a 'replaced' attribute that is set to True if any actual
|
||||
replacement is done. Convenient ways of creating such callables are
|
||||
using the :class:`LinkReplacer` and :class:`LinkRebaser` classes. '''
|
||||
media_type = self.mime_map.get(name, guess_type(name))
|
||||
if name == self.opf_name:
|
||||
for elem in self.opf_xpath('//*[@href]'):
|
||||
elem.set('href', replace_func(elem.get('href')))
|
||||
elif media_type.lower() in OEB_DOCS:
|
||||
rewrite_links(self.parsed(name), replace_func)
|
||||
elif media_type.lower() in OEB_STYLES:
|
||||
replaceUrls(self.parsed(name), replace_func)
|
||||
elif media_type.lower() == guess_type('toc.ncx'):
|
||||
for elem in self.parsed(name).xpath('//*[@src]'):
|
||||
elem.set('src', replace_func(elem.get('src')))
|
||||
|
||||
if replace_func.replaced:
|
||||
self.dirty(name)
|
||||
return replace_func.replaced
|
||||
|
||||
def iterlinks(self, name, get_line_numbers=True):
|
||||
''' Iterate over all links in name. If get_line_numbers is True the
|
||||
yields results of the form (link, line_number, offset). Where
|
||||
line_number is the line_number at which the link occurs and offset is
|
||||
the number of characters from the start of the line. Note that offset
|
||||
could actually encompass several lines if not zero. '''
|
||||
media_type = self.mime_map.get(name, guess_type(name))
|
||||
if name == self.opf_name:
|
||||
for elem in self.opf_xpath('//*[@href]'):
|
||||
yield (elem.get('href'), elem.sourceline, 0) if get_line_numbers else elem.get('href')
|
||||
elif media_type.lower() in OEB_DOCS:
|
||||
for el, attr, link, pos in iterlinks(self.parsed(name)):
|
||||
yield (link, el.sourceline, pos) if get_line_numbers else link
|
||||
elif media_type.lower() in OEB_STYLES:
|
||||
if get_line_numbers:
|
||||
with self.open(name) as f:
|
||||
raw = self.decode(f.read())
|
||||
for link, offset in itercsslinks(raw):
|
||||
yield link, 0, offset
|
||||
else:
|
||||
for link in getUrls(self.parsed(name)):
|
||||
yield link
|
||||
elif media_type.lower() == guess_type('toc.ncx'):
|
||||
for elem in self.parsed(name).xpath('//*[@src]'):
|
||||
yield (elem.get('src'), elem.sourceline, 0) if get_line_numbers else elem.get('src')
|
||||
|
||||
def abspath_to_name(self, fullpath):
|
||||
return self.relpath(os.path.abspath(fullpath)).replace(os.sep, '/')
|
||||
|
||||
@ -648,15 +745,15 @@ class EpubContainer(Container):
|
||||
container_path = join(self.root, 'META-INF', 'container.xml')
|
||||
if not exists(container_path):
|
||||
raise InvalidEpub('No META-INF/container.xml in epub')
|
||||
self.container = etree.fromstring(open(container_path, 'rb').read())
|
||||
opf_files = self.container.xpath((
|
||||
container = etree.fromstring(open(container_path, 'rb').read())
|
||||
opf_files = container.xpath((
|
||||
r'child::ocf:rootfiles/ocf:rootfile'
|
||||
'[@media-type="%s" and @full-path]'%guess_type('a.opf')
|
||||
), namespaces={'ocf':OCF_NS}
|
||||
)
|
||||
if not opf_files:
|
||||
raise InvalidEpub('META-INF/container.xml contains no link to OPF file')
|
||||
opf_path = os.path.join(self.root, *opf_files[0].get('full-path').split('/'))
|
||||
opf_path = os.path.join(self.root, *(urlunquote(opf_files[0].get('full-path')).split('/')))
|
||||
if not exists(opf_path):
|
||||
raise InvalidEpub('OPF file does not exist at location pointed to'
|
||||
' by META-INF/container.xml')
|
||||
@ -666,14 +763,35 @@ class EpubContainer(Container):
|
||||
self.obfuscated_fonts = {}
|
||||
if 'META-INF/encryption.xml' in self.name_path_map:
|
||||
self.process_encryption()
|
||||
self.parsed_cache['META-INF/container.xml'] = container
|
||||
|
||||
def clone_data(self, dest_dir):
|
||||
ans = super(EpubContainer, self).clone_data(dest_dir)
|
||||
ans['pathtoepub'] = self.pathtoepub
|
||||
ans['obfuscated_fonts'] = self.obfuscated_fonts.copy()
|
||||
ans['container'] = copy.deepcopy(self.container)
|
||||
return ans
|
||||
|
||||
def rename(self, old_name, new_name):
|
||||
is_opf = old_name == self.opf_name
|
||||
super(EpubContainer, self).rename(old_name, new_name)
|
||||
if is_opf:
|
||||
for elem in self.parsed('META-INF/container.xml').xpath((
|
||||
r'child::ocf:rootfiles/ocf:rootfile'
|
||||
'[@media-type="%s" and @full-path]'%guess_type('a.opf')
|
||||
), namespaces={'ocf':OCF_NS}
|
||||
):
|
||||
# The asinine epubcheck cannot handle quoted filenames in
|
||||
# container.xml
|
||||
elem.set('full-path', self.opf_name)
|
||||
self.dirty('META-INF/container.xml')
|
||||
if old_name in self.obfuscated_fonts:
|
||||
self.obfuscated_fonts[new_name] = self.obfuscated_fonts.pop(old_name)
|
||||
enc = self.parsed('META-INF/encryption.xml')
|
||||
for cr in enc.xpath('//*[local-name()="CipherReference" and @URI]'):
|
||||
if self.href_to_name(cr.get('URI')) == old_name:
|
||||
cr.set('URI', self.name_to_href(new_name))
|
||||
self.dirty('META-INF/encryption.xml')
|
||||
|
||||
@property
|
||||
def names_that_need_not_be_manifested(self):
|
||||
return super(EpubContainer, self).names_that_need_not_be_manifested | {'META-INF/' + x for x in self.META_INF}
|
||||
|
@ -10,11 +10,7 @@ __docformat__ = 'restructuredtext en'
|
||||
import codecs
|
||||
from urlparse import urlparse
|
||||
|
||||
from cssutils import replaceUrls
|
||||
|
||||
from calibre.ebooks.chardet import strip_encoding_declarations
|
||||
from calibre.ebooks.oeb.polish.container import guess_type
|
||||
from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, rewrite_links)
|
||||
|
||||
class LinkReplacer(object):
|
||||
|
||||
@ -26,6 +22,12 @@ class LinkReplacer(object):
|
||||
self.replaced = False
|
||||
|
||||
def __call__(self, url):
|
||||
if url and url.startswith('#'):
|
||||
repl = self.frag_map(self.base, url[1:])
|
||||
if not repl or repl == url[1:]:
|
||||
return url
|
||||
self.replaced = True
|
||||
return '#' + repl
|
||||
name = self.container.href_to_name(url, self.base)
|
||||
if not name:
|
||||
return url
|
||||
@ -42,23 +44,37 @@ class LinkReplacer(object):
|
||||
self.replaced = True
|
||||
return href
|
||||
|
||||
def replace_links(container, link_map, frag_map=lambda name, frag:frag):
|
||||
ncx_type = guess_type('toc.ncx')
|
||||
for name, media_type in container.mime_map.iteritems():
|
||||
repl = LinkReplacer(name, container, link_map, frag_map)
|
||||
if media_type.lower() in OEB_DOCS:
|
||||
rewrite_links(container.parsed(name), repl)
|
||||
elif media_type.lower() in OEB_STYLES:
|
||||
replaceUrls(container.parsed(name), repl)
|
||||
elif media_type.lower() == ncx_type:
|
||||
for elem in container.parsed(name).xpath('//*[@src]'):
|
||||
src = elem.get('src')
|
||||
nsrc = repl(src)
|
||||
if src != nsrc:
|
||||
elem.set('src', nsrc)
|
||||
class LinkRebaser(object):
|
||||
|
||||
if repl.replaced:
|
||||
container.dirty(name)
|
||||
def __init__(self, container, old_name, new_name):
|
||||
self.old_name, self.new_name = old_name, new_name
|
||||
self.container = container
|
||||
self.replaced = False
|
||||
|
||||
def __call__(self, url):
|
||||
if url and url.startswith('#'):
|
||||
return url
|
||||
purl = urlparse(url)
|
||||
frag = purl.fragment
|
||||
name = self.container.href_to_name(url, self.old_name)
|
||||
if not name:
|
||||
return url
|
||||
if name == self.old_name:
|
||||
name = self.new_name
|
||||
href = self.container.name_to_href(name, self.new_name)
|
||||
if frag:
|
||||
href += '#' + frag
|
||||
if href != url:
|
||||
self.replaced = True
|
||||
return href
|
||||
|
||||
|
||||
def replace_links(container, link_map, frag_map=lambda name, frag:frag, replace_in_opf=False):
|
||||
for name, media_type in container.mime_map.iteritems():
|
||||
if name == container.opf_name and not replace_in_opf:
|
||||
continue
|
||||
repl = LinkReplacer(name, container, link_map, frag_map)
|
||||
container.replace_links(name, repl)
|
||||
|
||||
def smarten_punctuation(container, report):
|
||||
from calibre.ebooks.conversion.preprocess import smarten_punctuation
|
||||
@ -83,3 +99,19 @@ def smarten_punctuation(container, report):
|
||||
m.getparent().remove(m)
|
||||
container.dirty(name)
|
||||
|
||||
def rename_files(container, file_map):
|
||||
overlap = set(file_map).intersection(set(file_map.itervalues()))
|
||||
if overlap:
|
||||
raise ValueError('Circular rename detected. The files %s are both rename targets and destinations' % ', '.join(overlap))
|
||||
for name, dest in file_map.iteritems():
|
||||
if container.exists(dest):
|
||||
raise ValueError('Cannot rename {0} to {1} as {1} already exists'.format(name, dest))
|
||||
if len(tuple(file_map.itervalues())) != len(set(file_map.itervalues())):
|
||||
raise ValueError('Cannot rename, the set of destination files contains duplicates')
|
||||
link_map = {}
|
||||
for current_name, new_name in file_map.iteritems():
|
||||
container.rename(current_name, new_name)
|
||||
if new_name != container.opf_name: # OPF is handled by the container
|
||||
link_map[current_name] = new_name
|
||||
replace_links(container, link_map, replace_in_opf=True)
|
||||
|
||||
|
@ -45,6 +45,7 @@ def get_simple_book(fmt='epub'):
|
||||
raw = raw.replace('LMONOI', P('fonts/liberation/LiberationMono-Italic.ttf'))
|
||||
raw = raw.replace('LMONO', P('fonts/liberation/LiberationMono-Regular.ttf'))
|
||||
raw = raw.replace('IMAGE1', I('marked.png'))
|
||||
raw = raw.replace('IMAGE2', I('textures/light_wood.png'))
|
||||
try:
|
||||
with open(x, 'wb') as f:
|
||||
f.write(raw.encode('utf-8'))
|
||||
@ -69,3 +70,10 @@ class BaseTest(unittest.TestCase):
|
||||
shutil.rmtree(self.tdir, ignore_errors=True)
|
||||
del self.tdir
|
||||
|
||||
def check_links(self, container):
|
||||
for name in container.name_path_map:
|
||||
for link in container.iterlinks(name, get_line_numbers=False):
|
||||
dest = container.href_to_name(link, name)
|
||||
if dest:
|
||||
self.assertTrue(container.exists(dest), 'The link %s in %s does not exist' % (link, name))
|
||||
|
||||
|
@ -6,12 +6,14 @@ from __future__ import (unicode_literals, division, absolute_import,
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import os
|
||||
import os, subprocess
|
||||
|
||||
from calibre.ebooks.oeb.polish.tests.base import BaseTest, get_simple_book
|
||||
|
||||
from calibre.ebooks.oeb.polish.container import get_container, clone_container
|
||||
from calibre.ebooks.oeb.polish.container import get_container, clone_container, OCF_NS
|
||||
from calibre.ebooks.oeb.polish.replace import rename_files
|
||||
from calibre.utils.filenames import nlinks_file
|
||||
from calibre.ptempfile import TemporaryFile
|
||||
|
||||
class ContainerTests(BaseTest):
|
||||
|
||||
@ -73,3 +75,82 @@ class ContainerTests(BaseTest):
|
||||
for x in files:
|
||||
self.assertNotIn(x, raw)
|
||||
|
||||
def run_external_tools(self, container, gvim=False, epubcheck=True):
|
||||
with TemporaryFile(suffix='.epub', dir=self.tdir) as f:
|
||||
container.commit(outpath=f)
|
||||
if gvim:
|
||||
subprocess.Popen(['gvim', '-f', f]).wait()
|
||||
if epubcheck:
|
||||
subprocess.Popen(['epubcheck', f]).wait()
|
||||
|
||||
def test_file_rename(self):
|
||||
' Test renaming of files '
|
||||
book = get_simple_book()
|
||||
count = [0]
|
||||
def new_container():
|
||||
count[0] += 1
|
||||
tdir = os.mkdir(os.path.join(self.tdir, str(count[0])))
|
||||
return get_container(book, tdir=tdir)
|
||||
|
||||
# Test simple opf rename
|
||||
c = new_container()
|
||||
orig_name = c.opf_name
|
||||
name = 'renamed opf.opf'
|
||||
rename_files(c, {c.opf_name: name})
|
||||
self.assertEqual(c.opf_name, name)
|
||||
for x in ('name_path_map', 'mime_map'):
|
||||
self.assertNotIn(orig_name, getattr(c, x))
|
||||
self.assertIn(name, getattr(c, x))
|
||||
self.assertNotIn(name, c.dirtied)
|
||||
root = c.parsed('META-INF/container.xml')
|
||||
vals = set(root.xpath(
|
||||
r'child::ocf:rootfiles/ocf:rootfile/@full-path',
|
||||
namespaces={'ocf':OCF_NS}))
|
||||
self.assertSetEqual(vals, {name})
|
||||
self.check_links(c)
|
||||
|
||||
# Test a rename that moves the OPF into different directory
|
||||
c = new_container()
|
||||
orig_name = c.opf_name
|
||||
name = 'renamed/again/metadata.opf'
|
||||
rename_files(c, {c.opf_name: name})
|
||||
self.check_links(c)
|
||||
|
||||
# Test that renaming commits dirtied items
|
||||
c = new_container()
|
||||
name = next(c.spine_names)[0]
|
||||
root = c.parsed(name)
|
||||
root.xpath('//*[local-name()="body"]')[0].set('id', 'rename-dirty-test')
|
||||
rename_files(c, {name:'other/' + name})
|
||||
with c.open('other/' + name) as f:
|
||||
raw = f.read()
|
||||
self.assertIn(b'id="rename-dirty-test"', raw)
|
||||
self.check_links(c)
|
||||
|
||||
# Test renaming of stylesheets
|
||||
c = new_container()
|
||||
rename_files(c, {'stylesheet.css':'styles/s 1.css', 'page_styles.css':'styles/p 1.css'})
|
||||
self.check_links(c)
|
||||
|
||||
# Test renaming of images
|
||||
c = new_container()
|
||||
rename_files(c, {'cover.png':'images/cover img.png', 'light_wood.png':'images/light wood.png', 'marked.png':'images/marked img.png'})
|
||||
self.check_links(c)
|
||||
|
||||
# Test renaming of ToC
|
||||
c = new_container()
|
||||
rename_files(c, {'toc.ncx': 'toc/toc file.ncx'})
|
||||
self.check_links(c)
|
||||
|
||||
# Test renaming of font files
|
||||
c = new_container()
|
||||
rename_files(c, {'LiberationMono-Regular.ttf': 'fonts/LiberationMono Regular.ttf'})
|
||||
self.check_links(c)
|
||||
|
||||
# Test renaming of text files
|
||||
c = new_container()
|
||||
rename_files(c, {'index_split_000.html':'text/page one.html', 'index_split_001.html':'text/page two.html'})
|
||||
self.check_links(c)
|
||||
|
||||
# self.run_external_tools(c, gvim=True)
|
||||
|
||||
|
@ -15,16 +15,17 @@
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
h1 {
|
||||
h2 {
|
||||
color: DarkCyan;
|
||||
text-align: center;
|
||||
background-image: url("IMAGE2");
|
||||
}
|
||||
|
||||
p { font-family: "Liberation Mono"; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h2>A simple test page</h2>
|
||||
<h2 id="page1">A simple test page</h2>
|
||||
<!--lorem-->
|
||||
<p>To pursue pleasure rationally encounter consequences that are extremely
|
||||
painful.</p>
|
||||
@ -39,11 +40,11 @@ avoids a pain that produces no resultant pleasure?</p>
|
||||
|
||||
<div style="text-align:center"><img alt="test" src="IMAGE1"></div>
|
||||
|
||||
<p>On the other hand.</p>
|
||||
<p>A <a href="#page2">link to the next page</a>.</p>
|
||||
|
||||
<!--/lorem-->
|
||||
|
||||
<h2>Another test page</h2>
|
||||
<h2 id="page2">Another test page</h2>
|
||||
|
||||
<!--lorem-->
|
||||
<p>The great explorer of the truth, the master-builder of human happiness. No
|
||||
|
Loading…
x
Reference in New Issue
Block a user