Edit book: Check book, detect case mismatches between links and the filenames of the files they point to and offer to auto-correct the links

Works on both case-insensitive and case-sensitive file systems
This commit is contained in:
Kovid Goyal 2014-02-02 21:15:20 +05:30
parent 6ae332ff87
commit 91e12f10ca
3 changed files with 107 additions and 4 deletions

View File

@ -6,12 +6,13 @@ from __future__ import (unicode_literals, division, absolute_import,
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import os
from collections import defaultdict
from urlparse import urlparse
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
from calibre.ebooks.oeb.polish.container import OEB_FONTS
from calibre.ebooks.oeb.polish.utils import guess_type
from calibre.ebooks.oeb.polish.utils import guess_type, actual_case_for_name, corrected_case_for_name
from calibre.ebooks.oeb.polish.check.base import BaseError, WARN, INFO
class BadLink(BaseError):
@ -20,6 +21,34 @@ class BadLink(BaseError):
' either fix, or remove the link.')
level = WARN
class CaseMismatch(BadLink):
def __init__(self, href, corrected_name, name, lnum, col):
BadLink.__init__(self, _('The linked to resource {0} does not exist').format(href), name, line=lnum, col=col)
self.HELP = _('The case of the link {0} and the case of the actual file it points to {1}'
' do not agree. You should change either the case of the link or rename the file.').format(
href, corrected_name)
self.INDIVIDUAL_FIX = _('Change the case of the link to match the actual file')
self.corrected_name = corrected_name
self.href = href
def __call__(self, container):
frag = urlparse(self.href).fragment
nhref = container.name_to_href(self.corrected_name, self.name)
if frag:
nhref += '#' + frag
orig_href = self.href
class LinkReplacer(object):
replaced = False
def __call__(self, url):
if url != orig_href:
return url
self.replaced = True
return nhref
replacer = LinkReplacer()
container.replace_links(self.name, replacer)
return replacer.replaced
class FileLink(BadLink):
HELP = _('This link uses the file:// URL scheme. This does not work with many ebook readers.'
@ -128,9 +157,20 @@ def check_links(container):
if tname in container.mime_map:
links_map[name].add(tname)
else:
a(BadLink(_('The linked resource %s is a directory') % fl(href), name, lnum, col))
# Filesystem says the file exists, but it is not in
# the mime_map, so either there is a case mismatch
# or the link is a directory
apath = container.name_to_abspath(tname)
if os.path.isdir(apath):
a(BadLink(_('The linked resource %s is a directory') % fl(href), name, lnum, col))
else:
a(CaseMismatch(href, actual_case_for_name(container, tname), name, lnum, col))
else:
a(BadLink(_('The linked resource %s does not exist') % fl(href), name, lnum, col))
cname = corrected_case_for_name(container, tname)
if cname is not None:
a(CaseMismatch(href, cname, name, lnum, col))
else:
a(BadLink(_('The linked resource %s does not exist') % fl(href), name, lnum, col))
else:
purl = urlparse(href)
if purl.scheme == 'file':

View File

@ -185,6 +185,23 @@ class ContainerTests(BaseTest):
self.check_links(c)
def test_actual_case(self):
' Test getting the actual case for files from names on case insensitive filesystems '
from calibre.ebooks.oeb.polish.utils import actual_case_for_name, corrected_case_for_name
book = get_simple_book()
c = get_container(book)
name = 'f1/f2/added file.html'
c.add_file(name, b'xxx')
self.assertTrue(c.exists(name))
variations = (name, name.upper(), name.replace('f1', 'F1'), name.replace('f2', 'F2'))
if c.exists(name.upper()):
for n in variations:
self.assertEqual(name, actual_case_for_name(c, n))
else:
for n in variations:
self.assertEqual(name, corrected_case_for_name(c, n))
self.assertIsNone(corrected_case_for_name(c, name+'/xx'))
def test_split_file(self):
' Test splitting of files '
book = get_split_book()

View File

@ -6,7 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import,
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import re
import re, os
from bisect import bisect
from calibre import guess_type as _guess_type
@ -21,6 +21,52 @@ def setup_cssutils_serialization(tab_width=2):
prefs.indentClosingBrace = False
prefs.omitLastSemicolon = False
def actual_case_for_name(container, name):
from calibre.utils.filenames import samefile
if not container.exists(name):
raise ValueError('Cannot get actual case for %s as it does not exist' % name)
parts = name.split('/')
base = ''
ans = []
for i, x in enumerate(parts):
base = '/'.join(ans + [x])
path = container.name_to_abspath(base)
pdir = os.path.dirname(path)
candidates = {os.path.join(pdir, q) for q in os.listdir(pdir)}
if x in candidates:
correctx = x
else:
for q in candidates:
if samefile(q, path):
correctx = os.path.basename(q)
break
else:
raise RuntimeError('Something bad happened')
ans.append(correctx)
return '/'.join(ans)
def corrected_case_for_name(container, name):
parts = name.split('/')
ans = []
base = ''
for i, x in enumerate(parts):
base = '/'.join(ans + [x])
if container.exists(base):
correctx = x
else:
try:
candidates = {q for q in os.listdir(os.path.dirname(container.name_to_abspath(base)))}
except EnvironmentError:
return None # one of the non-terminal components of name is a file instead of a directory
for q in candidates:
if q.lower() == x.lower():
correctx = q
break
else:
return None
ans.append(correctx)
return '/'.join(ans)
class PositionFinder(object):
def __init__(self, raw):