diff --git a/src/calibre/ebooks/oeb/polish/check/links.py b/src/calibre/ebooks/oeb/polish/check/links.py index 8f6e9bf5e1..f887852e63 100644 --- a/src/calibre/ebooks/oeb/polish/check/links.py +++ b/src/calibre/ebooks/oeb/polish/check/links.py @@ -6,12 +6,13 @@ from __future__ import (unicode_literals, division, absolute_import, __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' +import os from collections import defaultdict from urlparse import urlparse from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES from calibre.ebooks.oeb.polish.container import OEB_FONTS -from calibre.ebooks.oeb.polish.utils import guess_type +from calibre.ebooks.oeb.polish.utils import guess_type, actual_case_for_name, corrected_case_for_name from calibre.ebooks.oeb.polish.check.base import BaseError, WARN, INFO class BadLink(BaseError): @@ -20,6 +21,34 @@ class BadLink(BaseError): ' either fix, or remove the link.') level = WARN +class CaseMismatch(BadLink): + + def __init__(self, href, corrected_name, name, lnum, col): + BadLink.__init__(self, _('The linked to resource {0} does not exist').format(href), name, line=lnum, col=col) + self.HELP = _('The case of the link {0} and the case of the actual file it points to {1}' + ' do not agree. You should change either the case of the link or rename the file.').format( + href, corrected_name) + self.INDIVIDUAL_FIX = _('Change the case of the link to match the actual file') + self.corrected_name = corrected_name + self.href = href + + def __call__(self, container): + frag = urlparse(self.href).fragment + nhref = container.name_to_href(self.corrected_name, self.name) + if frag: + nhref += '#' + frag + orig_href = self.href + class LinkReplacer(object): + replaced = False + def __call__(self, url): + if url != orig_href: + return url + self.replaced = True + return nhref + replacer = LinkReplacer() + container.replace_links(self.name, replacer) + return replacer.replaced + class FileLink(BadLink): HELP = _('This link uses the file:// URL scheme. This does not work with many ebook readers.' @@ -128,9 +157,20 @@ def check_links(container): if tname in container.mime_map: links_map[name].add(tname) else: - a(BadLink(_('The linked resource %s is a directory') % fl(href), name, lnum, col)) + # Filesystem says the file exists, but it is not in + # the mime_map, so either there is a case mismatch + # or the link is a directory + apath = container.name_to_abspath(tname) + if os.path.isdir(apath): + a(BadLink(_('The linked resource %s is a directory') % fl(href), name, lnum, col)) + else: + a(CaseMismatch(href, actual_case_for_name(container, tname), name, lnum, col)) else: - a(BadLink(_('The linked resource %s does not exist') % fl(href), name, lnum, col)) + cname = corrected_case_for_name(container, tname) + if cname is not None: + a(CaseMismatch(href, cname, name, lnum, col)) + else: + a(BadLink(_('The linked resource %s does not exist') % fl(href), name, lnum, col)) else: purl = urlparse(href) if purl.scheme == 'file': diff --git a/src/calibre/ebooks/oeb/polish/tests/container.py b/src/calibre/ebooks/oeb/polish/tests/container.py index 2a3965944b..c612e37090 100644 --- a/src/calibre/ebooks/oeb/polish/tests/container.py +++ b/src/calibre/ebooks/oeb/polish/tests/container.py @@ -185,6 +185,23 @@ class ContainerTests(BaseTest): self.check_links(c) + def test_actual_case(self): + ' Test getting the actual case for files from names on case insensitive filesystems ' + from calibre.ebooks.oeb.polish.utils import actual_case_for_name, corrected_case_for_name + book = get_simple_book() + c = get_container(book) + name = 'f1/f2/added file.html' + c.add_file(name, b'xxx') + self.assertTrue(c.exists(name)) + variations = (name, name.upper(), name.replace('f1', 'F1'), name.replace('f2', 'F2')) + if c.exists(name.upper()): + for n in variations: + self.assertEqual(name, actual_case_for_name(c, n)) + else: + for n in variations: + self.assertEqual(name, corrected_case_for_name(c, n)) + self.assertIsNone(corrected_case_for_name(c, name+'/xx')) + def test_split_file(self): ' Test splitting of files ' book = get_split_book() diff --git a/src/calibre/ebooks/oeb/polish/utils.py b/src/calibre/ebooks/oeb/polish/utils.py index 9c976b0b3c..72c63a6de7 100644 --- a/src/calibre/ebooks/oeb/polish/utils.py +++ b/src/calibre/ebooks/oeb/polish/utils.py @@ -6,7 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import, __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' -import re +import re, os from bisect import bisect from calibre import guess_type as _guess_type @@ -21,6 +21,52 @@ def setup_cssutils_serialization(tab_width=2): prefs.indentClosingBrace = False prefs.omitLastSemicolon = False +def actual_case_for_name(container, name): + from calibre.utils.filenames import samefile + if not container.exists(name): + raise ValueError('Cannot get actual case for %s as it does not exist' % name) + parts = name.split('/') + base = '' + ans = [] + for i, x in enumerate(parts): + base = '/'.join(ans + [x]) + path = container.name_to_abspath(base) + pdir = os.path.dirname(path) + candidates = {os.path.join(pdir, q) for q in os.listdir(pdir)} + if x in candidates: + correctx = x + else: + for q in candidates: + if samefile(q, path): + correctx = os.path.basename(q) + break + else: + raise RuntimeError('Something bad happened') + ans.append(correctx) + return '/'.join(ans) + +def corrected_case_for_name(container, name): + parts = name.split('/') + ans = [] + base = '' + for i, x in enumerate(parts): + base = '/'.join(ans + [x]) + if container.exists(base): + correctx = x + else: + try: + candidates = {q for q in os.listdir(os.path.dirname(container.name_to_abspath(base)))} + except EnvironmentError: + return None # one of the non-terminal components of name is a file instead of a directory + for q in candidates: + if q.lower() == x.lower(): + correctx = q + break + else: + return None + ans.append(correctx) + return '/'.join(ans) + class PositionFinder(object): def __init__(self, raw):