diff --git a/src/calibre/ebooks/oeb/polish/check/main.py b/src/calibre/ebooks/oeb/polish/check/main.py index cdae11acc8..6332168d9d 100644 --- a/src/calibre/ebooks/oeb/polish/check/main.py +++ b/src/calibre/ebooks/oeb/polish/check/main.py @@ -13,7 +13,7 @@ from calibre.ebooks.oeb.polish.utils import guess_type from calibre.ebooks.oeb.polish.cover import is_raster_image from calibre.ebooks.oeb.polish.check.base import run_checkers from calibre.ebooks.oeb.polish.check.parsing import ( - check_filenames, check_xml_parsing, check_css_parsing, fix_style_tag, check_html_size) + check_filenames, check_xml_parsing, check_css_parsing, fix_style_tag, check_html_size, check_ids) from calibre.ebooks.oeb.polish.check.images import check_raster_images from calibre.ebooks.oeb.polish.check.links import check_links, check_mimetypes from calibre.ebooks.oeb.polish.check.fonts import check_fonts @@ -60,6 +60,7 @@ def run_checks(container): errors += check_links(container) errors += check_fonts(container) errors += check_filenames(container) + errors += check_ids(container) return errors diff --git a/src/calibre/ebooks/oeb/polish/check/parsing.py b/src/calibre/ebooks/oeb/polish/check/parsing.py index 3a6b37f0cf..4f0536fec8 100644 --- a/src/calibre/ebooks/oeb/polish/check/parsing.py +++ b/src/calibre/ebooks/oeb/polish/check/parsing.py @@ -14,7 +14,7 @@ import cssutils from calibre import force_unicode, human_readable, prepare_string_for_xml from calibre.ebooks.html_entities import html5_entities from calibre.ebooks.oeb.polish.pretty import pretty_script_or_style as fix_style_tag -from calibre.ebooks.oeb.polish.utils import PositionFinder +from calibre.ebooks.oeb.polish.utils import PositionFinder, guess_type from calibre.ebooks.oeb.polish.check.base import BaseError, WARN, ERROR, INFO from calibre.ebooks.oeb.base import OEB_DOCS, XHTML_NS, urlquote, URL_SAFE @@ -237,6 +237,28 @@ class CSSError(BaseError): pos_pats = (re.compile(r'\[(\d+):(\d+)'), re.compile(r'(\d+), (\d+)\)')) +class DuplicateId(BaseError): + + has_multiple_locations = True + + INDIVIDUAL_FIX = _( + 'Remove the duplicate ids from all but the first element') + + def __init__(self, name, eid, locs): + BaseError.__init__(self, _('Duplicate id: %s') % eid, name) + self.HELP = _( + 'The id {0} is present on more than one element in {1}. This is' + ' not allowed. Remove the id from all but one of the elements').format(eid, name) + self.all_locations = [(name, lnum, None) for lnum in sorted(locs)] + self.duplicate_id = eid + + def __call__(self, container): + elems = [e for e in container.parsed(self.name).xpath('//*[@id]') if e.get('id') == self.duplicate_id] + for e in elems[1:]: + e.attrib.pop('id') + container.dirty(self.name) + return True + class ErrorHandler(object): ' Replacement logger to get useful error/warning info out of cssutils during parsing ' @@ -289,3 +311,22 @@ def check_filenames(container): if urlquote(name) != name: errors.append(EscapedName(name)) return errors + +def check_ids(container): + errors = [] + mts = set(OEB_DOCS) | {guess_type('a.opf'), guess_type('a.ncx')} + for name, mt in container.mime_map.iteritems(): + if mt in mts: + root = container.parsed(name) + seen_ids = {} + dups = {} + for elem in root.xpath('//*[@id]'): + eid = elem.get('id') + if eid in seen_ids: + if eid not in dups: + dups[eid] = [seen_ids[eid]] + dups[eid].append(elem.sourceline) + else: + seen_ids[eid] = elem.sourceline + errors.extend(DuplicateId(name, eid, locs) for eid, locs in dups.iteritems()) + return errors