mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Edit book: Add checks for duplicate ids in HTML/OPF/NCX files
This commit is contained in:
parent
e2647b735f
commit
4cf26dd397
@ -13,7 +13,7 @@ from calibre.ebooks.oeb.polish.utils import guess_type
|
||||
from calibre.ebooks.oeb.polish.cover import is_raster_image
|
||||
from calibre.ebooks.oeb.polish.check.base import run_checkers
|
||||
from calibre.ebooks.oeb.polish.check.parsing import (
|
||||
check_filenames, check_xml_parsing, check_css_parsing, fix_style_tag, check_html_size)
|
||||
check_filenames, check_xml_parsing, check_css_parsing, fix_style_tag, check_html_size, check_ids)
|
||||
from calibre.ebooks.oeb.polish.check.images import check_raster_images
|
||||
from calibre.ebooks.oeb.polish.check.links import check_links, check_mimetypes
|
||||
from calibre.ebooks.oeb.polish.check.fonts import check_fonts
|
||||
@ -60,6 +60,7 @@ def run_checks(container):
|
||||
errors += check_links(container)
|
||||
errors += check_fonts(container)
|
||||
errors += check_filenames(container)
|
||||
errors += check_ids(container)
|
||||
|
||||
return errors
|
||||
|
||||
|
@ -14,7 +14,7 @@ import cssutils
|
||||
from calibre import force_unicode, human_readable, prepare_string_for_xml
|
||||
from calibre.ebooks.html_entities import html5_entities
|
||||
from calibre.ebooks.oeb.polish.pretty import pretty_script_or_style as fix_style_tag
|
||||
from calibre.ebooks.oeb.polish.utils import PositionFinder
|
||||
from calibre.ebooks.oeb.polish.utils import PositionFinder, guess_type
|
||||
from calibre.ebooks.oeb.polish.check.base import BaseError, WARN, ERROR, INFO
|
||||
from calibre.ebooks.oeb.base import OEB_DOCS, XHTML_NS, urlquote, URL_SAFE
|
||||
|
||||
@ -237,6 +237,28 @@ class CSSError(BaseError):
|
||||
|
||||
pos_pats = (re.compile(r'\[(\d+):(\d+)'), re.compile(r'(\d+), (\d+)\)'))
|
||||
|
||||
class DuplicateId(BaseError):
|
||||
|
||||
has_multiple_locations = True
|
||||
|
||||
INDIVIDUAL_FIX = _(
|
||||
'Remove the duplicate ids from all but the first element')
|
||||
|
||||
def __init__(self, name, eid, locs):
|
||||
BaseError.__init__(self, _('Duplicate id: %s') % eid, name)
|
||||
self.HELP = _(
|
||||
'The id {0} is present on more than one element in {1}. This is'
|
||||
' not allowed. Remove the id from all but one of the elements').format(eid, name)
|
||||
self.all_locations = [(name, lnum, None) for lnum in sorted(locs)]
|
||||
self.duplicate_id = eid
|
||||
|
||||
def __call__(self, container):
|
||||
elems = [e for e in container.parsed(self.name).xpath('//*[@id]') if e.get('id') == self.duplicate_id]
|
||||
for e in elems[1:]:
|
||||
e.attrib.pop('id')
|
||||
container.dirty(self.name)
|
||||
return True
|
||||
|
||||
class ErrorHandler(object):
|
||||
|
||||
' Replacement logger to get useful error/warning info out of cssutils during parsing '
|
||||
@ -289,3 +311,22 @@ def check_filenames(container):
|
||||
if urlquote(name) != name:
|
||||
errors.append(EscapedName(name))
|
||||
return errors
|
||||
|
||||
def check_ids(container):
|
||||
errors = []
|
||||
mts = set(OEB_DOCS) | {guess_type('a.opf'), guess_type('a.ncx')}
|
||||
for name, mt in container.mime_map.iteritems():
|
||||
if mt in mts:
|
||||
root = container.parsed(name)
|
||||
seen_ids = {}
|
||||
dups = {}
|
||||
for elem in root.xpath('//*[@id]'):
|
||||
eid = elem.get('id')
|
||||
if eid in seen_ids:
|
||||
if eid not in dups:
|
||||
dups[eid] = [seen_ids[eid]]
|
||||
dups[eid].append(elem.sourceline)
|
||||
else:
|
||||
seen_ids[eid] = elem.sourceline
|
||||
errors.extend(DuplicateId(name, eid, locs) for eid, locs in dups.iteritems())
|
||||
return errors
|
||||
|
Loading…
x
Reference in New Issue
Block a user