mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Edit book: Add checks for duplicate ids in HTML/OPF/NCX files
This commit is contained in:
parent
e2647b735f
commit
4cf26dd397
@ -13,7 +13,7 @@ from calibre.ebooks.oeb.polish.utils import guess_type
|
|||||||
from calibre.ebooks.oeb.polish.cover import is_raster_image
|
from calibre.ebooks.oeb.polish.cover import is_raster_image
|
||||||
from calibre.ebooks.oeb.polish.check.base import run_checkers
|
from calibre.ebooks.oeb.polish.check.base import run_checkers
|
||||||
from calibre.ebooks.oeb.polish.check.parsing import (
|
from calibre.ebooks.oeb.polish.check.parsing import (
|
||||||
check_filenames, check_xml_parsing, check_css_parsing, fix_style_tag, check_html_size)
|
check_filenames, check_xml_parsing, check_css_parsing, fix_style_tag, check_html_size, check_ids)
|
||||||
from calibre.ebooks.oeb.polish.check.images import check_raster_images
|
from calibre.ebooks.oeb.polish.check.images import check_raster_images
|
||||||
from calibre.ebooks.oeb.polish.check.links import check_links, check_mimetypes
|
from calibre.ebooks.oeb.polish.check.links import check_links, check_mimetypes
|
||||||
from calibre.ebooks.oeb.polish.check.fonts import check_fonts
|
from calibre.ebooks.oeb.polish.check.fonts import check_fonts
|
||||||
@ -60,6 +60,7 @@ def run_checks(container):
|
|||||||
errors += check_links(container)
|
errors += check_links(container)
|
||||||
errors += check_fonts(container)
|
errors += check_fonts(container)
|
||||||
errors += check_filenames(container)
|
errors += check_filenames(container)
|
||||||
|
errors += check_ids(container)
|
||||||
|
|
||||||
return errors
|
return errors
|
||||||
|
|
||||||
|
@ -14,7 +14,7 @@ import cssutils
|
|||||||
from calibre import force_unicode, human_readable, prepare_string_for_xml
|
from calibre import force_unicode, human_readable, prepare_string_for_xml
|
||||||
from calibre.ebooks.html_entities import html5_entities
|
from calibre.ebooks.html_entities import html5_entities
|
||||||
from calibre.ebooks.oeb.polish.pretty import pretty_script_or_style as fix_style_tag
|
from calibre.ebooks.oeb.polish.pretty import pretty_script_or_style as fix_style_tag
|
||||||
from calibre.ebooks.oeb.polish.utils import PositionFinder
|
from calibre.ebooks.oeb.polish.utils import PositionFinder, guess_type
|
||||||
from calibre.ebooks.oeb.polish.check.base import BaseError, WARN, ERROR, INFO
|
from calibre.ebooks.oeb.polish.check.base import BaseError, WARN, ERROR, INFO
|
||||||
from calibre.ebooks.oeb.base import OEB_DOCS, XHTML_NS, urlquote, URL_SAFE
|
from calibre.ebooks.oeb.base import OEB_DOCS, XHTML_NS, urlquote, URL_SAFE
|
||||||
|
|
||||||
@ -237,6 +237,28 @@ class CSSError(BaseError):
|
|||||||
|
|
||||||
pos_pats = (re.compile(r'\[(\d+):(\d+)'), re.compile(r'(\d+), (\d+)\)'))
|
pos_pats = (re.compile(r'\[(\d+):(\d+)'), re.compile(r'(\d+), (\d+)\)'))
|
||||||
|
|
||||||
|
class DuplicateId(BaseError):
|
||||||
|
|
||||||
|
has_multiple_locations = True
|
||||||
|
|
||||||
|
INDIVIDUAL_FIX = _(
|
||||||
|
'Remove the duplicate ids from all but the first element')
|
||||||
|
|
||||||
|
def __init__(self, name, eid, locs):
|
||||||
|
BaseError.__init__(self, _('Duplicate id: %s') % eid, name)
|
||||||
|
self.HELP = _(
|
||||||
|
'The id {0} is present on more than one element in {1}. This is'
|
||||||
|
' not allowed. Remove the id from all but one of the elements').format(eid, name)
|
||||||
|
self.all_locations = [(name, lnum, None) for lnum in sorted(locs)]
|
||||||
|
self.duplicate_id = eid
|
||||||
|
|
||||||
|
def __call__(self, container):
|
||||||
|
elems = [e for e in container.parsed(self.name).xpath('//*[@id]') if e.get('id') == self.duplicate_id]
|
||||||
|
for e in elems[1:]:
|
||||||
|
e.attrib.pop('id')
|
||||||
|
container.dirty(self.name)
|
||||||
|
return True
|
||||||
|
|
||||||
class ErrorHandler(object):
|
class ErrorHandler(object):
|
||||||
|
|
||||||
' Replacement logger to get useful error/warning info out of cssutils during parsing '
|
' Replacement logger to get useful error/warning info out of cssutils during parsing '
|
||||||
@ -289,3 +311,22 @@ def check_filenames(container):
|
|||||||
if urlquote(name) != name:
|
if urlquote(name) != name:
|
||||||
errors.append(EscapedName(name))
|
errors.append(EscapedName(name))
|
||||||
return errors
|
return errors
|
||||||
|
|
||||||
|
def check_ids(container):
|
||||||
|
errors = []
|
||||||
|
mts = set(OEB_DOCS) | {guess_type('a.opf'), guess_type('a.ncx')}
|
||||||
|
for name, mt in container.mime_map.iteritems():
|
||||||
|
if mt in mts:
|
||||||
|
root = container.parsed(name)
|
||||||
|
seen_ids = {}
|
||||||
|
dups = {}
|
||||||
|
for elem in root.xpath('//*[@id]'):
|
||||||
|
eid = elem.get('id')
|
||||||
|
if eid in seen_ids:
|
||||||
|
if eid not in dups:
|
||||||
|
dups[eid] = [seen_ids[eid]]
|
||||||
|
dups[eid].append(elem.sourceline)
|
||||||
|
else:
|
||||||
|
seen_ids[eid] = elem.sourceline
|
||||||
|
errors.extend(DuplicateId(name, eid, locs) for eid, locs in dups.iteritems())
|
||||||
|
return errors
|
||||||
|
Loading…
x
Reference in New Issue
Block a user