mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Edit Book: Check Book: Add a check for "invalid" ids
Some sad-sack ebook services barf on "invalid" ids. See #1467765 (Private bug) They should just accept all valid HTML 5 ids instead of relying on the terrible epubcheck.
This commit is contained in:
parent
f37b91ad49
commit
f46d465475
@ -328,6 +328,34 @@ class DuplicateId(BaseError):
|
||||
container.dirty(self.name)
|
||||
return True
|
||||
|
||||
class InvalidId(BaseError):
|
||||
|
||||
INDIVIDUAL_FIX = _(
|
||||
'Replace this id with a randomly generated valid id')
|
||||
|
||||
def __init__(self, name, line, eid):
|
||||
BaseError.__init__(self, _('Invalid id: %s') % eid, name, line)
|
||||
self.HELP = _(
|
||||
'The id {0} is not a valid id. IDs must start with a letter ([A-Za-z]) and may be'
|
||||
' followed by any number of letters, digits ([0-9]), hyphens ("-"), underscores ("_")'
|
||||
', colons (":"), and periods ("."). This is to ensure maximum compatibility'
|
||||
' with a wide range of devices.').format(eid)
|
||||
self.invalid_id = eid
|
||||
|
||||
def __call__(self, container):
|
||||
import uuid
|
||||
from calibre.ebooks.oeb.polish.replace import replace_ids
|
||||
newid = 'g' + uuid.uuid4().hex
|
||||
changed = False
|
||||
elems = (e for e in container.parsed(self.name).xpath('//*[@id]') if e.get('id') == self.invalid_id)
|
||||
for e in elems:
|
||||
e.set('id', newid)
|
||||
changed = True
|
||||
container.dirty(self.name)
|
||||
if changed:
|
||||
replace_ids(container, {self.name:{self.invalid_id:newid}})
|
||||
return changed
|
||||
|
||||
class BareTextInBody(BaseError):
|
||||
|
||||
INDIVIDUAL_FIX = _('Wrap the bare text in a p tag')
|
||||
@ -416,6 +444,8 @@ def check_filenames(container):
|
||||
errors.append(EscapedName(name))
|
||||
return errors
|
||||
|
||||
valid_id = re.compile(r'^[a-zA-Z][a-zA-Z0-9_:.-]*$')
|
||||
|
||||
def check_ids(container):
|
||||
errors = []
|
||||
mts = set(OEB_DOCS) | {guess_type('a.opf'), guess_type('a.ncx')}
|
||||
@ -432,6 +462,8 @@ def check_ids(container):
|
||||
dups[eid].append(elem.sourceline)
|
||||
else:
|
||||
seen_ids[eid] = elem.sourceline
|
||||
if eid and valid_id.match(eid) is None:
|
||||
errors.append(InvalidId(name, elem.sourceline, eid))
|
||||
errors.extend(DuplicateId(name, eid, locs) for eid, locs in dups.iteritems())
|
||||
return errors
|
||||
|
||||
|
@ -8,7 +8,7 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import codecs, shutil, os, posixpath
|
||||
from urlparse import urlparse
|
||||
from urlparse import urlparse, urlunparse
|
||||
from collections import Counter, defaultdict
|
||||
|
||||
from calibre import sanitize_file_name_unicode
|
||||
@ -46,6 +46,35 @@ class LinkReplacer(object):
|
||||
self.replaced = True
|
||||
return href
|
||||
|
||||
class IdReplacer(object):
|
||||
|
||||
def __init__(self, base, container, id_map):
|
||||
self.base, self.container, self.replaced = base, container, False
|
||||
self.id_map = id_map
|
||||
|
||||
def __call__(self, url):
|
||||
if url and url.startswith('#'):
|
||||
repl = self.id_map.get(self.base, {}).get(url[1:])
|
||||
if repl is None or repl == url[1:]:
|
||||
return url
|
||||
self.replaced = True
|
||||
return '#' + repl
|
||||
name = self.container.href_to_name(url, self.base)
|
||||
if not name:
|
||||
return url
|
||||
id_map = self.id_map.get(name)
|
||||
if id_map is None:
|
||||
return url
|
||||
purl = urlparse(url)
|
||||
nfrag = id_map.get(purl.fragment)
|
||||
if nfrag is None:
|
||||
return url
|
||||
purl = purl._replace(fragment=nfrag)
|
||||
href = urlunparse(purl)
|
||||
if href != url:
|
||||
self.replaced = True
|
||||
return href
|
||||
|
||||
class LinkRebaser(object):
|
||||
|
||||
def __init__(self, container, old_name, new_name):
|
||||
@ -88,6 +117,22 @@ def replace_links(container, link_map, frag_map=lambda name, frag:frag, replace_
|
||||
repl = LinkReplacer(name, container, link_map, frag_map)
|
||||
container.replace_links(name, repl)
|
||||
|
||||
def replace_ids(container, id_map):
|
||||
'''
|
||||
Replace all links in the container that pointed to the changed ids.
|
||||
|
||||
:param id_map: A mapping of {name:id_map} where each id_map is a mapping of {old_id:new_id}
|
||||
:return: True iff at least one link was changed
|
||||
|
||||
'''
|
||||
changed = False
|
||||
for name, media_type in container.mime_map.iteritems():
|
||||
repl = IdReplacer(name, container, id_map)
|
||||
container.replace_links(name, repl)
|
||||
if repl.replaced:
|
||||
changed = True
|
||||
return changed
|
||||
|
||||
def smarten_punctuation(container, report):
|
||||
from calibre.ebooks.conversion.preprocess import smarten_punctuation
|
||||
smartened = False
|
||||
|
Loading…
x
Reference in New Issue
Block a user