mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Edit Book: Check Book: Add a check for "invalid" ids
Some sad-sack ebook services barf on "invalid" ids. See #1467765 (Private bug) They should just accept all valid HTML 5 ids instead of relying on the terrible epubcheck.
This commit is contained in:
parent
f37b91ad49
commit
f46d465475
@ -328,6 +328,34 @@ class DuplicateId(BaseError):
|
|||||||
container.dirty(self.name)
|
container.dirty(self.name)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
class InvalidId(BaseError):
|
||||||
|
|
||||||
|
INDIVIDUAL_FIX = _(
|
||||||
|
'Replace this id with a randomly generated valid id')
|
||||||
|
|
||||||
|
def __init__(self, name, line, eid):
|
||||||
|
BaseError.__init__(self, _('Invalid id: %s') % eid, name, line)
|
||||||
|
self.HELP = _(
|
||||||
|
'The id {0} is not a valid id. IDs must start with a letter ([A-Za-z]) and may be'
|
||||||
|
' followed by any number of letters, digits ([0-9]), hyphens ("-"), underscores ("_")'
|
||||||
|
', colons (":"), and periods ("."). This is to ensure maximum compatibility'
|
||||||
|
' with a wide range of devices.').format(eid)
|
||||||
|
self.invalid_id = eid
|
||||||
|
|
||||||
|
def __call__(self, container):
|
||||||
|
import uuid
|
||||||
|
from calibre.ebooks.oeb.polish.replace import replace_ids
|
||||||
|
newid = 'g' + uuid.uuid4().hex
|
||||||
|
changed = False
|
||||||
|
elems = (e for e in container.parsed(self.name).xpath('//*[@id]') if e.get('id') == self.invalid_id)
|
||||||
|
for e in elems:
|
||||||
|
e.set('id', newid)
|
||||||
|
changed = True
|
||||||
|
container.dirty(self.name)
|
||||||
|
if changed:
|
||||||
|
replace_ids(container, {self.name:{self.invalid_id:newid}})
|
||||||
|
return changed
|
||||||
|
|
||||||
class BareTextInBody(BaseError):
|
class BareTextInBody(BaseError):
|
||||||
|
|
||||||
INDIVIDUAL_FIX = _('Wrap the bare text in a p tag')
|
INDIVIDUAL_FIX = _('Wrap the bare text in a p tag')
|
||||||
@ -416,6 +444,8 @@ def check_filenames(container):
|
|||||||
errors.append(EscapedName(name))
|
errors.append(EscapedName(name))
|
||||||
return errors
|
return errors
|
||||||
|
|
||||||
|
valid_id = re.compile(r'^[a-zA-Z][a-zA-Z0-9_:.-]*$')
|
||||||
|
|
||||||
def check_ids(container):
|
def check_ids(container):
|
||||||
errors = []
|
errors = []
|
||||||
mts = set(OEB_DOCS) | {guess_type('a.opf'), guess_type('a.ncx')}
|
mts = set(OEB_DOCS) | {guess_type('a.opf'), guess_type('a.ncx')}
|
||||||
@ -432,6 +462,8 @@ def check_ids(container):
|
|||||||
dups[eid].append(elem.sourceline)
|
dups[eid].append(elem.sourceline)
|
||||||
else:
|
else:
|
||||||
seen_ids[eid] = elem.sourceline
|
seen_ids[eid] = elem.sourceline
|
||||||
|
if eid and valid_id.match(eid) is None:
|
||||||
|
errors.append(InvalidId(name, elem.sourceline, eid))
|
||||||
errors.extend(DuplicateId(name, eid, locs) for eid, locs in dups.iteritems())
|
errors.extend(DuplicateId(name, eid, locs) for eid, locs in dups.iteritems())
|
||||||
return errors
|
return errors
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import codecs, shutil, os, posixpath
|
import codecs, shutil, os, posixpath
|
||||||
from urlparse import urlparse
|
from urlparse import urlparse, urlunparse
|
||||||
from collections import Counter, defaultdict
|
from collections import Counter, defaultdict
|
||||||
|
|
||||||
from calibre import sanitize_file_name_unicode
|
from calibre import sanitize_file_name_unicode
|
||||||
@ -46,6 +46,35 @@ class LinkReplacer(object):
|
|||||||
self.replaced = True
|
self.replaced = True
|
||||||
return href
|
return href
|
||||||
|
|
||||||
|
class IdReplacer(object):
|
||||||
|
|
||||||
|
def __init__(self, base, container, id_map):
|
||||||
|
self.base, self.container, self.replaced = base, container, False
|
||||||
|
self.id_map = id_map
|
||||||
|
|
||||||
|
def __call__(self, url):
|
||||||
|
if url and url.startswith('#'):
|
||||||
|
repl = self.id_map.get(self.base, {}).get(url[1:])
|
||||||
|
if repl is None or repl == url[1:]:
|
||||||
|
return url
|
||||||
|
self.replaced = True
|
||||||
|
return '#' + repl
|
||||||
|
name = self.container.href_to_name(url, self.base)
|
||||||
|
if not name:
|
||||||
|
return url
|
||||||
|
id_map = self.id_map.get(name)
|
||||||
|
if id_map is None:
|
||||||
|
return url
|
||||||
|
purl = urlparse(url)
|
||||||
|
nfrag = id_map.get(purl.fragment)
|
||||||
|
if nfrag is None:
|
||||||
|
return url
|
||||||
|
purl = purl._replace(fragment=nfrag)
|
||||||
|
href = urlunparse(purl)
|
||||||
|
if href != url:
|
||||||
|
self.replaced = True
|
||||||
|
return href
|
||||||
|
|
||||||
class LinkRebaser(object):
|
class LinkRebaser(object):
|
||||||
|
|
||||||
def __init__(self, container, old_name, new_name):
|
def __init__(self, container, old_name, new_name):
|
||||||
@ -88,6 +117,22 @@ def replace_links(container, link_map, frag_map=lambda name, frag:frag, replace_
|
|||||||
repl = LinkReplacer(name, container, link_map, frag_map)
|
repl = LinkReplacer(name, container, link_map, frag_map)
|
||||||
container.replace_links(name, repl)
|
container.replace_links(name, repl)
|
||||||
|
|
||||||
|
def replace_ids(container, id_map):
|
||||||
|
'''
|
||||||
|
Replace all links in the container that pointed to the changed ids.
|
||||||
|
|
||||||
|
:param id_map: A mapping of {name:id_map} where each id_map is a mapping of {old_id:new_id}
|
||||||
|
:return: True iff at least one link was changed
|
||||||
|
|
||||||
|
'''
|
||||||
|
changed = False
|
||||||
|
for name, media_type in container.mime_map.iteritems():
|
||||||
|
repl = IdReplacer(name, container, id_map)
|
||||||
|
container.replace_links(name, repl)
|
||||||
|
if repl.replaced:
|
||||||
|
changed = True
|
||||||
|
return changed
|
||||||
|
|
||||||
def smarten_punctuation(container, report):
|
def smarten_punctuation(container, report):
|
||||||
from calibre.ebooks.conversion.preprocess import smarten_punctuation
|
from calibre.ebooks.conversion.preprocess import smarten_punctuation
|
||||||
smartened = False
|
smartened = False
|
||||||
|
Loading…
x
Reference in New Issue
Block a user