mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
When backing up metadata. automatically remove XML invalid chars, instead of erroring out
This commit is contained in:
parent
83810d655d
commit
621c641396
@ -21,7 +21,7 @@ from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.utils.date import parse_date, isoformat
|
||||
from calibre.utils.localization import get_lang, canonicalize_lang
|
||||
from calibre import prints, guess_type
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
|
||||
from calibre.utils.config import tweaks
|
||||
|
||||
class Resource(object): # {{{
|
||||
@ -1436,7 +1436,10 @@ def metadata_to_opf(mi, as_string=True, default_lang=None):
|
||||
attrib['name'] = name
|
||||
if content:
|
||||
attrib['content'] = content
|
||||
elem = metadata.makeelement(tag, attrib=attrib)
|
||||
try:
|
||||
elem = metadata.makeelement(tag, attrib=attrib)
|
||||
except ValueError:
|
||||
elem = metadata.makeelement(tag, attrib={k:clean_xml_chars(v) for k, v in attrib.iteritems()})
|
||||
elem.tail = '\n'+(' '*8)
|
||||
if text:
|
||||
try:
|
||||
|
@ -28,13 +28,14 @@ def clean_ascii_chars(txt, charlist=None):
|
||||
pat = re.compile(u'|'.join(map(unichr, charlist)))
|
||||
return pat.sub('', txt)
|
||||
|
||||
def allowed(x):
|
||||
x = ord(x)
|
||||
return (x != 127 and (31 < x < 0xd7ff or x in (9, 10, 13))) or (0xe000 < x < 0xfffd) or (0x10000 < x < 0x10ffff)
|
||||
|
||||
def clean_xml_chars(unicode_string):
|
||||
def allowed(x):
|
||||
x = ord(x)
|
||||
return (0x0001 < x < 0xd7ff) or (0xe000 < x < 0xfffd) or (0x10000 < x < 0x10ffff)
|
||||
return u''.join(filter(allowed, unicode_string))
|
||||
|
||||
##
|
||||
|
||||
# Fredrik Lundh: http://effbot.org/zone/re-sub.htm#unescape-html
|
||||
# Removes HTML or XML character references and entities from a text string.
|
||||
#
|
||||
@ -60,6 +61,7 @@ def unescape(text, rm=False, rchar=u''):
|
||||
except KeyError:
|
||||
pass
|
||||
if rm:
|
||||
return rchar #replace by char
|
||||
return text # leave as is
|
||||
return rchar # replace by char
|
||||
return text # leave as is
|
||||
return re.sub("&#?\w+;", fixup, text)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user