mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
When backing up metadata. automatically remove XML invalid chars, instead of erroring out
This commit is contained in:
parent
83810d655d
commit
621c641396
@ -21,7 +21,7 @@ from calibre.ebooks.metadata.book.base import Metadata
|
|||||||
from calibre.utils.date import parse_date, isoformat
|
from calibre.utils.date import parse_date, isoformat
|
||||||
from calibre.utils.localization import get_lang, canonicalize_lang
|
from calibre.utils.localization import get_lang, canonicalize_lang
|
||||||
from calibre import prints, guess_type
|
from calibre import prints, guess_type
|
||||||
from calibre.utils.cleantext import clean_ascii_chars
|
from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
|
||||||
from calibre.utils.config import tweaks
|
from calibre.utils.config import tweaks
|
||||||
|
|
||||||
class Resource(object): # {{{
|
class Resource(object): # {{{
|
||||||
@ -1436,7 +1436,10 @@ def metadata_to_opf(mi, as_string=True, default_lang=None):
|
|||||||
attrib['name'] = name
|
attrib['name'] = name
|
||||||
if content:
|
if content:
|
||||||
attrib['content'] = content
|
attrib['content'] = content
|
||||||
|
try:
|
||||||
elem = metadata.makeelement(tag, attrib=attrib)
|
elem = metadata.makeelement(tag, attrib=attrib)
|
||||||
|
except ValueError:
|
||||||
|
elem = metadata.makeelement(tag, attrib={k:clean_xml_chars(v) for k, v in attrib.iteritems()})
|
||||||
elem.tail = '\n'+(' '*8)
|
elem.tail = '\n'+(' '*8)
|
||||||
if text:
|
if text:
|
||||||
try:
|
try:
|
||||||
|
@ -28,13 +28,14 @@ def clean_ascii_chars(txt, charlist=None):
|
|||||||
pat = re.compile(u'|'.join(map(unichr, charlist)))
|
pat = re.compile(u'|'.join(map(unichr, charlist)))
|
||||||
return pat.sub('', txt)
|
return pat.sub('', txt)
|
||||||
|
|
||||||
def clean_xml_chars(unicode_string):
|
def allowed(x):
|
||||||
def allowed(x):
|
|
||||||
x = ord(x)
|
x = ord(x)
|
||||||
return (0x0001 < x < 0xd7ff) or (0xe000 < x < 0xfffd) or (0x10000 < x < 0x10ffff)
|
return (x != 127 and (31 < x < 0xd7ff or x in (9, 10, 13))) or (0xe000 < x < 0xfffd) or (0x10000 < x < 0x10ffff)
|
||||||
|
|
||||||
|
def clean_xml_chars(unicode_string):
|
||||||
return u''.join(filter(allowed, unicode_string))
|
return u''.join(filter(allowed, unicode_string))
|
||||||
|
|
||||||
##
|
|
||||||
# Fredrik Lundh: http://effbot.org/zone/re-sub.htm#unescape-html
|
# Fredrik Lundh: http://effbot.org/zone/re-sub.htm#unescape-html
|
||||||
# Removes HTML or XML character references and entities from a text string.
|
# Removes HTML or XML character references and entities from a text string.
|
||||||
#
|
#
|
||||||
@ -60,6 +61,7 @@ def unescape(text, rm=False, rchar=u''):
|
|||||||
except KeyError:
|
except KeyError:
|
||||||
pass
|
pass
|
||||||
if rm:
|
if rm:
|
||||||
return rchar #replace by char
|
return rchar # replace by char
|
||||||
return text # leave as is
|
return text # leave as is
|
||||||
return re.sub("&#?\w+;", fixup, text)
|
return re.sub("&#?\w+;", fixup, text)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user