mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Clean XML of invalid chars before parsing OPF
This commit is contained in:
parent
ebbefb5de3
commit
0b4ae4a23c
@ -3,17 +3,17 @@
|
|||||||
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
|
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
from collections import namedtuple
|
|
||||||
from polyglot.builtins import map
|
|
||||||
|
|
||||||
|
from collections import namedtuple
|
||||||
|
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.ebooks.oeb.base import OPF
|
from calibre.ebooks.oeb.base import OPF
|
||||||
from calibre.ebooks.oeb.polish.utils import guess_type
|
from calibre.ebooks.oeb.polish.utils import guess_type
|
||||||
from calibre.spell import parse_lang_code
|
from calibre.spell import parse_lang_code
|
||||||
|
from calibre.utils.cleantext import clean_xml_chars
|
||||||
from calibre.utils.localization import lang_as_iso639_1
|
from calibre.utils.localization import lang_as_iso639_1
|
||||||
from calibre.utils.xml_parse import safe_xml_fromstring
|
from calibre.utils.xml_parse import safe_xml_fromstring
|
||||||
from polyglot.builtins import filter
|
from polyglot.builtins import filter, map
|
||||||
|
|
||||||
OPFVersion = namedtuple('OPFVersion', 'major minor patch')
|
OPFVersion = namedtuple('OPFVersion', 'major minor patch')
|
||||||
|
|
||||||
@ -43,7 +43,7 @@ def parse_opf(stream_or_path):
|
|||||||
raise ValueError('Empty file: '+getattr(stream, 'name', 'stream'))
|
raise ValueError('Empty file: '+getattr(stream, 'name', 'stream'))
|
||||||
raw, encoding = xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True, assume_utf8=True)
|
raw, encoding = xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True, assume_utf8=True)
|
||||||
raw = raw[raw.find('<'):]
|
raw = raw[raw.find('<'):]
|
||||||
root = safe_xml_fromstring(raw)
|
root = safe_xml_fromstring(clean_xml_chars(raw))
|
||||||
if root is None:
|
if root is None:
|
||||||
raise ValueError('Not an OPF file')
|
raise ValueError('Not an OPF file')
|
||||||
return root
|
return root
|
||||||
|
Loading…
x
Reference in New Issue
Block a user