From efcd33c78db4da975880afafcaf0b56b84b53ac7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 20 May 2014 07:51:55 +0530 Subject: [PATCH] Edit book: Fix Check Book failing if a binary file such as an image or font is mislabelled in the OPF as being a text file. Fixes #1320977 [Private bug](https://bugs.launchpad.net/calibre/+bug/1320977) --- .../ebooks/oeb/polish/check/parsing.py | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/oeb/polish/check/parsing.py b/src/calibre/ebooks/oeb/polish/check/parsing.py index 2b28c0b7cc..e27345dc2d 100644 --- a/src/calibre/ebooks/oeb/polish/check/parsing.py +++ b/src/calibre/ebooks/oeb/polish/check/parsing.py @@ -25,6 +25,19 @@ ALL_ENTITIES = HTML_ENTITTIES | XML_ENTITIES replace_pat = re.compile('&(%s);' % '|'.join(re.escape(x) for x in sorted((HTML_ENTITTIES - XML_ENTITIES)))) mismatch_pat = re.compile('tag mismatch:.+?line (\d+).+?line \d+') +class DecodeError(BaseError): + + is_parsing_error = True + + HELP = _('A decoding errors means that the contents of the file could not' + ' be interpreted as text. This usually happens if the file has' + ' an incorrect character encoding declaration or if the file is actually' + ' a binary file, like an image or font that is mislabelled with' + ' an incorrect media type in the OPF.') + + def __init__(self, name): + BaseError.__init__(self, _('Parsing of %s failed, could not decode') % name, name) + class XMLParseError(BaseError): is_parsing_error = True @@ -200,6 +213,8 @@ def check_xml_parsing(name, mt, raw): try: root = fromstring(eraw, parser=parser) + except UnicodeDecodeError: + return errors + [DecodeError(name)] except XMLSyntaxError as err: try: line, col = err.position @@ -312,7 +327,10 @@ def check_css_parsing(name, raw, line_offset=0, is_declaration=False): if is_declaration: parser.parseStyle(raw, validate=True) else: - parser.parseString(raw, validate=True) + try: + parser.parseString(raw, validate=True) + except UnicodeDecodeError: + return [DecodeError(name)] for err in log.errors: err.line += line_offset return log.errors