From 4e090c09b801377d7e3fa70c72567d544f6cd8af Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 23 Aug 2013 20:53:53 +0530 Subject: [PATCH] OEB Reader: Properly serialize OPF using utf-8 when reading metadata HTML Input: Handle malformed OPF files when converting. Fixes #1215924 [Mobipocket conversion fail with error TypeError: Invalid input object: NoneType](https://bugs.launchpad.net/calibre/+bug/1215924) When reading metadata from OPF files during conversion, the OPF was serialized in ascii with no encoding declaration which could break for some bad OPF files. --- src/calibre/ebooks/metadata/opf2.py | 2 ++ src/calibre/ebooks/oeb/reader.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index 25d4855980..6ae40d157d 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -554,6 +554,8 @@ class OPF(object): # {{{ resolve_entities=True, assume_utf8=True) raw = raw[raw.find('<'):] self.root = etree.fromstring(raw, self.PARSER) + if self.root is None: + raise ValueError('Not an OPF file') try: self.package_version = float(self.root.get('version', None)) except (AttributeError, TypeError, ValueError): diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py index cb10b4ccce..b99f5d1087 100644 --- a/src/calibre/ebooks/oeb/reader.py +++ b/src/calibre/ebooks/oeb/reader.py @@ -127,7 +127,7 @@ class OEBReader(object): def _metadata_from_opf(self, opf): from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata - stream = cStringIO.StringIO(etree.tostring(opf)) + stream = cStringIO.StringIO(etree.tostring(opf, xml_declaration=True, encoding='utf-8')) mi = OPF(stream).to_book_metadata() if not mi.language: mi.language = get_lang().replace('_', '-')