OEB Reader: Properly serialize OPF using utf-8 when reading metadata

HTML Input: Handle malformed OPF files when converting.  Fixes #1215924 [Mobipocket conversion fail with error TypeError: Invalid input object: NoneType](https://bugs.launchpad.net/calibre/+bug/1215924)

When reading metadata from OPF files during conversion, the OPF was
serialized in ascii with no encoding declaration which could break for
some bad OPF files.
This commit is contained in:
Kovid Goyal 2013-08-23 20:53:53 +05:30
parent f43f0bba89
commit 4e090c09b8
2 changed files with 3 additions and 1 deletions

View File

@ -554,6 +554,8 @@ class OPF(object): # {{{
resolve_entities=True, assume_utf8=True) resolve_entities=True, assume_utf8=True)
raw = raw[raw.find('<'):] raw = raw[raw.find('<'):]
self.root = etree.fromstring(raw, self.PARSER) self.root = etree.fromstring(raw, self.PARSER)
if self.root is None:
raise ValueError('Not an OPF file')
try: try:
self.package_version = float(self.root.get('version', None)) self.package_version = float(self.root.get('version', None))
except (AttributeError, TypeError, ValueError): except (AttributeError, TypeError, ValueError):

View File

@ -127,7 +127,7 @@ class OEBReader(object):
def _metadata_from_opf(self, opf): def _metadata_from_opf(self, opf):
from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.metadata.opf2 import OPF
from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
stream = cStringIO.StringIO(etree.tostring(opf)) stream = cStringIO.StringIO(etree.tostring(opf, xml_declaration=True, encoding='utf-8'))
mi = OPF(stream).to_book_metadata() mi = OPF(stream).to_book_metadata()
if not mi.language: if not mi.language:
mi.language = get_lang().replace('_', '-') mi.language = get_lang().replace('_', '-')