diff --git a/src/calibre/ebooks/metadata/opf.py b/src/calibre/ebooks/metadata/opf.py new file mode 100644 index 0000000000..8854de914e --- /dev/null +++ b/src/calibre/ebooks/metadata/opf.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +# License: GPLv3 Copyright: 2016, Kovid Goyal + +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +from calibre.ebooks.metadata import parse_opf_version +from calibre.ebooks.metadata.opf2 import OPF +from calibre.ebooks.metadata.utils import parse_opf + +class DummyFile(object): + + def __init__(self, raw): + self.raw = raw + + def read(self): + return self.raw + +def get_metadata(stream): + if isinstance(stream, bytes): + stream = DummyFile(stream) + root = parse_opf(stream) + ver = parse_opf_version(root.get('version')) + opf = OPF(None, preparsed_opf=root, read_toc=False) + return opf.to_book_metadata(), ver, opf.raster_cover, opf.first_spine_item() diff --git a/src/calibre/ebooks/metadata/utils.py b/src/calibre/ebooks/metadata/utils.py new file mode 100644 index 0000000000..7c2d9d1c3d --- /dev/null +++ b/src/calibre/ebooks/metadata/utils.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +# License: GPLv3 Copyright: 2016, Kovid Goyal + +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +from calibre.ebooks.chardet import xml_to_unicode +from lxml import etree + +PARSER = etree.XMLParser(recover=True, no_network=True) + +def parse_opf(stream_or_path): + stream = stream_or_path + if not hasattr(stream, 'read'): + stream = open(stream, 'rb') + raw = stream.read() + if not raw: + raise ValueError('Empty file: '+getattr(stream, 'name', 'stream')) + raw, encoding = xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True, assume_utf8=True) + raw = raw[raw.find('<'):] + root = etree.fromstring(raw, PARSER) + if root is None: + raise ValueError('Not an OPF file') + return root + +