diff --git a/src/calibre/ebooks/chardet/__init__.py b/src/calibre/ebooks/chardet/__init__.py index 03d8fc2ea0..8ad41c524f 100644 --- a/src/calibre/ebooks/chardet/__init__.py +++ b/src/calibre/ebooks/chardet/__init__.py @@ -30,7 +30,7 @@ def detect(aBuf): # Added by Kovid ENCODING_PATS = [ - re.compile(r'<[^<>]+encoding=[\'"](.*?)[\'"][^<>]*>', re.IGNORECASE), + re.compile(r'<\?[^<>]+encoding=[\'"](.*?)[\'"][^<>]*>', re.IGNORECASE), re.compile(r'', re.IGNORECASE) ] ENTITY_PATTERN = re.compile(r'&(\S+?);') diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index 2bbd13d2ce..2d2703c799 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -273,10 +273,11 @@ class Spine(ResourceCollection): for itemref in itemrefs: idref = itemref.get('idref', None) if idref is not None: - r = Spine.Item(s.manifest.id_for_path, - s.manifest.path_for_id(idref), is_path=True) - r.is_linear = itemref.get('linear', 'yes') == 'yes' - s.append(r) + path = s.manifest.path_for_id(idref) + if path: + r = Spine.Item(s.manifest.id_for_path, path, is_path=True) + r.is_linear = itemref.get('linear', 'yes') == 'yes' + s.append(r) return s @staticmethod @@ -439,7 +440,7 @@ class OPF(object): stream = open(stream, 'rb') self.basedir = self.base_dir = basedir raw, self.encoding = xml_to_unicode(stream.read(), strip_encoding_pats=True, resolve_entities=True) - + raw = raw[raw.find('<'):] self.root = etree.fromstring(raw, self.PARSER) self.metadata = self.metadata_path(self.root) if not self.metadata: @@ -983,4 +984,4 @@ def main(args=sys.argv): if __name__ == '__main__': - sys.exit(test()) \ No newline at end of file + sys.exit(test())