From 5d9d21572eb4d1830c947425a27a7ff2cc570c5c Mon Sep 17 00:00:00 2001 From: David Li Date: Sun, 28 Jul 2024 16:00:00 +0900 Subject: [PATCH 1/2] Fix import of manga epubs --- src/calibre/ebooks/metadata/epub.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/metadata/epub.py b/src/calibre/ebooks/metadata/epub.py index 1f3141b113..612ea0ec88 100644 --- a/src/calibre/ebooks/metadata/epub.py +++ b/src/calibre/ebooks/metadata/epub.py @@ -21,6 +21,9 @@ from calibre.utils.localunzip import LocalZipFile from calibre.utils.xml_parse import safe_xml_fromstring from calibre.utils.zipfile import BadZipfile, ZipFile, safe_replace +import PIL +from PIL import Image as PILImage + class EPubException(Exception): pass @@ -36,7 +39,7 @@ class ContainerException(OCFException): class Container(dict): - def __init__(self, stream=None): + def __init__(self, stream=None, archive=None): if not stream: return container = safe_xml_fromstring(stream.read()) @@ -49,6 +52,15 @@ class Container(dict): mt, fp = rootfile.get('media-type'), rootfile.get('full-path') if not mt or not fp: raise EPubException(" element malformed") + + if archive: + try: + archive.getinfo(fp) + except KeyError: + # Some Kobo epubs have multiple rootfile entries, but only + # one exists. Ignore the ones that don't exist. + continue + self[mt] = fp @@ -95,7 +107,7 @@ class OCFReader(OCF): try: with closing(self.open(OCF.CONTAINER_PATH)) as f: - self.container = Container(f) + self.container = Container(f, self.archive) except KeyError: raise EPubException("missing OCF container.xml file") self.opf_path = self.container[OPF.MIMETYPE] @@ -192,6 +204,17 @@ def render_cover(cpage, zf, reader=None): cpage = os.path.join(tdir, cpage) if not os.path.exists(cpage): return + + # In the case of manga, the first spine item may be an image + # already, so treat it as a raster cover + try: + PILImage.open(cpage) + except PIL.UnidentifiedImageError: + pass + else: + with open(cpage, "rb") as source: + return source.read() + return render_html_svg_workaround(cpage, default_log, root=tdir) From f1827e40d137ae80da59135461c1ae81fbd17cf6 Mon Sep 17 00:00:00 2001 From: David Li Date: Sun, 4 Aug 2024 17:29:02 +0900 Subject: [PATCH 2/2] Feedback --- src/calibre/ebooks/metadata/epub.py | 48 +++++++++++++++++------------ 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/src/calibre/ebooks/metadata/epub.py b/src/calibre/ebooks/metadata/epub.py index 612ea0ec88..2a7e0a3db6 100644 --- a/src/calibre/ebooks/metadata/epub.py +++ b/src/calibre/ebooks/metadata/epub.py @@ -17,14 +17,11 @@ from calibre.ebooks.metadata.opf import get_metadata as get_metadata_from_opf from calibre.ebooks.metadata.opf import set_metadata as set_metadata_opf from calibre.ebooks.metadata.opf2 import OPF from calibre.ptempfile import TemporaryDirectory +from calibre.utils.imghdr import what as what_image_type from calibre.utils.localunzip import LocalZipFile from calibre.utils.xml_parse import safe_xml_fromstring from calibre.utils.zipfile import BadZipfile, ZipFile, safe_replace -import PIL -from PIL import Image as PILImage - - class EPubException(Exception): pass @@ -39,7 +36,7 @@ class ContainerException(OCFException): class Container(dict): - def __init__(self, stream=None, archive=None): + def __init__(self, stream=None, file_exists=None): if not stream: return container = safe_xml_fromstring(stream.read()) @@ -53,13 +50,10 @@ class Container(dict): if not mt or not fp: raise EPubException(" element malformed") - if archive: - try: - archive.getinfo(fp) - except KeyError: - # Some Kobo epubs have multiple rootfile entries, but only - # one exists. Ignore the ones that don't exist. - continue + if file_exists and not file_exists(fp): + # Some Kobo epubs have multiple rootfile entries, but only one + # exists. Ignore the ones that don't exist. + continue self[mt] = fp @@ -107,7 +101,7 @@ class OCFReader(OCF): try: with closing(self.open(OCF.CONTAINER_PATH)) as f: - self.container = Container(f, self.archive) + self.container = Container(f, self.exists) except KeyError: raise EPubException("missing OCF container.xml file") self.opf_path = self.container[OPF.MIMETYPE] @@ -137,6 +131,14 @@ class OCFReader(OCF): def read_bytes(self, name): return self.open(name).read() + def exists(self, path): + try: + self.open(path) + return True + except OSError: + return False + + class OCFZipReader(OCFReader): @@ -165,6 +167,13 @@ class OCFZipReader(OCFReader): def read_bytes(self, name): return self.archive.read(name) + def exists(self, path): + try: + self.archive.getinfo(path) + return True + except KeyError: + return False + def get_zip_reader(stream, root=None): try: @@ -206,12 +215,13 @@ def render_cover(cpage, zf, reader=None): return # In the case of manga, the first spine item may be an image - # already, so treat it as a raster cover - try: - PILImage.open(cpage) - except PIL.UnidentifiedImageError: - pass - else: + # already, so treat it as a raster cover. + file_format = what_image_type(cpage) + if file_format == "jpeg": + # Only JPEG is allowed since elsewhere we assume raster covers + # are JPEG. In principle we could convert other image formats + # but this is already an out-of-spec case that happens to + # arise in books from some stores. with open(cpage, "rb") as source: return source.read()