From 3df15e222a951e683b10f1fe37f12c0a1d6a4cb8 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 10 Mar 2020 13:37:21 +0530
Subject: [PATCH] MOBI Input: Dont auto-convert images in PNG/GIF formats to
 JPEG

---
 src/calibre/ebooks/mobi/reader/mobi6.py | 41 +++++++++++++++++--------
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/src/calibre/ebooks/mobi/reader/mobi6.py b/src/calibre/ebooks/mobi/reader/mobi6.py
index 6400295a39..85988dbb04 100644
--- a/src/calibre/ebooks/mobi/reader/mobi6.py
+++ b/src/calibre/ebooks/mobi/reader/mobi6.py
@@ -10,7 +10,7 @@ import shutil, os, re, struct, textwrap, io
 
 from lxml import html, etree
 
-from calibre import (xml_entity_to_unicode, entity_to_unicode)
+from calibre import xml_entity_to_unicode, entity_to_unicode, guess_type
 from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
 from calibre.ebooks import DRMError, unit_convert
 from calibre.ebooks.chardet import strip_encoding_declarations
@@ -178,7 +178,7 @@ class MobiReader(object):
         self.processed_html = strip_encoding_declarations(self.processed_html)
         self.processed_html = re.sub(r'&(\S+?);', xml_entity_to_unicode,
             self.processed_html)
-        self.extract_images(processed_records, output_dir)
+        image_name_map = self.extract_images(processed_records, output_dir)
         self.replace_page_breaks()
         self.cleanup_html()
 
@@ -272,7 +272,7 @@ class MobiReader(object):
             head.insert(0, title)
             head.text = '\n\t'
 
-        self.upshift_markup(root)
+        self.upshift_markup(root, image_name_map)
         guides = root.xpath('//guide')
         guide = guides[0] if guides else None
         metadata_elems = root.xpath('//metadata')
@@ -389,8 +389,9 @@ class MobiReader(object):
             raw += unit
         return raw
 
-    def upshift_markup(self, root):
+    def upshift_markup(self, root, image_name_map=None):
         self.log.debug('Converting style information to CSS...')
+        image_name_map = image_name_map or {}
         size_map = {
             'xx-small': '0.5',
             'x-small': '1',
@@ -510,10 +511,11 @@ class MobiReader(object):
                     recindex = attrib.pop(attr, None) or recindex
                 if recindex is not None:
                     try:
-                        recindex = '%05d'%int(recindex)
-                    except:
+                        recindex = int(recindex)
+                    except Exception:
                         pass
-                    attrib['src'] = 'images/%s.jpg' % recindex
+                    else:
+                        attrib['src'] = 'images/' + image_name_map.get(recindex, '%05d.jpg' % recindex)
                 for attr in ('width', 'height'):
                     if attr in attrib:
                         val = attrib[attr]
@@ -674,7 +676,7 @@ class MobiReader(object):
         for i in getattr(self, 'image_names', []):
             path = os.path.join(bp, 'images', i)
             added.add(path)
-            manifest.append((path, 'image/jpeg'))
+            manifest.append((path, guess_type(path)[0] or 'image/jpeg'))
         if cover_copied is not None:
             manifest.append((cover_copied, 'image/jpeg'))
 
@@ -870,6 +872,7 @@ class MobiReader(object):
             os.makedirs(output_dir)
         image_index = 0
         self.image_names = []
+        image_name_map = {}
         start = getattr(self.book_header, 'first_image_index', -1)
         if start > self.num_sections or start < 0:
             # BAEN PRC files have bad headers
@@ -882,18 +885,30 @@ class MobiReader(object):
             image_index += 1
             if data[:4] in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n',
                     b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}:
-                # This record is a known non image type, not need to try to
+                # This record is a known non image type, no need to try to
                 # load the image
                 continue
 
-            path = os.path.join(output_dir, '%05d.jpg' % image_index)
             try:
-                if what(None, data) not in {'jpg', 'jpeg', 'gif', 'png', 'bmp'}:
-                    continue
-                save_cover_data_to(data, path, minify_to=(10000, 10000))
+                imgfmt = what(None, data)
             except Exception:
                 continue
+            if imgfmt not in {'jpg', 'jpeg', 'gif', 'png', 'bmp'}:
+                continue
+            if imgfmt == 'jpeg':
+                imgfmt = 'jpg'
+            path = os.path.join(output_dir, '%05d.%s' % (image_index, imgfmt))
+            image_name_map[image_index] = os.path.basename(path)
+            if imgfmt in ('gif', 'png'):
+                with open(path, 'wb') as f:
+                    f.write(data)
+            else:
+                try:
+                    save_cover_data_to(data, path, minify_to=(10000, 10000))
+                except Exception:
+                    continue
             self.image_names.append(os.path.basename(path))
+        return image_name_map
 
 
 def test_mbp_regex():