Fix #1872 (IOError: cannot identify image file)

2025-07-09 03:04:10 -04:00 · 2009-02-17 13:02:18 -08:00 · 2009-02-17 13:02:18 -08:00 · 359c5e92a3
commit 359c5e92a3
parent a2745fed4c 49a55d1da6
2 changed files with 42 additions and 28 deletions
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -249,9 +249,6 @@ class MobiReader(object):
            self.processed_html = '<html><p>'+self.processed_html.replace('\n\n', '<p>')+'</html>'
        self.processed_html = self.processed_html.replace('\r\n', '\n')
        self.processed_html = self.processed_html.replace('> <', '>\n<')
-        for t, c in [('b', 'bold'), ('i', 'italic')]:
-            self.processed_html = re.sub(r'(?i)<%s>'%t, r'<span class="%s">'%c, self.processed_html)
-            self.processed_html = re.sub(r'(?i)</%s>'%t, r'</span>', self.processed_html)
        
    def upshift_markup(self, root):
        if self.verbose:
@ -273,8 +270,6 @@ class MobiReader(object):
                for key in tag.attrib.keys():
                    tag.attrib.pop(key)
                continue
-            if tag.tag == 'pre' and not tag.text:
-                tag.tag = 'div'
            styles, attrib = [], tag.attrib
            if attrib.has_key('style'):
                style = attrib.pop('style').strip()
@ -294,35 +289,44 @@ class MobiReader(object):
                align = attrib.pop('align').strip()
                if align:
                    styles.append('text-align: %s' % align)
-            if mobi_version == 1 and tag.tag == 'hr':
+            if tag.tag == 'hr':
+                if mobi_version == 1:
                    tag.tag = 'div'
                    styles.append('page-break-before: always')
                    styles.append('display: block')
                    styles.append('margin: 0')
-            if styles:
-                attrib['style'] = '; '.join(styles)
-                
-            if tag.tag.lower() == 'font':
+            elif tag.tag == 'i':
+                tag.tag = 'span'
+                tag.attrib['class'] = 'italic'
+            elif tag.tag == 'b':
+                tag.tag = 'span'
+                tag.attrib['class'] = 'bold'
+            elif tag.tag == 'font':
                sz = tag.get('size', '').lower()
                try:
                    float(sz)
                except ValueError:
                    if sz in size_map.keys():
                        attrib['size'] = size_map[sz]
+            elif tag.tag == 'img':
+                recindex = None
+                for attr in self.IMAGE_ATTRS:
+                    recindex = attrib.pop(attr, None) or recindex
+                if recindex is not None:
+                    attrib['src'] = 'images/%s.jpg' % recindex
+            elif tag.tag == 'pre':
+                if not tag.text:
+                    tag.tag = 'div'
+            if styles:
+                attrib['style'] = '; '.join(styles)
            if 'filepos-id' in attrib:
                attrib['id'] = attrib.pop('filepos-id')
            if 'filepos' in attrib:
                filepos = attrib.pop('filepos')
                try:
                    attrib['href'] = "#filepos%d" % int(filepos)
-                except:
-                    attrib['href'] = filepos
-            if tag.tag == 'img':
-                recindex = None
-                for attr in self.IMAGE_ATTRS:
-                    recindex = attrib.pop(attr, None) or recindex
-                if recindex is not None:
-                    attrib['src'] = 'images/%s.jpg' % recindex
+                except ValueError:
+                    pass
    
    def create_opf(self, htmlfile, guide=None):
        mi = self.book_header.exth.mi
@ -332,7 +336,7 @@ class MobiReader(object):
        manifest = [(htmlfile, 'text/x-oeb1-document')]
        bp = os.path.dirname(htmlfile)
        for i in getattr(self, 'image_names', []):
-            manifest.append((os.path.join(bp, 'images/', i), 'image/jpg'))
+            manifest.append((os.path.join(bp, 'images/', i), 'image/jpeg'))
        
        opf.create_manifest(manifest)
        opf.create_spine([os.path.basename(htmlfile)])
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -416,7 +416,11 @@ class MobiWriter(object):
        coverid = metadata.cover[0] if metadata.cover else None
        for _, href in images:
            item = self._oeb.manifest.hrefs[href]
+            try:
                data = rescale_image(item.data, self._imagemax)
+            except IOError:
+                self._oeb.logger.warn('Bad image file %r' % item.href)
+                continue
            self._records.append(data)
    
    def _generate_record0(self):
@ -486,9 +490,11 @@ class MobiWriter(object):
            index = self._images[href] - 1
            exth.write(pack('>III', 0xc9, 0x0c, index))
            exth.write(pack('>III', 0xcb, 0x0c, 0))
-            index = self._add_thumbnail(item) - 1
-            exth.write(pack('>III', 0xca, 0x0c, index))
-            nrecs += 3
+            nrecs += 2
+            index = self._add_thumbnail(item)
+            if index is not None:
+                exth.write(pack('>III', 0xca, 0x0c, index - 1))
+                nrecs += 1
        exth = exth.getvalue()
        trail = len(exth) % 4
        pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte
@ -496,7 +502,11 @@ class MobiWriter(object):
        return ''.join(exth)

    def _add_thumbnail(self, item):
+        try:
            data = rescale_image(item.data, MAX_THUMB_SIZE, MAX_THUMB_DIMEN)
+        except IOError:
+            self._oeb.logger.warn('Bad image file %r' % item.href)
+            return None
        manifest = self._oeb.manifest
        id, href = manifest.generate('thumbnail', 'thumbnail.jpeg')
        manifest.add(id, href, 'image/jpeg', data=data)