Fix #1872 (IOError: cannot identify image file)

This commit is contained in:
Kovid Goyal 2009-02-17 13:02:18 -08:00
commit 359c5e92a3
2 changed files with 42 additions and 28 deletions

View File

@ -249,9 +249,6 @@ class MobiReader(object):
self.processed_html = '<html><p>'+self.processed_html.replace('\n\n', '<p>')+'</html>'
self.processed_html = self.processed_html.replace('\r\n', '\n')
self.processed_html = self.processed_html.replace('> <', '>\n<')
for t, c in [('b', 'bold'), ('i', 'italic')]:
self.processed_html = re.sub(r'(?i)<%s>'%t, r'<span class="%s">'%c, self.processed_html)
self.processed_html = re.sub(r'(?i)</%s>'%t, r'</span>', self.processed_html)
def upshift_markup(self, root):
if self.verbose:
@ -273,8 +270,6 @@ class MobiReader(object):
for key in tag.attrib.keys():
tag.attrib.pop(key)
continue
if tag.tag == 'pre' and not tag.text:
tag.tag = 'div'
styles, attrib = [], tag.attrib
if attrib.has_key('style'):
style = attrib.pop('style').strip()
@ -294,35 +289,44 @@ class MobiReader(object):
align = attrib.pop('align').strip()
if align:
styles.append('text-align: %s' % align)
if mobi_version == 1 and tag.tag == 'hr':
if tag.tag == 'hr':
if mobi_version == 1:
tag.tag = 'div'
styles.append('page-break-before: always')
styles.append('display: block')
styles.append('margin: 0')
if styles:
attrib['style'] = '; '.join(styles)
if tag.tag.lower() == 'font':
elif tag.tag == 'i':
tag.tag = 'span'
tag.attrib['class'] = 'italic'
elif tag.tag == 'b':
tag.tag = 'span'
tag.attrib['class'] = 'bold'
elif tag.tag == 'font':
sz = tag.get('size', '').lower()
try:
float(sz)
except ValueError:
if sz in size_map.keys():
attrib['size'] = size_map[sz]
elif tag.tag == 'img':
recindex = None
for attr in self.IMAGE_ATTRS:
recindex = attrib.pop(attr, None) or recindex
if recindex is not None:
attrib['src'] = 'images/%s.jpg' % recindex
elif tag.tag == 'pre':
if not tag.text:
tag.tag = 'div'
if styles:
attrib['style'] = '; '.join(styles)
if 'filepos-id' in attrib:
attrib['id'] = attrib.pop('filepos-id')
if 'filepos' in attrib:
filepos = attrib.pop('filepos')
try:
attrib['href'] = "#filepos%d" % int(filepos)
except:
attrib['href'] = filepos
if tag.tag == 'img':
recindex = None
for attr in self.IMAGE_ATTRS:
recindex = attrib.pop(attr, None) or recindex
if recindex is not None:
attrib['src'] = 'images/%s.jpg' % recindex
except ValueError:
pass
def create_opf(self, htmlfile, guide=None):
mi = self.book_header.exth.mi
@ -332,7 +336,7 @@ class MobiReader(object):
manifest = [(htmlfile, 'text/x-oeb1-document')]
bp = os.path.dirname(htmlfile)
for i in getattr(self, 'image_names', []):
manifest.append((os.path.join(bp, 'images/', i), 'image/jpg'))
manifest.append((os.path.join(bp, 'images/', i), 'image/jpeg'))
opf.create_manifest(manifest)
opf.create_spine([os.path.basename(htmlfile)])

View File

@ -416,7 +416,11 @@ class MobiWriter(object):
coverid = metadata.cover[0] if metadata.cover else None
for _, href in images:
item = self._oeb.manifest.hrefs[href]
try:
data = rescale_image(item.data, self._imagemax)
except IOError:
self._oeb.logger.warn('Bad image file %r' % item.href)
continue
self._records.append(data)
def _generate_record0(self):
@ -486,9 +490,11 @@ class MobiWriter(object):
index = self._images[href] - 1
exth.write(pack('>III', 0xc9, 0x0c, index))
exth.write(pack('>III', 0xcb, 0x0c, 0))
index = self._add_thumbnail(item) - 1
exth.write(pack('>III', 0xca, 0x0c, index))
nrecs += 3
nrecs += 2
index = self._add_thumbnail(item)
if index is not None:
exth.write(pack('>III', 0xca, 0x0c, index - 1))
nrecs += 1
exth = exth.getvalue()
trail = len(exth) % 4
pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte
@ -496,7 +502,11 @@ class MobiWriter(object):
return ''.join(exth)
def _add_thumbnail(self, item):
try:
data = rescale_image(item.data, MAX_THUMB_SIZE, MAX_THUMB_DIMEN)
except IOError:
self._oeb.logger.warn('Bad image file %r' % item.href)
return None
manifest = self._oeb.manifest
id, href = manifest.generate('thumbnail', 'thumbnail.jpeg')
manifest.add(id, href, 'image/jpeg', data=data)