From 2326873b7c8bfc11478b59c7c8ec10d4776e99c6 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Wed, 11 Feb 2009 00:46:48 -0500 Subject: [PATCH 1/2] Fixes for Mobipocket extraction: - Fix #1817. Ignore invalid filepos hyperlink targets. - From output of book producing #1817 noticed bug with and -> conversion. Move conversion to post-parsing. - Correct image MIME type in output OPF. --- src/calibre/ebooks/mobi/reader.py | 33 ++++++++++++++++++------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 6811f9ccda..c7080c7eb8 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -247,9 +247,6 @@ class MobiReader(object): self.processed_html = '

'+self.processed_html.replace('\n\n', '

')+'' self.processed_html = self.processed_html.replace('\r\n', '\n') self.processed_html = self.processed_html.replace('> <', '>\n<') - for t, c in [('b', 'bold'), ('i', 'italic')]: - self.processed_html = re.sub(r'(?i)<%s>'%t, r''%c, self.processed_html) - self.processed_html = re.sub(r'(?i)'%t, r'', self.processed_html) def upshift_markup(self, root): if self.verbose: @@ -295,27 +292,35 @@ class MobiReader(object): styles.append('page-break-before: always') styles.append('display: block') styles.append('margin: 0') - if styles: - attrib['style'] = '; '.join(styles) - - if tag.tag.lower() == 'font': + elif tag.tag == 'i': + tag.tag = 'span' + tag.attrib['class'] = 'italic' + elif tag.tag == 'b': + tag.tag = 'span' + tag.attrib['class'] = 'bold' + elif tag.tag == 'font': sz = tag.get('size', '').lower() try: float(sz) except ValueError: if sz in size_map.keys(): attrib['size'] = size_map[sz] - if 'filepos-id' in attrib: - attrib['id'] = attrib.pop('filepos-id') - if 'filepos' in attrib: - filepos = int(attrib.pop('filepos')) - attrib['href'] = "#filepos%d" % filepos - if tag.tag == 'img': + elif tag.tag == 'img': recindex = None for attr in self.IMAGE_ATTRS: recindex = attrib.pop(attr, None) or recindex if recindex is not None: attrib['src'] = 'images/%s.jpg' % recindex + if styles: + attrib['style'] = '; '.join(styles) + if 'filepos-id' in attrib: + attrib['id'] = attrib.pop('filepos-id') + if 'filepos' in attrib: + filepos = attrib.pop('filepos') + try: + attrib['href'] = "#filepos%d" % int(filepos) + except ValueError: + pass def create_opf(self, htmlfile, guide=None): mi = self.book_header.exth.mi @@ -325,7 +330,7 @@ class MobiReader(object): manifest = [(htmlfile, 'text/x-oeb1-document')] bp = os.path.dirname(htmlfile) for i in getattr(self, 'image_names', []): - manifest.append((os.path.join(bp, 'images/', i), 'image/jpg')) + manifest.append((os.path.join(bp, 'images/', i), 'image/jpeg')) opf.create_manifest(manifest) opf.create_spine([os.path.basename(htmlfile)]) From 49a55d1da631efc23e3f08d982fbfa6f72a22593 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Tue, 17 Feb 2009 23:31:41 -0500 Subject: [PATCH 2/2] Fix #1872. Handle damaged images for Mobipocket conversion. --- src/calibre/ebooks/mobi/writer.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index 380bdbf518..d67bc099ef 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -416,7 +416,11 @@ class MobiWriter(object): coverid = metadata.cover[0] if metadata.cover else None for _, href in images: item = self._oeb.manifest.hrefs[href] - data = rescale_image(item.data, self._imagemax) + try: + data = rescale_image(item.data, self._imagemax) + except IOError: + self._oeb.logger.warn('Bad image file %r' % item.href) + continue self._records.append(data) def _generate_record0(self): @@ -486,9 +490,11 @@ class MobiWriter(object): index = self._images[href] - 1 exth.write(pack('>III', 0xc9, 0x0c, index)) exth.write(pack('>III', 0xcb, 0x0c, 0)) - index = self._add_thumbnail(item) - 1 - exth.write(pack('>III', 0xca, 0x0c, index)) - nrecs += 3 + nrecs += 2 + index = self._add_thumbnail(item) + if index is not None: + exth.write(pack('>III', 0xca, 0x0c, index - 1)) + nrecs += 1 exth = exth.getvalue() trail = len(exth) % 4 pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte @@ -496,7 +502,11 @@ class MobiWriter(object): return ''.join(exth) def _add_thumbnail(self, item): - data = rescale_image(item.data, MAX_THUMB_SIZE, MAX_THUMB_DIMEN) + try: + data = rescale_image(item.data, MAX_THUMB_SIZE, MAX_THUMB_DIMEN) + except IOError: + self._oeb.logger.warn('Bad image file %r' % item.href) + return None manifest = self._oeb.manifest id, href = manifest.generate('thumbnail', 'thumbnail.jpeg') manifest.add(id, href, 'image/jpeg', data=data)