From 6a786fcba45c72ea382c6234cccca87c77a5d8c0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 27 Feb 2009 09:36:11 -0800 Subject: [PATCH] MOBI Input:Fix #1921 (Mobi to Epub conversion) --- src/calibre/ebooks/mobi/reader.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 09ce96f646..df728e400e 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal ' Read data from .mobi files ''' -import sys, struct, os, cStringIO, re +import sys, struct, os, cStringIO, re, functools try: from PIL import Image as PILImage @@ -186,7 +186,9 @@ class MobiReader(object): self.processed_html = self.processed_html.decode(self.book_header.codec, 'ignore') for pat in ENCODING_PATS: self.processed_html = pat.sub('', self.processed_html) - self.processed_html = re.sub(r'&(\S+?);', entity_to_unicode, + e2u = functools.partial(entity_to_unicode, + exceptions=['lt', 'gt', 'amp', 'apos', 'quot']) + self.processed_html = re.sub(r'&(\S+?);', e2u, self.processed_html) self.extract_images(processed_records, output_dir) self.replace_page_breaks()