From 40cb9772038e2bf889beb975eee14abeda542a55 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 16 Dec 2009 15:32:21 -0700 Subject: [PATCH] Fix #4230 (Can't view/convert certain Mobipocket files) --- src/calibre/ebooks/mobi/input.py | 12 +++++++++--- src/calibre/ebooks/mobi/reader.py | 10 ++++++---- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/calibre/ebooks/mobi/input.py b/src/calibre/ebooks/mobi/input.py index bc9409b852..487e70c04f 100644 --- a/src/calibre/ebooks/mobi/input.py +++ b/src/calibre/ebooks/mobi/input.py @@ -16,10 +16,16 @@ class MOBIInput(InputFormatPlugin): accelerators): from calibre.ebooks.mobi.reader import MobiReader from lxml import html - mr = MobiReader(stream, log, options.input_encoding, - options.debug_pipeline) parse_cache = {} - mr.extract_content('.', parse_cache) + try: + mr = MobiReader(stream, log, options.input_encoding, + options.debug_pipeline) + mr.extract_content('.', parse_cache) + except: + mr = MobiReader(stream, log, options.input_encoding, + options.debug_pipeline, try_extra_data_fix=True) + mr.extract_content('.', parse_cache) + raw = parse_cache.pop('calibre_raw_mobi_markup', False) if raw: if isinstance(raw, unicode): diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 20d31d48b4..973418204f 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -108,7 +108,7 @@ class EXTHHeader(object): class BookHeader(object): - def __init__(self, raw, ident, user_encoding, log): + def __init__(self, raw, ident, user_encoding, log, try_extra_data_fix=False): self.log = log self.compression_type = raw[:2] self.records, self.records_size = struct.unpack('>HH', raw[8:12]) @@ -141,7 +141,8 @@ class BookHeader(object): self.codec = 'cp1252' if user_encoding is None else user_encoding log.warn('Unknown codepage %d. Assuming %s' % (self.codepage, self.codec)) - if ident == 'TEXTREAD' or self.length < 0xE4 or 0xE8 < self.length: + if ident == 'TEXTREAD' or self.length < 0xE4 or 0xE8 < self.length \ + or (try_extra_data_fix and self.length == 0xE4): self.extra_flags = 0 else: self.extra_flags, = struct.unpack('>H', raw[0xF2:0xF4]) @@ -229,7 +230,8 @@ class MobiReader(object): PAGE_BREAK_PAT = re.compile(r'(<[/]{0,1}mbp:pagebreak\s*[/]{0,1}>)+', re.IGNORECASE) IMAGE_ATTRS = ('lowrecindex', 'recindex', 'hirecindex') - def __init__(self, filename_or_stream, log, user_encoding=None, debug=None): + def __init__(self, filename_or_stream, log, user_encoding=None, debug=None, + try_extra_data_fix=False): self.log = log self.debug = debug self.embedded_mi = None @@ -284,7 +286,7 @@ class MobiReader(object): self.book_header = BookHeader(self.sections[0][0], self.ident, - user_encoding, self.log) + user_encoding, self.log, try_extra_data_fix=try_extra_data_fix) self.name = self.name.decode(self.book_header.codec, 'replace') def extract_content(self, output_dir, parse_cache):