mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #4230 (Can't view/convert certain Mobipocket files)
This commit is contained in:
parent
3737a93493
commit
40cb977203
@ -16,10 +16,16 @@ class MOBIInput(InputFormatPlugin):
|
|||||||
accelerators):
|
accelerators):
|
||||||
from calibre.ebooks.mobi.reader import MobiReader
|
from calibre.ebooks.mobi.reader import MobiReader
|
||||||
from lxml import html
|
from lxml import html
|
||||||
mr = MobiReader(stream, log, options.input_encoding,
|
|
||||||
options.debug_pipeline)
|
|
||||||
parse_cache = {}
|
parse_cache = {}
|
||||||
mr.extract_content('.', parse_cache)
|
try:
|
||||||
|
mr = MobiReader(stream, log, options.input_encoding,
|
||||||
|
options.debug_pipeline)
|
||||||
|
mr.extract_content('.', parse_cache)
|
||||||
|
except:
|
||||||
|
mr = MobiReader(stream, log, options.input_encoding,
|
||||||
|
options.debug_pipeline, try_extra_data_fix=True)
|
||||||
|
mr.extract_content('.', parse_cache)
|
||||||
|
|
||||||
raw = parse_cache.pop('calibre_raw_mobi_markup', False)
|
raw = parse_cache.pop('calibre_raw_mobi_markup', False)
|
||||||
if raw:
|
if raw:
|
||||||
if isinstance(raw, unicode):
|
if isinstance(raw, unicode):
|
||||||
|
@ -108,7 +108,7 @@ class EXTHHeader(object):
|
|||||||
|
|
||||||
class BookHeader(object):
|
class BookHeader(object):
|
||||||
|
|
||||||
def __init__(self, raw, ident, user_encoding, log):
|
def __init__(self, raw, ident, user_encoding, log, try_extra_data_fix=False):
|
||||||
self.log = log
|
self.log = log
|
||||||
self.compression_type = raw[:2]
|
self.compression_type = raw[:2]
|
||||||
self.records, self.records_size = struct.unpack('>HH', raw[8:12])
|
self.records, self.records_size = struct.unpack('>HH', raw[8:12])
|
||||||
@ -141,7 +141,8 @@ class BookHeader(object):
|
|||||||
self.codec = 'cp1252' if user_encoding is None else user_encoding
|
self.codec = 'cp1252' if user_encoding is None else user_encoding
|
||||||
log.warn('Unknown codepage %d. Assuming %s' % (self.codepage,
|
log.warn('Unknown codepage %d. Assuming %s' % (self.codepage,
|
||||||
self.codec))
|
self.codec))
|
||||||
if ident == 'TEXTREAD' or self.length < 0xE4 or 0xE8 < self.length:
|
if ident == 'TEXTREAD' or self.length < 0xE4 or 0xE8 < self.length \
|
||||||
|
or (try_extra_data_fix and self.length == 0xE4):
|
||||||
self.extra_flags = 0
|
self.extra_flags = 0
|
||||||
else:
|
else:
|
||||||
self.extra_flags, = struct.unpack('>H', raw[0xF2:0xF4])
|
self.extra_flags, = struct.unpack('>H', raw[0xF2:0xF4])
|
||||||
@ -229,7 +230,8 @@ class MobiReader(object):
|
|||||||
PAGE_BREAK_PAT = re.compile(r'(<[/]{0,1}mbp:pagebreak\s*[/]{0,1}>)+', re.IGNORECASE)
|
PAGE_BREAK_PAT = re.compile(r'(<[/]{0,1}mbp:pagebreak\s*[/]{0,1}>)+', re.IGNORECASE)
|
||||||
IMAGE_ATTRS = ('lowrecindex', 'recindex', 'hirecindex')
|
IMAGE_ATTRS = ('lowrecindex', 'recindex', 'hirecindex')
|
||||||
|
|
||||||
def __init__(self, filename_or_stream, log, user_encoding=None, debug=None):
|
def __init__(self, filename_or_stream, log, user_encoding=None, debug=None,
|
||||||
|
try_extra_data_fix=False):
|
||||||
self.log = log
|
self.log = log
|
||||||
self.debug = debug
|
self.debug = debug
|
||||||
self.embedded_mi = None
|
self.embedded_mi = None
|
||||||
@ -284,7 +286,7 @@ class MobiReader(object):
|
|||||||
|
|
||||||
|
|
||||||
self.book_header = BookHeader(self.sections[0][0], self.ident,
|
self.book_header = BookHeader(self.sections[0][0], self.ident,
|
||||||
user_encoding, self.log)
|
user_encoding, self.log, try_extra_data_fix=try_extra_data_fix)
|
||||||
self.name = self.name.decode(self.book_header.codec, 'replace')
|
self.name = self.name.decode(self.book_header.codec, 'replace')
|
||||||
|
|
||||||
def extract_content(self, output_dir, parse_cache):
|
def extract_content(self, output_dir, parse_cache):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user