Fix bug #2263 for mobi files.

This commit is contained in:
John Schember 2009-04-11 22:04:25 -04:00
parent 28a9c6868a
commit 3c70e352df

View File

@ -15,7 +15,8 @@ except ImportError:
from lxml import html, etree from lxml import html, etree
from calibre import entity_to_unicode from calibre import entity_to_unicode, sanitize_file_name
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks import DRMError from calibre.ebooks import DRMError
from calibre.ebooks.chardet import ENCODING_PATS from calibre.ebooks.chardet import ENCODING_PATS
from calibre.ebooks.mobi import MobiError from calibre.ebooks.mobi import MobiError
@ -25,7 +26,6 @@ from calibre.ebooks.mobi.langcodes import main_language, sub_language
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPFCreator, OPF from calibre.ebooks.metadata.opf2 import OPFCreator, OPF
from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata.toc import TOC
from calibre import sanitize_file_name
class EXTHHeader(object): class EXTHHeader(object):
@ -154,6 +154,62 @@ class BookHeader(object):
self.exth.mi.language = self.language self.exth.mi.language = self.language
class MetadataHeader(BookHeader):
def __init__(self, stream):
self.stream = stream
self.ident = self.identity()
self.num_sections = self.section_count()
if self.num_sections >= 2:
header = self.header()
BookHeader.__init__(self, header, self.ident, None)
else:
self.exth = None
def identity(self):
self.stream.seek(60)
ident = self.stream.read(8).upper()
if ident not in ['BOOKMOBI', 'TEXTREAD']:
raise MobiError('Unknown book type: %s' % ident)
return ident
def section_count(self):
self.stream.seek(76)
return struct.unpack('>H', self.stream.read(2))[0]
def section_offset(self, number):
self.stream.seek(78+number*8)
return struct.unpack('>LBBBB', self.stream.read(8))[0]
def header(self):
section_headers = []
# First section with the metadata
section_headers.append(self.section_offset(0))
# Second section used to get the lengh of the first
section_headers.append(self.section_offset(1))
end_off = section_headers[1]
off = section_headers[0]
self.stream.seek(off)
return self.stream.read(end_off - off)
def section_data(self, number):
start = self.section_offset(number)
if number == self.num_sections -1:
end = os.stat(self.stream.name).st_size
else:
end = self.section_offset(number + 1)
self.stream.seek(start)
return self.stream.read(end - start)
class MobiReader(object): class MobiReader(object):
PAGE_BREAK_PAT = re.compile(r'(<[/]{0,1}mbp:pagebreak\s*[/]{0,1}>)+', re.IGNORECASE) PAGE_BREAK_PAT = re.compile(r'(<[/]{0,1}mbp:pagebreak\s*[/]{0,1}>)+', re.IGNORECASE)
IMAGE_ATTRS = ('lowrecindex', 'recindex', 'hirecindex') IMAGE_ATTRS = ('lowrecindex', 'recindex', 'hirecindex')
@ -562,26 +618,35 @@ class MobiReader(object):
self.image_names.append(os.path.basename(path)) self.image_names.append(os.path.basename(path))
im.convert('RGB').save(open(path, 'wb'), format='JPEG') im.convert('RGB').save(open(path, 'wb'), format='JPEG')
def get_metadata(stream): def get_metadata(stream):
mr = MobiReader(stream) mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
if mr.book_header.exth is None: try:
mi = MetaInformation(mr.name, [_('Unknown')]) mh = MetadataHeader(stream)
else:
mi = mr.create_opf('dummy.html') if mh.exth is not None:
try: if mh.exth.mi is not None:
if hasattr(mr.book_header.exth, 'cover_offset'): mi = mh.exth.mi
cover_index = mr.book_header.first_image_index + mr.book_header.exth.cover_offset else:
data = mr.sections[int(cover_index)][0] with TemporaryDirectory('_mobi_meta_reader') as tdir:
else: mr = MobiReader(stream)
data = mr.sections[mr.book_header.first_image_index][0] mr.extract_content(tdir)
buf = cStringIO.StringIO(data) if mr.embedded_mi is not None:
im = PILImage.open(buf) mi = mr.embedded_mi
obuf = cStringIO.StringIO()
im.convert('RGBA').save(obuf, format='JPEG') if hasattr(mh.exth, 'cover_offset'):
mi.cover_data = ('jpg', obuf.getvalue()) cover_index = mh.first_image_index + mh.exth.cover_offset
except: data = mh.section_data(int(cover_index))
import traceback else:
traceback.print_exc() data = mh.section_data(mh.first_image_index)
buf = cStringIO.StringIO(data)
im = PILImage.open(buf)
obuf = cStringIO.StringIO()
im.convert('RGBA').save(obuf, format='JPEG')
mi.cover_data = ('jpg', obuf.getvalue())
except:
import traceback
traceback.print_exc()
return mi return mi