diff --git a/src/calibre/ebooks/metadata/ereader.py b/src/calibre/ebooks/metadata/ereader.py index 42f575188c..036baff2aa 100644 --- a/src/calibre/ebooks/metadata/ereader.py +++ b/src/calibre/ebooks/metadata/ereader.py @@ -16,6 +16,18 @@ from calibre.ebooks.pdb.ereader.reader132 import HeaderRecord from calibre.ebooks.pdb.header import PdbHeaderBuilder from calibre.ebooks.pdb.header import PdbHeaderReader +def get_cover(pheader, eheader): + cover_data = None + + for i in range(eheader.image_count): + raw = pheader.section_data(eheader.image_data_offset + i) + + if raw[4:4 + 32].strip('\x00') == 'cover.png': + cover_data = raw[62:] + break + + return ('png', cover_data) + def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object @@ -29,7 +41,7 @@ def get_metadata(stream, extract_cover=True): if len(pheader.section_data(0)) == 132: hr = HeaderRecord(pheader.section_data(0)) - if hr.version in (2, 10) and hr.has_metadata == 1: + if hr.compression in (2, 10) and hr.has_metadata == 1: try: mdata = pheader.section_data(hr.metadata_offset) @@ -41,6 +53,9 @@ def get_metadata(stream, extract_cover=True): except: pass + if extract_cover: + mi.cover_data = get_cover(pheader, hr) + if not mi.title: mi.title = pheader.title if pheader.title else _('Unknown') @@ -56,7 +71,7 @@ def set_metadata(stream, mi): sections = [pheader.section_data(x) for x in range(0, pheader.section_count())] hr = HeaderRecord(sections[0]) - if hr.version not in (2, 10): + if hr.compression not in (2, 10): return # Create a metadata record for the file if one does not alreay exist diff --git a/src/calibre/ebooks/pdb/ereader/reader132.py b/src/calibre/ebooks/pdb/ereader/reader132.py index 49fdfb8980..adb77d478f 100644 --- a/src/calibre/ebooks/pdb/ereader/reader132.py +++ b/src/calibre/ebooks/pdb/ereader/reader132.py @@ -29,12 +29,19 @@ class HeaderRecord(object): ''' def __init__(self, raw): - self.version, = struct.unpack('>H', raw[0:2]) + self.compression, = struct.unpack('>H', raw[0:2]) self.non_text_offset, = struct.unpack('>H', raw[12:14]) + self.chapter_count, = struct.unpack('>H', raw[14:16]) + self.image_count, = struct.unpack('>H', raw[20:22]) + self.link_count, = struct.unpack('>H', raw[22:24]) self.has_metadata, = struct.unpack('>H', raw[24:26]) - self.footnote_rec, = struct.unpack('>H', raw[28:30]) - self.sidebar_rec, = struct.unpack('>H', raw[30:32]) + self.footnote_count, = struct.unpack('>H', raw[28:30]) + self.sidebar_count, = struct.unpack('>H', raw[30:32]) + self.chapter_offset, = struct.unpack('>H', raw[32:34]) + self.small_font_page_offset, = struct.unpack('>H', raw[36:38]) + self.large_font_page_offset, = struct.unpack('>H', raw[38:40]) self.image_data_offset, = struct.unpack('>H', raw[40:42]) + self.link_offset, = struct.unpack('>H', raw[42:44]) self.metadata_offset, = struct.unpack('>H', raw[44:46]) self.footnote_offset, = struct.unpack('>H', raw[48:50]) self.sidebar_offset, = struct.unpack('>H', raw[50:52]) @@ -58,11 +65,11 @@ class Reader132(FormatReader): self.header_record = HeaderRecord(self.section_data(0)) - if self.header_record.version not in (2, 10): - if self.header_record.version in (260, 272): + if self.header_record.compression not in (2, 10): + if self.header_record.compression in (260, 272): raise DRMError('eReader DRM is not supported.') else: - raise EreaderError('Unknown book version %i.' % self.header_record.version) + raise EreaderError('Unknown book compression %i.' % self.header_record.compression) from calibre.ebooks.metadata.pdb import get_metadata self.mi = get_metadata(stream, False) @@ -71,9 +78,9 @@ class Reader132(FormatReader): return self.sections[number] def decompress_text(self, number): - if self.header_record.version == 2: + if self.header_record.compression == 2: return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace') - if self.header_record.version == 10: + if self.header_record.compression == 10: return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace') def get_image(self, number): @@ -115,19 +122,19 @@ class Reader132(FormatReader): pml += self.get_text_page(i) html += pml_to_html(pml) - if self.header_record.footnote_rec > 0: + if self.header_record.footnote_count > 0: html += '

%s

' % _('Footnotes') footnoteids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.footnote_offset).decode('cp1252' if self.encoding is None else self.encoding)) - for fid, i in enumerate(range(self.header_record.footnote_offset + 1, self.header_record.footnote_offset + self.header_record.footnote_rec)): + for fid, i in enumerate(range(self.header_record.footnote_offset + 1, self.header_record.footnote_offset + self.header_record.footnote_count)): self.log.debug('Extracting footnote page %i' % i) html += '
' html += footnote_sidebar_to_html(footnoteids[fid], self.decompress_text(i)) html += '
' - if self.header_record.sidebar_rec > 0: + if self.header_record.sidebar_count > 0: html += '

%s

' % _('Sidebar') sidebarids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.sidebar_offset).decode('cp1252' if self.encoding is None else self.encoding)) - for sid, i in enumerate(range(self.header_record.sidebar_offset + 1, self.header_record.sidebar_offset + self.header_record.sidebar_rec)): + for sid, i in enumerate(range(self.header_record.sidebar_offset + 1, self.header_record.sidebar_offset + self.header_record.sidebar_count)): self.log.debug('Extracting sidebar page %i' % i) html += '
' html += footnote_sidebar_to_html(sidebarids[sid], self.decompress_text(i)) diff --git a/src/calibre/ebooks/pml/input.py b/src/calibre/ebooks/pml/input.py index 45f54f192f..5453665a55 100644 --- a/src/calibre/ebooks/pml/input.py +++ b/src/calibre/ebooks/pml/input.py @@ -27,20 +27,20 @@ class PMLInput(InputFormatPlugin): def process_pml(self, pml_path, html_path, close_all=False): pclose = False hclose = False - + if not hasattr(pml_path, 'read'): pml_stream = open(pml_path, 'rb') pclose = True else: pml_stream = pml_path pml_stream.seek(0) - + if not hasattr(html_path, 'write'): html_stream = open(html_path, 'wb') hclose = True else: html_stream = html_path - + ienc = pml_stream.encoding if pml_stream.encoding else 'cp1252' if self.options.input_encoding: ienc = self.options.input_encoding @@ -95,12 +95,12 @@ class PMLInput(InputFormatPlugin): with TemporaryDirectory('_unpmlz') as tdir: zf = ZipFile(stream) zf.extractall(tdir) - + pmls = glob.glob(os.path.join(tdir, '*.pml')) for pml in pmls: html_name = os.path.splitext(os.path.basename(pml))[0]+'.html' html_path = os.path.join(os.getcwd(), html_name) - + pages.append(html_name) log.debug('Processing PML item %s...' % pml) ttoc = self.process_pml(pml, html_path) @@ -119,7 +119,7 @@ class PMLInput(InputFormatPlugin): manifest_items = [] for item in pages+images: manifest_items.append((item, None)) - + from calibre.ebooks.metadata.meta import get_metadata log.debug('Reading metadata from input file...') mi = get_metadata(stream, 'pml') @@ -133,5 +133,5 @@ class PMLInput(InputFormatPlugin): with open('metadata.opf', 'wb') as opffile: with open('toc.ncx', 'wb') as tocfile: opf.render(opffile, tocfile, 'toc.ncx') - + return os.path.join(os.getcwd(), 'metadata.opf')