diff --git a/src/calibre/ebooks/pdb/ereader/reader.py b/src/calibre/ebooks/pdb/ereader/reader.py index f6bbc3d23f..b696005e85 100644 --- a/src/calibre/ebooks/pdb/ereader/reader.py +++ b/src/calibre/ebooks/pdb/ereader/reader.py @@ -13,6 +13,7 @@ import os, sys, struct, zlib from calibre import CurrentDir from calibre.ebooks import DRMError from calibre.ebooks.metadata import MetaInformation +from calibre.ebooks.pdb.formatreader import FormatReader from calibre.ebooks.pdb.ereader import EreaderError from calibre.ebooks.pdb.ereader.pmlconverter import pml_to_html, \ footnote_to_html, sidebar_to_html @@ -51,10 +52,11 @@ class HeaderRecord(object): self.num_sidebar_pages = self.sidebar_offset - self.last_data_offset if self.footnote_offset < self.last_data_offset else 0 -class Reader(object): +class Reader(FormatReader): - def __init__(self, header, stream, log): + def __init__(self, header, stream, log, encoding=None): self.log = log + self.encoding = encoding self.sections = [] for i in range(header.num_sections): @@ -73,9 +75,9 @@ class Reader(object): def decompress_text(self, number): if self.header_record.version == 2: - return decompress_doc(self.section_data(number)).decode('cp1252') + return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding) if self.header_record.version == 10: - return zlib.decompress(self.section_data(number)).decode('cp1252') + return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding) def get_image(self, number): diff --git a/src/calibre/ebooks/pdb/formatreader.py b/src/calibre/ebooks/pdb/formatreader.py new file mode 100644 index 0000000000..25abb462cf --- /dev/null +++ b/src/calibre/ebooks/pdb/formatreader.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +from __future__ import with_statement +''' +Interface defining the necessary public functions for a pdb format reader. +''' + +__license__ = 'GPL v3' +__copyright__ = '2009, John Schember ' +__docformat__ = 'restructuredtext en' + + +class FormatReader(object): + + def __init__(self, header, stream, log, encoding=None): + raise NotImplementedError() + + def extract_content(self, output_dir): + raise NotImplementedError() diff --git a/src/calibre/ebooks/pdb/input.py b/src/calibre/ebooks/pdb/input.py index d64e2aa51b..9d848b1c24 100644 --- a/src/calibre/ebooks/pdb/input.py +++ b/src/calibre/ebooks/pdb/input.py @@ -28,7 +28,7 @@ class PDBInput(InputFormatPlugin): log.debug('Detected ebook format as: %s with identity: %s' % (IDENTITY_TO_NAME[header.ident], header.ident)) - reader = Reader(header, stream, log) + reader = Reader(header, stream, log, options.input_encoding) opf = reader.extract_content(os.getcwd()) return opf