Interface for pdb format readers. PDB: support user input encodings

This commit is contained in:
John Schember 2009-04-22 08:04:19 -04:00
parent 3bbd277d2b
commit f158c9c643
3 changed files with 25 additions and 5 deletions

View File

@ -13,6 +13,7 @@ import os, sys, struct, zlib
from calibre import CurrentDir
from calibre.ebooks import DRMError
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.pdb.formatreader import FormatReader
from calibre.ebooks.pdb.ereader import EreaderError
from calibre.ebooks.pdb.ereader.pmlconverter import pml_to_html, \
footnote_to_html, sidebar_to_html
@ -51,10 +52,11 @@ class HeaderRecord(object):
self.num_sidebar_pages = self.sidebar_offset - self.last_data_offset if self.footnote_offset < self.last_data_offset else 0
class Reader(object):
class Reader(FormatReader):
def __init__(self, header, stream, log):
def __init__(self, header, stream, log, encoding=None):
self.log = log
self.encoding = encoding
self.sections = []
for i in range(header.num_sections):
@ -73,9 +75,9 @@ class Reader(object):
def decompress_text(self, number):
if self.header_record.version == 2:
return decompress_doc(self.section_data(number)).decode('cp1252')
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
if self.header_record.version == 10:
return zlib.decompress(self.section_data(number)).decode('cp1252')
return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
def get_image(self, number):

View File

@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
from __future__ import with_statement
'''
Interface defining the necessary public functions for a pdb format reader.
'''
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
class FormatReader(object):
def __init__(self, header, stream, log, encoding=None):
raise NotImplementedError()
def extract_content(self, output_dir):
raise NotImplementedError()

View File

@ -28,7 +28,7 @@ class PDBInput(InputFormatPlugin):
log.debug('Detected ebook format as: %s with identity: %s' % (IDENTITY_TO_NAME[header.ident], header.ident))
reader = Reader(header, stream, log)
reader = Reader(header, stream, log, options.input_encoding)
opf = reader.extract_content(os.getcwd())
return opf