Interface for pdb format readers. PDB: support user input encodings

This commit is contained in:
John Schember 2009-04-22 08:04:19 -04:00
parent 3bbd277d2b
commit f158c9c643
3 changed files with 25 additions and 5 deletions

View File

@ -13,6 +13,7 @@ import os, sys, struct, zlib
from calibre import CurrentDir from calibre import CurrentDir
from calibre.ebooks import DRMError from calibre.ebooks import DRMError
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.pdb.formatreader import FormatReader
from calibre.ebooks.pdb.ereader import EreaderError from calibre.ebooks.pdb.ereader import EreaderError
from calibre.ebooks.pdb.ereader.pmlconverter import pml_to_html, \ from calibre.ebooks.pdb.ereader.pmlconverter import pml_to_html, \
footnote_to_html, sidebar_to_html footnote_to_html, sidebar_to_html
@ -51,10 +52,11 @@ class HeaderRecord(object):
self.num_sidebar_pages = self.sidebar_offset - self.last_data_offset if self.footnote_offset < self.last_data_offset else 0 self.num_sidebar_pages = self.sidebar_offset - self.last_data_offset if self.footnote_offset < self.last_data_offset else 0
class Reader(object): class Reader(FormatReader):
def __init__(self, header, stream, log): def __init__(self, header, stream, log, encoding=None):
self.log = log self.log = log
self.encoding = encoding
self.sections = [] self.sections = []
for i in range(header.num_sections): for i in range(header.num_sections):
@ -73,9 +75,9 @@ class Reader(object):
def decompress_text(self, number): def decompress_text(self, number):
if self.header_record.version == 2: if self.header_record.version == 2:
return decompress_doc(self.section_data(number)).decode('cp1252') return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
if self.header_record.version == 10: if self.header_record.version == 10:
return zlib.decompress(self.section_data(number)).decode('cp1252') return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
def get_image(self, number): def get_image(self, number):

View File

@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
from __future__ import with_statement
'''
Interface defining the necessary public functions for a pdb format reader.
'''
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
class FormatReader(object):
def __init__(self, header, stream, log, encoding=None):
raise NotImplementedError()
def extract_content(self, output_dir):
raise NotImplementedError()

View File

@ -28,7 +28,7 @@ class PDBInput(InputFormatPlugin):
log.debug('Detected ebook format as: %s with identity: %s' % (IDENTITY_TO_NAME[header.ident], header.ident)) log.debug('Detected ebook format as: %s with identity: %s' % (IDENTITY_TO_NAME[header.ident], header.ident))
reader = Reader(header, stream, log) reader = Reader(header, stream, log, options.input_encoding)
opf = reader.extract_content(os.getcwd()) opf = reader.extract_content(os.getcwd())
return opf return opf