Super simple plugin for azw4 support.

This commit is contained in:
John Schember 2011-08-27 17:33:56 -04:00
parent 226bd0f871
commit a76e9dab89
4 changed files with 78 additions and 1 deletions

View File

@ -255,7 +255,7 @@ class LRXMetadataReader(MetadataReaderPlugin):
class MOBIMetadataReader(MetadataReaderPlugin):
name = 'Read MOBI metadata'
file_types = set(['mobi', 'prc', 'azw'])
file_types = set(['mobi', 'prc', 'azw', 'azw4'])
description = _('Read metadata from %s files')%'MOBI'
def get_metadata(self, stream, ftype):
@ -510,6 +510,7 @@ from calibre.ebooks.lit.input import LITInput
from calibre.ebooks.mobi.input import MOBIInput
from calibre.ebooks.odt.input import ODTInput
from calibre.ebooks.pdb.input import PDBInput
from calibre.ebooks.azw4.input import AZW4Input
from calibre.ebooks.pdf.input import PDFInput
from calibre.ebooks.pml.input import PMLInput
from calibre.ebooks.rb.input import RBInput
@ -606,6 +607,7 @@ plugins += [
MOBIInput,
ODTInput,
PDBInput,
AZW4Input,
PDFInput,
PMLInput,
RBInput,

View File

View File

@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.pdb.header import PdbHeaderReader
from calibre.ebooks.azw4.reader import Reader
class AZW4Input(InputFormatPlugin):
name = 'AZW4 Input'
author = 'John Schember'
description = 'Convert AZW4 to HTML'
file_types = set(['azw4'])
def convert(self, stream, options, file_ext, log,
accelerators):
header = PdbHeaderReader(stream)
reader = Reader(header, stream, log, options)
opf = reader.extract_content(os.getcwd())
return opf

View File

@ -0,0 +1,49 @@
# -*- coding: utf-8 -*-
'''
Read content from azw4 file.
azw4 is essentially a PDF stuffed into a MOBI container.
'''
__license__ = 'GPL v3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
import re
from calibre.ebooks.pdb.formatreader import FormatReader
class Reader(FormatReader):
def __init__(self, header, stream, log, options):
self.header = header
self.stream = stream
self.log = log
self.options = options
def extract_content(self, output_dir):
self.log.info('Extracting PDF from AZW4 Container...')
self.stream.seek(0)
raw_data = self.stream.read()
data = ''
mo = re.search(r'(?ums)%PDF.*%%EOF.', raw_data)
if mo:
data = mo.group()
pdf_n = os.path.join(os.getcwdu(), 'tmp.pdf')
pdf = open(pdf_n, 'wb')
pdf.write(data)
pdf.close()
from calibre.customize.ui import plugin_for_input_format
pdf_plugin = plugin_for_input_format('pdf')
for opt in pdf_plugin.options:
if not hasattr(self.options, opt.option.name):
setattr(self.options, opt.option.name, opt.recommended_value)
return pdf_plugin.convert(open(pdf_n, 'rb'), self.options, 'pdf', self.log, {})