From a76e9dab890ae1858e7f4e00e7ac58fcfcbdfcd5 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 27 Aug 2011 17:33:56 -0400 Subject: [PATCH] Super simple plugin for azw4 support. --- src/calibre/customize/builtins.py | 4 ++- src/calibre/ebooks/azw4/__init__.py | 0 src/calibre/ebooks/azw4/input.py | 26 +++++++++++++++ src/calibre/ebooks/azw4/reader.py | 49 +++++++++++++++++++++++++++++ 4 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 src/calibre/ebooks/azw4/__init__.py create mode 100644 src/calibre/ebooks/azw4/input.py create mode 100644 src/calibre/ebooks/azw4/reader.py diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 6f5a0d4d36..2f0e269444 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -255,7 +255,7 @@ class LRXMetadataReader(MetadataReaderPlugin): class MOBIMetadataReader(MetadataReaderPlugin): name = 'Read MOBI metadata' - file_types = set(['mobi', 'prc', 'azw']) + file_types = set(['mobi', 'prc', 'azw', 'azw4']) description = _('Read metadata from %s files')%'MOBI' def get_metadata(self, stream, ftype): @@ -510,6 +510,7 @@ from calibre.ebooks.lit.input import LITInput from calibre.ebooks.mobi.input import MOBIInput from calibre.ebooks.odt.input import ODTInput from calibre.ebooks.pdb.input import PDBInput +from calibre.ebooks.azw4.input import AZW4Input from calibre.ebooks.pdf.input import PDFInput from calibre.ebooks.pml.input import PMLInput from calibre.ebooks.rb.input import RBInput @@ -606,6 +607,7 @@ plugins += [ MOBIInput, ODTInput, PDBInput, + AZW4Input, PDFInput, PMLInput, RBInput, diff --git a/src/calibre/ebooks/azw4/__init__.py b/src/calibre/ebooks/azw4/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/calibre/ebooks/azw4/input.py b/src/calibre/ebooks/azw4/input.py new file mode 100644 index 0000000000..1ac7657342 --- /dev/null +++ b/src/calibre/ebooks/azw4/input.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPL v3' +__copyright__ = '2011, John Schember ' +__docformat__ = 'restructuredtext en' + +import os + +from calibre.customize.conversion import InputFormatPlugin +from calibre.ebooks.pdb.header import PdbHeaderReader +from calibre.ebooks.azw4.reader import Reader + +class AZW4Input(InputFormatPlugin): + + name = 'AZW4 Input' + author = 'John Schember' + description = 'Convert AZW4 to HTML' + file_types = set(['azw4']) + + def convert(self, stream, options, file_ext, log, + accelerators): + header = PdbHeaderReader(stream) + reader = Reader(header, stream, log, options) + opf = reader.extract_content(os.getcwd()) + + return opf diff --git a/src/calibre/ebooks/azw4/reader.py b/src/calibre/ebooks/azw4/reader.py new file mode 100644 index 0000000000..5acb86b3fc --- /dev/null +++ b/src/calibre/ebooks/azw4/reader.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- + +''' +Read content from azw4 file. + +azw4 is essentially a PDF stuffed into a MOBI container. +''' + +__license__ = 'GPL v3' +__copyright__ = '2011, John Schember ' +__docformat__ = 'restructuredtext en' + + +import os +import re + +from calibre.ebooks.pdb.formatreader import FormatReader + +class Reader(FormatReader): + + def __init__(self, header, stream, log, options): + self.header = header + self.stream = stream + self.log = log + self.options = options + + def extract_content(self, output_dir): + self.log.info('Extracting PDF from AZW4 Container...') + + self.stream.seek(0) + raw_data = self.stream.read() + data = '' + mo = re.search(r'(?ums)%PDF.*%%EOF.', raw_data) + if mo: + data = mo.group() + + pdf_n = os.path.join(os.getcwdu(), 'tmp.pdf') + pdf = open(pdf_n, 'wb') + pdf.write(data) + pdf.close() + + from calibre.customize.ui import plugin_for_input_format + + pdf_plugin = plugin_for_input_format('pdf') + for opt in pdf_plugin.options: + if not hasattr(self.options, opt.option.name): + setattr(self.options, opt.option.name, opt.recommended_value) + + return pdf_plugin.convert(open(pdf_n, 'rb'), self.options, 'pdf', self.log, {})