diff --git a/src/calibre/ebooks/azw4/reader.py b/src/calibre/ebooks/azw4/reader.py index 5acb86b3fc..60eaaef20e 100644 --- a/src/calibre/ebooks/azw4/reader.py +++ b/src/calibre/ebooks/azw4/reader.py @@ -16,6 +16,15 @@ import re from calibre.ebooks.pdb.formatreader import FormatReader +def unwrap(stream, output_path): + raw_data = stream.read() + m = re.search(br'%PDF.+%%EOF', raw_data, flags=re.DOTALL) + if m is None: + raise ValueError('No embedded PDF found in AZW4 file') + with open(output_path, 'wb') as f: + f.write(m.group()) + + class Reader(FormatReader): def __init__(self, header, stream, log, options): @@ -30,17 +39,15 @@ class Reader(FormatReader): self.stream.seek(0) raw_data = self.stream.read() data = '' - mo = re.search(r'(?ums)%PDF.*%%EOF.', raw_data) + mo = re.search(br'%PDF.+%%EOF', raw_data, flags=re.DOTALL) if mo: data = mo.group() - + pdf_n = os.path.join(os.getcwdu(), 'tmp.pdf') - pdf = open(pdf_n, 'wb') - pdf.write(data) - pdf.close() - + with open(pdf_n, 'wb') as pdf: + pdf.write(data) from calibre.customize.ui import plugin_for_input_format - + pdf_plugin = plugin_for_input_format('pdf') for opt in pdf_plugin.options: if not hasattr(self.options, opt.option.name): diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 4c73aa8272..e027fdbcf9 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -1030,6 +1030,15 @@ OptionRecommendation(name='search_replace', if hasattr(self.opts, 'lrf') and self.output_plugin.file_type == 'lrf': self.opts.lrf = True + if self.input_fmt == 'azw4' and self.output_plugin.file_type == 'pdf': + self.ui_reporter(0.01, 'AZW4 files are simply wrappers around PDF files.' + ' Skipping the conversion and unwrapping the embedded PDF instead') + from calibre.ebooks.azw4.reader import unwrap + unwrap(stream, self.output) + self.ui_reporter(1.) + self.log(self.output_fmt.upper(), 'output written to', self.output) + self.flush() + return self.ui_reporter(0.01, _('Converting input to HTML...')) ir = CompositeProgressReporter(0.01, 0.34, self.ui_reporter)