mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
When converting AZW4 files to PDF, skip the conversion and simply unwrap the PDF file already embedded inside the AZW4 file.
This almost always gives better results, since otherwise we are doing a PDF to PDF conversion.
This commit is contained in:
parent
de641e723a
commit
826dc25ffd
@ -16,6 +16,15 @@ import re
|
||||
|
||||
from calibre.ebooks.pdb.formatreader import FormatReader
|
||||
|
||||
def unwrap(stream, output_path):
|
||||
raw_data = stream.read()
|
||||
m = re.search(br'%PDF.+%%EOF', raw_data, flags=re.DOTALL)
|
||||
if m is None:
|
||||
raise ValueError('No embedded PDF found in AZW4 file')
|
||||
with open(output_path, 'wb') as f:
|
||||
f.write(m.group())
|
||||
|
||||
|
||||
class Reader(FormatReader):
|
||||
|
||||
def __init__(self, header, stream, log, options):
|
||||
@ -30,15 +39,13 @@ class Reader(FormatReader):
|
||||
self.stream.seek(0)
|
||||
raw_data = self.stream.read()
|
||||
data = ''
|
||||
mo = re.search(r'(?ums)%PDF.*%%EOF.', raw_data)
|
||||
mo = re.search(br'%PDF.+%%EOF', raw_data, flags=re.DOTALL)
|
||||
if mo:
|
||||
data = mo.group()
|
||||
|
||||
pdf_n = os.path.join(os.getcwdu(), 'tmp.pdf')
|
||||
pdf = open(pdf_n, 'wb')
|
||||
with open(pdf_n, 'wb') as pdf:
|
||||
pdf.write(data)
|
||||
pdf.close()
|
||||
|
||||
from calibre.customize.ui import plugin_for_input_format
|
||||
|
||||
pdf_plugin = plugin_for_input_format('pdf')
|
||||
|
@ -1030,6 +1030,15 @@ OptionRecommendation(name='search_replace',
|
||||
|
||||
if hasattr(self.opts, 'lrf') and self.output_plugin.file_type == 'lrf':
|
||||
self.opts.lrf = True
|
||||
if self.input_fmt == 'azw4' and self.output_plugin.file_type == 'pdf':
|
||||
self.ui_reporter(0.01, 'AZW4 files are simply wrappers around PDF files.'
|
||||
' Skipping the conversion and unwrapping the embedded PDF instead')
|
||||
from calibre.ebooks.azw4.reader import unwrap
|
||||
unwrap(stream, self.output)
|
||||
self.ui_reporter(1.)
|
||||
self.log(self.output_fmt.upper(), 'output written to', self.output)
|
||||
self.flush()
|
||||
return
|
||||
|
||||
self.ui_reporter(0.01, _('Converting input to HTML...'))
|
||||
ir = CompositeProgressReporter(0.01, 0.34, self.ui_reporter)
|
||||
|
Loading…
x
Reference in New Issue
Block a user