mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
Fix reading metadata from PDF broken by poppler >= 0.47.0
This commit is contained in:
parent
8f655cd610
commit
59e8070ef9
@ -33,22 +33,17 @@ def read_info(outputdir, get_cover):
|
||||
ans = {}
|
||||
|
||||
try:
|
||||
raw = subprocess.check_output([pdfinfo, '-meta', '-enc', 'UTF-8', 'src.pdf'])
|
||||
raw = subprocess.check_output([pdfinfo, '-enc', 'UTF-8', 'src.pdf'])
|
||||
except subprocess.CalledProcessError as e:
|
||||
prints('pdfinfo errored out with return code: %d'%e.returncode)
|
||||
return None
|
||||
# The XMP metadata could be in an encoding other than UTF-8, so split it
|
||||
# out before trying to decode raw
|
||||
parts = re.split(br'^Metadata:', raw, 1, flags=re.MULTILINE)
|
||||
if len(parts) > 1:
|
||||
raw, ans['xmp_metadata'] = parts
|
||||
try:
|
||||
raw = raw.decode('utf-8')
|
||||
info_raw = raw.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
prints('pdfinfo returned no UTF-8 data')
|
||||
return None
|
||||
|
||||
for line in raw.splitlines():
|
||||
for line in info_raw.splitlines():
|
||||
if u':' not in line:
|
||||
continue
|
||||
field, val = line.partition(u':')[::2]
|
||||
@ -56,6 +51,22 @@ def read_info(outputdir, get_cover):
|
||||
if field and val:
|
||||
ans[field] = val.strip()
|
||||
|
||||
# Now read XMP metadata
|
||||
# Versions of poppler before 0.47.0 used to print out both the Info dict and
|
||||
# XMP metadata packet together. However, since that changed in
|
||||
# https://cgit.freedesktop.org/poppler/poppler/commit/?id=c91483aceb1b640771f572cb3df9ad707e5cad0d
|
||||
# we can no longer rely on it.
|
||||
try:
|
||||
raw = subprocess.check_output([pdfinfo, '-meta', 'src.pdf']).strip()
|
||||
except subprocess.CalledProcessError as e:
|
||||
prints('pdfinfo errored out with return code: %d'%e.returncode)
|
||||
|
||||
parts = re.split(br'^Metadata:', raw, 1, flags=re.MULTILINE)
|
||||
if len(parts) > 1:
|
||||
raw, ans['xmp_metadata'] = parts
|
||||
elif raw:
|
||||
ans['xmp_metadata'] = raw
|
||||
|
||||
if get_cover:
|
||||
try:
|
||||
subprocess.check_call([pdftoppm, '-singlefile', '-jpeg', '-cropbox',
|
||||
|
Loading…
x
Reference in New Issue
Block a user