mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
PDF metadata: Workaround for PDF files with corrupted XMP metadata packets, generated by Nitro PDF. See #1541981 (Private bug)
This commit is contained in:
parent
c585ad5893
commit
1b148eb370
@ -13,8 +13,6 @@ from calibre.ebooks.metadata import (
|
||||
MetaInformation, string_to_authors, check_isbn, check_doi)
|
||||
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
|
||||
|
||||
#_isbn_pat = re.compile(r'ISBN[: ]*([-0-9Xx]+)')
|
||||
|
||||
def get_tools():
|
||||
from calibre.ebooks.pdf.pdftohtml import PDFTOHTML
|
||||
base = os.path.dirname(PDFTOHTML)
|
||||
|
@ -221,6 +221,10 @@ def metadata_from_xmp_packet(raw_bytes):
|
||||
root = parse_xmp_packet(raw_bytes)
|
||||
mi = Metadata(_('Unknown'))
|
||||
title = first_alt('//dc:title', root)
|
||||
if title.startswith(r'\376\377'):
|
||||
# corrupted XMP packet generated by Nitro PDF. See
|
||||
# https://bugs.launchpad.net/calibre/+bug/1541981
|
||||
raise ValueError('Corrupted XMP metadata packet detected, probably generated by Nitro PDF')
|
||||
if title:
|
||||
mi.title = title
|
||||
authors = multiple_sequences('//dc:creator', root)
|
||||
@ -341,7 +345,7 @@ def consolidate_metadata(info_mi, info):
|
||||
# We'll use the xmp tags/authors but fallback to the info ones if the
|
||||
# xmp does not have tags/authors. smart_update() should have taken care of
|
||||
# the rest
|
||||
info_mi.authors, info_mi.tags = xmp_mi.authors or info_mi.authors, xmp_mi.tags or info_mi.tags
|
||||
info_mi.authors, info_mi.tags = (info_authors if xmp_mi.is_null('authors') else xmp_mi.authors), xmp_mi.tags or info_tags
|
||||
return info_mi
|
||||
|
||||
def nsmap(*args):
|
||||
|
Loading…
x
Reference in New Issue
Block a user