From 1b148eb3709ed9df175ff81097ce0900c5136b43 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 5 Feb 2016 21:48:17 +0530 Subject: [PATCH] PDF metadata: Workaround for PDF files with corrupted XMP metadata packets, generated by Nitro PDF. See #1541981 (Private bug) --- src/calibre/ebooks/metadata/pdf.py | 2 -- src/calibre/ebooks/metadata/xmp.py | 6 +++++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/metadata/pdf.py b/src/calibre/ebooks/metadata/pdf.py index c76da304dc..76c88aece9 100644 --- a/src/calibre/ebooks/metadata/pdf.py +++ b/src/calibre/ebooks/metadata/pdf.py @@ -13,8 +13,6 @@ from calibre.ebooks.metadata import ( MetaInformation, string_to_authors, check_isbn, check_doi) from calibre.utils.ipc.simple_worker import fork_job, WorkerError -#_isbn_pat = re.compile(r'ISBN[: ]*([-0-9Xx]+)') - def get_tools(): from calibre.ebooks.pdf.pdftohtml import PDFTOHTML base = os.path.dirname(PDFTOHTML) diff --git a/src/calibre/ebooks/metadata/xmp.py b/src/calibre/ebooks/metadata/xmp.py index e2b4e1c58d..ab212e538a 100644 --- a/src/calibre/ebooks/metadata/xmp.py +++ b/src/calibre/ebooks/metadata/xmp.py @@ -221,6 +221,10 @@ def metadata_from_xmp_packet(raw_bytes): root = parse_xmp_packet(raw_bytes) mi = Metadata(_('Unknown')) title = first_alt('//dc:title', root) + if title.startswith(r'\376\377'): + # corrupted XMP packet generated by Nitro PDF. See + # https://bugs.launchpad.net/calibre/+bug/1541981 + raise ValueError('Corrupted XMP metadata packet detected, probably generated by Nitro PDF') if title: mi.title = title authors = multiple_sequences('//dc:creator', root) @@ -341,7 +345,7 @@ def consolidate_metadata(info_mi, info): # We'll use the xmp tags/authors but fallback to the info ones if the # xmp does not have tags/authors. smart_update() should have taken care of # the rest - info_mi.authors, info_mi.tags = xmp_mi.authors or info_mi.authors, xmp_mi.tags or info_mi.tags + info_mi.authors, info_mi.tags = (info_authors if xmp_mi.is_null('authors') else xmp_mi.authors), xmp_mi.tags or info_tags return info_mi def nsmap(*args):