mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
PDF metadata: Workaround for PDF files with corrupted XMP metadata packets, generated by Nitro PDF. See #1541981 (Private bug)
This commit is contained in:
parent
c585ad5893
commit
1b148eb370
@ -13,8 +13,6 @@ from calibre.ebooks.metadata import (
|
|||||||
MetaInformation, string_to_authors, check_isbn, check_doi)
|
MetaInformation, string_to_authors, check_isbn, check_doi)
|
||||||
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
|
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
|
||||||
|
|
||||||
#_isbn_pat = re.compile(r'ISBN[: ]*([-0-9Xx]+)')
|
|
||||||
|
|
||||||
def get_tools():
|
def get_tools():
|
||||||
from calibre.ebooks.pdf.pdftohtml import PDFTOHTML
|
from calibre.ebooks.pdf.pdftohtml import PDFTOHTML
|
||||||
base = os.path.dirname(PDFTOHTML)
|
base = os.path.dirname(PDFTOHTML)
|
||||||
|
@ -221,6 +221,10 @@ def metadata_from_xmp_packet(raw_bytes):
|
|||||||
root = parse_xmp_packet(raw_bytes)
|
root = parse_xmp_packet(raw_bytes)
|
||||||
mi = Metadata(_('Unknown'))
|
mi = Metadata(_('Unknown'))
|
||||||
title = first_alt('//dc:title', root)
|
title = first_alt('//dc:title', root)
|
||||||
|
if title.startswith(r'\376\377'):
|
||||||
|
# corrupted XMP packet generated by Nitro PDF. See
|
||||||
|
# https://bugs.launchpad.net/calibre/+bug/1541981
|
||||||
|
raise ValueError('Corrupted XMP metadata packet detected, probably generated by Nitro PDF')
|
||||||
if title:
|
if title:
|
||||||
mi.title = title
|
mi.title = title
|
||||||
authors = multiple_sequences('//dc:creator', root)
|
authors = multiple_sequences('//dc:creator', root)
|
||||||
@ -341,7 +345,7 @@ def consolidate_metadata(info_mi, info):
|
|||||||
# We'll use the xmp tags/authors but fallback to the info ones if the
|
# We'll use the xmp tags/authors but fallback to the info ones if the
|
||||||
# xmp does not have tags/authors. smart_update() should have taken care of
|
# xmp does not have tags/authors. smart_update() should have taken care of
|
||||||
# the rest
|
# the rest
|
||||||
info_mi.authors, info_mi.tags = xmp_mi.authors or info_mi.authors, xmp_mi.tags or info_mi.tags
|
info_mi.authors, info_mi.tags = (info_authors if xmp_mi.is_null('authors') else xmp_mi.authors), xmp_mi.tags or info_tags
|
||||||
return info_mi
|
return info_mi
|
||||||
|
|
||||||
def nsmap(*args):
|
def nsmap(*args):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user