mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Implement #631 (Feature Request: forcing metadata from filenames)
This commit is contained in:
parent
4bb44bd1b6
commit
2904bfdcb2
@ -20,6 +20,7 @@ from calibre.ebooks.lrf.meta import set_metadata as set_lrf_metadata
|
|||||||
from calibre.ebooks.metadata.epub import set_metadata as set_epub_metadata
|
from calibre.ebooks.metadata.epub import set_metadata as set_epub_metadata
|
||||||
|
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
from calibre.utils.config import prefs
|
||||||
|
|
||||||
_METADATA_PRIORITIES = [
|
_METADATA_PRIORITIES = [
|
||||||
'html', 'htm', 'xhtml', 'xhtm',
|
'html', 'htm', 'xhtml', 'xhtm',
|
||||||
@ -59,7 +60,7 @@ def metadata_from_formats(formats):
|
|||||||
|
|
||||||
def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
|
def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
|
||||||
if stream_type: stream_type = stream_type.lower()
|
if stream_type: stream_type = stream_type.lower()
|
||||||
if stream_type in ('html', 'html', 'xhtml', 'xhtm'):
|
if stream_type in ('html', 'html', 'xhtml', 'xhtm', 'xml'):
|
||||||
stream_type = 'html'
|
stream_type = 'html'
|
||||||
if stream_type in ('mobi', 'prc'):
|
if stream_type in ('mobi', 'prc'):
|
||||||
stream_type = 'mobi'
|
stream_type = 'mobi'
|
||||||
@ -73,18 +74,20 @@ def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
|
|||||||
if use_libprs_metadata and getattr(opf, 'application_id', None) is not None:
|
if use_libprs_metadata and getattr(opf, 'application_id', None) is not None:
|
||||||
return opf
|
return opf
|
||||||
|
|
||||||
try:
|
mi = MetaInformation(None, None)
|
||||||
func = eval(stream_type + '_metadata')
|
if prefs['read_file_metadata']:
|
||||||
mi = func(stream)
|
try:
|
||||||
except NameError:
|
func = eval(stream_type + '_metadata')
|
||||||
mi = MetaInformation(None, None)
|
mi = func(stream)
|
||||||
|
except NameError:
|
||||||
|
pass
|
||||||
|
|
||||||
name = os.path.basename(getattr(stream, 'name', ''))
|
name = os.path.basename(getattr(stream, 'name', ''))
|
||||||
base = metadata_from_filename(name)
|
base = metadata_from_filename(name)
|
||||||
if not base.authors:
|
if not base.authors:
|
||||||
base.authors = ['Unknown']
|
base.authors = [_('Unknown')]
|
||||||
if not base.title:
|
if not base.title:
|
||||||
base.title = 'Unknown'
|
base.title = _('Unknown')
|
||||||
base.smart_update(mi)
|
base.smart_update(mi)
|
||||||
if opf is not None:
|
if opf is not None:
|
||||||
base.smart_update(opf)
|
base.smart_update(opf)
|
||||||
|
@ -5,12 +5,11 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
import sys, os
|
import sys, os
|
||||||
|
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
from calibre.ebooks.pyPdf import PdfFileReader
|
from pyPdf import PdfFileReader
|
||||||
|
|
||||||
def get_metadata(stream):
|
def get_metadata(stream):
|
||||||
""" Return metadata as a L{MetaInfo} object """
|
""" Return metadata as a L{MetaInfo} object """
|
||||||
title = 'Unknown'
|
mi = MetaInformation(_('Unknown'), [_('Unknown')])
|
||||||
mi = MetaInformation(title, ['Unknown'])
|
|
||||||
stream.seek(0)
|
stream.seek(0)
|
||||||
try:
|
try:
|
||||||
info = PdfFileReader(stream).getDocumentInfo()
|
info = PdfFileReader(stream).getDocumentInfo()
|
||||||
|
@ -80,6 +80,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
|
|||||||
self.language.addItem(item[1], QVariant(item[0]))
|
self.language.addItem(item[1], QVariant(item[0]))
|
||||||
|
|
||||||
self.output_format.setCurrentIndex(0 if prefs['output_format'] == 'LRF' else 1)
|
self.output_format.setCurrentIndex(0 if prefs['output_format'] == 'LRF' else 1)
|
||||||
|
self.pdf_metadata.setChecked(prefs['read_file_metadata'])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -113,6 +114,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
|
|||||||
config['confirm_delete'] = bool(self.confirm_delete.isChecked())
|
config['confirm_delete'] = bool(self.confirm_delete.isChecked())
|
||||||
pattern = self.filename_pattern.commit()
|
pattern = self.filename_pattern.commit()
|
||||||
prefs['filename_pattern'] = pattern
|
prefs['filename_pattern'] = pattern
|
||||||
|
prefs['read_file_metadata'] = bool(self.pdf_metadata.isChecked())
|
||||||
config['save_to_disk_single_format'] = BOOK_EXTENSIONS[self.single_format.currentIndex()]
|
config['save_to_disk_single_format'] = BOOK_EXTENSIONS[self.single_format.currentIndex()]
|
||||||
config['cover_flow_queue_length'] = self.cover_browse.value()
|
config['cover_flow_queue_length'] = self.cover_browse.value()
|
||||||
prefs['language'] = str(self.language.itemData(self.language.currentIndex()).toString())
|
prefs['language'] = str(self.language.itemData(self.language.currentIndex()).toString())
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
<x>0</x>
|
<x>0</x>
|
||||||
<y>0</y>
|
<y>0</y>
|
||||||
<width>709</width>
|
<width>709</width>
|
||||||
<height>723</height>
|
<height>750</height>
|
||||||
</rect>
|
</rect>
|
||||||
</property>
|
</property>
|
||||||
<property name="windowTitle" >
|
<property name="windowTitle" >
|
||||||
@ -158,6 +158,19 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
<item>
|
||||||
|
<widget class="QCheckBox" name="pdf_metadata" >
|
||||||
|
<property name="toolTip" >
|
||||||
|
<string>If you disable this setting, metadatas is guessed from the filename instead. This can be configured in the Advanced section.</string>
|
||||||
|
</property>
|
||||||
|
<property name="text" >
|
||||||
|
<string>Read &metadata from files</string>
|
||||||
|
</property>
|
||||||
|
<property name="checked" >
|
||||||
|
<bool>true</bool>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<layout class="QGridLayout" name="gridLayout_2" >
|
<layout class="QGridLayout" name="gridLayout_2" >
|
||||||
<item row="1" column="0" >
|
<item row="1" column="0" >
|
||||||
|
@ -524,6 +524,8 @@ def _prefs():
|
|||||||
help=_('The language in which to display the user interface'))
|
help=_('The language in which to display the user interface'))
|
||||||
c.add_opt('output_format', default='LRF',
|
c.add_opt('output_format', default='LRF',
|
||||||
help=_('The default output format for ebook conversions.'))
|
help=_('The default output format for ebook conversions.'))
|
||||||
|
c.add_opt('read_file_metadata', default=True,
|
||||||
|
help=_('Read metadata from files'))
|
||||||
|
|
||||||
c.add_opt('migrated', default=False, help='For Internal use. Don\'t modify.')
|
c.add_opt('migrated', default=False, help='For Internal use. Don\'t modify.')
|
||||||
return c
|
return c
|
||||||
|
@ -34,6 +34,11 @@ Implementation of stream filters for PDF.
|
|||||||
__author__ = "Mathieu Fenniak"
|
__author__ = "Mathieu Fenniak"
|
||||||
__author_email__ = "biziqe@mathieu.fenniak.net"
|
__author_email__ = "biziqe@mathieu.fenniak.net"
|
||||||
|
|
||||||
|
from utils import PdfReadError
|
||||||
|
try:
|
||||||
|
from cStringIO import StringIO
|
||||||
|
except ImportError:
|
||||||
|
from StringIO import StringIO
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import zlib
|
import zlib
|
||||||
@ -100,32 +105,33 @@ class FlateDecode(object):
|
|||||||
# predictor 1 == no predictor
|
# predictor 1 == no predictor
|
||||||
if predictor != 1:
|
if predictor != 1:
|
||||||
columns = decodeParms["/Columns"]
|
columns = decodeParms["/Columns"]
|
||||||
if predictor >= 10:
|
# PNG prediction:
|
||||||
newdata = ""
|
if predictor >= 10 and predictor <= 15:
|
||||||
|
output = StringIO()
|
||||||
# PNG prediction can vary from row to row
|
# PNG prediction can vary from row to row
|
||||||
rowlength = columns + 1
|
rowlength = columns + 1
|
||||||
assert len(data) % rowlength == 0
|
assert len(data) % rowlength == 0
|
||||||
prev_rowdata = "\x00"*rowlength
|
prev_rowdata = (0,) * rowlength
|
||||||
for row in range(len(data) / rowlength):
|
for row in xrange(len(data) / rowlength):
|
||||||
rowdata = list(data[(row*rowlength):((row+1)*rowlength)])
|
rowdata = [ord(x) for x in data[(row*rowlength):((row+1)*rowlength)]]
|
||||||
filterByte = ord(rowdata[0])
|
filterByte = rowdata[0]
|
||||||
if filterByte == 0:
|
if filterByte == 0:
|
||||||
pass
|
pass
|
||||||
elif filterByte == 1:
|
elif filterByte == 1:
|
||||||
for i in range(2, rowlength):
|
for i in range(2, rowlength):
|
||||||
rowdata[i] = chr((ord(rowdata[i]) + ord(rowdata[i-1])) % 256)
|
rowdata[i] = (rowdata[i] + rowdata[i-1]) % 256
|
||||||
elif filterByte == 2:
|
elif filterByte == 2:
|
||||||
for i in range(1, rowlength):
|
for i in range(1, rowlength):
|
||||||
rowdata[i] = chr((ord(rowdata[i]) + ord(prev_rowdata[i])) % 256)
|
rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
|
||||||
else:
|
else:
|
||||||
# unsupported PNG filter
|
# unsupported PNG filter
|
||||||
assert False
|
raise PdfReadError("Unsupported PNG filter %r" % filterByte)
|
||||||
prev_rowdata = rowdata
|
prev_rowdata = rowdata
|
||||||
newdata += ''.join(rowdata[1:])
|
output.write(''.join([chr(x) for x in rowdata[1:]]))
|
||||||
data = newdata
|
data = output.getvalue()
|
||||||
else:
|
else:
|
||||||
# unsupported predictor
|
# unsupported predictor
|
||||||
assert False
|
raise PdfReadError("Unsupported flatedecode predictor %r" % predictor)
|
||||||
return data
|
return data
|
||||||
decode = staticmethod(decode)
|
decode = staticmethod(decode)
|
||||||
|
|
||||||
@ -220,9 +226,15 @@ def decodeStreamData(stream):
|
|||||||
data = ASCIIHexDecode.decode(data)
|
data = ASCIIHexDecode.decode(data)
|
||||||
elif filterType == "/ASCII85Decode":
|
elif filterType == "/ASCII85Decode":
|
||||||
data = ASCII85Decode.decode(data)
|
data = ASCII85Decode.decode(data)
|
||||||
|
elif filterType == "/Crypt":
|
||||||
|
decodeParams = stream.get("/DecodeParams", {})
|
||||||
|
if "/Name" not in decodeParams and "/Type" not in decodeParams:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
|
||||||
else:
|
else:
|
||||||
# unsupported filter
|
# unsupported filter
|
||||||
assert False
|
raise NotImplementedError("unsupported filter %s" % filterType)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
@ -237,3 +249,4 @@ if __name__ == "__main__":
|
|||||||
"""
|
"""
|
||||||
ascii85_originalText="Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure."
|
ascii85_originalText="Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure."
|
||||||
assert ASCII85Decode.decode(ascii85Test) == ascii85_originalText
|
assert ASCII85Decode.decode(ascii85Test) == ascii85_originalText
|
||||||
|
|
@ -203,6 +203,10 @@ class IndirectObject(PdfObject):
|
|||||||
|
|
||||||
|
|
||||||
class FloatObject(decimal.Decimal, PdfObject):
|
class FloatObject(decimal.Decimal, PdfObject):
|
||||||
|
def __new__(cls, value="0", context=None):
|
||||||
|
return decimal.Decimal.__new__(cls, str(value), context)
|
||||||
|
def __repr__(self):
|
||||||
|
return str(self)
|
||||||
def writeToStream(self, stream, encryption_key):
|
def writeToStream(self, stream, encryption_key):
|
||||||
stream.write(str(self))
|
stream.write(str(self))
|
||||||
|
|
||||||
@ -419,8 +423,73 @@ class NameObject(str, PdfObject):
|
|||||||
|
|
||||||
|
|
||||||
class DictionaryObject(dict, PdfObject):
|
class DictionaryObject(dict, PdfObject):
|
||||||
def __init__(self):
|
|
||||||
pass
|
def __init__(self, *args, **kwargs):
|
||||||
|
if len(args) == 0:
|
||||||
|
self.update(kwargs)
|
||||||
|
elif len(args) == 1:
|
||||||
|
arr = args[0]
|
||||||
|
# If we're passed a list/tuple, make a dict out of it
|
||||||
|
if not hasattr(arr, "iteritems"):
|
||||||
|
newarr = {}
|
||||||
|
for k, v in arr:
|
||||||
|
newarr[k] = v
|
||||||
|
arr = newarr
|
||||||
|
self.update(arr)
|
||||||
|
else:
|
||||||
|
raise TypeError("dict expected at most 1 argument, got 3")
|
||||||
|
|
||||||
|
def update(self, arr):
|
||||||
|
# note, a ValueError halfway through copying values
|
||||||
|
# will leave half the values in this dict.
|
||||||
|
for k, v in arr.iteritems():
|
||||||
|
self.__setitem__(k, v)
|
||||||
|
|
||||||
|
def raw_get(self, key):
|
||||||
|
return dict.__getitem__(self, key)
|
||||||
|
|
||||||
|
def __setitem__(self, key, value):
|
||||||
|
if not isinstance(key, PdfObject):
|
||||||
|
raise ValueError("key must be PdfObject")
|
||||||
|
if not isinstance(value, PdfObject):
|
||||||
|
raise ValueError("value must be PdfObject")
|
||||||
|
return dict.__setitem__(self, key, value)
|
||||||
|
|
||||||
|
def setdefault(self, key, value=None):
|
||||||
|
if not isinstance(key, PdfObject):
|
||||||
|
raise ValueError("key must be PdfObject")
|
||||||
|
if not isinstance(value, PdfObject):
|
||||||
|
raise ValueError("value must be PdfObject")
|
||||||
|
return dict.setdefault(self, key, value)
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
return dict.__getitem__(self, key).getObject()
|
||||||
|
|
||||||
|
##
|
||||||
|
# Retrieves XMP (Extensible Metadata Platform) data relevant to the
|
||||||
|
# this object, if available.
|
||||||
|
# <p>
|
||||||
|
# Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
# @return Returns a {@link #xmp.XmpInformation XmlInformation} instance
|
||||||
|
# that can be used to access XMP metadata from the document. Can also
|
||||||
|
# return None if no metadata was found on the document root.
|
||||||
|
def getXmpMetadata(self):
|
||||||
|
metadata = self.get("/Metadata", None)
|
||||||
|
if metadata == None:
|
||||||
|
return None
|
||||||
|
metadata = metadata.getObject()
|
||||||
|
import xmp
|
||||||
|
if not isinstance(metadata, xmp.XmpInformation):
|
||||||
|
metadata = xmp.XmpInformation(metadata)
|
||||||
|
self[NameObject("/Metadata")] = metadata
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
##
|
||||||
|
# Read-only property that accesses the {@link
|
||||||
|
# #DictionaryObject.getXmpData getXmpData} function.
|
||||||
|
# <p>
|
||||||
|
# Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
xmpMetadata = property(lambda self: self.getXmpMetadata(), None, None)
|
||||||
|
|
||||||
def writeToStream(self, stream, encryption_key):
|
def writeToStream(self, stream, encryption_key):
|
||||||
stream.write("<<\n")
|
stream.write("<<\n")
|
||||||
@ -563,7 +632,7 @@ class EncodedStreamObject(StreamObject):
|
|||||||
return self.decodedSelf.getData()
|
return self.decodedSelf.getData()
|
||||||
else:
|
else:
|
||||||
# create decoded object
|
# create decoded object
|
||||||
decoded = StreamObject()
|
decoded = DecodedStreamObject()
|
||||||
decoded._data = filters.decodeStreamData(self)
|
decoded._data = filters.decodeStreamData(self)
|
||||||
for key, value in self.items():
|
for key, value in self.items():
|
||||||
if not key in ("/Length", "/Filter", "/DecodeParms"):
|
if not key in ("/Length", "/Filter", "/DecodeParms"):
|
||||||
@ -583,8 +652,8 @@ class RectangleObject(ArrayObject):
|
|||||||
ArrayObject.__init__(self, [self.ensureIsNumber(x) for x in arr])
|
ArrayObject.__init__(self, [self.ensureIsNumber(x) for x in arr])
|
||||||
|
|
||||||
def ensureIsNumber(self, value):
|
def ensureIsNumber(self, value):
|
||||||
if not isinstance(value, NumberObject):
|
if not isinstance(value, (NumberObject, FloatObject)):
|
||||||
value = NumberObject(value)
|
value = FloatObject(value)
|
||||||
return value
|
return value
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
@ -88,7 +88,8 @@ class PdfFileWriter(object):
|
|||||||
return IndirectObject(len(self._objects), 0, self)
|
return IndirectObject(len(self._objects), 0, self)
|
||||||
|
|
||||||
def getObject(self, ido):
|
def getObject(self, ido):
|
||||||
assert ido.pdf == self
|
if ido.pdf != self:
|
||||||
|
raise ValueError("pdf must be self")
|
||||||
return self._objects[ido.idnum - 1]
|
return self._objects[ido.idnum - 1]
|
||||||
|
|
||||||
##
|
##
|
||||||
@ -105,7 +106,7 @@ class PdfFileWriter(object):
|
|||||||
page = self._addObject(page)
|
page = self._addObject(page)
|
||||||
pages = self.getObject(self._pages)
|
pages = self.getObject(self._pages)
|
||||||
pages["/Kids"].append(page)
|
pages["/Kids"].append(page)
|
||||||
pages["/Count"] = NumberObject(pages["/Count"] + 1)
|
pages[NameObject("/Count")] = NumberObject(pages["/Count"] + 1)
|
||||||
|
|
||||||
##
|
##
|
||||||
# Encrypt this PDF file with the PDF Standard encryption handler.
|
# Encrypt this PDF file with the PDF Standard encryption handler.
|
||||||
@ -272,7 +273,6 @@ class PdfFileWriter(object):
|
|||||||
class PdfFileReader(object):
|
class PdfFileReader(object):
|
||||||
def __init__(self, stream):
|
def __init__(self, stream):
|
||||||
self.flattenedPages = None
|
self.flattenedPages = None
|
||||||
self.pageNumbers = {}
|
|
||||||
self.resolvedObjects = {}
|
self.resolvedObjects = {}
|
||||||
self.read(stream)
|
self.read(stream)
|
||||||
self.stream = stream
|
self.stream = stream
|
||||||
@ -290,7 +290,7 @@ class PdfFileReader(object):
|
|||||||
def getDocumentInfo(self):
|
def getDocumentInfo(self):
|
||||||
if not self.trailer.has_key("/Info"):
|
if not self.trailer.has_key("/Info"):
|
||||||
return None
|
return None
|
||||||
obj = self.getObject(self.trailer['/Info'])
|
obj = self.trailer['/Info']
|
||||||
retval = DocumentInformation()
|
retval = DocumentInformation()
|
||||||
retval.update(obj)
|
retval.update(obj)
|
||||||
return retval
|
return retval
|
||||||
@ -302,6 +302,28 @@ class PdfFileReader(object):
|
|||||||
# Stability: Added in v1.7, will exist for all future v1.x releases.
|
# Stability: Added in v1.7, will exist for all future v1.x releases.
|
||||||
documentInfo = property(lambda self: self.getDocumentInfo(), None, None)
|
documentInfo = property(lambda self: self.getDocumentInfo(), None, None)
|
||||||
|
|
||||||
|
##
|
||||||
|
# Retrieves XMP (Extensible Metadata Platform) data from the PDF document
|
||||||
|
# root.
|
||||||
|
# <p>
|
||||||
|
# Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
# @return Returns a {@link #generic.XmpInformation XmlInformation}
|
||||||
|
# instance that can be used to access XMP metadata from the document.
|
||||||
|
# Can also return None if no metadata was found on the document root.
|
||||||
|
def getXmpMetadata(self):
|
||||||
|
try:
|
||||||
|
self._override_encryption = True
|
||||||
|
return self.trailer["/Root"].getXmpMetadata()
|
||||||
|
finally:
|
||||||
|
self._override_encryption = False
|
||||||
|
|
||||||
|
##
|
||||||
|
# Read-only property that accesses the {@link #PdfFileReader.getXmpData
|
||||||
|
# getXmpData} function.
|
||||||
|
# <p>
|
||||||
|
# Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
xmpMetadata = property(lambda self: self.getXmpMetadata(), None, None)
|
||||||
|
|
||||||
##
|
##
|
||||||
# Calculates the number of pages in this PDF file.
|
# Calculates the number of pages in this PDF file.
|
||||||
# <p>
|
# <p>
|
||||||
@ -346,43 +368,39 @@ class PdfFileReader(object):
|
|||||||
# Stability: Added in v1.10, will exist for all future v1.x releases.
|
# Stability: Added in v1.10, will exist for all future v1.x releases.
|
||||||
# @return Returns a dict which maps names to {@link #Destination
|
# @return Returns a dict which maps names to {@link #Destination
|
||||||
# destinations}.
|
# destinations}.
|
||||||
def getNamedDestinations(self, tree = None, map = None):
|
def getNamedDestinations(self, tree=None, retval=None):
|
||||||
if self.flattenedPages == None:
|
if retval == None:
|
||||||
self._flatten()
|
retval = {}
|
||||||
|
catalog = self.trailer["/Root"]
|
||||||
get = self.safeGetObject
|
|
||||||
if map == None:
|
|
||||||
map = {}
|
|
||||||
catalog = get(self.trailer["/Root"])
|
|
||||||
|
|
||||||
# get the name tree
|
# get the name tree
|
||||||
if catalog.has_key("/Dests"):
|
if catalog.has_key("/Dests"):
|
||||||
tree = get(catalog["/Dests"])
|
tree = catalog["/Dests"]
|
||||||
elif catalog.has_key("/Names"):
|
elif catalog.has_key("/Names"):
|
||||||
names = get(catalog['/Names'])
|
names = catalog['/Names']
|
||||||
if names.has_key("/Dests"):
|
if names.has_key("/Dests"):
|
||||||
tree = get(names['/Dests'])
|
tree = names['/Dests']
|
||||||
|
|
||||||
if tree == None:
|
if tree == None:
|
||||||
return map
|
return retval
|
||||||
|
|
||||||
if tree.has_key("/Kids"):
|
if tree.has_key("/Kids"):
|
||||||
# recurse down the tree
|
# recurse down the tree
|
||||||
for kid in get(tree["/Kids"]):
|
for kid in tree["/Kids"]:
|
||||||
self.getNamedDestinations(get(kid), map)
|
self.getNamedDestinations(kid.getObject(), retval)
|
||||||
|
|
||||||
if tree.has_key("/Names"):
|
if tree.has_key("/Names"):
|
||||||
names = get(tree["/Names"])
|
names = tree["/Names"]
|
||||||
for i in range(0, len(names), 2):
|
for i in range(0, len(names), 2):
|
||||||
key = get(names[i])
|
key = names[i].getObject()
|
||||||
val = get(names[i+1])
|
val = names[i+1].getObject()
|
||||||
if isinstance(val, DictionaryObject) and val.has_key('/D'):
|
if isinstance(val, DictionaryObject) and val.has_key('/D'):
|
||||||
val = get(val['/D'])
|
val = val['/D']
|
||||||
dest = self._buildDestination(val, key)
|
dest = self._buildDestination(key, val)
|
||||||
if dest != None:
|
if dest != None:
|
||||||
map[key] = dest
|
retval[key] = dest
|
||||||
|
|
||||||
return map
|
return retval
|
||||||
|
|
||||||
##
|
##
|
||||||
# Read-only property that accesses the {@link #PdfFileReader.getOutlines
|
# Read-only property that accesses the {@link #PdfFileReader.getOutlines
|
||||||
@ -396,20 +414,16 @@ class PdfFileReader(object):
|
|||||||
# <p>
|
# <p>
|
||||||
# Stability: Added in v1.10, will exist for all future v1.x releases.
|
# Stability: Added in v1.10, will exist for all future v1.x releases.
|
||||||
# @return Returns a nested list of {@link #Destination destinations}.
|
# @return Returns a nested list of {@link #Destination destinations}.
|
||||||
def getOutlines(self, node = None, outlines = None):
|
def getOutlines(self, node=None, outlines=None):
|
||||||
if self.flattenedPages == None:
|
|
||||||
self._flatten()
|
|
||||||
|
|
||||||
get = self.safeGetObject
|
|
||||||
if outlines == None:
|
if outlines == None:
|
||||||
outlines = []
|
outlines = []
|
||||||
catalog = get(self.trailer["/Root"])
|
catalog = self.trailer["/Root"]
|
||||||
|
|
||||||
# get the outline dictionary and named destinations
|
# get the outline dictionary and named destinations
|
||||||
if catalog.has_key("/Outlines"):
|
if catalog.has_key("/Outlines"):
|
||||||
lines = get(catalog["/Outlines"])
|
lines = catalog["/Outlines"]
|
||||||
if lines.has_key("/First"):
|
if lines.has_key("/First"):
|
||||||
node = get(lines["/First"])
|
node = lines["/First"]
|
||||||
self._namedDests = self.getNamedDestinations()
|
self._namedDests = self.getNamedDestinations()
|
||||||
|
|
||||||
if node == None:
|
if node == None:
|
||||||
@ -424,49 +438,44 @@ class PdfFileReader(object):
|
|||||||
# check for sub-outlines
|
# check for sub-outlines
|
||||||
if node.has_key("/First"):
|
if node.has_key("/First"):
|
||||||
subOutlines = []
|
subOutlines = []
|
||||||
self.getOutlines(get(node["/First"]), subOutlines)
|
self.getOutlines(node["/First"], subOutlines)
|
||||||
if subOutlines:
|
if subOutlines:
|
||||||
outlines.append(subOutlines)
|
outlines.append(subOutlines)
|
||||||
|
|
||||||
if not node.has_key("/Next"):
|
if not node.has_key("/Next"):
|
||||||
break
|
break
|
||||||
node = get(node["/Next"])
|
node = node["/Next"]
|
||||||
|
|
||||||
return outlines
|
return outlines
|
||||||
|
|
||||||
def _buildDestination(self, array, title):
|
def _buildDestination(self, title, array):
|
||||||
if not (isinstance(array, ArrayObject) and len(array) >= 2 and \
|
page, typ = array[0:2]
|
||||||
isinstance(array[0], IndirectObject)):
|
array = array[2:]
|
||||||
return None
|
return Destination(title, page, typ, *array)
|
||||||
|
|
||||||
pageKey = (array[0].generation, array[0].idnum)
|
|
||||||
if not self.pageNumbers.has_key(pageKey):
|
|
||||||
return None
|
|
||||||
|
|
||||||
pageNum = self.pageNumbers[pageKey]
|
|
||||||
return Destination(*([title, pageNum]+array[1:]))
|
|
||||||
|
|
||||||
def _buildOutline(self, node):
|
def _buildOutline(self, node):
|
||||||
dest, title, outline = None, None, None
|
dest, title, outline = None, None, None
|
||||||
|
|
||||||
if node.has_key("/A") and node.has_key("/Title"):
|
if node.has_key("/A") and node.has_key("/Title"):
|
||||||
# Action, section 8.5 (only type GoTo supported)
|
# Action, section 8.5 (only type GoTo supported)
|
||||||
title = self.safeGetObject(node["/Title"])
|
title = node["/Title"]
|
||||||
action = self.safeGetObject(node["/A"])
|
action = node["/A"]
|
||||||
if action["/S"] == "/GoTo":
|
if action["/S"] == "/GoTo":
|
||||||
dest = self.safeGetObject(action["/D"])
|
dest = action["/D"]
|
||||||
elif node.has_key("/Dest") and node.has_key("/Title"):
|
elif node.has_key("/Dest") and node.has_key("/Title"):
|
||||||
# Destination, section 8.2.1
|
# Destination, section 8.2.1
|
||||||
title = self.safeGetObject(node["/Title"])
|
title = node["/Title"]
|
||||||
dest = self.safeGetObject(node["/Dest"])
|
dest = node["/Dest"]
|
||||||
|
|
||||||
# if destination found, then create outline
|
# if destination found, then create outline
|
||||||
if dest:
|
if dest:
|
||||||
if isinstance(dest, ArrayObject):
|
if isinstance(dest, ArrayObject):
|
||||||
outline = self._buildDestination(dest, title)
|
outline = self._buildDestination(title, dest)
|
||||||
elif isinstance(dest, str) and self._namedDests.has_key(dest):
|
elif isinstance(dest, unicode) and self._namedDests.has_key(dest):
|
||||||
outline = self._namedDests[dest]
|
outline = self._namedDests[dest]
|
||||||
outline.title = title
|
outline[NameObject("/Title")] = title
|
||||||
|
else:
|
||||||
|
raise utils.PdfReadError("Unexpected destination %r" % dest)
|
||||||
return outline
|
return outline
|
||||||
|
|
||||||
##
|
##
|
||||||
@ -478,7 +487,7 @@ class PdfFileReader(object):
|
|||||||
pages = property(lambda self: ConvertFunctionsToVirtualList(self.getNumPages, self.getPage),
|
pages = property(lambda self: ConvertFunctionsToVirtualList(self.getNumPages, self.getPage),
|
||||||
None, None)
|
None, None)
|
||||||
|
|
||||||
def _flatten(self, pages = None, inherit = None):
|
def _flatten(self, pages=None, inherit=None):
|
||||||
inheritablePageAttributes = (
|
inheritablePageAttributes = (
|
||||||
NameObject("/Resources"), NameObject("/MediaBox"),
|
NameObject("/Resources"), NameObject("/MediaBox"),
|
||||||
NameObject("/CropBox"), NameObject("/Rotate")
|
NameObject("/CropBox"), NameObject("/Rotate")
|
||||||
@ -487,37 +496,25 @@ class PdfFileReader(object):
|
|||||||
inherit = dict()
|
inherit = dict()
|
||||||
if pages == None:
|
if pages == None:
|
||||||
self.flattenedPages = []
|
self.flattenedPages = []
|
||||||
catalog = self.getObject(self.trailer["/Root"])
|
catalog = self.trailer["/Root"].getObject()
|
||||||
pages = self.getObject(catalog["/Pages"])
|
pages = catalog["/Pages"].getObject()
|
||||||
indirectReference = None
|
|
||||||
if isinstance(pages, IndirectObject):
|
|
||||||
indirectReference = pages
|
|
||||||
pages = self.getObject(pages)
|
|
||||||
t = pages["/Type"]
|
t = pages["/Type"]
|
||||||
if t == "/Pages":
|
if t == "/Pages":
|
||||||
for attr in inheritablePageAttributes:
|
for attr in inheritablePageAttributes:
|
||||||
if pages.has_key(attr):
|
if pages.has_key(attr):
|
||||||
inherit[attr] = pages[attr]
|
inherit[attr] = pages[attr]
|
||||||
for page in self.safeGetObject(pages["/Kids"]):
|
for page in pages["/Kids"]:
|
||||||
self._flatten(page, inherit)
|
self._flatten(page.getObject(), inherit)
|
||||||
elif t == "/Page":
|
elif t == "/Page":
|
||||||
for attr,value in inherit.items():
|
for attr,value in inherit.items():
|
||||||
# if the page has it's own value, it does not inherit the
|
# if the page has it's own value, it does not inherit the
|
||||||
# parent's value:
|
# parent's value:
|
||||||
if not pages.has_key(attr):
|
if not pages.has_key(attr):
|
||||||
pages[attr] = value
|
pages[attr] = value
|
||||||
pageObj = PageObject(self, indirectReference)
|
pageObj = PageObject(self)
|
||||||
pageObj.update(pages)
|
pageObj.update(pages)
|
||||||
if indirectReference:
|
|
||||||
key = (indirectReference.generation, indirectReference.idnum)
|
|
||||||
self.pageNumbers[key] = len(self.flattenedPages)
|
|
||||||
self.flattenedPages.append(pageObj)
|
self.flattenedPages.append(pageObj)
|
||||||
|
|
||||||
def safeGetObject(self, obj):
|
|
||||||
if isinstance(obj, IndirectObject):
|
|
||||||
return self.safeGetObject(self.getObject(obj))
|
|
||||||
return obj
|
|
||||||
|
|
||||||
def getObject(self, indirectReference):
|
def getObject(self, indirectReference):
|
||||||
retval = self.resolvedObjects.get(indirectReference.generation, {}).get(indirectReference.idnum, None)
|
retval = self.resolvedObjects.get(indirectReference.generation, {}).get(indirectReference.idnum, None)
|
||||||
if retval != None:
|
if retval != None:
|
||||||
@ -527,7 +524,7 @@ class PdfFileReader(object):
|
|||||||
# indirect reference to object in object stream
|
# indirect reference to object in object stream
|
||||||
# read the entire object stream into memory
|
# read the entire object stream into memory
|
||||||
stmnum,idx = self.xref_objStm[indirectReference.idnum]
|
stmnum,idx = self.xref_objStm[indirectReference.idnum]
|
||||||
objStm = self.getObject(IndirectObject(stmnum, 0, self))
|
objStm = IndirectObject(stmnum, 0, self).getObject()
|
||||||
assert objStm['/Type'] == '/ObjStm'
|
assert objStm['/Type'] == '/ObjStm'
|
||||||
assert idx < objStm['/N']
|
assert idx < objStm['/N']
|
||||||
streamData = StringIO(objStm.getData())
|
streamData = StringIO(objStm.getData())
|
||||||
@ -619,7 +616,7 @@ class PdfFileReader(object):
|
|||||||
# read all cross reference tables and their trailers
|
# read all cross reference tables and their trailers
|
||||||
self.xref = {}
|
self.xref = {}
|
||||||
self.xref_objStm = {}
|
self.xref_objStm = {}
|
||||||
self.trailer = {}
|
self.trailer = DictionaryObject()
|
||||||
while 1:
|
while 1:
|
||||||
# load the xref table
|
# load the xref table
|
||||||
stream.seek(startxref, 0)
|
stream.seek(startxref, 0)
|
||||||
@ -641,6 +638,16 @@ class PdfFileReader(object):
|
|||||||
cnt = 0
|
cnt = 0
|
||||||
while cnt < size:
|
while cnt < size:
|
||||||
line = stream.read(20)
|
line = stream.read(20)
|
||||||
|
# It's very clear in section 3.4.3 of the PDF spec
|
||||||
|
# that all cross-reference table lines are a fixed
|
||||||
|
# 20 bytes. However... some malformed PDF files
|
||||||
|
# use a single character EOL without a preceeding
|
||||||
|
# space. Detect that case, and seek the stream
|
||||||
|
# back one character. (0-9 means we've bled into
|
||||||
|
# the next xref entry, t means we've bled into the
|
||||||
|
# text "trailer"):
|
||||||
|
if line[-1] in "0123456789t":
|
||||||
|
stream.seek(-1, 1)
|
||||||
offset, generation = line[:16].split(" ")
|
offset, generation = line[:16].split(" ")
|
||||||
offset, generation = int(offset), int(generation)
|
offset, generation = int(offset), int(generation)
|
||||||
if not self.xref.has_key(generation):
|
if not self.xref.has_key(generation):
|
||||||
@ -669,8 +676,8 @@ class PdfFileReader(object):
|
|||||||
for key, value in newTrailer.items():
|
for key, value in newTrailer.items():
|
||||||
if not self.trailer.has_key(key):
|
if not self.trailer.has_key(key):
|
||||||
self.trailer[key] = value
|
self.trailer[key] = value
|
||||||
if newTrailer.has_key(NameObject("/Prev")):
|
if newTrailer.has_key("/Prev"):
|
||||||
startxref = newTrailer[NameObject("/Prev")]
|
startxref = newTrailer["/Prev"]
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
elif x.isdigit():
|
elif x.isdigit():
|
||||||
@ -681,43 +688,46 @@ class PdfFileReader(object):
|
|||||||
assert xrefstream["/Type"] == "/XRef"
|
assert xrefstream["/Type"] == "/XRef"
|
||||||
self.cacheIndirectObject(generation, idnum, xrefstream)
|
self.cacheIndirectObject(generation, idnum, xrefstream)
|
||||||
streamData = StringIO(xrefstream.getData())
|
streamData = StringIO(xrefstream.getData())
|
||||||
num, size = xrefstream.get("/Index", [0, xrefstream.get("/Size")])
|
idx_pairs = xrefstream.get("/Index", [0, xrefstream.get("/Size")])
|
||||||
entrySizes = xrefstream.get("/W")
|
entrySizes = xrefstream.get("/W")
|
||||||
cnt = 0
|
for num, size in self._pairs(idx_pairs):
|
||||||
while cnt < size:
|
cnt = 0
|
||||||
for i in range(len(entrySizes)):
|
while cnt < size:
|
||||||
d = streamData.read(entrySizes[i])
|
for i in range(len(entrySizes)):
|
||||||
di = convertToInt(d, entrySizes[i])
|
d = streamData.read(entrySizes[i])
|
||||||
if i == 0:
|
di = convertToInt(d, entrySizes[i])
|
||||||
xref_type = di
|
if i == 0:
|
||||||
elif i == 1:
|
xref_type = di
|
||||||
if xref_type == 0:
|
elif i == 1:
|
||||||
next_free_object = di
|
if xref_type == 0:
|
||||||
elif xref_type == 1:
|
next_free_object = di
|
||||||
byte_offset = di
|
elif xref_type == 1:
|
||||||
elif xref_type == 2:
|
byte_offset = di
|
||||||
objstr_num = di
|
elif xref_type == 2:
|
||||||
elif i == 2:
|
objstr_num = di
|
||||||
if xref_type == 0:
|
elif i == 2:
|
||||||
next_generation = di
|
if xref_type == 0:
|
||||||
elif xref_type == 1:
|
next_generation = di
|
||||||
generation = di
|
elif xref_type == 1:
|
||||||
elif xref_type == 2:
|
generation = di
|
||||||
obstr_idx = di
|
elif xref_type == 2:
|
||||||
if xref_type == 0:
|
obstr_idx = di
|
||||||
pass
|
if xref_type == 0:
|
||||||
elif xref_type == 1:
|
pass
|
||||||
if not self.xref.has_key(generation):
|
elif xref_type == 1:
|
||||||
self.xref[generation] = {}
|
if not self.xref.has_key(generation):
|
||||||
self.xref[generation][num] = byte_offset
|
self.xref[generation] = {}
|
||||||
elif xref_type == 2:
|
if not num in self.xref[generation]:
|
||||||
self.xref_objStm[num] = [objstr_num, obstr_idx]
|
self.xref[generation][num] = byte_offset
|
||||||
cnt += 1
|
elif xref_type == 2:
|
||||||
num += 1
|
if not num in self.xref_objStm:
|
||||||
|
self.xref_objStm[num] = [objstr_num, obstr_idx]
|
||||||
|
cnt += 1
|
||||||
|
num += 1
|
||||||
trailerKeys = "/Root", "/Encrypt", "/Info", "/ID"
|
trailerKeys = "/Root", "/Encrypt", "/Info", "/ID"
|
||||||
for key in trailerKeys:
|
for key in trailerKeys:
|
||||||
if xrefstream.has_key(key) and not self.trailer.has_key(key):
|
if xrefstream.has_key(key) and not self.trailer.has_key(key):
|
||||||
self.trailer[NameObject(key)] = xrefstream[key]
|
self.trailer[NameObject(key)] = xrefstream.raw_get(key)
|
||||||
if xrefstream.has_key("/Prev"):
|
if xrefstream.has_key("/Prev"):
|
||||||
startxref = xrefstream["/Prev"]
|
startxref = xrefstream["/Prev"]
|
||||||
else:
|
else:
|
||||||
@ -737,6 +747,14 @@ class PdfFileReader(object):
|
|||||||
assert False
|
assert False
|
||||||
break
|
break
|
||||||
|
|
||||||
|
def _pairs(self, array):
|
||||||
|
i = 0
|
||||||
|
while True:
|
||||||
|
yield array[i], array[i+1]
|
||||||
|
i += 2
|
||||||
|
if (i+1) >= len(array):
|
||||||
|
break
|
||||||
|
|
||||||
def readNextEndLine(self, stream):
|
def readNextEndLine(self, stream):
|
||||||
line = ""
|
line = ""
|
||||||
while True:
|
while True:
|
||||||
@ -778,7 +796,7 @@ class PdfFileReader(object):
|
|||||||
self._override_encryption = False
|
self._override_encryption = False
|
||||||
|
|
||||||
def _decrypt(self, password):
|
def _decrypt(self, password):
|
||||||
encrypt = self.safeGetObject(self.trailer['/Encrypt'])
|
encrypt = self.trailer['/Encrypt'].getObject()
|
||||||
if encrypt['/Filter'] != '/Standard':
|
if encrypt['/Filter'] != '/Standard':
|
||||||
raise NotImplementedError, "only Standard PDF encryption handler is available"
|
raise NotImplementedError, "only Standard PDF encryption handler is available"
|
||||||
if not (encrypt['/V'] in (1, 2)):
|
if not (encrypt['/V'] in (1, 2)):
|
||||||
@ -788,13 +806,13 @@ class PdfFileReader(object):
|
|||||||
self._decryption_key = key
|
self._decryption_key = key
|
||||||
return 1
|
return 1
|
||||||
else:
|
else:
|
||||||
rev = self.safeGetObject(encrypt['/R'])
|
rev = encrypt['/R'].getObject()
|
||||||
if rev == 2:
|
if rev == 2:
|
||||||
keylen = 5
|
keylen = 5
|
||||||
else:
|
else:
|
||||||
keylen = self.safeGetObject(encrypt['/Length']) / 8
|
keylen = encrypt['/Length'].getObject() / 8
|
||||||
key = _alg33_1(password, rev, keylen)
|
key = _alg33_1(password, rev, keylen)
|
||||||
real_O = self.safeGetObject(encrypt["/O"])
|
real_O = encrypt["/O"].getObject()
|
||||||
if rev == 2:
|
if rev == 2:
|
||||||
userpass = utils.RC4_encrypt(key, real_O)
|
userpass = utils.RC4_encrypt(key, real_O)
|
||||||
else:
|
else:
|
||||||
@ -812,20 +830,20 @@ class PdfFileReader(object):
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
def _authenticateUserPassword(self, password):
|
def _authenticateUserPassword(self, password):
|
||||||
encrypt = self.safeGetObject(self.trailer['/Encrypt'])
|
encrypt = self.trailer['/Encrypt'].getObject()
|
||||||
rev = self.safeGetObject(encrypt['/R'])
|
rev = encrypt['/R'].getObject()
|
||||||
owner_entry = self.safeGetObject(encrypt['/O']).original_bytes
|
owner_entry = encrypt['/O'].getObject().original_bytes
|
||||||
p_entry = self.safeGetObject(encrypt['/P'])
|
p_entry = encrypt['/P'].getObject()
|
||||||
id_entry = self.safeGetObject(self.trailer['/ID'])
|
id_entry = self.trailer['/ID'].getObject()
|
||||||
id1_entry = self.safeGetObject(id_entry[0])
|
id1_entry = id_entry[0].getObject()
|
||||||
if rev == 2:
|
if rev == 2:
|
||||||
U, key = _alg34(password, owner_entry, p_entry, id1_entry)
|
U, key = _alg34(password, owner_entry, p_entry, id1_entry)
|
||||||
elif rev >= 3:
|
elif rev >= 3:
|
||||||
U, key = _alg35(password, rev,
|
U, key = _alg35(password, rev,
|
||||||
self.safeGetObject(encrypt["/Length"]) / 8, owner_entry,
|
encrypt["/Length"].getObject() / 8, owner_entry,
|
||||||
p_entry, id1_entry,
|
p_entry, id1_entry,
|
||||||
self.safeGetObject(encrypt.get("/EncryptMetadata", False)))
|
encrypt.get("/EncryptMetadata", BooleanObject(False)).getObject())
|
||||||
real_U = self.safeGetObject(encrypt['/U']).original_bytes
|
real_U = encrypt['/U'].getObject().original_bytes
|
||||||
return U == real_U, key
|
return U == real_U, key
|
||||||
|
|
||||||
def getIsEncrypted(self):
|
def getIsEncrypted(self):
|
||||||
@ -874,10 +892,9 @@ def createRectangleAccessor(name, fallback):
|
|||||||
# will be created by accessing the {@link #PdfFileReader.getPage getPage}
|
# will be created by accessing the {@link #PdfFileReader.getPage getPage}
|
||||||
# function of the {@link #PdfFileReader PdfFileReader} class.
|
# function of the {@link #PdfFileReader PdfFileReader} class.
|
||||||
class PageObject(DictionaryObject):
|
class PageObject(DictionaryObject):
|
||||||
def __init__(self, pdf, indirectReference = None):
|
def __init__(self, pdf):
|
||||||
DictionaryObject.__init__(self)
|
DictionaryObject.__init__(self)
|
||||||
self.pdf = pdf
|
self.pdf = pdf
|
||||||
self.indirectReference = indirectReference
|
|
||||||
|
|
||||||
##
|
##
|
||||||
# Rotates a page clockwise by increments of 90 degrees.
|
# Rotates a page clockwise by increments of 90 degrees.
|
||||||
@ -1058,7 +1075,7 @@ class PageObject(DictionaryObject):
|
|||||||
# implementation-defined manner. Default value: same as MediaBox.
|
# implementation-defined manner. Default value: same as MediaBox.
|
||||||
# <p>
|
# <p>
|
||||||
# Stability: Added in v1.4, will exist for all future v1.x releases.
|
# Stability: Added in v1.4, will exist for all future v1.x releases.
|
||||||
cropBox = createRectangleAccessor("/CropBox", ("/CropBox",))
|
cropBox = createRectangleAccessor("/CropBox", ("/MediaBox",))
|
||||||
|
|
||||||
##
|
##
|
||||||
# A rectangle (RectangleObject), expressed in default user space units,
|
# A rectangle (RectangleObject), expressed in default user space units,
|
||||||
@ -1110,7 +1127,15 @@ class ContentStream(DecodedStreamObject):
|
|||||||
break
|
break
|
||||||
stream.seek(-1, 1)
|
stream.seek(-1, 1)
|
||||||
if peek.isalpha() or peek == "'" or peek == '"':
|
if peek.isalpha() or peek == "'" or peek == '"':
|
||||||
operator = readUntilWhitespace(stream, maxchars=2)
|
operator = ""
|
||||||
|
while True:
|
||||||
|
tok = stream.read(1)
|
||||||
|
if tok.isspace() or tok in NameObject.delimiterCharacters:
|
||||||
|
stream.seek(-1, 1)
|
||||||
|
break
|
||||||
|
elif tok == '':
|
||||||
|
break
|
||||||
|
operator += tok
|
||||||
if operator == "BI":
|
if operator == "BI":
|
||||||
# begin inline image - a completely different parsing
|
# begin inline image - a completely different parsing
|
||||||
# mechanism is required, of course... thanks buddy...
|
# mechanism is required, of course... thanks buddy...
|
||||||
@ -1120,6 +1145,14 @@ class ContentStream(DecodedStreamObject):
|
|||||||
else:
|
else:
|
||||||
self.operations.append((operands, operator))
|
self.operations.append((operands, operator))
|
||||||
operands = []
|
operands = []
|
||||||
|
elif peek == '%':
|
||||||
|
# If we encounter a comment in the content stream, we have to
|
||||||
|
# handle it here. Typically, readObject will handle
|
||||||
|
# encountering a comment -- but readObject assumes that
|
||||||
|
# following the comment must be the object we're trying to
|
||||||
|
# read. In this case, it could be an operator instead.
|
||||||
|
while peek not in ('\r', '\n'):
|
||||||
|
peek = stream.read(1)
|
||||||
else:
|
else:
|
||||||
operands.append(readObject(stream, None))
|
operands.append(readObject(stream, None))
|
||||||
|
|
||||||
@ -1251,86 +1284,74 @@ class DocumentInformation(DictionaryObject):
|
|||||||
# See section 8.2.1 of the PDF 1.6 reference.
|
# See section 8.2.1 of the PDF 1.6 reference.
|
||||||
# Stability: Added in v1.10, will exist for all v1.x releases.
|
# Stability: Added in v1.10, will exist for all v1.x releases.
|
||||||
class Destination(DictionaryObject):
|
class Destination(DictionaryObject):
|
||||||
def __init__(self, *args):
|
def __init__(self, title, page, typ, *args):
|
||||||
DictionaryObject.__init__(self)
|
DictionaryObject.__init__(self)
|
||||||
self.title = args[0]
|
self[NameObject("/Title")] = title
|
||||||
self["/Page"], self["/Type"] = args[1], args[2]
|
self[NameObject("/Page")] = page
|
||||||
|
self[NameObject("/Type")] = typ
|
||||||
|
|
||||||
# from table 8.2 of the PDF 1.6 reference.
|
# from table 8.2 of the PDF 1.6 reference.
|
||||||
mapNull = lambda x: {True: None, False: x}[isinstance(x, NullObject)]
|
if typ == "/XYZ":
|
||||||
params = map(mapNull, args[3:])
|
(self[NameObject("/Left")], self[NameObject("/Top")],
|
||||||
type = self["/Type"]
|
self[NameObject("/Zoom")]) = args
|
||||||
|
elif typ == "/FitR":
|
||||||
if type == "/XYZ":
|
(self[NameObject("/Left")], self[NameObject("/Bottom")],
|
||||||
self["/Left"], self["/Top"], self["/Zoom"] = params
|
self[NameObject("/Right")], self[NameObject("/Top")]) = args
|
||||||
elif type == "/FitR":
|
elif typ in ["/FitH", "FitBH"]:
|
||||||
self["/Left"], self["/Bottom"], \
|
self[NameObject("/Top")], = args
|
||||||
self["/Right"], self["/Top"] = params
|
elif typ in ["/FitV", "FitBV"]:
|
||||||
elif type in ["/FitH", "FitBH"]:
|
self[NameObject("/Left")], = args
|
||||||
self["/Top"], = params
|
elif typ in ["/Fit", "FitB"]:
|
||||||
elif type in ["/FitV", "FitBV"]:
|
|
||||||
self["/Left"], = params
|
|
||||||
elif type in ["/Fit", "FitB"]:
|
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
raise utils.PdfReadError, "Unknown Destination Type: " + type
|
raise utils.PdfReadError("Unknown Destination Type: %r" % typ)
|
||||||
|
|
||||||
def setTitle(self, title):
|
|
||||||
self["/Title"] = title.strip()
|
|
||||||
|
|
||||||
##
|
##
|
||||||
# Read-write property accessing the destination title.
|
# Read-only property accessing the destination title.
|
||||||
# @return A string.
|
# @return A string.
|
||||||
title = property(lambda self: self.get("/Title"), setTitle, None)
|
title = property(lambda self: self.get("/Title"))
|
||||||
|
|
||||||
##
|
##
|
||||||
# Read-only property accessing the destination page.
|
# Read-only property accessing the destination page.
|
||||||
# @return An integer.
|
# @return An integer.
|
||||||
page = property(lambda self: self.get("/Page"), None, None)
|
page = property(lambda self: self.get("/Page"))
|
||||||
|
|
||||||
##
|
##
|
||||||
# Read-only property accessing the destination type.
|
# Read-only property accessing the destination type.
|
||||||
# @return A string.
|
# @return A string.
|
||||||
type = property(lambda self: self.get("/Type"), None, None)
|
typ = property(lambda self: self.get("/Type"))
|
||||||
|
|
||||||
##
|
##
|
||||||
# Read-only property accessing the zoom factor.
|
# Read-only property accessing the zoom factor.
|
||||||
# @return A number, or None if not available.
|
# @return A number, or None if not available.
|
||||||
zoom = property(lambda self: self.get("/Zoom", None), None, None)
|
zoom = property(lambda self: self.get("/Zoom", None))
|
||||||
|
|
||||||
##
|
##
|
||||||
# Read-only property accessing the left horizontal coordinate.
|
# Read-only property accessing the left horizontal coordinate.
|
||||||
# @return A number, or None if not available.
|
# @return A number, or None if not available.
|
||||||
left = property(lambda self: self.get("/Left", None), None, None)
|
left = property(lambda self: self.get("/Left", None))
|
||||||
|
|
||||||
##
|
##
|
||||||
# Read-only property accessing the right horizontal coordinate.
|
# Read-only property accessing the right horizontal coordinate.
|
||||||
# @return A number, or None if not available.
|
# @return A number, or None if not available.
|
||||||
right = property(lambda self: self.get("/Right", None), None, None)
|
right = property(lambda self: self.get("/Right", None))
|
||||||
|
|
||||||
##
|
##
|
||||||
# Read-only property accessing the top vertical coordinate.
|
# Read-only property accessing the top vertical coordinate.
|
||||||
# @return A number, or None if not available.
|
# @return A number, or None if not available.
|
||||||
top = property(lambda self: self.get("/Top", None), None, None)
|
top = property(lambda self: self.get("/Top", None))
|
||||||
|
|
||||||
##
|
##
|
||||||
# Read-only property accessing the bottom vertical coordinate.
|
# Read-only property accessing the bottom vertical coordinate.
|
||||||
# @return A number, or None if not available.
|
# @return A number, or None if not available.
|
||||||
bottom = property(lambda self: self.get("/Bottom", None), None, None)
|
bottom = property(lambda self: self.get("/Bottom", None))
|
||||||
|
|
||||||
|
|
||||||
def convertToInt(d, size):
|
def convertToInt(d, size):
|
||||||
if size <= 4:
|
if size > 8:
|
||||||
d = "\x00\x00\x00\x00" + d
|
raise utils.PdfReadError("invalid size in convertToInt")
|
||||||
d = d[-4:]
|
d = "\x00\x00\x00\x00\x00\x00\x00\x00" + d
|
||||||
return struct.unpack(">l", d)[0]
|
d = d[-8:]
|
||||||
elif size <= 8:
|
return struct.unpack(">q", d)[0]
|
||||||
d = "\x00\x00\x00\x00\x00\x00\x00\x00" + d
|
|
||||||
d = d[-8:]
|
|
||||||
return struct.unpack(">q", d)[0]
|
|
||||||
else:
|
|
||||||
# size too big
|
|
||||||
assert False
|
|
||||||
|
|
||||||
# ref: pdf1.8 spec section 3.5.2 algorithm 3.2
|
# ref: pdf1.8 spec section 3.5.2 algorithm 3.2
|
||||||
_encryption_padding = '\x28\xbf\x4e\x5e\x4e\x75\x8a\x41\x64\x00\x4e\x56' + \
|
_encryption_padding = '\x28\xbf\x4e\x5e\x4e\x75\x8a\x41\x64\x00\x4e\x56' + \
|
@ -34,6 +34,19 @@ Utility functions for PDF library.
|
|||||||
__author__ = "Mathieu Fenniak"
|
__author__ = "Mathieu Fenniak"
|
||||||
__author_email__ = "biziqe@mathieu.fenniak.net"
|
__author_email__ = "biziqe@mathieu.fenniak.net"
|
||||||
|
|
||||||
|
#ENABLE_PSYCO = False
|
||||||
|
#if ENABLE_PSYCO:
|
||||||
|
# try:
|
||||||
|
# import psyco
|
||||||
|
# except ImportError:
|
||||||
|
# ENABLE_PSYCO = False
|
||||||
|
#
|
||||||
|
#if not ENABLE_PSYCO:
|
||||||
|
# class psyco:
|
||||||
|
# def proxy(func):
|
||||||
|
# return func
|
||||||
|
# proxy = staticmethod(proxy)
|
||||||
|
|
||||||
def readUntilWhitespace(stream, maxchars=None):
|
def readUntilWhitespace(stream, maxchars=None):
|
||||||
txt = ""
|
txt = ""
|
||||||
while True:
|
while True:
|
355
src/pyPdf/xmp.py
Normal file
355
src/pyPdf/xmp.py
Normal file
@ -0,0 +1,355 @@
|
|||||||
|
import re
|
||||||
|
import datetime
|
||||||
|
import decimal
|
||||||
|
from generic import PdfObject
|
||||||
|
from xml.dom import getDOMImplementation
|
||||||
|
from xml.dom.minidom import parseString
|
||||||
|
|
||||||
|
RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
DC_NAMESPACE = "http://purl.org/dc/elements/1.1/"
|
||||||
|
XMP_NAMESPACE = "http://ns.adobe.com/xap/1.0/"
|
||||||
|
PDF_NAMESPACE = "http://ns.adobe.com/pdf/1.3/"
|
||||||
|
XMPMM_NAMESPACE = "http://ns.adobe.com/xap/1.0/mm/"
|
||||||
|
|
||||||
|
# What is the PDFX namespace, you might ask? I might ask that too. It's
|
||||||
|
# a completely undocumented namespace used to place "custom metadata"
|
||||||
|
# properties, which are arbitrary metadata properties with no semantic or
|
||||||
|
# documented meaning. Elements in the namespace are key/value-style storage,
|
||||||
|
# where the element name is the key and the content is the value. The keys
|
||||||
|
# are transformed into valid XML identifiers by substituting an invalid
|
||||||
|
# identifier character with \u2182 followed by the unicode hex ID of the
|
||||||
|
# original character. A key like "my car" is therefore "my\u21820020car".
|
||||||
|
#
|
||||||
|
# \u2182, in case you're wondering, is the unicode character
|
||||||
|
# \u{ROMAN NUMERAL TEN THOUSAND}, a straightforward and obvious choice for
|
||||||
|
# escaping characters.
|
||||||
|
#
|
||||||
|
# Intentional users of the pdfx namespace should be shot on sight. A
|
||||||
|
# custom data schema and sensical XML elements could be used instead, as is
|
||||||
|
# suggested by Adobe's own documentation on XMP (under "Extensibility of
|
||||||
|
# Schemas").
|
||||||
|
#
|
||||||
|
# Information presented here on the /pdfx/ schema is a result of limited
|
||||||
|
# reverse engineering, and does not constitute a full specification.
|
||||||
|
PDFX_NAMESPACE = "http://ns.adobe.com/pdfx/1.3/"
|
||||||
|
|
||||||
|
iso8601 = re.compile("""
|
||||||
|
(?P<year>[0-9]{4})
|
||||||
|
(-
|
||||||
|
(?P<month>[0-9]{2})
|
||||||
|
(-
|
||||||
|
(?P<day>[0-9]+)
|
||||||
|
(T
|
||||||
|
(?P<hour>[0-9]{2}):
|
||||||
|
(?P<minute>[0-9]{2})
|
||||||
|
(:(?P<second>[0-9]{2}(.[0-9]+)?))?
|
||||||
|
(?P<tzd>Z|[-+][0-9]{2}:[0-9]{2})
|
||||||
|
)?
|
||||||
|
)?
|
||||||
|
)?
|
||||||
|
""", re.VERBOSE)
|
||||||
|
|
||||||
|
##
|
||||||
|
# An object that represents Adobe XMP metadata.
|
||||||
|
class XmpInformation(PdfObject):
|
||||||
|
|
||||||
|
def __init__(self, stream):
|
||||||
|
self.stream = stream
|
||||||
|
docRoot = parseString(self.stream.getData())
|
||||||
|
self.rdfRoot = docRoot.getElementsByTagNameNS(RDF_NAMESPACE, "RDF")[0]
|
||||||
|
self.cache = {}
|
||||||
|
|
||||||
|
def writeToStream(self, stream, encryption_key):
|
||||||
|
self.stream.writeToStream(stream, encryption_key)
|
||||||
|
|
||||||
|
def getElement(self, aboutUri, namespace, name):
|
||||||
|
for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
|
||||||
|
if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
|
||||||
|
attr = desc.getAttributeNodeNS(namespace, name)
|
||||||
|
if attr != None:
|
||||||
|
yield attr
|
||||||
|
for element in desc.getElementsByTagNameNS(namespace, name):
|
||||||
|
yield element
|
||||||
|
|
||||||
|
def getNodesInNamespace(self, aboutUri, namespace):
|
||||||
|
for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
|
||||||
|
if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
|
||||||
|
for i in range(desc.attributes.length):
|
||||||
|
attr = desc.attributes.item(i)
|
||||||
|
if attr.namespaceURI == namespace:
|
||||||
|
yield attr
|
||||||
|
for child in desc.childNodes:
|
||||||
|
if child.namespaceURI == namespace:
|
||||||
|
yield child
|
||||||
|
|
||||||
|
def _getText(self, element):
|
||||||
|
text = ""
|
||||||
|
for child in element.childNodes:
|
||||||
|
if child.nodeType == child.TEXT_NODE:
|
||||||
|
text += child.data
|
||||||
|
return text
|
||||||
|
|
||||||
|
def _converter_string(value):
|
||||||
|
return value
|
||||||
|
|
||||||
|
def _converter_date(value):
|
||||||
|
m = iso8601.match(value)
|
||||||
|
year = int(m.group("year"))
|
||||||
|
month = int(m.group("month") or "1")
|
||||||
|
day = int(m.group("day") or "1")
|
||||||
|
hour = int(m.group("hour") or "0")
|
||||||
|
minute = int(m.group("minute") or "0")
|
||||||
|
second = decimal.Decimal(m.group("second") or "0")
|
||||||
|
seconds = second.to_integral(decimal.ROUND_FLOOR)
|
||||||
|
milliseconds = (second - seconds) * 1000000
|
||||||
|
tzd = m.group("tzd") or "Z"
|
||||||
|
dt = datetime.datetime(year, month, day, hour, minute, seconds, milliseconds)
|
||||||
|
if tzd != "Z":
|
||||||
|
tzd_hours, tzd_minutes = [int(x) for x in tzd.split(":")]
|
||||||
|
tzd_hours *= -1
|
||||||
|
if tzd_hours < 0:
|
||||||
|
tzd_minutes *= -1
|
||||||
|
dt = dt + datetime.timedelta(hours=tzd_hours, minutes=tzd_minutes)
|
||||||
|
return dt
|
||||||
|
_test_converter_date = staticmethod(_converter_date)
|
||||||
|
|
||||||
|
def _getter_bag(namespace, name, converter):
|
||||||
|
def get(self):
|
||||||
|
cached = self.cache.get(namespace, {}).get(name)
|
||||||
|
if cached:
|
||||||
|
return cached
|
||||||
|
retval = []
|
||||||
|
for element in self.getElement("", namespace, name):
|
||||||
|
bags = element.getElementsByTagNameNS(RDF_NAMESPACE, "Bag")
|
||||||
|
if len(bags):
|
||||||
|
for bag in bags:
|
||||||
|
for item in bag.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
|
||||||
|
value = self._getText(item)
|
||||||
|
value = converter(value)
|
||||||
|
retval.append(value)
|
||||||
|
ns_cache = self.cache.setdefault(namespace, {})
|
||||||
|
ns_cache[name] = retval
|
||||||
|
return retval
|
||||||
|
return get
|
||||||
|
|
||||||
|
def _getter_seq(namespace, name, converter):
|
||||||
|
def get(self):
|
||||||
|
cached = self.cache.get(namespace, {}).get(name)
|
||||||
|
if cached:
|
||||||
|
return cached
|
||||||
|
retval = []
|
||||||
|
for element in self.getElement("", namespace, name):
|
||||||
|
seqs = element.getElementsByTagNameNS(RDF_NAMESPACE, "Seq")
|
||||||
|
if len(seqs):
|
||||||
|
for seq in seqs:
|
||||||
|
for item in seq.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
|
||||||
|
value = self._getText(item)
|
||||||
|
value = converter(value)
|
||||||
|
retval.append(value)
|
||||||
|
else:
|
||||||
|
value = converter(self._getText(element))
|
||||||
|
retval.append(value)
|
||||||
|
ns_cache = self.cache.setdefault(namespace, {})
|
||||||
|
ns_cache[name] = retval
|
||||||
|
return retval
|
||||||
|
return get
|
||||||
|
|
||||||
|
def _getter_langalt(namespace, name, converter):
|
||||||
|
def get(self):
|
||||||
|
cached = self.cache.get(namespace, {}).get(name)
|
||||||
|
if cached:
|
||||||
|
return cached
|
||||||
|
retval = {}
|
||||||
|
for element in self.getElement("", namespace, name):
|
||||||
|
alts = element.getElementsByTagNameNS(RDF_NAMESPACE, "Alt")
|
||||||
|
if len(alts):
|
||||||
|
for alt in alts:
|
||||||
|
for item in alt.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
|
||||||
|
value = self._getText(item)
|
||||||
|
value = converter(value)
|
||||||
|
retval[item.getAttribute("xml:lang")] = value
|
||||||
|
else:
|
||||||
|
retval["x-default"] = converter(self._getText(element))
|
||||||
|
ns_cache = self.cache.setdefault(namespace, {})
|
||||||
|
ns_cache[name] = retval
|
||||||
|
return retval
|
||||||
|
return get
|
||||||
|
|
||||||
|
def _getter_single(namespace, name, converter):
|
||||||
|
def get(self):
|
||||||
|
cached = self.cache.get(namespace, {}).get(name)
|
||||||
|
if cached:
|
||||||
|
return cached
|
||||||
|
value = None
|
||||||
|
for element in self.getElement("", namespace, name):
|
||||||
|
if element.nodeType == element.ATTRIBUTE_NODE:
|
||||||
|
value = element.nodeValue
|
||||||
|
else:
|
||||||
|
value = self._getText(element)
|
||||||
|
break
|
||||||
|
if value != None:
|
||||||
|
value = converter(value)
|
||||||
|
ns_cache = self.cache.setdefault(namespace, {})
|
||||||
|
ns_cache[name] = value
|
||||||
|
return value
|
||||||
|
return get
|
||||||
|
|
||||||
|
##
|
||||||
|
# Contributors to the resource (other than the authors). An unsorted
|
||||||
|
# array of names.
|
||||||
|
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
dc_contributor = property(_getter_bag(DC_NAMESPACE, "contributor", _converter_string))
|
||||||
|
|
||||||
|
##
|
||||||
|
# Text describing the extent or scope of the resource.
|
||||||
|
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
dc_coverage = property(_getter_single(DC_NAMESPACE, "coverage", _converter_string))
|
||||||
|
|
||||||
|
##
|
||||||
|
# A sorted array of names of the authors of the resource, listed in order
|
||||||
|
# of precedence.
|
||||||
|
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
dc_creator = property(_getter_seq(DC_NAMESPACE, "creator", _converter_string))
|
||||||
|
|
||||||
|
##
|
||||||
|
# A sorted array of dates (datetime.datetime instances) of signifigance to
|
||||||
|
# the resource. The dates and times are in UTC.
|
||||||
|
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
dc_date = property(_getter_seq(DC_NAMESPACE, "date", _converter_date))
|
||||||
|
|
||||||
|
##
|
||||||
|
# A language-keyed dictionary of textual descriptions of the content of the
|
||||||
|
# resource.
|
||||||
|
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
dc_description = property(_getter_langalt(DC_NAMESPACE, "description", _converter_string))
|
||||||
|
|
||||||
|
##
|
||||||
|
# The mime-type of the resource.
|
||||||
|
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
dc_format = property(_getter_single(DC_NAMESPACE, "format", _converter_string))
|
||||||
|
|
||||||
|
##
|
||||||
|
# Unique identifier of the resource.
|
||||||
|
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
dc_identifier = property(_getter_single(DC_NAMESPACE, "identifier", _converter_string))
|
||||||
|
|
||||||
|
##
|
||||||
|
# An unordered array specifying the languages used in the resource.
|
||||||
|
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
dc_language = property(_getter_bag(DC_NAMESPACE, "language", _converter_string))
|
||||||
|
|
||||||
|
##
|
||||||
|
# An unordered array of publisher names.
|
||||||
|
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
dc_publisher = property(_getter_bag(DC_NAMESPACE, "publisher", _converter_string))
|
||||||
|
|
||||||
|
##
|
||||||
|
# An unordered array of text descriptions of relationships to other
|
||||||
|
# documents.
|
||||||
|
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
dc_relation = property(_getter_bag(DC_NAMESPACE, "relation", _converter_string))
|
||||||
|
|
||||||
|
##
|
||||||
|
# A language-keyed dictionary of textual descriptions of the rights the
|
||||||
|
# user has to this resource.
|
||||||
|
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
dc_rights = property(_getter_langalt(DC_NAMESPACE, "rights", _converter_string))
|
||||||
|
|
||||||
|
##
|
||||||
|
# Unique identifier of the work from which this resource was derived.
|
||||||
|
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
dc_source = property(_getter_single(DC_NAMESPACE, "source", _converter_string))
|
||||||
|
|
||||||
|
##
|
||||||
|
# An unordered array of descriptive phrases or keywrods that specify the
|
||||||
|
# topic of the content of the resource.
|
||||||
|
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
dc_subject = property(_getter_bag(DC_NAMESPACE, "subject", _converter_string))
|
||||||
|
|
||||||
|
##
|
||||||
|
# A language-keyed dictionary of the title of the resource.
|
||||||
|
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
dc_title = property(_getter_langalt(DC_NAMESPACE, "title", _converter_string))
|
||||||
|
|
||||||
|
##
|
||||||
|
# An unordered array of textual descriptions of the document type.
|
||||||
|
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
dc_type = property(_getter_bag(DC_NAMESPACE, "type", _converter_string))
|
||||||
|
|
||||||
|
##
|
||||||
|
# An unformatted text string representing document keywords.
|
||||||
|
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
pdf_keywords = property(_getter_single(PDF_NAMESPACE, "Keywords", _converter_string))
|
||||||
|
|
||||||
|
##
|
||||||
|
# The PDF file version, for example 1.0, 1.3.
|
||||||
|
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
pdf_pdfversion = property(_getter_single(PDF_NAMESPACE, "PDFVersion", _converter_string))
|
||||||
|
|
||||||
|
##
|
||||||
|
# The name of the tool that created the PDF document.
|
||||||
|
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
pdf_producer = property(_getter_single(PDF_NAMESPACE, "Producer", _converter_string))
|
||||||
|
|
||||||
|
##
|
||||||
|
# The date and time the resource was originally created. The date and
|
||||||
|
# time are returned as a UTC datetime.datetime object.
|
||||||
|
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
xmp_createDate = property(_getter_single(XMP_NAMESPACE, "CreateDate", _converter_date))
|
||||||
|
|
||||||
|
##
|
||||||
|
# The date and time the resource was last modified. The date and time
|
||||||
|
# are returned as a UTC datetime.datetime object.
|
||||||
|
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
xmp_modifyDate = property(_getter_single(XMP_NAMESPACE, "ModifyDate", _converter_date))
|
||||||
|
|
||||||
|
##
|
||||||
|
# The date and time that any metadata for this resource was last
|
||||||
|
# changed. The date and time are returned as a UTC datetime.datetime
|
||||||
|
# object.
|
||||||
|
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
xmp_metadataDate = property(_getter_single(XMP_NAMESPACE, "MetadataDate", _converter_date))
|
||||||
|
|
||||||
|
##
|
||||||
|
# The name of the first known tool used to create the resource.
|
||||||
|
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
xmp_creatorTool = property(_getter_single(XMP_NAMESPACE, "CreatorTool", _converter_string))
|
||||||
|
|
||||||
|
##
|
||||||
|
# The common identifier for all versions and renditions of this resource.
|
||||||
|
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
xmpmm_documentId = property(_getter_single(XMPMM_NAMESPACE, "DocumentID", _converter_string))
|
||||||
|
|
||||||
|
##
|
||||||
|
# An identifier for a specific incarnation of a document, updated each
|
||||||
|
# time a file is saved.
|
||||||
|
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
xmpmm_instanceId = property(_getter_single(XMPMM_NAMESPACE, "InstanceID", _converter_string))
|
||||||
|
|
||||||
|
def custom_properties(self):
|
||||||
|
if not hasattr(self, "_custom_properties"):
|
||||||
|
self._custom_properties = {}
|
||||||
|
for node in self.getNodesInNamespace("", PDFX_NAMESPACE):
|
||||||
|
key = node.localName
|
||||||
|
while True:
|
||||||
|
# see documentation about PDFX_NAMESPACE earlier in file
|
||||||
|
idx = key.find(u"\u2182")
|
||||||
|
if idx == -1:
|
||||||
|
break
|
||||||
|
key = key[:idx] + chr(int(key[idx+1:idx+5], base=16)) + key[idx+5:]
|
||||||
|
if node.nodeType == node.ATTRIBUTE_NODE:
|
||||||
|
value = node.nodeValue
|
||||||
|
else:
|
||||||
|
value = self._getText(node)
|
||||||
|
self._custom_properties[key] = value
|
||||||
|
return self._custom_properties
|
||||||
|
|
||||||
|
##
|
||||||
|
# Retrieves custom metadata properties defined in the undocumented pdfx
|
||||||
|
# metadata schema.
|
||||||
|
# <p>Stability: Added in v1.12, will exist for all future v1.x releases.
|
||||||
|
# @return Returns a dictionary of key/value items for custom metadata
|
||||||
|
# properties.
|
||||||
|
custom_properties = property(custom_properties)
|
||||||
|
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user