mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
Add a timeout to the PDF metadata writer as it hangs on some PDF files
This commit is contained in:
parent
cf77ec2c4a
commit
4e1f851a44
@ -2,7 +2,8 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
'''Read meta information from PDF files'''
|
'''Read meta information from PDF files'''
|
||||||
|
|
||||||
import sys, os, StringIO
|
import sys, os, cStringIO
|
||||||
|
from threading import Thread
|
||||||
|
|
||||||
from calibre.ebooks.metadata import MetaInformation, authors_to_string, get_parser
|
from calibre.ebooks.metadata import MetaInformation, authors_to_string, get_parser
|
||||||
from pyPdf import PdfFileReader, PdfFileWriter
|
from pyPdf import PdfFileReader, PdfFileWriter
|
||||||
@ -29,24 +30,45 @@ def get_metadata(stream):
|
|||||||
print >>sys.stderr, msg.encode('utf8')
|
print >>sys.stderr, msg.encode('utf8')
|
||||||
return mi
|
return mi
|
||||||
|
|
||||||
|
class MetadataWriter(Thread):
|
||||||
|
|
||||||
|
def __init__(self, out_pdf, buf):
|
||||||
|
self.out_pdf = out_pdf
|
||||||
|
self.buf = buf
|
||||||
|
Thread.__init__(self)
|
||||||
|
self.daemon = True
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
try:
|
||||||
|
self.out_pdf.write(self.buf)
|
||||||
|
except RuntimeError:
|
||||||
|
pass
|
||||||
|
|
||||||
def set_metadata(stream, mi):
|
def set_metadata(stream, mi):
|
||||||
stream.seek(0)
|
stream.seek(0)
|
||||||
|
|
||||||
# Use a StringIO object for the pdf because we will want to over
|
# Use a StringIO object for the pdf because we will want to over
|
||||||
# write it later and if we are working on the stream directly it
|
# write it later and if we are working on the stream directly it
|
||||||
# could cause some issues.
|
# could cause some issues.
|
||||||
raw = StringIO.StringIO(stream.read())
|
raw = cStringIO.StringIO(stream.read())
|
||||||
orig_pdf = PdfFileReader(raw)
|
orig_pdf = PdfFileReader(raw)
|
||||||
|
|
||||||
title = mi.title if mi.title else orig_pdf.documentInfo.title
|
title = mi.title if mi.title else orig_pdf.documentInfo.title
|
||||||
author = authors_to_string(mi.authors) if mi.authors else orig_pdf.documentInfo.author
|
author = authors_to_string(mi.authors) if mi.authors else orig_pdf.documentInfo.author
|
||||||
|
|
||||||
out_pdf = PdfFileWriter(title=title, author=author)
|
out_pdf = PdfFileWriter(title=title, author=author)
|
||||||
|
out_str = cStringIO.StringIO()
|
||||||
|
writer = MetadataWriter(out_pdf, out_str)
|
||||||
for page in orig_pdf.pages:
|
for page in orig_pdf.pages:
|
||||||
out_pdf.addPage(page)
|
out_pdf.addPage(page)
|
||||||
|
|
||||||
out_str = StringIO.StringIO()
|
writer.start()
|
||||||
out_pdf.write(out_str)
|
writer.join(10) # Wait 10 secs for writing to complete
|
||||||
|
out_pdf.killed = True
|
||||||
|
writer.join()
|
||||||
|
if out_pdf.killed:
|
||||||
|
print 'Failed to set metadata: took too long'
|
||||||
|
return
|
||||||
|
|
||||||
stream.seek(0)
|
stream.seek(0)
|
||||||
stream.truncate()
|
stream.truncate()
|
||||||
|
@ -299,7 +299,7 @@ def readStringFromStream(stream):
|
|||||||
elif tok == "t":
|
elif tok == "t":
|
||||||
tok = "\t"
|
tok = "\t"
|
||||||
elif tok == "b":
|
elif tok == "b":
|
||||||
tok == "\b"
|
tok = "\b"
|
||||||
elif tok == "f":
|
elif tok == "f":
|
||||||
tok = "\f"
|
tok = "\f"
|
||||||
elif tok == "(":
|
elif tok == "(":
|
||||||
|
@ -39,15 +39,12 @@ __author__ = "Mathieu Fenniak"
|
|||||||
__author_email__ = "biziqe@mathieu.fenniak.net"
|
__author_email__ = "biziqe@mathieu.fenniak.net"
|
||||||
|
|
||||||
import struct
|
import struct
|
||||||
try:
|
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
except ImportError:
|
|
||||||
from StringIO import StringIO
|
|
||||||
|
|
||||||
import filters
|
from generic import DictionaryObject, NameObject, NumberObject, \
|
||||||
import utils
|
createStringObject, ArrayObject, ByteStringObject, StreamObject, \
|
||||||
import warnings
|
IndirectObject, utils, readObject, TextStringObject, BooleanObject, \
|
||||||
from generic import *
|
RectangleObject, DecodedStreamObject
|
||||||
from utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirtualList
|
from utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirtualList
|
||||||
|
|
||||||
|
|
||||||
@ -56,6 +53,7 @@ from utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirt
|
|||||||
# class (typically {@link #PdfFileReader PdfFileReader}).
|
# class (typically {@link #PdfFileReader PdfFileReader}).
|
||||||
class PdfFileWriter(object):
|
class PdfFileWriter(object):
|
||||||
def __init__(self,title=u"Unknown",author=u"Unknown"):
|
def __init__(self,title=u"Unknown",author=u"Unknown"):
|
||||||
|
self.killed = False
|
||||||
self._header = "%PDF-1.3"
|
self._header = "%PDF-1.3"
|
||||||
self._objects = [] # array of indirect objects
|
self._objects = [] # array of indirect objects
|
||||||
|
|
||||||
@ -162,7 +160,7 @@ class PdfFileWriter(object):
|
|||||||
# @param stream An object to write the file to. The object must support
|
# @param stream An object to write the file to. The object must support
|
||||||
# the write method, and the tell method, similar to a file object.
|
# the write method, and the tell method, similar to a file object.
|
||||||
def write(self, stream):
|
def write(self, stream):
|
||||||
import struct, md5
|
import md5
|
||||||
|
|
||||||
externalReferenceMap = {}
|
externalReferenceMap = {}
|
||||||
self.stack = []
|
self.stack = []
|
||||||
@ -214,6 +212,8 @@ class PdfFileWriter(object):
|
|||||||
stream.write("\nstartxref\n%s\n%%%%EOF\n" % (xref_location))
|
stream.write("\nstartxref\n%s\n%%%%EOF\n" % (xref_location))
|
||||||
|
|
||||||
def _sweepIndirectReferences(self, externMap, data):
|
def _sweepIndirectReferences(self, externMap, data):
|
||||||
|
if self.killed:
|
||||||
|
raise RuntimeError('Writer killed')
|
||||||
if isinstance(data, DictionaryObject):
|
if isinstance(data, DictionaryObject):
|
||||||
for key, value in data.items():
|
for key, value in data.items():
|
||||||
origvalue = value
|
origvalue = value
|
||||||
|
Loading…
x
Reference in New Issue
Block a user