PDF metadata: Support reading/writing of tags

This commit is contained in:
Kovid Goyal 2011-08-02 12:08:34 -06:00
parent fbe7f346ea
commit a5e8b23bfe

View File

@ -13,7 +13,7 @@ from calibre.ebooks.metadata import MetaInformation, string_to_authors, \
authors_to_string
from calibre.utils.ipc.job import ParallelJob
from calibre.utils.ipc.server import Server
from calibre.ptempfile import PersistentTemporaryFile
from calibre.ptempfile import PersistentTemporaryFile, TemporaryFile
from calibre import prints
podofo, podofo_err = plugins['podofo']
@ -38,7 +38,7 @@ def get_metadata(stream, cpath=None):
server.close()
if job.result is None:
raise ValueError('Failed to read metadata: ' + job.details)
title, authors, creator, ok = job.result
title, authors, creator, tags, ok = job.result
if not ok:
print 'Failed to extract cover:'
print job.details
@ -49,6 +49,8 @@ def get_metadata(stream, cpath=None):
mi = MetaInformation(title, authors)
if creator:
mi.book_producer = creator
if tags:
mi.tags = tags
if os.path.exists(pt.name): os.remove(pt.name)
if ok:
mi.cover = cpath
@ -63,9 +65,17 @@ def get_metadata_quick(raw):
author = p.author
authors = string_to_authors(author) if author else [_('Unknown')]
creator = p.creator
try:
tags = [x.strip() for x in p.keywords.split(u',')]
tags = [x for x in tags if x]
except:
tags = []
mi = MetaInformation(title, authors)
if creator:
mi.book_producer = creator
if tags:
mi.tags = tags
return mi
def get_metadata_(path, cpath=None):
@ -77,6 +87,11 @@ def get_metadata_(path, cpath=None):
author = p.author
authors = string_to_authors(author) if author else [_('Unknown')]
creator = p.creator
try:
tags = [x.strip() for x in p.keywords.split(u',')]
tags = [x for x in tags if x]
except:
tags = []
ok = True
try:
if cpath is not None:
@ -93,7 +108,7 @@ def get_metadata_(path, cpath=None):
traceback.print_exc()
ok = False
return (title, authors, creator, ok)
return (title, authors, creator, tags, ok)
def prep(val):
if not val:
@ -105,58 +120,66 @@ def prep(val):
def set_metadata(stream, mi):
if not podofo:
raise Unavailable(podofo_err)
pt = PersistentTemporaryFile('_podofo.pdf')
pt.write(stream.read())
pt.close()
server = Server(pool_size=1)
job = ParallelJob('write_pdf_metadata', 'Write pdf metadata',
lambda x,y:x, args=[pt.name, mi.title, mi.authors, mi.book_producer])
server.add_job(job)
while not job.is_finished:
time.sleep(0.1)
with TemporaryFile('_podofo_read.pdf') as inputf, \
TemporaryFile('_podofo_write.pdf') as outputf:
server = Server(pool_size=1)
with open(inputf, 'wb') as f:
shutil.copyfileobj(stream, f)
job = ParallelJob('write_pdf_metadata', 'Write pdf metadata',
lambda x,y:x, args=[inputf, outputf, mi.title, mi.authors,
mi.book_producer, mi.tags])
server.add_job(job)
while not job.is_finished:
time.sleep(0.1)
job.update()
job.update()
job.update()
server.close()
if job.failed:
prints(job.details)
elif job.result is not None:
stream.seek(0)
stream.truncate()
stream.write(job.result)
stream.flush()
stream.seek(0)
try:
os.remove(pt.name)
except:
pass
server.close()
if job.failed:
prints(job.details)
elif job.result:
with open(outputf, 'rb') as f:
f.seek(0, 2)
if f.tell() > 100:
f.seek(0)
stream.seek(0)
stream.truncate()
shutil.copyfileobj(f, stream)
stream.flush()
stream.seek(0)
def set_metadata_(path, title, authors, bkp):
def set_metadata_(path, opath, title, authors, bkp, tags):
p = podofo.PDFDoc()
p.open(path)
title = prep(title)
touched = False
if title:
if title and title != p.title:
p.title = title
touched = True
author = prep(authors_to_string(authors))
if author:
if author and author != p.author:
p.author = author
touched = True
bkp = prep(bkp)
if bkp:
if bkp and bkp != p.creator:
p.creator = bkp
touched = True
try:
tags = prep(u', '.join([x.strip() for x in tags if x.strip()]))
if tags != p.keywords:
p.keywords = tags
touched = True
except:
pass
if touched:
from calibre.ptempfile import TemporaryFile
with TemporaryFile('_pdf_set_metadata.pdf') as f:
p.save(f)
return open(f, 'rb').read()
p.save(opath)
return True
return False
if __name__ == '__main__':
f = '/tmp/t.pdf'