PDF metadata: Support reading/writing of tags

This commit is contained in:
Kovid Goyal 2011-08-02 12:08:34 -06:00
parent fbe7f346ea
commit a5e8b23bfe

View File

@ -13,7 +13,7 @@ from calibre.ebooks.metadata import MetaInformation, string_to_authors, \
authors_to_string authors_to_string
from calibre.utils.ipc.job import ParallelJob from calibre.utils.ipc.job import ParallelJob
from calibre.utils.ipc.server import Server from calibre.utils.ipc.server import Server
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile, TemporaryFile
from calibre import prints from calibre import prints
podofo, podofo_err = plugins['podofo'] podofo, podofo_err = plugins['podofo']
@ -38,7 +38,7 @@ def get_metadata(stream, cpath=None):
server.close() server.close()
if job.result is None: if job.result is None:
raise ValueError('Failed to read metadata: ' + job.details) raise ValueError('Failed to read metadata: ' + job.details)
title, authors, creator, ok = job.result title, authors, creator, tags, ok = job.result
if not ok: if not ok:
print 'Failed to extract cover:' print 'Failed to extract cover:'
print job.details print job.details
@ -49,6 +49,8 @@ def get_metadata(stream, cpath=None):
mi = MetaInformation(title, authors) mi = MetaInformation(title, authors)
if creator: if creator:
mi.book_producer = creator mi.book_producer = creator
if tags:
mi.tags = tags
if os.path.exists(pt.name): os.remove(pt.name) if os.path.exists(pt.name): os.remove(pt.name)
if ok: if ok:
mi.cover = cpath mi.cover = cpath
@ -63,9 +65,17 @@ def get_metadata_quick(raw):
author = p.author author = p.author
authors = string_to_authors(author) if author else [_('Unknown')] authors = string_to_authors(author) if author else [_('Unknown')]
creator = p.creator creator = p.creator
try:
tags = [x.strip() for x in p.keywords.split(u',')]
tags = [x for x in tags if x]
except:
tags = []
mi = MetaInformation(title, authors) mi = MetaInformation(title, authors)
if creator: if creator:
mi.book_producer = creator mi.book_producer = creator
if tags:
mi.tags = tags
return mi return mi
def get_metadata_(path, cpath=None): def get_metadata_(path, cpath=None):
@ -77,6 +87,11 @@ def get_metadata_(path, cpath=None):
author = p.author author = p.author
authors = string_to_authors(author) if author else [_('Unknown')] authors = string_to_authors(author) if author else [_('Unknown')]
creator = p.creator creator = p.creator
try:
tags = [x.strip() for x in p.keywords.split(u',')]
tags = [x for x in tags if x]
except:
tags = []
ok = True ok = True
try: try:
if cpath is not None: if cpath is not None:
@ -93,7 +108,7 @@ def get_metadata_(path, cpath=None):
traceback.print_exc() traceback.print_exc()
ok = False ok = False
return (title, authors, creator, ok) return (title, authors, creator, tags, ok)
def prep(val): def prep(val):
if not val: if not val:
@ -105,58 +120,66 @@ def prep(val):
def set_metadata(stream, mi): def set_metadata(stream, mi):
if not podofo: if not podofo:
raise Unavailable(podofo_err) raise Unavailable(podofo_err)
pt = PersistentTemporaryFile('_podofo.pdf') with TemporaryFile('_podofo_read.pdf') as inputf, \
pt.write(stream.read()) TemporaryFile('_podofo_write.pdf') as outputf:
pt.close() server = Server(pool_size=1)
server = Server(pool_size=1) with open(inputf, 'wb') as f:
job = ParallelJob('write_pdf_metadata', 'Write pdf metadata', shutil.copyfileobj(stream, f)
lambda x,y:x, args=[pt.name, mi.title, mi.authors, mi.book_producer]) job = ParallelJob('write_pdf_metadata', 'Write pdf metadata',
server.add_job(job) lambda x,y:x, args=[inputf, outputf, mi.title, mi.authors,
while not job.is_finished: mi.book_producer, mi.tags])
time.sleep(0.1) server.add_job(job)
while not job.is_finished:
time.sleep(0.1)
job.update()
job.update() job.update()
server.close()
job.update() if job.failed:
server.close() prints(job.details)
if job.failed: elif job.result:
prints(job.details) with open(outputf, 'rb') as f:
elif job.result is not None: f.seek(0, 2)
stream.seek(0) if f.tell() > 100:
stream.truncate() f.seek(0)
stream.write(job.result) stream.seek(0)
stream.flush() stream.truncate()
stream.seek(0) shutil.copyfileobj(f, stream)
try: stream.flush()
os.remove(pt.name) stream.seek(0)
except:
pass
def set_metadata_(path, opath, title, authors, bkp, tags):
def set_metadata_(path, title, authors, bkp):
p = podofo.PDFDoc() p = podofo.PDFDoc()
p.open(path) p.open(path)
title = prep(title) title = prep(title)
touched = False touched = False
if title: if title and title != p.title:
p.title = title p.title = title
touched = True touched = True
author = prep(authors_to_string(authors)) author = prep(authors_to_string(authors))
if author: if author and author != p.author:
p.author = author p.author = author
touched = True touched = True
bkp = prep(bkp) bkp = prep(bkp)
if bkp: if bkp and bkp != p.creator:
p.creator = bkp p.creator = bkp
touched = True touched = True
try:
tags = prep(u', '.join([x.strip() for x in tags if x.strip()]))
if tags != p.keywords:
p.keywords = tags
touched = True
except:
pass
if touched: if touched:
from calibre.ptempfile import TemporaryFile p.save(opath)
with TemporaryFile('_pdf_set_metadata.pdf') as f: return True
p.save(f) return False
return open(f, 'rb').read()
if __name__ == '__main__': if __name__ == '__main__':
f = '/tmp/t.pdf' f = '/tmp/t.pdf'