diff --git a/src/calibre/utils/podofo/__init__.py b/src/calibre/utils/podofo/__init__.py index 284deb7c43..78f250dd0e 100644 --- a/src/calibre/utils/podofo/__init__.py +++ b/src/calibre/utils/podofo/__init__.py @@ -13,7 +13,7 @@ from calibre.ebooks.metadata import MetaInformation, string_to_authors, \ authors_to_string from calibre.utils.ipc.job import ParallelJob from calibre.utils.ipc.server import Server -from calibre.ptempfile import PersistentTemporaryFile +from calibre.ptempfile import PersistentTemporaryFile, TemporaryFile from calibre import prints podofo, podofo_err = plugins['podofo'] @@ -38,7 +38,7 @@ def get_metadata(stream, cpath=None): server.close() if job.result is None: raise ValueError('Failed to read metadata: ' + job.details) - title, authors, creator, ok = job.result + title, authors, creator, tags, ok = job.result if not ok: print 'Failed to extract cover:' print job.details @@ -49,6 +49,8 @@ def get_metadata(stream, cpath=None): mi = MetaInformation(title, authors) if creator: mi.book_producer = creator + if tags: + mi.tags = tags if os.path.exists(pt.name): os.remove(pt.name) if ok: mi.cover = cpath @@ -63,9 +65,17 @@ def get_metadata_quick(raw): author = p.author authors = string_to_authors(author) if author else [_('Unknown')] creator = p.creator + try: + tags = [x.strip() for x in p.keywords.split(u',')] + tags = [x for x in tags if x] + except: + tags = [] + mi = MetaInformation(title, authors) if creator: mi.book_producer = creator + if tags: + mi.tags = tags return mi def get_metadata_(path, cpath=None): @@ -77,6 +87,11 @@ def get_metadata_(path, cpath=None): author = p.author authors = string_to_authors(author) if author else [_('Unknown')] creator = p.creator + try: + tags = [x.strip() for x in p.keywords.split(u',')] + tags = [x for x in tags if x] + except: + tags = [] ok = True try: if cpath is not None: @@ -93,7 +108,7 @@ def get_metadata_(path, cpath=None): traceback.print_exc() ok = False - return (title, authors, creator, ok) + return (title, authors, creator, tags, ok) def prep(val): if not val: @@ -105,58 +120,66 @@ def prep(val): def set_metadata(stream, mi): if not podofo: raise Unavailable(podofo_err) - pt = PersistentTemporaryFile('_podofo.pdf') - pt.write(stream.read()) - pt.close() - server = Server(pool_size=1) - job = ParallelJob('write_pdf_metadata', 'Write pdf metadata', - lambda x,y:x, args=[pt.name, mi.title, mi.authors, mi.book_producer]) - server.add_job(job) - while not job.is_finished: - time.sleep(0.1) + with TemporaryFile('_podofo_read.pdf') as inputf, \ + TemporaryFile('_podofo_write.pdf') as outputf: + server = Server(pool_size=1) + with open(inputf, 'wb') as f: + shutil.copyfileobj(stream, f) + job = ParallelJob('write_pdf_metadata', 'Write pdf metadata', + lambda x,y:x, args=[inputf, outputf, mi.title, mi.authors, + mi.book_producer, mi.tags]) + server.add_job(job) + while not job.is_finished: + time.sleep(0.1) + job.update() + job.update() - - job.update() - server.close() - if job.failed: - prints(job.details) - elif job.result is not None: - stream.seek(0) - stream.truncate() - stream.write(job.result) - stream.flush() - stream.seek(0) - try: - os.remove(pt.name) - except: - pass + server.close() + if job.failed: + prints(job.details) + elif job.result: + with open(outputf, 'rb') as f: + f.seek(0, 2) + if f.tell() > 100: + f.seek(0) + stream.seek(0) + stream.truncate() + shutil.copyfileobj(f, stream) + stream.flush() + stream.seek(0) - -def set_metadata_(path, title, authors, bkp): +def set_metadata_(path, opath, title, authors, bkp, tags): p = podofo.PDFDoc() p.open(path) title = prep(title) touched = False - if title: + if title and title != p.title: p.title = title touched = True author = prep(authors_to_string(authors)) - if author: + if author and author != p.author: p.author = author touched = True bkp = prep(bkp) - if bkp: + if bkp and bkp != p.creator: p.creator = bkp touched = True + try: + tags = prep(u', '.join([x.strip() for x in tags if x.strip()])) + if tags != p.keywords: + p.keywords = tags + touched = True + except: + pass + if touched: - from calibre.ptempfile import TemporaryFile - with TemporaryFile('_pdf_set_metadata.pdf') as f: - p.save(f) - return open(f, 'rb').read() + p.save(opath) + return True + return False if __name__ == '__main__': f = '/tmp/t.pdf'