From a48dd172db1e35f5cf54628051e8e7d44852217d Mon Sep 17 00:00:00 2001 From: John Schember Date: Sun, 12 Apr 2009 10:03:46 -0400 Subject: [PATCH] PDFMetadataWriter working --- src/calibre/customize/builtins.py | 11 +++++++++++ src/calibre/ebooks/metadata/pdf.py | 31 +++++++++++++++++++----------- src/calibre/ebooks/oeb/iterator.py | 1 + 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 484d46dc36..a9fc342059 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -262,6 +262,17 @@ class MOBIMetadataWriter(MetadataWriterPlugin): def set_metadata(self, stream, mi, type): from calibre.ebooks.metadata.mobi import set_metadata set_metadata(stream, mi) + +class PDFMetadataWriter(MetadataWriterPlugin): + + name = 'Set PDF metadata' + file_types = set(['pdf']) + description = _('Set metadata in %s files') % 'PDF' + author = 'John Schember' + + def set_metadata(self, stream, mi, type): + from calibre.ebooks.metadata.pdf import set_metadata + set_metadata(stream, mi) from calibre.ebooks.epub.input import EPUBInput diff --git a/src/calibre/ebooks/metadata/pdf.py b/src/calibre/ebooks/metadata/pdf.py index 6b94b07275..06a02939ba 100644 --- a/src/calibre/ebooks/metadata/pdf.py +++ b/src/calibre/ebooks/metadata/pdf.py @@ -5,7 +5,7 @@ from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' -import sys, os, re, StringIO +import sys, os, StringIO from calibre.ebooks.metadata import MetaInformation, authors_to_string from calibre.ptempfile import TemporaryDirectory @@ -52,18 +52,27 @@ def get_metadata(stream, extract_cover=True): def set_metadata(stream, mi): stream.seek(0) - raw = stream.read() - if mi.title: - tit = mi.title.encode('utf-8') if isinstance(mi.title, unicode) else mi.title - raw = re.compile(r'<<.*?/Title\((.+?)\)', re.DOTALL).sub(lambda m: m.group().replace(m.group(1), tit), raw) - if mi.authors: - au = authors_to_string(mi.authors) - if isinstance(au, unicode): - au = au.encode('utf-8') - raw = re.compile(r'<<.*?/Author\((.+?)\)', re.DOTALL).sub(lambda m: m.group().replace(m.group(1), au), raw) + + # Use a StringIO object for the pdf because we will want to over + # write it later and if we are working on the stream directly it + # could cause some issues. + raw = StringIO.StringIO(stream.read()) + orig_pdf = PdfFileReader(raw) + + title = mi.title if mi.title else orig_pdf.documentInfo.title + author = authors_to_string(mi.authors) if mi.authors else orig_pdf.documentInfo.author + + out_pdf = PdfFileWriter(title=title, author=author) + for page in orig_pdf.pages: + out_pdf.addPage(page) + + out_str = StringIO.StringIO() + out_pdf.write(out_str) + stream.seek(0) stream.truncate() - stream.write(raw) + out_str.seek(0) + stream.write(out_str.read()) stream.seek(0) def get_cover(stream): diff --git a/src/calibre/ebooks/oeb/iterator.py b/src/calibre/ebooks/oeb/iterator.py index ec0eda908a..88fffc604a 100644 --- a/src/calibre/ebooks/oeb/iterator.py +++ b/src/calibre/ebooks/oeb/iterator.py @@ -1,3 +1,4 @@ +from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2008 Kovid Goyal '