diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index a087e7f36d..d4470b16fd 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -251,6 +251,17 @@ class MOBIMetadataWriter(MetadataWriterPlugin): def set_metadata(self, stream, mi, type): from calibre.ebooks.metadata.mobi import set_metadata set_metadata(stream, mi) + +class PDFMetadataWriter(MetadataWriterPlugin): + + name = 'Set PDF metadata' + file_types = set(['pdf']) + description = _('Set metadata in %s files') % 'PDF' + author = 'John Schember' + + def set_metadata(self, stream, mi, type): + from calibre.ebooks.metadata.pdf import set_metadata + set_metadata(stream, mi) plugins = [HTML2ZIP] diff --git a/src/calibre/ebooks/metadata/pdf.py b/src/calibre/ebooks/metadata/pdf.py index ad59351248..80cdc82070 100644 --- a/src/calibre/ebooks/metadata/pdf.py +++ b/src/calibre/ebooks/metadata/pdf.py @@ -2,10 +2,10 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' '''Read meta information from PDF files''' -import sys, os, re +import sys, os, StringIO from calibre.ebooks.metadata import MetaInformation, authors_to_string, get_parser -from pyPdf import PdfFileReader +from pyPdf import PdfFileReader, PdfFileWriter def get_metadata(stream): """ Return metadata as a L{MetaInfo} object """ @@ -31,18 +31,27 @@ def get_metadata(stream): def set_metadata(stream, mi): stream.seek(0) - raw = stream.read() - if mi.title: - tit = mi.title.encode('utf-8') if isinstance(mi.title, unicode) else mi.title - raw = re.compile(r'<<.*?/Title\((.+?)\)', re.DOTALL).sub(lambda m: m.group().replace(m.group(1), tit), raw) - if mi.authors: - au = authors_to_string(mi.authors) - if isinstance(au, unicode): - au = au.encode('utf-8') - raw = re.compile(r'<<.*?/Author\((.+?)\)', re.DOTALL).sub(lambda m: m.group().replace(m.group(1), au), raw) + + # Use a StringIO object for the pdf because we will want to over + # write it later and if we are working on the stream directly it + # could cause some issues. + raw = StringIO.StringIO(stream.read()) + orig_pdf = PdfFileReader(raw) + + title = mi.title if mi.title else orig_pdf.documentInfo.title + author = authors_to_string(mi.authors) if mi.authors else orig_pdf.documentInfo.author + + out_pdf = PdfFileWriter(title=title, author=author) + for page in orig_pdf.pages: + out_pdf.addPage(page) + + out_str = StringIO.StringIO() + out_pdf.write(out_str) + stream.seek(0) stream.truncate() - stream.write(raw) + out_str.seek(0) + stream.write(out_str.read()) stream.seek(0) def option_parser():