From a16ecea6eee65b82cce8e485cbdd16d6cabf646a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 24 Aug 2007 04:52:15 +0000 Subject: [PATCH] pdf-meta on linux. --- src/libprs500/ebooks/metadata/__init__.py | 4 +- src/libprs500/ebooks/metadata/meta.py | 3 + src/libprs500/ebooks/metadata/pdf.py | 97 +++++++++++++++++++++++ 3 files changed, 102 insertions(+), 2 deletions(-) create mode 100644 src/libprs500/ebooks/metadata/pdf.py diff --git a/src/libprs500/ebooks/metadata/__init__.py b/src/libprs500/ebooks/metadata/__init__.py index bd05fe1940..6c999195fa 100644 --- a/src/libprs500/ebooks/metadata/__init__.py +++ b/src/libprs500/ebooks/metadata/__init__.py @@ -27,7 +27,7 @@ def get_parser(extension): parser = OptionParser(version='libprs500 version: '+VERSION, usage='''%prog [options] myfile.'''+extension) parser.add_option("-t", "--title", action="store", type="string", \ - dest="title", help="Set the book title") + dest="title", help="Set the book title", default=None) parser.add_option("-a", "--authors", action="store", type="string", \ dest="authors", help="Set the authors", default=None) parser.add_option("-c", "--category", action="store", type="string", \ @@ -62,7 +62,7 @@ class MetaInformation(object): def __str__(self): ans = '' ans += 'Title : ' + str(self.title) + '\n' - ans += 'Author : ' + str(self.author) + '\n' + ans += 'Author : ' + ', '.join(self.authors) + '\n' ans += 'Category: ' + str(self.category) + '\n' ans += 'Comments: ' + str(self.comments) + '\n' return ans.strip() diff --git a/src/libprs500/ebooks/metadata/meta.py b/src/libprs500/ebooks/metadata/meta.py index 001e034443..ef5ad5d23d 100644 --- a/src/libprs500/ebooks/metadata/meta.py +++ b/src/libprs500/ebooks/metadata/meta.py @@ -15,6 +15,7 @@ from libprs500.ebooks.metadata.rtf import get_metadata as rtf_metadata from libprs500.ebooks.lrf.meta import get_metadata as lrf_metadata +from libprs500.ebooks.metadata.pdf import get_metadata as pdf_metadata from libprs500.ebooks.metadata import MetaInformation def get_metadata(stream, stream_type='lrf'): @@ -22,5 +23,7 @@ def get_metadata(stream, stream_type='lrf'): return rtf_metadata(stream) if stream_type == 'lrf': return lrf_metadata(stream) + if stream_type == 'pdf': + return pdf_metadata(stream) return MetaInformation(None, None) diff --git a/src/libprs500/ebooks/metadata/pdf.py b/src/libprs500/ebooks/metadata/pdf.py new file mode 100644 index 0000000000..eeae0bb378 --- /dev/null +++ b/src/libprs500/ebooks/metadata/pdf.py @@ -0,0 +1,97 @@ +## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License along +## with this program; if not, write to the Free Software Foundation, Inc., +## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +'''Read meta information from PDF files''' + +import sys, os, copy + +from libprs500.ebooks.metadata import MetaInformation, get_parser +from libprs500.ptempfile import PersistentTemporaryFile + +def get_metadata(stream): + """ Return metadata as a L{MetaInfo} object """ + if hasattr(stream, 'name'): + title = stream.name + else: + title = 'Unknown' + mi = MetaInformation(title, 'Unknown') + + stream.seek(0) + pt = PersistentTemporaryFile('.pdf') + pt.write(stream.read()) + pt.close() + return get_metadata_from_file(pt.name, mi) + +def set_metadata(path, options): + try: + import podofo + doc = podofo.PdfDocument() + doc.Load(path) + info = doc.GetInfo() + if options.title: + info.SetTitle(options.title) + if options.authors: + info.SetAuthor(options.authors) + if options.category: + info.SetSubject(options.category) + pt = PersistentTemporaryFile('.pdf') + pt.close() + doc.Write(pt.name) + stream = open(path, 'wb') + stream.write(open(pt.name, 'rb').read()) + stream.close() + except ImportError: + return False + return True + +def get_metadata_from_file(path, default_mi=None): + if not default_mi: + title = os.path.splitext(os.path.basename(path))[0] + mi = MetaInformation(title, 'Unknown') + else: + mi = copy.copy(default_mi) + try: + import podofo + doc = podofo.PdfDocument() + doc.Load(path) + info = doc.GetInfo() + if info.GetTitle(): + mi.title = info.GetTitle() + if info.GetAuthor(): + mi.authors = info.GetAuthor().split(',') + if info.GetSubject(): + mi.category = info.GetSubject() + except ImportError: + pass + finally: + return mi + + +def main(args=sys.argv): + parser = get_parser('pdf') + options, args = parser.parse_args(args) + if len(args) != 2: + print >>sys.stderr, 'No filename specified.' + return 1 + + path = os.path.abspath(os.path.expanduser(args[1])) + if not set_metadata(path, options): + print >>sys.stderr, 'You do not have the podofo python extension installed. Cannot read PDF files.' + return 1 + + print get_metadata_from_file(path) + return 0 + +if __name__ == '__main__': + sys.exit(main()) \ No newline at end of file