From 08aad56c046ef4f45e112fbb5d2465c2cbabfc3f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 29 Jan 2007 08:52:47 +0000 Subject: [PATCH] Added support for reading RTF metadata. --- src/libprs500/gui/database.py | 39 ++++++-------- src/libprs500/lrf/meta.py | 25 ++++++++- src/libprs500/metadata/__init__.py | 22 ++++++++ src/libprs500/metadata/meta.py | 26 +++++++++ src/libprs500/metadata/rtf.py | 85 ++++++++++++++++++++++++++++++ 5 files changed, 174 insertions(+), 23 deletions(-) create mode 100644 src/libprs500/metadata/meta.py create mode 100644 src/libprs500/metadata/rtf.py diff --git a/src/libprs500/gui/database.py b/src/libprs500/gui/database.py index bc3d04f5a4..889be76eb0 100644 --- a/src/libprs500/gui/database.py +++ b/src/libprs500/gui/database.py @@ -17,6 +17,7 @@ import os from zlib import compress, decompress from stat import ST_SIZE from libprs500.lrf.meta import LRFMetaFile, LRFException +from libprs500.metadata.meta import get_metadata from cStringIO import StringIO as cStringIO class LibraryDatabase(object): @@ -54,28 +55,21 @@ class LibraryDatabase(object): def add_book(self, path): _file = os.path.abspath(path) - title, author, publisher, size, cover = os.path.basename(_file), \ - None, None, os.stat(_file)[ST_SIZE], None + title, size, cover = os.path.basename(_file), \ + os.stat(_file)[ST_SIZE], None ext = title[title.rfind(".")+1:].lower() if title.find(".") > -1 else None - comments, tags = None, None - if ext == "lrf": - lrf = LRFMetaFile(open(_file, "r+b")) - title, author, cover, publisher = lrf.title, lrf.author.strip(), \ - lrf.thumbnail, lrf.publisher.strip() - if "unknown" in publisher.lower() or 'some publisher' in publisher.lower(): - publisher = None - if "unknown" in author.lower(): - author = None - comments = lrf.free_text - if not comments: - comments = None - classification, category = lrf.classification, lrf.category - if 'unknown' in classification.lower(): - classification = '' - if 'unknown' in category.lower(): - category = '' - if classification or category: - tags = ", ".join((classification, category)) + mi = get_metadata(open(_file, "r+b"), ext) + tags = [] + if not mi.title: + mi.title = title + if mi.category: + tags.append(mi.category) + if mi.classification: + tags.append(mi.classification) + if tags: + tags = ', '.join(tags) + else: + tags = None data = open(_file).read() usize = len(data) data = compress(data) @@ -86,7 +80,8 @@ class LibraryDatabase(object): self.con.execute("insert into books_meta (title, authors, publisher, "+\ "size, tags, comments, rating) values "+\ "(?,?,?,?,?,?,?)", \ - (title, author, publisher, size, tags, comments, None)) + (mi.title, mi.author, mi.publisher, size, tags, \ + mi.comments, None)) _id = self.con.execute("select max(id) from books_meta").next()[0] self.con.execute("insert into books_data values (?,?,?,?)", \ (_id, ext, usize, sqlite.Binary(data))) diff --git a/src/libprs500/lrf/meta.py b/src/libprs500/lrf/meta.py index e59d91512d..a013ad51f5 100644 --- a/src/libprs500/lrf/meta.py +++ b/src/libprs500/lrf/meta.py @@ -32,6 +32,7 @@ import xml.dom.minidom as dom from functools import wraps from libprs500.prstypes import field +from libprs500.metadata import MetaInformation BYTE = "= 0: + found = True + stream.seek(stream.tell() - block_size + idx - len(prefix)) + else: + stream.seek(stream.tell()) + if not found: + return None, 0 + data, count, = cStringIO.StringIO(), 0 + pos = stream.tell() + while True: + ch = stream.read(1) + if ch == '{': + count += 1 + elif ch == '}': + count -= 1 + data.write(ch) + if count == 0: + break + return data.getvalue(), pos + +def get_metadata(stream): + stream.seek(0) + if stream.read(5) != r'{\rtf': + raise Exception('Not a valid RTF file') + block, pos = get_document_info(stream) + if not block: + return MetaInformation(None, None) + title, author, comment, category = None, None, None, None + title_match = title_pat.search(block) + if title_match: + title = title_match.group(1).strip() + author_match = author_pat.search(block) + if author_match: + author = author_match.group(1).strip() + comment_match = comment_pat.search(block) + if comment_match: + title = comment_match.group(1).strip() + category_match = category_pat.search(block) + if category_match: + category = category_match.group(1).strip() + mi = MetaInformation(title, author) + mi.comments = comment + mi.category = category + return mi + +def main(): + import sys + print get_metadata(open(sys.argv[1])) + +if __name__ == '__main__': + main() \ No newline at end of file