mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Added support for reading RTF metadata.
This commit is contained in:
parent
8327ce9c1e
commit
08aad56c04
@ -17,6 +17,7 @@ import os
|
|||||||
from zlib import compress, decompress
|
from zlib import compress, decompress
|
||||||
from stat import ST_SIZE
|
from stat import ST_SIZE
|
||||||
from libprs500.lrf.meta import LRFMetaFile, LRFException
|
from libprs500.lrf.meta import LRFMetaFile, LRFException
|
||||||
|
from libprs500.metadata.meta import get_metadata
|
||||||
from cStringIO import StringIO as cStringIO
|
from cStringIO import StringIO as cStringIO
|
||||||
|
|
||||||
class LibraryDatabase(object):
|
class LibraryDatabase(object):
|
||||||
@ -54,28 +55,21 @@ class LibraryDatabase(object):
|
|||||||
|
|
||||||
def add_book(self, path):
|
def add_book(self, path):
|
||||||
_file = os.path.abspath(path)
|
_file = os.path.abspath(path)
|
||||||
title, author, publisher, size, cover = os.path.basename(_file), \
|
title, size, cover = os.path.basename(_file), \
|
||||||
None, None, os.stat(_file)[ST_SIZE], None
|
os.stat(_file)[ST_SIZE], None
|
||||||
ext = title[title.rfind(".")+1:].lower() if title.find(".") > -1 else None
|
ext = title[title.rfind(".")+1:].lower() if title.find(".") > -1 else None
|
||||||
comments, tags = None, None
|
mi = get_metadata(open(_file, "r+b"), ext)
|
||||||
if ext == "lrf":
|
tags = []
|
||||||
lrf = LRFMetaFile(open(_file, "r+b"))
|
if not mi.title:
|
||||||
title, author, cover, publisher = lrf.title, lrf.author.strip(), \
|
mi.title = title
|
||||||
lrf.thumbnail, lrf.publisher.strip()
|
if mi.category:
|
||||||
if "unknown" in publisher.lower() or 'some publisher' in publisher.lower():
|
tags.append(mi.category)
|
||||||
publisher = None
|
if mi.classification:
|
||||||
if "unknown" in author.lower():
|
tags.append(mi.classification)
|
||||||
author = None
|
if tags:
|
||||||
comments = lrf.free_text
|
tags = ', '.join(tags)
|
||||||
if not comments:
|
else:
|
||||||
comments = None
|
tags = None
|
||||||
classification, category = lrf.classification, lrf.category
|
|
||||||
if 'unknown' in classification.lower():
|
|
||||||
classification = ''
|
|
||||||
if 'unknown' in category.lower():
|
|
||||||
category = ''
|
|
||||||
if classification or category:
|
|
||||||
tags = ", ".join((classification, category))
|
|
||||||
data = open(_file).read()
|
data = open(_file).read()
|
||||||
usize = len(data)
|
usize = len(data)
|
||||||
data = compress(data)
|
data = compress(data)
|
||||||
@ -86,7 +80,8 @@ class LibraryDatabase(object):
|
|||||||
self.con.execute("insert into books_meta (title, authors, publisher, "+\
|
self.con.execute("insert into books_meta (title, authors, publisher, "+\
|
||||||
"size, tags, comments, rating) values "+\
|
"size, tags, comments, rating) values "+\
|
||||||
"(?,?,?,?,?,?,?)", \
|
"(?,?,?,?,?,?,?)", \
|
||||||
(title, author, publisher, size, tags, comments, None))
|
(mi.title, mi.author, mi.publisher, size, tags, \
|
||||||
|
mi.comments, None))
|
||||||
_id = self.con.execute("select max(id) from books_meta").next()[0]
|
_id = self.con.execute("select max(id) from books_meta").next()[0]
|
||||||
self.con.execute("insert into books_data values (?,?,?,?)", \
|
self.con.execute("insert into books_data values (?,?,?,?)", \
|
||||||
(_id, ext, usize, sqlite.Binary(data)))
|
(_id, ext, usize, sqlite.Binary(data)))
|
||||||
|
@ -32,6 +32,7 @@ import xml.dom.minidom as dom
|
|||||||
from functools import wraps
|
from functools import wraps
|
||||||
|
|
||||||
from libprs500.prstypes import field
|
from libprs500.prstypes import field
|
||||||
|
from libprs500.metadata import MetaInformation
|
||||||
|
|
||||||
BYTE = "<B" #: Unsigned char little endian encoded in 1 byte
|
BYTE = "<B" #: Unsigned char little endian encoded in 1 byte
|
||||||
WORD = "<H" #: Unsigned short little endian encoded in 2 bytes
|
WORD = "<H" #: Unsigned short little endian encoded in 2 bytes
|
||||||
@ -186,7 +187,29 @@ def insert_into_file(fileobj, data, start, end):
|
|||||||
return delta
|
return delta
|
||||||
|
|
||||||
|
|
||||||
|
def get_metadata(stream):
|
||||||
|
"""
|
||||||
|
Return basic meta-data about the LRF file in C{stream} as a
|
||||||
|
L{MetaInformation} object.
|
||||||
|
"""
|
||||||
|
lrf = LRFMetaFile(stream)
|
||||||
|
mi = MetaInformation(lrf.title.strip(), lrf.author.strip())
|
||||||
|
mi.comments = lrf.free_text.strip()
|
||||||
|
mi.category = lrf.category.strip()
|
||||||
|
mi.classification = lrf.classification.strip()
|
||||||
|
mi.publisher = lrf.publisher.strip()
|
||||||
|
if not mi.title or 'unknown' in mi.title.lower():
|
||||||
|
mi.title = None
|
||||||
|
if not mi.author or 'unknown' in mi.author.lower():
|
||||||
|
mi.author = None
|
||||||
|
if not mi.category or 'unknown' in mi.category.lower():
|
||||||
|
mi.category = None
|
||||||
|
if not mi.classification or 'unknown' in mi.classification.lower():
|
||||||
|
mi.classification = None
|
||||||
|
if not mi.publisher or 'unknown' in mi.publisher.lower() or \
|
||||||
|
'some publisher' in mi.publisher.lower():
|
||||||
|
mi.publisher = None
|
||||||
|
return mi
|
||||||
|
|
||||||
class LRFMetaFile(object):
|
class LRFMetaFile(object):
|
||||||
""" Has properties to read and write all Meta information in a LRF file. """
|
""" Has properties to read and write all Meta information in a LRF file. """
|
||||||
|
@ -18,3 +18,25 @@ the L{libprs500.lrf.meta} module.
|
|||||||
"""
|
"""
|
||||||
__docformat__ = "epytext"
|
__docformat__ = "epytext"
|
||||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||||
|
|
||||||
|
|
||||||
|
class MetaInformation(object):
|
||||||
|
|
||||||
|
def __init__(self, title, author):
|
||||||
|
self.title = title
|
||||||
|
self.author = author
|
||||||
|
self.comments = None
|
||||||
|
self.category = None
|
||||||
|
self.classification = None
|
||||||
|
self.publisher = None
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
ans = ''
|
||||||
|
ans += 'Title : ' + str(self.title) + '\n'
|
||||||
|
ans += 'Author : ' + str(self.author) + '\n'
|
||||||
|
ans += 'Category: ' + str(self.category) + '\n'
|
||||||
|
ans += 'Comments: ' + str(self.comments) + '\n'
|
||||||
|
return ans.strip()
|
||||||
|
|
||||||
|
def __nonzero__(self):
|
||||||
|
return self.title or self.author or self.comments or self.category
|
26
src/libprs500/metadata/meta.py
Normal file
26
src/libprs500/metadata/meta.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
## Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
|
||||||
|
## This program is free software; you can redistribute it and/or modify
|
||||||
|
## it under the terms of the GNU General Public License as published by
|
||||||
|
## the Free Software Foundation; either version 2 of the License, or
|
||||||
|
## (at your option) any later version.
|
||||||
|
##
|
||||||
|
## This program is distributed in the hope that it will be useful,
|
||||||
|
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
## GNU General Public License for more details.
|
||||||
|
##
|
||||||
|
## You should have received a copy of the GNU General Public License along
|
||||||
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
from libprs500.metadata.rtf import get_metadata as rtf_metadata
|
||||||
|
from libprs500.lrf.meta import get_metadata as lrf_metadata
|
||||||
|
from libprs500.metadata import MetaInformation
|
||||||
|
|
||||||
|
def get_metadata(stream, stream_type='lrf'):
|
||||||
|
if stream_type == 'rtf':
|
||||||
|
return rtf_metadata(stream)
|
||||||
|
if stream_type == 'lrf':
|
||||||
|
return lrf_metadata(stream)
|
||||||
|
return MetaInformation(None, None)
|
||||||
|
|
85
src/libprs500/metadata/rtf.py
Normal file
85
src/libprs500/metadata/rtf.py
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
## Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
|
||||||
|
## This program is free software; you can redistribute it and/or modify
|
||||||
|
## it under the terms of the GNU General Public License as published by
|
||||||
|
## the Free Software Foundation; either version 2 of the License, or
|
||||||
|
## (at your option) any later version.
|
||||||
|
##
|
||||||
|
## This program is distributed in the hope that it will be useful,
|
||||||
|
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
## GNU General Public License for more details.
|
||||||
|
##
|
||||||
|
## You should have received a copy of the GNU General Public License along
|
||||||
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
import re, cStringIO
|
||||||
|
|
||||||
|
from libprs500.metadata import MetaInformation
|
||||||
|
|
||||||
|
title_pat = re.compile(r'\{\\info.*?\{\\title(.*?)\}', re.DOTALL)
|
||||||
|
author_pat = re.compile(r'\{\\info.*?\{\\author(.*?)\}', re.DOTALL)
|
||||||
|
comment_pat = re.compile(r'\{\\info.*?\{\\subject(.*?)\}', re.DOTALL)
|
||||||
|
category_pat = re.compile(r'\{\\info.*?\{\\category(.*?)\}', re.DOTALL)
|
||||||
|
|
||||||
|
def get_document_info(stream):
|
||||||
|
block_size = 4096
|
||||||
|
stream.seek(0)
|
||||||
|
found, block = False, ""
|
||||||
|
while not found:
|
||||||
|
prefix = block[-6:]
|
||||||
|
block = prefix + stream.read(block_size)
|
||||||
|
if len(block) == len(prefix):
|
||||||
|
break
|
||||||
|
idx = block.find(r'{\info')
|
||||||
|
if idx >= 0:
|
||||||
|
found = True
|
||||||
|
stream.seek(stream.tell() - block_size + idx - len(prefix))
|
||||||
|
else:
|
||||||
|
stream.seek(stream.tell())
|
||||||
|
if not found:
|
||||||
|
return None, 0
|
||||||
|
data, count, = cStringIO.StringIO(), 0
|
||||||
|
pos = stream.tell()
|
||||||
|
while True:
|
||||||
|
ch = stream.read(1)
|
||||||
|
if ch == '{':
|
||||||
|
count += 1
|
||||||
|
elif ch == '}':
|
||||||
|
count -= 1
|
||||||
|
data.write(ch)
|
||||||
|
if count == 0:
|
||||||
|
break
|
||||||
|
return data.getvalue(), pos
|
||||||
|
|
||||||
|
def get_metadata(stream):
|
||||||
|
stream.seek(0)
|
||||||
|
if stream.read(5) != r'{\rtf':
|
||||||
|
raise Exception('Not a valid RTF file')
|
||||||
|
block, pos = get_document_info(stream)
|
||||||
|
if not block:
|
||||||
|
return MetaInformation(None, None)
|
||||||
|
title, author, comment, category = None, None, None, None
|
||||||
|
title_match = title_pat.search(block)
|
||||||
|
if title_match:
|
||||||
|
title = title_match.group(1).strip()
|
||||||
|
author_match = author_pat.search(block)
|
||||||
|
if author_match:
|
||||||
|
author = author_match.group(1).strip()
|
||||||
|
comment_match = comment_pat.search(block)
|
||||||
|
if comment_match:
|
||||||
|
title = comment_match.group(1).strip()
|
||||||
|
category_match = category_pat.search(block)
|
||||||
|
if category_match:
|
||||||
|
category = category_match.group(1).strip()
|
||||||
|
mi = MetaInformation(title, author)
|
||||||
|
mi.comments = comment
|
||||||
|
mi.category = category
|
||||||
|
return mi
|
||||||
|
|
||||||
|
def main():
|
||||||
|
import sys
|
||||||
|
print get_metadata(open(sys.argv[1]))
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
Loading…
x
Reference in New Issue
Block a user