mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
This commit is contained in:
parent
08aad56c04
commit
6b0df0e357
@ -12,17 +12,25 @@
|
|||||||
## You should have received a copy of the GNU General Public License along
|
## You should have received a copy of the GNU General Public License along
|
||||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
"""
|
||||||
|
Read metadata from RTF files.
|
||||||
|
"""
|
||||||
import re, cStringIO
|
import re, cStringIO
|
||||||
|
|
||||||
from libprs500.metadata import MetaInformation
|
from libprs500.metadata import MetaInformation
|
||||||
|
|
||||||
title_pat = re.compile(r'\{\\info.*?\{\\title(.*?)\}', re.DOTALL)
|
title_pat = re.compile(r'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL)
|
||||||
author_pat = re.compile(r'\{\\info.*?\{\\author(.*?)\}', re.DOTALL)
|
author_pat = re.compile(r'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL)
|
||||||
comment_pat = re.compile(r'\{\\info.*?\{\\subject(.*?)\}', re.DOTALL)
|
comment_pat = re.compile(r'\{\\info.*?\{\\subject(.*?)(?<!\\)\}', re.DOTALL)
|
||||||
category_pat = re.compile(r'\{\\info.*?\{\\category(.*?)\}', re.DOTALL)
|
category_pat = re.compile(r'\{\\info.*?\{\\category(.*?)(?<!\\)\}', re.DOTALL)
|
||||||
|
|
||||||
def get_document_info(stream):
|
def get_document_info(stream):
|
||||||
|
"""
|
||||||
|
Extract the \info block from an RTF file.
|
||||||
|
Return the info block as a stringa and the position in the file at which it
|
||||||
|
starts.
|
||||||
|
@param stream: File like object pointing to the RTF file.
|
||||||
|
"""
|
||||||
block_size = 4096
|
block_size = 4096
|
||||||
stream.seek(0)
|
stream.seek(0)
|
||||||
found, block = False, ""
|
found, block = False, ""
|
||||||
@ -36,13 +44,17 @@ def get_document_info(stream):
|
|||||||
found = True
|
found = True
|
||||||
stream.seek(stream.tell() - block_size + idx - len(prefix))
|
stream.seek(stream.tell() - block_size + idx - len(prefix))
|
||||||
else:
|
else:
|
||||||
stream.seek(stream.tell())
|
if block.find(r'\sect') > -1:
|
||||||
|
break
|
||||||
if not found:
|
if not found:
|
||||||
return None, 0
|
return None, 0
|
||||||
data, count, = cStringIO.StringIO(), 0
|
data, count, = cStringIO.StringIO(), 0
|
||||||
pos = stream.tell()
|
pos = stream.tell()
|
||||||
while True:
|
while True:
|
||||||
ch = stream.read(1)
|
ch = stream.read(1)
|
||||||
|
if ch == '\\':
|
||||||
|
data.write(ch + stream.read(1))
|
||||||
|
continue
|
||||||
if ch == '{':
|
if ch == '{':
|
||||||
count += 1
|
count += 1
|
||||||
elif ch == '}':
|
elif ch == '}':
|
||||||
@ -53,6 +65,7 @@ def get_document_info(stream):
|
|||||||
return data.getvalue(), pos
|
return data.getvalue(), pos
|
||||||
|
|
||||||
def get_metadata(stream):
|
def get_metadata(stream):
|
||||||
|
""" Return metadata as a L{MetaInfo} object """
|
||||||
stream.seek(0)
|
stream.seek(0)
|
||||||
if stream.read(5) != r'{\rtf':
|
if stream.read(5) != r'{\rtf':
|
||||||
raise Exception('Not a valid RTF file')
|
raise Exception('Not a valid RTF file')
|
||||||
@ -79,6 +92,9 @@ def get_metadata(stream):
|
|||||||
|
|
||||||
def main():
|
def main():
|
||||||
import sys
|
import sys
|
||||||
|
if len(sys.argv) != 2:
|
||||||
|
print >> sys.stderr, "Usage:", sys.argv[0], " mybook.rtf"
|
||||||
|
sys.exit(1)
|
||||||
print get_metadata(open(sys.argv[1]))
|
print get_metadata(open(sys.argv[1]))
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
Loading…
x
Reference in New Issue
Block a user