Add metadata to info in RTF metadata plugin

This commit is contained in:
Sengian 2011-01-09 14:47:23 +01:00
parent df85aacb47
commit 4043009433

View File

@ -11,6 +11,8 @@ title_pat = re.compile(r'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL)
author_pat = re.compile(r'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL) author_pat = re.compile(r'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL)
comment_pat = re.compile(r'\{\\info.*?\{\\subject(.*?)(?<!\\)\}', re.DOTALL) comment_pat = re.compile(r'\{\\info.*?\{\\subject(.*?)(?<!\\)\}', re.DOTALL)
category_pat = re.compile(r'\{\\info.*?\{\\category(.*?)(?<!\\)\}', re.DOTALL) category_pat = re.compile(r'\{\\info.*?\{\\category(.*?)(?<!\\)\}', re.DOTALL)
tags_pat = re.compile(r'\{\\info.*?\{\\keywords(.*?)(?<!\\)\}', re.DOTALL)
publisher_pat = re.compile(r'\{\\info.*?\{\\manager(.*?)(?<!\\)\}', re.DOTALL)
def get_document_info(stream): def get_document_info(stream):
""" """
@ -93,50 +95,70 @@ def get_metadata(stream):
stream.seek(0) stream.seek(0)
cpg = detect_codepage(stream) cpg = detect_codepage(stream)
stream.seek(0) stream.seek(0)
title_match = title_pat.search(block) title_match = title_pat.search(block)
if title_match: if title_match:
title = decode(title_match.group(1).strip(), cpg) title = decode(title_match.group(1).strip(), cpg)
else:
title = _('Unknown')
author_match = author_pat.search(block) author_match = author_pat.search(block)
if author_match: if author_match:
author = decode(author_match.group(1).strip(), cpg) author = decode(author_match.group(1).strip(), cpg)
comment_match = comment_pat.search(block) else:
if comment_match: author = None
comment = decode(comment_match.group(1).strip(), cpg)
category_match = category_pat.search(block)
if category_match:
category = decode(category_match.group(1).strip(), cpg)
mi = MetaInformation(title, author) mi = MetaInformation(title, author)
if author: if author:
mi.authors = string_to_authors(author) mi.authors = string_to_authors(author)
mi.comments = comment
mi.category = category comment_match = comment_pat.search(block)
if comment_match:
comment = decode(comment_match.group(1).strip(), cpg)
mi.comments = comment
category_match = category_pat.search(block)
if category_match:
category = decode(category_match.group(1).strip(), cpg)
mi.category = category
tags_match = tags_pat.search(block)
if tags_match:
tags = decode(tags_match.group(1).strip(), cpg)
mi.tags = tags
publisher_match = publisher_pat.search(block)
if publisher_match:
publisher = decode(publisher_match.group(1).strip(), cpg)
mi.publisher = publisher
return mi return mi
def create_metadata(stream, options): def create_metadata(stream, options):
md = r'{\info' md = [r'{\info']
if options.title: if options.title:
title = options.title.encode('ascii', 'ignore') title = options.title.encode('ascii', 'ignore')
md += r'{\title %s}'%(title,) md.append(r'{\title %s}'%(title,))
if options.authors: if options.authors:
au = options.authors au = options.authors
if not isinstance(au, basestring): if not isinstance(au, basestring):
au = u', '.join(au) au = u', '.join(au)
author = au.encode('ascii', 'ignore') author = au.encode('ascii', 'ignore')
md += r'{\author %s}'%(author,) md.append(r'{\author %s}'%(author,))
if options.get('category', None): if options.get('category', None):
category = options.category.encode('ascii', 'ignore') category = options.category.encode('ascii', 'ignore')
md += r'{\category %s}'%(category,) md.append(r'{\category %s}'%(category,))
comp = options.comment if hasattr(options, 'comment') else options.comments comp = options.comment if hasattr(options, 'comment') else options.comments
if comp: if comp:
comment = comp.encode('ascii', 'ignore') comment = comp.encode('ascii', 'ignore')
md += r'{\subject %s}'%(comment,) md.append(r'{\subject %s}'%(comment,))
if len(md) > 6: if options.publisher:
md += '}' publisher = options.publisher.encode('ascii', 'ignore')
md.append(r'{\manager %s}'%(publisher,))
if options.tags:
tags = u', '.join(options.tags)
tags = tags.encode('ascii', 'ignore')
md.append(r'{\keywords %s}'%(tags,))
if len(md) > 1:
md.append('}')
stream.seek(0) stream.seek(0)
src = stream.read() src = stream.read()
ans = src[:6] + md + src[6:] ans = src[:6] + ''.join(md) + src[6:]
stream.seek(0) stream.seek(0)
stream.write(ans) stream.write(ans)
@ -149,14 +171,15 @@ def set_metadata(stream, options):
index = src.rindex('}') index = src.rindex('}')
return src[:index] + r'{\ '[:-1] + name + ' ' + val + '}}' return src[:index] + r'{\ '[:-1] + name + ' ' + val + '}}'
src, pos = get_document_info(stream) src, pos = get_document_info(stream)
if not src: print 'I was thre'
if src is not None:
create_metadata(stream, options) create_metadata(stream, options)
else: else:
olen = len(src) olen = len(src)
base_pat = r'\{\\name(.*?)(?<!\\)\}' base_pat = r'\{\\name(.*?)(?<!\\)\}'
title = options.title title = options.title
if title != None: if title is not None:
title = title.encode('ascii', 'replace') title = title.encode('ascii', 'replace')
pat = re.compile(base_pat.replace('name', 'title'), re.DOTALL) pat = re.compile(base_pat.replace('name', 'title'), re.DOTALL)
if pat.search(src): if pat.search(src):
@ -164,7 +187,7 @@ def set_metadata(stream, options):
else: else:
src = add_metadata_item(src, 'title', title) src = add_metadata_item(src, 'title', title)
comment = options.comments comment = options.comments
if comment != None: if comment is not None:
comment = comment.encode('ascii', 'replace') comment = comment.encode('ascii', 'replace')
pat = re.compile(base_pat.replace('name', 'subject'), re.DOTALL) pat = re.compile(base_pat.replace('name', 'subject'), re.DOTALL)
if pat.search(src): if pat.search(src):
@ -172,7 +195,7 @@ def set_metadata(stream, options):
else: else:
src = add_metadata_item(src, 'subject', comment) src = add_metadata_item(src, 'subject', comment)
author = options.authors author = options.authors
if author != None: if author is not None:
author = ', '.join(author) author = ', '.join(author)
author = author.encode('ascii', 'ignore') author = author.encode('ascii', 'ignore')
pat = re.compile(base_pat.replace('name', 'author'), re.DOTALL) pat = re.compile(base_pat.replace('name', 'author'), re.DOTALL)
@ -181,13 +204,30 @@ def set_metadata(stream, options):
else: else:
src = add_metadata_item(src, 'author', author) src = add_metadata_item(src, 'author', author)
category = options.get('category', None) category = options.get('category', None)
if category != None: if category is not None:
category = category.encode('ascii', 'replace') category = category.encode('ascii', 'replace')
pat = re.compile(base_pat.replace('name', 'category'), re.DOTALL) pat = re.compile(base_pat.replace('name', 'category'), re.DOTALL)
if pat.search(src): if pat.search(src):
src = pat.sub(r'{\\category ' + category + r'}', src) src = pat.sub(r'{\\category ' + category + r'}', src)
else: else:
src = add_metadata_item(src, 'category', category) src = add_metadata_item(src, 'category', category)
tags = options.tags
if tags is not None:
tags = ', '.join(tags)
tags = tags.encode('ascii', 'ignore')
pat = re.compile(base_pat.replace('name', 'keywords'), re.DOTALL)
if pat.search(src):
src = pat.sub(r'{\\keywords ' + tags + r'}', src)
else:
src = add_metadata_item(src, 'keywords', tags)
publisher = options.publisher
if publisher is not None:
publisher = publisher.encode('ascii', 'replace')
pat = re.compile(base_pat.replace('name', 'manager'), re.DOTALL)
if pat.search(src):
src = pat.sub(r'{\\manager ' + publisher + r'}', src)
else:
src = add_metadata_item(src, 'manager', publisher)
stream.seek(pos + olen) stream.seek(pos + olen)
after = stream.read() after = stream.read()
stream.seek(pos) stream.seek(pos)