mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #3373 (I have problems with characters in title and book author name)
This commit is contained in:
parent
63e13101be
commit
48b99181d1
@ -3,7 +3,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
"""
|
||||
Edit metadata in RTF files.
|
||||
"""
|
||||
import re, cStringIO, sys
|
||||
import re, cStringIO, codecs
|
||||
|
||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
||||
|
||||
@ -52,6 +52,32 @@ def get_document_info(stream):
|
||||
break
|
||||
return data.getvalue(), pos
|
||||
|
||||
def detect_codepage(stream):
|
||||
pat = re.compile(r'\\ansicpg(\d+)')
|
||||
match = pat.search(stream.read(512))
|
||||
if match is not None:
|
||||
num = match.group(1)
|
||||
if num == '0':
|
||||
num = '1250'
|
||||
codec = 'cp'+num
|
||||
try:
|
||||
codecs.lookup(codec)
|
||||
return codec
|
||||
except:
|
||||
pass
|
||||
|
||||
def decode(raw, codec):
|
||||
if codec is not None:
|
||||
def codepage(match):
|
||||
return chr(int(match.group(1), 16))
|
||||
raw = re.sub(r"\\'([a-fA-F0-9]{2})", codepage, raw)
|
||||
raw = raw.decode(codec)
|
||||
|
||||
def uni(match):
|
||||
return unichr(int(match.group(1)))
|
||||
raw = re.sub(r'\\u([0-9]{4}).', uni, raw)
|
||||
return raw
|
||||
|
||||
def get_metadata(stream):
|
||||
""" Return metadata as a L{MetaInfo} object """
|
||||
title, author, comment, category = None, None, None, None
|
||||
@ -62,18 +88,22 @@ def get_metadata(stream):
|
||||
if not block:
|
||||
return MetaInformation(None, None)
|
||||
|
||||
stream.seek(0)
|
||||
cpg = detect_codepage(stream)
|
||||
stream.seek(0)
|
||||
|
||||
title_match = title_pat.search(block)
|
||||
if title_match:
|
||||
title = title_match.group(1).strip()
|
||||
title = decode(title_match.group(1).strip(), cpg)
|
||||
author_match = author_pat.search(block)
|
||||
if author_match:
|
||||
author = author_match.group(1).strip()
|
||||
author = decode(author_match.group(1).strip(), cpg)
|
||||
comment_match = comment_pat.search(block)
|
||||
if comment_match:
|
||||
comment = comment_match.group(1).strip()
|
||||
comment = decode(comment_match.group(1).strip(), cpg)
|
||||
category_match = category_pat.search(block)
|
||||
if category_match:
|
||||
category = category_match.group(1).strip()
|
||||
category = decode(category_match.group(1).strip(), cpg)
|
||||
mi = MetaInformation(title, author)
|
||||
if author:
|
||||
mi.authors = string_to_authors(author)
|
||||
|
Loading…
x
Reference in New Issue
Block a user