MOBI Output: Normalize unicode strings when writing metadata to MOBI files as the Kindle cannot handle non-normalized unicode. Fixes #8229 (Diacritical mark in MOBI title)

This commit is contained in:
Kovid Goyal 2011-02-14 11:23:22 -07:00
parent c4f06e39af
commit ecbcb38ead
3 changed files with 23 additions and 11 deletions

View File

@ -152,8 +152,17 @@ def check_ebook_format(stream, current_guess):
stream.seek(0) stream.seek(0)
return ans return ans
def normalize(x):
if isinstance(x, unicode):
import unicodedata
x = unicodedata.normalize('NFKC', x)
return x
def calibre_cover(title, author_string, series_string=None, def calibre_cover(title, author_string, series_string=None,
output_format='jpg', title_size=46, author_size=36): output_format='jpg', title_size=46, author_size=36):
title = normalize(title)
author_string = normalize(author_string)
series_string = normalize(series_string)
from calibre.utils.magick.draw import create_cover_page, TextLine from calibre.utils.magick.draw import create_cover_page, TextLine
lines = [TextLine(title, title_size), TextLine(author_string, author_size)] lines = [TextLine(title, title_size), TextLine(author_string, author_size)]
if series_string: if series_string:

View File

@ -12,6 +12,7 @@ __docformat__ = 'restructuredtext en'
from struct import pack, unpack from struct import pack, unpack
from cStringIO import StringIO from cStringIO import StringIO
from calibre.ebooks import normalize
from calibre.ebooks.mobi import MobiError from calibre.ebooks.mobi import MobiError
from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN
from calibre.ebooks.mobi.langcodes import iana2mobi from calibre.ebooks.mobi.langcodes import iana2mobi
@ -311,6 +312,7 @@ class MetadataUpdater(object):
return StreamSlicer(self.stream, start, stop) return StreamSlicer(self.stream, start, stop)
def update(self, mi): def update(self, mi):
mi.title = normalize(mi.title)
def update_exth_record(rec): def update_exth_record(rec):
recs.append(rec) recs.append(rec)
if rec[0] in self.original_exth_records: if rec[0] in self.original_exth_records:
@ -331,12 +333,12 @@ class MetadataUpdater(object):
kindle_pdoc = None kindle_pdoc = None
if mi.author_sort and pas: if mi.author_sort and pas:
authors = mi.author_sort authors = mi.author_sort
update_exth_record((100, authors.encode(self.codec, 'replace'))) update_exth_record((100, normalize(authors).encode(self.codec, 'replace')))
elif mi.authors: elif mi.authors:
authors = ';'.join(mi.authors) authors = ';'.join(mi.authors)
update_exth_record((100, authors.encode(self.codec, 'replace'))) update_exth_record((100, normalize(authors).encode(self.codec, 'replace')))
if mi.publisher: if mi.publisher:
update_exth_record((101, mi.publisher.encode(self.codec, 'replace'))) update_exth_record((101, normalize(mi.publisher).encode(self.codec, 'replace')))
if mi.comments: if mi.comments:
# Strip user annotations # Strip user annotations
a_offset = mi.comments.find('<div class="user_annotations">') a_offset = mi.comments.find('<div class="user_annotations">')
@ -345,12 +347,12 @@ class MetadataUpdater(object):
mi.comments = mi.comments[:a_offset] mi.comments = mi.comments[:a_offset]
if ad_offset >= 0: if ad_offset >= 0:
mi.comments = mi.comments[:ad_offset] mi.comments = mi.comments[:ad_offset]
update_exth_record((103, mi.comments.encode(self.codec, 'replace'))) update_exth_record((103, normalize(mi.comments).encode(self.codec, 'replace')))
if mi.isbn: if mi.isbn:
update_exth_record((104, mi.isbn.encode(self.codec, 'replace'))) update_exth_record((104, mi.isbn.encode(self.codec, 'replace')))
if mi.tags: if mi.tags:
subjects = '; '.join(mi.tags) subjects = '; '.join(mi.tags)
update_exth_record((105, subjects.encode(self.codec, 'replace'))) update_exth_record((105, normalize(subjects).encode(self.codec, 'replace')))
if kindle_pdoc and kindle_pdoc in mi.tags: if kindle_pdoc and kindle_pdoc in mi.tags:
update_exth_record((501, str('PDOC'))) update_exth_record((501, str('PDOC')))

View File

@ -14,8 +14,9 @@ import re
from struct import pack from struct import pack
import time import time
from urlparse import urldefrag from urlparse import urldefrag
from cStringIO import StringIO from cStringIO import StringIO
from calibre.ebooks import normalize
from calibre.ebooks.mobi.langcodes import iana2mobi from calibre.ebooks.mobi.langcodes import iana2mobi
from calibre.ebooks.mobi.mobiml import MBP_NS from calibre.ebooks.mobi.mobiml import MBP_NS
from calibre.ebooks.oeb.base import OEB_DOCS from calibre.ebooks.oeb.base import OEB_DOCS
@ -1365,7 +1366,7 @@ class MobiWriter(object):
self._text_length, self._text_length,
self._text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf) self._text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf)
uid = random.randint(0, 0xffffffff) uid = random.randint(0, 0xffffffff)
title = unicode(metadata.title[0]).encode('utf-8') title = normalize(unicode(metadata.title[0])).encode('utf-8')
# The MOBI Header # The MOBI Header
# 0x0 - 0x3 # 0x0 - 0x3
@ -1523,12 +1524,12 @@ class MobiWriter(object):
items = oeb.metadata[term] items = oeb.metadata[term]
if term == 'creator': if term == 'creator':
if self._prefer_author_sort: if self._prefer_author_sort:
creators = [unicode(c.file_as or c) for c in items] creators = [normalize(unicode(c.file_as or c)) for c in items]
else: else:
creators = [unicode(c) for c in items] creators = [normalize(unicode(c)) for c in items]
items = ['; '.join(creators)] items = ['; '.join(creators)]
for item in items: for item in items:
data = self.COLLAPSE_RE.sub(' ', unicode(item)) data = self.COLLAPSE_RE.sub(' ', normalize(unicode(item)))
if term == 'identifier': if term == 'identifier':
if data.lower().startswith('urn:isbn:'): if data.lower().startswith('urn:isbn:'):
data = data[9:] data = data[9:]
@ -1542,7 +1543,7 @@ class MobiWriter(object):
nrecs += 1 nrecs += 1
if term == 'rights' : if term == 'rights' :
try: try:
rights = unicode(oeb.metadata.rights[0]).encode('utf-8') rights = normalize(unicode(oeb.metadata.rights[0])).encode('utf-8')
except: except:
rights = 'Unknown' rights = 'Unknown'
exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8)) exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8))