mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
MOBI Output: Normalize unicode strings when writing metadata to MOBI files as the Kindle cannot handle non-normalized unicode. Fixes #8229 (Diacritical mark in MOBI title)
This commit is contained in:
parent
c4f06e39af
commit
ecbcb38ead
@ -152,8 +152,17 @@ def check_ebook_format(stream, current_guess):
|
|||||||
stream.seek(0)
|
stream.seek(0)
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
def normalize(x):
|
||||||
|
if isinstance(x, unicode):
|
||||||
|
import unicodedata
|
||||||
|
x = unicodedata.normalize('NFKC', x)
|
||||||
|
return x
|
||||||
|
|
||||||
def calibre_cover(title, author_string, series_string=None,
|
def calibre_cover(title, author_string, series_string=None,
|
||||||
output_format='jpg', title_size=46, author_size=36):
|
output_format='jpg', title_size=46, author_size=36):
|
||||||
|
title = normalize(title)
|
||||||
|
author_string = normalize(author_string)
|
||||||
|
series_string = normalize(series_string)
|
||||||
from calibre.utils.magick.draw import create_cover_page, TextLine
|
from calibre.utils.magick.draw import create_cover_page, TextLine
|
||||||
lines = [TextLine(title, title_size), TextLine(author_string, author_size)]
|
lines = [TextLine(title, title_size), TextLine(author_string, author_size)]
|
||||||
if series_string:
|
if series_string:
|
||||||
|
@ -12,6 +12,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
from struct import pack, unpack
|
from struct import pack, unpack
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
|
|
||||||
|
from calibre.ebooks import normalize
|
||||||
from calibre.ebooks.mobi import MobiError
|
from calibre.ebooks.mobi import MobiError
|
||||||
from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN
|
from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN
|
||||||
from calibre.ebooks.mobi.langcodes import iana2mobi
|
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||||
@ -311,6 +312,7 @@ class MetadataUpdater(object):
|
|||||||
return StreamSlicer(self.stream, start, stop)
|
return StreamSlicer(self.stream, start, stop)
|
||||||
|
|
||||||
def update(self, mi):
|
def update(self, mi):
|
||||||
|
mi.title = normalize(mi.title)
|
||||||
def update_exth_record(rec):
|
def update_exth_record(rec):
|
||||||
recs.append(rec)
|
recs.append(rec)
|
||||||
if rec[0] in self.original_exth_records:
|
if rec[0] in self.original_exth_records:
|
||||||
@ -331,12 +333,12 @@ class MetadataUpdater(object):
|
|||||||
kindle_pdoc = None
|
kindle_pdoc = None
|
||||||
if mi.author_sort and pas:
|
if mi.author_sort and pas:
|
||||||
authors = mi.author_sort
|
authors = mi.author_sort
|
||||||
update_exth_record((100, authors.encode(self.codec, 'replace')))
|
update_exth_record((100, normalize(authors).encode(self.codec, 'replace')))
|
||||||
elif mi.authors:
|
elif mi.authors:
|
||||||
authors = ';'.join(mi.authors)
|
authors = ';'.join(mi.authors)
|
||||||
update_exth_record((100, authors.encode(self.codec, 'replace')))
|
update_exth_record((100, normalize(authors).encode(self.codec, 'replace')))
|
||||||
if mi.publisher:
|
if mi.publisher:
|
||||||
update_exth_record((101, mi.publisher.encode(self.codec, 'replace')))
|
update_exth_record((101, normalize(mi.publisher).encode(self.codec, 'replace')))
|
||||||
if mi.comments:
|
if mi.comments:
|
||||||
# Strip user annotations
|
# Strip user annotations
|
||||||
a_offset = mi.comments.find('<div class="user_annotations">')
|
a_offset = mi.comments.find('<div class="user_annotations">')
|
||||||
@ -345,12 +347,12 @@ class MetadataUpdater(object):
|
|||||||
mi.comments = mi.comments[:a_offset]
|
mi.comments = mi.comments[:a_offset]
|
||||||
if ad_offset >= 0:
|
if ad_offset >= 0:
|
||||||
mi.comments = mi.comments[:ad_offset]
|
mi.comments = mi.comments[:ad_offset]
|
||||||
update_exth_record((103, mi.comments.encode(self.codec, 'replace')))
|
update_exth_record((103, normalize(mi.comments).encode(self.codec, 'replace')))
|
||||||
if mi.isbn:
|
if mi.isbn:
|
||||||
update_exth_record((104, mi.isbn.encode(self.codec, 'replace')))
|
update_exth_record((104, mi.isbn.encode(self.codec, 'replace')))
|
||||||
if mi.tags:
|
if mi.tags:
|
||||||
subjects = '; '.join(mi.tags)
|
subjects = '; '.join(mi.tags)
|
||||||
update_exth_record((105, subjects.encode(self.codec, 'replace')))
|
update_exth_record((105, normalize(subjects).encode(self.codec, 'replace')))
|
||||||
|
|
||||||
if kindle_pdoc and kindle_pdoc in mi.tags:
|
if kindle_pdoc and kindle_pdoc in mi.tags:
|
||||||
update_exth_record((501, str('PDOC')))
|
update_exth_record((501, str('PDOC')))
|
||||||
|
@ -14,8 +14,9 @@ import re
|
|||||||
from struct import pack
|
from struct import pack
|
||||||
import time
|
import time
|
||||||
from urlparse import urldefrag
|
from urlparse import urldefrag
|
||||||
|
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
|
|
||||||
|
from calibre.ebooks import normalize
|
||||||
from calibre.ebooks.mobi.langcodes import iana2mobi
|
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||||
from calibre.ebooks.mobi.mobiml import MBP_NS
|
from calibre.ebooks.mobi.mobiml import MBP_NS
|
||||||
from calibre.ebooks.oeb.base import OEB_DOCS
|
from calibre.ebooks.oeb.base import OEB_DOCS
|
||||||
@ -1365,7 +1366,7 @@ class MobiWriter(object):
|
|||||||
self._text_length,
|
self._text_length,
|
||||||
self._text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf)
|
self._text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf)
|
||||||
uid = random.randint(0, 0xffffffff)
|
uid = random.randint(0, 0xffffffff)
|
||||||
title = unicode(metadata.title[0]).encode('utf-8')
|
title = normalize(unicode(metadata.title[0])).encode('utf-8')
|
||||||
# The MOBI Header
|
# The MOBI Header
|
||||||
|
|
||||||
# 0x0 - 0x3
|
# 0x0 - 0x3
|
||||||
@ -1523,12 +1524,12 @@ class MobiWriter(object):
|
|||||||
items = oeb.metadata[term]
|
items = oeb.metadata[term]
|
||||||
if term == 'creator':
|
if term == 'creator':
|
||||||
if self._prefer_author_sort:
|
if self._prefer_author_sort:
|
||||||
creators = [unicode(c.file_as or c) for c in items]
|
creators = [normalize(unicode(c.file_as or c)) for c in items]
|
||||||
else:
|
else:
|
||||||
creators = [unicode(c) for c in items]
|
creators = [normalize(unicode(c)) for c in items]
|
||||||
items = ['; '.join(creators)]
|
items = ['; '.join(creators)]
|
||||||
for item in items:
|
for item in items:
|
||||||
data = self.COLLAPSE_RE.sub(' ', unicode(item))
|
data = self.COLLAPSE_RE.sub(' ', normalize(unicode(item)))
|
||||||
if term == 'identifier':
|
if term == 'identifier':
|
||||||
if data.lower().startswith('urn:isbn:'):
|
if data.lower().startswith('urn:isbn:'):
|
||||||
data = data[9:]
|
data = data[9:]
|
||||||
@ -1542,7 +1543,7 @@ class MobiWriter(object):
|
|||||||
nrecs += 1
|
nrecs += 1
|
||||||
if term == 'rights' :
|
if term == 'rights' :
|
||||||
try:
|
try:
|
||||||
rights = unicode(oeb.metadata.rights[0]).encode('utf-8')
|
rights = normalize(unicode(oeb.metadata.rights[0])).encode('utf-8')
|
||||||
except:
|
except:
|
||||||
rights = 'Unknown'
|
rights = 'Unknown'
|
||||||
exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8))
|
exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user