From ecbcb38ead93745a82b652d6e3f3194ddc04b62f Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 14 Feb 2011 11:23:22 -0700
Subject: [PATCH] MOBI Output: Normalize unicode strings when writing metadata
 to MOBI files as the Kindle cannot handle non-normalized unicode. Fixes #8229
 (Diacritical mark in MOBI title)

---
 src/calibre/ebooks/__init__.py      |  9 +++++++++
 src/calibre/ebooks/metadata/mobi.py | 12 +++++++-----
 src/calibre/ebooks/mobi/writer.py   | 13 +++++++------
 3 files changed, 23 insertions(+), 11 deletions(-)
diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py
index 49604ae682..dcd32811b3 100644
--- a/src/calibre/ebooks/__init__.py
+++ b/src/calibre/ebooks/__init__.py
@@ -152,8 +152,17 @@ def check_ebook_format(stream, current_guess):
         stream.seek(0)
     return ans
 
+def normalize(x):
+    if isinstance(x, unicode):
+        import unicodedata
+        x = unicodedata.normalize('NFKC', x)
+    return x
+
 def calibre_cover(title, author_string, series_string=None,
         output_format='jpg', title_size=46, author_size=36):
+    title = normalize(title)
+    author_string = normalize(author_string)
+    series_string = normalize(series_string)
     from calibre.utils.magick.draw import create_cover_page, TextLine
     lines = [TextLine(title, title_size), TextLine(author_string, author_size)]
     if series_string:
diff --git a/src/calibre/ebooks/metadata/mobi.py b/src/calibre/ebooks/metadata/mobi.py
index 30668d70f7..963391dcf8 100644
--- a/src/calibre/ebooks/metadata/mobi.py
+++ b/src/calibre/ebooks/metadata/mobi.py
@@ -12,6 +12,7 @@ __docformat__ = 'restructuredtext en'
 from struct import pack, unpack
 from cStringIO import StringIO
 
+from calibre.ebooks import normalize
 from calibre.ebooks.mobi import MobiError
 from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN
 from calibre.ebooks.mobi.langcodes import iana2mobi
@@ -311,6 +312,7 @@ class MetadataUpdater(object):
         return StreamSlicer(self.stream, start, stop)
 
     def update(self, mi):
+        mi.title = normalize(mi.title)
         def update_exth_record(rec):
             recs.append(rec)
             if rec[0] in self.original_exth_records:
@@ -331,12 +333,12 @@ class MetadataUpdater(object):
             kindle_pdoc = None
         if mi.author_sort and pas:
             authors = mi.author_sort
-            update_exth_record((100, authors.encode(self.codec, 'replace')))
+            update_exth_record((100, normalize(authors).encode(self.codec, 'replace')))
         elif mi.authors:
             authors = ';'.join(mi.authors)
-            update_exth_record((100, authors.encode(self.codec, 'replace')))
+            update_exth_record((100, normalize(authors).encode(self.codec, 'replace')))
         if mi.publisher:
-            update_exth_record((101, mi.publisher.encode(self.codec, 'replace')))
+            update_exth_record((101, normalize(mi.publisher).encode(self.codec, 'replace')))
         if mi.comments:
             # Strip user annotations
             a_offset = mi.comments.find('<div class="user_annotations">')
@@ -345,12 +347,12 @@ class MetadataUpdater(object):
                 mi.comments = mi.comments[:a_offset]
             if ad_offset >= 0:
                 mi.comments = mi.comments[:ad_offset]
-            update_exth_record((103, mi.comments.encode(self.codec, 'replace')))
+            update_exth_record((103, normalize(mi.comments).encode(self.codec, 'replace')))
         if mi.isbn:
             update_exth_record((104, mi.isbn.encode(self.codec, 'replace')))
         if mi.tags:
             subjects = '; '.join(mi.tags)
-            update_exth_record((105, subjects.encode(self.codec, 'replace')))
+            update_exth_record((105, normalize(subjects).encode(self.codec, 'replace')))
 
             if kindle_pdoc and kindle_pdoc in mi.tags:
                 update_exth_record((501, str('PDOC')))
diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py
index abba173d69..b3f8160e3a 100644
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@@ -14,8 +14,9 @@ import re
 from struct import pack
 import time
 from urlparse import urldefrag
-
 from cStringIO import StringIO
+
+from calibre.ebooks import normalize
 from calibre.ebooks.mobi.langcodes import iana2mobi
 from calibre.ebooks.mobi.mobiml import MBP_NS
 from calibre.ebooks.oeb.base import OEB_DOCS
@@ -1365,7 +1366,7 @@ class MobiWriter(object):
             self._text_length,
             self._text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf)
         uid = random.randint(0, 0xffffffff)
-        title = unicode(metadata.title[0]).encode('utf-8')
+        title = normalize(unicode(metadata.title[0])).encode('utf-8')
         # The MOBI Header
 
         # 0x0 - 0x3
@@ -1523,12 +1524,12 @@ class MobiWriter(object):
             items = oeb.metadata[term]
             if term == 'creator':
                 if self._prefer_author_sort:
-                    creators = [unicode(c.file_as or c) for c in items]
+                    creators = [normalize(unicode(c.file_as or c)) for c in items]
                 else:
-                    creators = [unicode(c) for c in items]
+                    creators = [normalize(unicode(c)) for c in items]
                 items = ['; '.join(creators)]
             for item in items:
-                data = self.COLLAPSE_RE.sub(' ', unicode(item))
+                data = self.COLLAPSE_RE.sub(' ', normalize(unicode(item)))
                 if term == 'identifier':
                     if data.lower().startswith('urn:isbn:'):
                         data = data[9:]
@@ -1542,7 +1543,7 @@ class MobiWriter(object):
                 nrecs += 1
             if term == 'rights' :
                 try:
-                    rights = unicode(oeb.metadata.rights[0]).encode('utf-8')
+                    rights = normalize(unicode(oeb.metadata.rights[0])).encode('utf-8')
                 except:
                     rights = 'Unknown'
                 exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8))