PDF Output: Serialize ToC string as UTF-16

PDF Output: Fix non-breaking space characters incorrectly rendered in
PDF outline. Fixes #1223862 [PDF TOC -    appears sometimes as €](https://bugs.launchpad.net/calibre/+bug/1223862)
This commit is contained in:
Kovid Goyal 2013-09-11 21:32:00 +05:30
parent ccf65bf82e
commit 725bb967e5
2 changed files with 10 additions and 3 deletions

View File

@ -100,11 +100,18 @@ class String(unicode):
try: try:
raw = s.encode('latin1') raw = s.encode('latin1')
if raw.startswith(codecs.BOM_UTF16_BE): if raw.startswith(codecs.BOM_UTF16_BE):
raise UnicodeEncodeError('') raw = codecs.BOM_UTF16_BE + s.encode('utf-16-be')
except UnicodeEncodeError: except UnicodeEncodeError:
raw = codecs.BOM_UTF16_BE + s.encode('utf-16-be') raw = codecs.BOM_UTF16_BE + s.encode('utf-16-be')
stream.write(b'('+raw+b')') stream.write(b'('+raw+b')')
class UTF16String(unicode):
def pdf_serialize(self, stream):
s = self.replace('\\', '\\\\').replace('(', r'\(').replace(')', r'\)')
raw = codecs.BOM_UTF16_BE + s.encode('utf-16-be')
stream.write(b'('+raw+b')')
class Dictionary(dict): class Dictionary(dict):
def pdf_serialize(self, stream): def pdf_serialize(self, stream):

View File

@ -11,7 +11,7 @@ import os
from urlparse import urlparse from urlparse import urlparse
from urllib2 import unquote from urllib2 import unquote
from calibre.ebooks.pdf.render.common import Array, Name, Dictionary, String from calibre.ebooks.pdf.render.common import Array, Name, Dictionary, String, UTF16String
class Destination(Array): class Destination(Array):
@ -131,7 +131,7 @@ class Links(object):
a = self.anchors[path] a = self.anchors[path]
dest = a.get(frag, a[None]) dest = a.get(frag, a[None])
item = Dictionary({'Parent':parentref, 'Dest':dest, item = Dictionary({'Parent':parentref, 'Dest':dest,
'Title':String(toc.text or _('Unknown'))}) 'Title':UTF16String(toc.text or _('Unknown'))})
return self.pdf.objects.add(item) return self.pdf.objects.add(item)