mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
More robust implementation of glyph to unicode mapping
This commit is contained in:
parent
302ce66f76
commit
8352098513
@ -91,7 +91,7 @@ class GlyphIndex(object):
|
|||||||
else:
|
else:
|
||||||
byts = bytearray(pack(b'>H', self.code))
|
byts = bytearray(pack(b'>H', self.code))
|
||||||
stream.write('<%s>'%''.join(map(
|
stream.write('<%s>'%''.join(map(
|
||||||
lambda x: bytes(hex(x)[2:]).rjust(2, b'0'), byts)))
|
lambda x: bytes(hex(int(x))[2:]).rjust(2, b'0'), byts)))
|
||||||
|
|
||||||
class Dictionary(dict):
|
class Dictionary(dict):
|
||||||
|
|
||||||
|
@ -205,6 +205,12 @@ class GraphicsState(object): # {{{
|
|||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
class Font(FontMetrics):
|
||||||
|
|
||||||
|
def __init__(self, sfnt):
|
||||||
|
FontMetrics.__init__(self, sfnt)
|
||||||
|
self.glyph_map = {}
|
||||||
|
|
||||||
class PdfEngine(QPaintEngine):
|
class PdfEngine(QPaintEngine):
|
||||||
|
|
||||||
def __init__(self, file_object, page_width, page_height, left_margin,
|
def __init__(self, file_object, page_width, page_height, left_margin,
|
||||||
@ -235,7 +241,7 @@ class PdfEngine(QPaintEngine):
|
|||||||
self.scale = sqrt(sy**2 + sx**2)
|
self.scale = sqrt(sy**2 + sx**2)
|
||||||
self.xscale, self.yscale = sx, sy
|
self.xscale, self.yscale = sx, sy
|
||||||
self.graphics_state = GraphicsState()
|
self.graphics_state = GraphicsState()
|
||||||
self.errors = []
|
self.errors, self.debug = [], []
|
||||||
self.text_option = QTextOption()
|
self.text_option = QTextOption()
|
||||||
self.text_option.setWrapMode(QTextOption.NoWrap)
|
self.text_option.setWrapMode(QTextOption.NoWrap)
|
||||||
self.fonts = {}
|
self.fonts = {}
|
||||||
@ -354,15 +360,67 @@ class PdfEngine(QPaintEngine):
|
|||||||
self.pdf.draw_rect(bl.x(), bl.y(), rect.width(), rect.height(),
|
self.pdf.draw_rect(bl.x(), bl.y(), rect.width(), rect.height(),
|
||||||
stroke=self.do_stroke, fill=self.do_fill)
|
stroke=self.do_stroke, fill=self.do_fill)
|
||||||
|
|
||||||
|
def get_text_layout(self, text_item, text):
|
||||||
|
tl = QTextLayout(text, text_item.font(), self.paintDevice())
|
||||||
|
self.text_option.setTextDirection(Qt.RightToLeft if
|
||||||
|
text_item.renderFlags() & text_item.RightToLeft else Qt.LeftToRight)
|
||||||
|
tl.setTextOption(self.text_option)
|
||||||
|
return tl
|
||||||
|
|
||||||
|
def update_glyph_map(self, text, indices, text_item, glyph_map):
|
||||||
|
'''
|
||||||
|
Map glyphs back to the unicode text they represent.
|
||||||
|
'''
|
||||||
|
pos = 0
|
||||||
|
tl = self.get_text_layout(text_item, '')
|
||||||
|
indices = list(indices)
|
||||||
|
|
||||||
|
def get_glyphs(string):
|
||||||
|
tl.setText(string)
|
||||||
|
tl.beginLayout()
|
||||||
|
line = tl.createLine()
|
||||||
|
if not line.isValid():
|
||||||
|
tl.endLayout()
|
||||||
|
return []
|
||||||
|
line.setLineWidth(int(1e12))
|
||||||
|
tl.endLayout()
|
||||||
|
ans = []
|
||||||
|
for run in tl.glyphRuns():
|
||||||
|
ans.extend(run.glyphIndexes())
|
||||||
|
return ans
|
||||||
|
|
||||||
|
ipos = 0
|
||||||
|
while ipos < len(indices):
|
||||||
|
if indices[ipos] in glyph_map:
|
||||||
|
t = glyph_map[indices[ipos]]
|
||||||
|
if t == text[pos:pos+len(t)]:
|
||||||
|
pos += len(t)
|
||||||
|
ipos += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
found = False
|
||||||
|
for l in xrange(1, 10):
|
||||||
|
string = text[pos:pos+l]
|
||||||
|
g = get_glyphs(string)
|
||||||
|
if g and g[0] == indices[ipos]:
|
||||||
|
found = True
|
||||||
|
glyph_map[g[0]] = string
|
||||||
|
break
|
||||||
|
if not found:
|
||||||
|
self.debug.append(
|
||||||
|
'Failed to find glyph->unicode mapping for text: %s'%text)
|
||||||
|
break
|
||||||
|
ipos += 1
|
||||||
|
pos += l
|
||||||
|
|
||||||
|
return text[pos:]
|
||||||
|
|
||||||
@store_error
|
@store_error
|
||||||
def drawTextItem(self, point, text_item):
|
def drawTextItem(self, point, text_item):
|
||||||
# super(PdfEngine, self).drawTextItem(point+QPoint(0, 0), text_item)
|
# super(PdfEngine, self).drawTextItem(point+QPoint(0, 0), text_item)
|
||||||
text = type(u'')(text_item.text()).replace('\n', ' ')
|
text = type(u'')(text_item.text()).replace('\n', ' ')
|
||||||
text = unicodedata.normalize('NFKC', text)
|
text = unicodedata.normalize('NFKC', text)
|
||||||
tl = QTextLayout(text, text_item.font(), self.paintDevice())
|
tl = self.get_text_layout(text_item, text)
|
||||||
self.text_option.setTextDirection(Qt.RightToLeft if
|
|
||||||
text_item.renderFlags() & text_item.RightToLeft else Qt.LeftToRight)
|
|
||||||
tl.setTextOption(self.text_option)
|
|
||||||
tl.setPosition(point)
|
tl.setPosition(point)
|
||||||
tl.beginLayout()
|
tl.beginLayout()
|
||||||
line = tl.createLine()
|
line = tl.createLine()
|
||||||
@ -375,9 +433,10 @@ class PdfEngine(QPaintEngine):
|
|||||||
rf = run.rawFont()
|
rf = run.rawFont()
|
||||||
name = hash(bytes(rf.fontTable('name')))
|
name = hash(bytes(rf.fontTable('name')))
|
||||||
if name not in self.fonts:
|
if name not in self.fonts:
|
||||||
self.fonts[name] = FontMetrics(Sfnt(rf))
|
self.fonts[name] = Font(Sfnt(rf))
|
||||||
metrics = self.fonts[name]
|
metrics = self.fonts[name]
|
||||||
indices = run.glyphIndexes()
|
indices = run.glyphIndexes()
|
||||||
|
text = self.update_glyph_map(text, indices, text_item, metrics.glyph_map)
|
||||||
glyphs = []
|
glyphs = []
|
||||||
pdf_pos = point
|
pdf_pos = point
|
||||||
first_baseline = None
|
first_baseline = None
|
||||||
@ -489,7 +548,7 @@ if __name__ == '__main__':
|
|||||||
# f.setUnderline(True)
|
# f.setUnderline(True)
|
||||||
# f.setOverline(True)
|
# f.setOverline(True)
|
||||||
# f.setStrikeOut(True)
|
# f.setStrikeOut(True)
|
||||||
f.setFamily('OpenDyslexic')
|
f.setFamily('Calibri')
|
||||||
p.setFont(f)
|
p.setFont(f)
|
||||||
# p.scale(2, 2)
|
# p.scale(2, 2)
|
||||||
# p.rotate(45)
|
# p.rotate(45)
|
||||||
@ -497,6 +556,8 @@ if __name__ == '__main__':
|
|||||||
p.drawText(QPoint(100, 300), 'Some text ū --- Д AV ff ff')
|
p.drawText(QPoint(100, 300), 'Some text ū --- Д AV ff ff')
|
||||||
finally:
|
finally:
|
||||||
p.end()
|
p.end()
|
||||||
|
for line in dev.engine.debug:
|
||||||
|
print (line)
|
||||||
if dev.engine.errors:
|
if dev.engine.errors:
|
||||||
for err in dev.engine.errors: print (err)
|
for err in dev.engine.errors: print (err)
|
||||||
raise SystemExit(1)
|
raise SystemExit(1)
|
||||||
|
@ -7,7 +7,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re, unicodedata
|
import re
|
||||||
from itertools import izip, groupby
|
from itertools import izip, groupby
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
from collections import Counter, OrderedDict
|
from collections import Counter, OrderedDict
|
||||||
@ -58,7 +58,7 @@ class FontStream(Stream):
|
|||||||
d['Subtype'] = Name('CIDFontType0C')
|
d['Subtype'] = Name('CIDFontType0C')
|
||||||
|
|
||||||
def to_hex_string(c):
|
def to_hex_string(c):
|
||||||
return bytes(hex(c)[2:]).rjust(4, b'0').decode('ascii')
|
return bytes(hex(int(c))[2:]).rjust(4, b'0').decode('ascii')
|
||||||
|
|
||||||
class CMap(Stream):
|
class CMap(Stream):
|
||||||
|
|
||||||
@ -154,19 +154,17 @@ class Font(object):
|
|||||||
self.font_descriptor['FontFile'+('3' if self.is_otf else '2')
|
self.font_descriptor['FontFile'+('3' if self.is_otf else '2')
|
||||||
] = objects.add(self.font_stream)
|
] = objects.add(self.font_stream)
|
||||||
self.write_widths(objects)
|
self.write_widths(objects)
|
||||||
glyph_map = self.metrics.sfnt['cmap'].get_char_codes(self.used_glyphs)
|
self.write_to_unicode(objects)
|
||||||
self.write_to_unicode(objects, glyph_map)
|
pdf_subset(self.metrics.sfnt, self.used_glyphs)
|
||||||
pdf_subset(self.metrics.sfnt, set(glyph_map))
|
|
||||||
if self.is_otf:
|
if self.is_otf:
|
||||||
self.font_stream.write(self.metrics.sfnt['CFF '].raw)
|
self.font_stream.write(self.metrics.sfnt['CFF '].raw)
|
||||||
else:
|
else:
|
||||||
self.metrics.os2.zero_fstype()
|
self.metrics.os2.zero_fstype()
|
||||||
self.metrics.sfnt(self.font_stream)
|
self.metrics.sfnt(self.font_stream)
|
||||||
|
|
||||||
def write_to_unicode(self, objects, glyph_map):
|
def write_to_unicode(self, objects):
|
||||||
glyph_map = {k:unicodedata.normalize('NFKC', unichr(v)) for k, v in
|
cmap = CMap(self.metrics.postscript_name, self.metrics.glyph_map,
|
||||||
glyph_map.iteritems()}
|
compress=self.compress)
|
||||||
cmap = CMap(self.metrics.postscript_name, glyph_map, compress=self.compress)
|
|
||||||
self.font_dict['ToUnicode'] = objects.add(cmap)
|
self.font_dict['ToUnicode'] = objects.add(cmap)
|
||||||
|
|
||||||
def write_widths(self, objects):
|
def write_widths(self, objects):
|
||||||
|
@ -13,7 +13,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
from struct import unpack_from, calcsize, pack
|
from struct import unpack_from, calcsize, pack
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
|
||||||
from calibre.utils.fonts.utils import get_bmp_glyph_ids, read_bmp_prefix
|
from calibre.utils.fonts.utils import get_bmp_glyph_ids
|
||||||
from calibre.utils.fonts.sfnt import UnknownTable, max_power_of_two
|
from calibre.utils.fonts.sfnt import UnknownTable, max_power_of_two
|
||||||
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
|
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
|
||||||
|
|
||||||
@ -165,33 +165,6 @@ class CmapTable(UnknownTable):
|
|||||||
ans[chars[i]] = glyph_id
|
ans[chars[i]] = glyph_id
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def get_char_codes(self, glyph_ids):
|
|
||||||
if self.bmp_table is None:
|
|
||||||
raise UnsupportedFont('This font has no Windows BMP cmap subtable.'
|
|
||||||
' Most likely a special purpose font.')
|
|
||||||
ans = {}
|
|
||||||
(start_count, end_count, range_offset, id_delta, glyph_id_len,
|
|
||||||
glyph_id_map, array_len) = read_bmp_prefix(self.bmp_table, 0)
|
|
||||||
|
|
||||||
glyph_ids = frozenset(glyph_ids)
|
|
||||||
|
|
||||||
for i, ec in enumerate(end_count):
|
|
||||||
sc = start_count[i]
|
|
||||||
ro = range_offset[i]
|
|
||||||
for code in xrange(sc, ec+1):
|
|
||||||
if ro == 0:
|
|
||||||
glyph_id = id_delta[i] + code
|
|
||||||
else:
|
|
||||||
idx = ro//2 + (code - sc) + i - array_len
|
|
||||||
glyph_id = glyph_id_map[idx]
|
|
||||||
if glyph_id != 0:
|
|
||||||
glyph_id += id_delta[i]
|
|
||||||
glyph_id %= 0x1000
|
|
||||||
if glyph_id in glyph_ids:
|
|
||||||
ans[glyph_id] = code
|
|
||||||
|
|
||||||
return ans
|
|
||||||
|
|
||||||
def set_character_map(self, cmap):
|
def set_character_map(self, cmap):
|
||||||
self.version, self.num_tables = 0, 1
|
self.version, self.num_tables = 0, 1
|
||||||
fmt = b'>7H'
|
fmt = b'>7H'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user