PDF Output: Strip zero-width space characters from the text as their presence prevents searching from working

This commit is contained in:
Kovid Goyal 2018-05-24 13:57:26 +05:30
parent a5b807ea0f
commit c6c689f9e3
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -246,9 +246,12 @@ class PdfEngine(QPaintEngine):
text_item.font().family(), e)) text_item.font().family(), e))
glyph_map = self.qt_hack.get_glyph_map(text_item) glyph_map = self.qt_hack.get_glyph_map(text_item)
gm = {} gm = {}
ans.ignore_glyphs = set()
for uc, glyph_id in enumerate(glyph_map): for uc, glyph_id in enumerate(glyph_map):
if glyph_id not in gm: if glyph_id not in gm:
gm[glyph_id] = unichr(uc) gm[glyph_id] = unichr(uc)
if uc in (0xad, 0x200b):
ans.ignore_glyphs.add(glyph_id)
ans.full_glyph_map = gm ans.full_glyph_map = gm
return ans return ans
@ -273,20 +276,25 @@ class PdfEngine(QPaintEngine):
except UnsupportedFont: except UnsupportedFont:
self.debug('Failed to load font: %s, drawing text as outlines...' % names) self.debug('Failed to load font: %s, drawing text as outlines...' % names)
return super(PdfEngine, self).drawTextItem(point, text_item) return super(PdfEngine, self).drawTextItem(point, text_item)
for glyph_id in gi.indices: indices, positions = [], []
ignore_glyphs = metrics.ignore_glyphs
for glyph_id, gpos in zip(gi.indices, gi.positions):
if glyph_id not in ignore_glyphs:
indices.append(glyph_id), positions.append(gpos)
for glyph_id in indices:
try: try:
metrics.glyph_map[glyph_id] = metrics.full_glyph_map[glyph_id] metrics.glyph_map[glyph_id] = metrics.full_glyph_map[glyph_id]
except (KeyError, ValueError): except (KeyError, ValueError):
pass pass
glyphs = [] glyphs = []
last_x = last_y = 0 last_x = last_y = 0
for glyph_index, (x, y) in zip(gi.indices, gi.positions): for glyph_index, (x, y) in zip(indices, positions):
glyphs.append((x-last_x, last_y - y, glyph_index)) glyphs.append((x-last_x, last_y - y, glyph_index))
last_x, last_y = x, y last_x, last_y = x, y
if not self.content_written_to_current_page: if not self.content_written_to_current_page:
dy = self.graphics.current_state.transform.dy() dy = self.graphics.current_state.transform.dy()
ypositions = [y + dy for x, y in gi.positions] ypositions = [y + dy for x, y in positions]
miny = min(ypositions or (0,)) miny = min(ypositions or (0,))
maxy = max(ypositions or (self.pixel_height,)) maxy = max(ypositions or (self.pixel_height,))
page_top = self.header_height if self.has_headers else 0 page_top = self.header_height if self.has_headers else 0