mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Ensure pmlz output is converting unicode characters to character codes.
This commit is contained in:
parent
02c7fb0209
commit
2df4d01923
@ -88,7 +88,7 @@ HTML_PML_RULES = [
|
|||||||
(re.compile('<a.*?href="(?P<target>#.+?).*?">(?P<text>)</a>', re.DOTALL), lambda match: '\\q="%s"%s\\q' % (match.group('target'), match.group('text'))),
|
(re.compile('<a.*?href="(?P<target>#.+?).*?">(?P<text>)</a>', re.DOTALL), lambda match: '\\q="%s"%s\\q' % (match.group('target'), match.group('text'))),
|
||||||
#(re.compile('<img.*?src="images/(?P<name>.+?)".*?>'), lambda match: '\\m="%s"' % match.group('name')),
|
#(re.compile('<img.*?src="images/(?P<name>.+?)".*?>'), lambda match: '\\m="%s"' % match.group('name')),
|
||||||
(re.compile('<img.*?src="(?P<name>.+?)".*?>(.*?</img>)*'), lambda match: '\\m="%s"' % image_name(match.group('name').strip('\x00'))),
|
(re.compile('<img.*?src="(?P<name>.+?)".*?>(.*?</img>)*'), lambda match: '\\m="%s"' % image_name(match.group('name').strip('\x00'))),
|
||||||
#(re.compile('&#(?P<num>\d\d\d\d);'), lambda match: '\\U%s' % int(match.group('num'))),
|
(re.compile('&#(?P<num>\d\d\d\d);'), lambda match: '\\U%s' % int(match.group('num'))),
|
||||||
(re.compile('&#(?P<num>\d\d\d);'), lambda match: '\\a%s' % match.group('num')),
|
(re.compile('&#(?P<num>\d\d\d);'), lambda match: '\\a%s' % match.group('num')),
|
||||||
(re.compile('<small .*?>(?P<text>.+?)</small>', re.DOTALL), lambda match: '\\k%s\\k' % match.group('text')),
|
(re.compile('<small .*?>(?P<text>.+?)</small>', re.DOTALL), lambda match: '\\k%s\\k' % match.group('text')),
|
||||||
(re.compile('<small>(?P<text>.+?)</small>', re.DOTALL), lambda match: '\\k%s\\k' % match.group('text')),
|
(re.compile('<small>(?P<text>.+?)</small>', re.DOTALL), lambda match: '\\k%s\\k' % match.group('text')),
|
||||||
@ -163,5 +163,12 @@ def html_to_pml(html):
|
|||||||
pml += body
|
pml += body
|
||||||
|
|
||||||
# Replace symbols outside of cp1512 wtih \Uxxxx
|
# Replace symbols outside of cp1512 wtih \Uxxxx
|
||||||
|
chars = set(pml)
|
||||||
|
unichars = []
|
||||||
|
for c in chars:
|
||||||
|
if ord(c) > 128:
|
||||||
|
unichars.append(c)
|
||||||
|
for u in unichars:
|
||||||
|
pml = pml.replace(u, '\U%s' % hex(ord(u))[2:].rjust(4, '0'))
|
||||||
|
|
||||||
return pml
|
return pml
|
||||||
|
Loading…
x
Reference in New Issue
Block a user