IGN:PML Output: Replace non breaking spaces with normal spaces

This commit is contained in:
Kovid Goyal 2009-10-27 13:07:37 -06:00
parent 53f7cdec0b
commit 1d7c3277ec

View File

@ -153,6 +153,10 @@ class PMLMLizer(object):
for unused in anchors.difference(links): for unused in anchors.difference(links):
text = text.replace('\\Q="%s"' % unused, '') text = text.replace('\\Q="%s"' % unused, '')
# Replace bad characters.
text = text.replace(u'\xc2', '')
text = text.replace(u'\xa0', ' ')
# Turn all html entities into unicode. This should not be necessary as # Turn all html entities into unicode. This should not be necessary as
# lxml should have already done this but we want to be sure it happens. # lxml should have already done this but we want to be sure it happens.
for entity in set(re.findall('&.+?;', text)): for entity in set(re.findall('&.+?;', text)):