Handle nbsp in all its incarnations.

This commit is contained in:
Kovid Goyal 2007-10-09 23:02:28 +00:00
parent 59a1af6050
commit d4b540b8cf

View File

@ -70,7 +70,7 @@ class HTMLConverter(object):
(re.compile(r'<a.*?>(.*?)</a\s*>', re.DOTALL|re.IGNORECASE), (re.compile(r'<a.*?>(.*?)</a\s*>', re.DOTALL|re.IGNORECASE),
lambda match: re.compile(r'<\s*?p.*?>', re.IGNORECASE).sub('', match.group())), lambda match: re.compile(r'<\s*?p.*?>', re.IGNORECASE).sub('', match.group())),
# Workaround bug in BeautifulSoup &nbsp; handling # Workaround bug in BeautifulSoup &nbsp; handling
(re.compile(r'&nbsp;', re.IGNORECASE), lambda match : u'\uffff') (re.compile(u'&nbsp;|&#160;|&#xa0;|\xa0', re.IGNORECASE), lambda match : u'\uffff')
] ]
# Fix Baen markup # Fix Baen markup
BAEN = [ BAEN = [