Handle nbsp in all its incarnations.

2025-07-09 03:04:10 -04:00 · 2007-10-09 23:02:28 +00:00 · 2007-10-09 23:02:28 +00:00 · d4b540b8cf
commit d4b540b8cf
parent 59a1af6050
1 changed files with 1 additions and 1 deletions
--- a/src/libprs500/ebooks/lrf/html/convert_from.py
+++ b/src/libprs500/ebooks/lrf/html/convert_from.py
@ -70,7 +70,7 @@ class HTMLConverter(object):
                        (re.compile(r'<a.*?>(.*?)</a\s*>', re.DOTALL|re.IGNORECASE),
                         lambda match: re.compile(r'<\s*?p.*?>', re.IGNORECASE).sub('', match.group())),
                        # Workaround bug in BeautifulSoup &nbsp; handling
-                        (re.compile(r'&nbsp;', re.IGNORECASE), lambda match : u'\uffff')
+                        (re.compile(u'&nbsp;|&#160;|&#xa0;|\xa0', re.IGNORECASE), lambda match : u'\uffff')
                         ]
    # Fix Baen markup
    BAEN = [