Handle   correctly

2025-07-09 03:04:10 -04:00 · 2007-07-13 23:21:29 +00:00 · 2007-07-13 23:21:29 +00:00 · 847030ea5b
commit 847030ea5b
parent 8f38a29165
1 changed files with 4 additions and 2 deletions
--- a/src/libprs500/ebooks/lrf/html/convert_from.py
+++ b/src/libprs500/ebooks/lrf/html/convert_from.py
@ -223,7 +223,8 @@ class HTMLConverter(object):
    PAGE_BREAK_PAT = re.compile(r'page-break-(?:after|before)\s*:\s*(\w+)', re.IGNORECASE)
    IGNORED_TAGS   = (Comment, Declaration, ProcessingInstruction)
    # Fix <a /> elements 
-    MARKUP_MASSAGE   = [(re.compile("(<\s*[aA]\s+.*\/)\s*>"), #Close <a /> tags
+    MARKUP_MASSAGE   = [(re.compile('&nbsp;'), lambda match : ' '), # Convert &nbsp; into a normal space as the default conversion converts it into \xa0 which is not a space in LRF
                        (re.compile("(<\s*[aA]\s+.*\/)\s*>"), #Close <a /> tags
                         lambda match: match.group(1)+"></a>"),
                         # Strip comments from <style> tags. This is needed as 
                         # sometimes there are unterminated comments
@ -1003,6 +1004,7 @@ class HTMLConverter(object):
                dropcaps = tag.has_key('class') and tag['class'] == 'libprs500_dropcaps'
                self.process_image(path, tag_css, width, height, dropcaps=dropcaps)
            else:
                if self.verbose:
                    print >>sys.stderr, "Failed to process:", tag
        elif tagname in ['style', 'link']:
            def update_css(ncss):