Handle   correctly

This commit is contained in:
Kovid Goyal 2007-07-13 23:21:29 +00:00
parent 8f38a29165
commit 847030ea5b

View File

@ -223,7 +223,8 @@ class HTMLConverter(object):
PAGE_BREAK_PAT = re.compile(r'page-break-(?:after|before)\s*:\s*(\w+)', re.IGNORECASE) PAGE_BREAK_PAT = re.compile(r'page-break-(?:after|before)\s*:\s*(\w+)', re.IGNORECASE)
IGNORED_TAGS = (Comment, Declaration, ProcessingInstruction) IGNORED_TAGS = (Comment, Declaration, ProcessingInstruction)
# Fix <a /> elements # Fix <a /> elements
MARKUP_MASSAGE = [(re.compile("(<\s*[aA]\s+.*\/)\s*>"), #Close <a /> tags MARKUP_MASSAGE = [(re.compile('&nbsp;'), lambda match : ' '), # Convert &nbsp; into a normal space as the default conversion converts it into \xa0 which is not a space in LRF
(re.compile("(<\s*[aA]\s+.*\/)\s*>"), #Close <a /> tags
lambda match: match.group(1)+"></a>"), lambda match: match.group(1)+"></a>"),
# Strip comments from <style> tags. This is needed as # Strip comments from <style> tags. This is needed as
# sometimes there are unterminated comments # sometimes there are unterminated comments
@ -1003,6 +1004,7 @@ class HTMLConverter(object):
dropcaps = tag.has_key('class') and tag['class'] == 'libprs500_dropcaps' dropcaps = tag.has_key('class') and tag['class'] == 'libprs500_dropcaps'
self.process_image(path, tag_css, width, height, dropcaps=dropcaps) self.process_image(path, tag_css, width, height, dropcaps=dropcaps)
else: else:
if self.verbose:
print >>sys.stderr, "Failed to process:", tag print >>sys.stderr, "Failed to process:", tag
elif tagname in ['style', 'link']: elif tagname in ['style', 'link']:
def update_css(ncss): def update_css(ncss):