Fix detection of <a/> tags

This commit is contained in:
Kovid Goyal 2007-07-30 06:24:16 +00:00
parent a79376875c
commit 1a36564830

View File

@ -233,7 +233,7 @@ class HTMLConverter(object):
# conversion converts it into \xa0 which is not a space in LRF # conversion converts it into \xa0 which is not a space in LRF
(re.compile('&nbsp;'), lambda match : ' '), (re.compile('&nbsp;'), lambda match : ' '),
# Close <a /> tags # Close <a /> tags
(re.compile("(<\s*[aA]\s+.*\/)\s*>"), (re.compile("(<a\s+.*?)/>|<a/>", re.IGNORECASE),
lambda match: match.group(1)+"></a>"), lambda match: match.group(1)+"></a>"),
# Strip comments from <style> tags. This is needed as # Strip comments from <style> tags. This is needed as
# sometimes there are unterminated comments # sometimes there are unterminated comments
@ -395,6 +395,7 @@ class HTMLConverter(object):
self.soup = BeautifulSoup(raw, self.soup = BeautifulSoup(raw,
convertEntities=BeautifulSoup.HTML_ENTITIES, convertEntities=BeautifulSoup.HTML_ENTITIES,
markupMassage=nmassage) markupMassage=nmassage)
#print self.soup
print 'done\n\tConverting to BBeB...', print 'done\n\tConverting to BBeB...',
sys.stdout.flush() sys.stdout.flush()
self.verbose = verbose self.verbose = verbose