This commit is contained in:
ldolse 2010-09-28 16:21:34 +08:00
parent b7f6d820a7
commit 443d45c560

View File

@ -191,13 +191,13 @@ class Dehyphenator(object):
lookupword = self.removesuffixes.sub('', dehyphenated) lookupword = self.removesuffixes.sub('', dehyphenated)
if self.prefixes.match(firsthalf) is None: if self.prefixes.match(firsthalf) is None:
lookupword = self.removeprefix.sub('', lookupword) lookupword = self.removeprefix.sub('', lookupword)
print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated) #print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated)
try: try:
searchresult = self.html.find(str.lower(lookupword)) searchresult = self.html.find(str.lower(lookupword))
except: except:
return hyphenated return hyphenated
if self.format == 'html_cleanup': if self.format == 'html_cleanup':
if self.html.find(lookupword) != -1 or self.html.find(str.lower(lookupword)) != -1: if self.html.find(lookupword) != -1 or searchresult != -1:
#print "Cleanup:returned dehyphenated word: " + str(dehyphenated) #print "Cleanup:returned dehyphenated word: " + str(dehyphenated)
return dehyphenated return dehyphenated
elif self.html.find(hyphenated) != -1: elif self.html.find(hyphenated) != -1:
@ -208,11 +208,11 @@ class Dehyphenator(object):
return firsthalf+u'\u2014'+wraptags+secondhalf return firsthalf+u'\u2014'+wraptags+secondhalf
else: else:
if self.html.find(lookupword) != -1 or self.html.find(str.lower(lookupword)) != -1: if self.html.find(lookupword) != -1 or searchresult != -1:
#print "returned dehyphenated word: " + str(dehyphenated) #print "returned dehyphenated word: " + str(dehyphenated)
return dehyphenated return dehyphenated
else: else:
#print "returned hyphenated word: " + str(hyphenated) #print " returned hyphenated word: " + str(hyphenated)
return hyphenated return hyphenated
def __call__(self, html, format, length=1): def __call__(self, html, format, length=1):