false positive tuning in txt input and dehyphenate

This commit is contained in:
ldolse 2011-01-21 08:35:14 +08:00
parent d5f485d71b
commit d3bd5b07e8
2 changed files with 6 additions and 2 deletions

View File

@ -224,6 +224,10 @@ class Dehyphenator(object):
return firsthalf+u'\u2014'+wraptags+secondhalf return firsthalf+u'\u2014'+wraptags+secondhalf
else: else:
if self.format == 'individual_words' and len(firsthalf) + len(secondhalf) <= 6:
if self.verbose > 2:
self.log("too short, returned hyphenated word: " + str(hyphenated))
return hyphenated
if len(firsthalf) <= 2 and len(secondhalf) <= 2: if len(firsthalf) <= 2 and len(secondhalf) <= 2:
if self.verbose > 2: if self.verbose > 2:
self.log("too short, returned hyphenated word: " + str(hyphenated)) self.log("too short, returned hyphenated word: " + str(hyphenated))

View File

@ -175,9 +175,9 @@ def detect_formatting_type(txt):
# Block quote. # Block quote.
textile_count += len(re.findall(r'(?mu)^bq\.', txt)) textile_count += len(re.findall(r'(?mu)^bq\.', txt))
# Images # Images
textile_count += len(re.findall(r'\![^\s]+(:[^\s]+)*', txt)) textile_count += len(re.findall(r'\![^\s]+(?=.*?/)(:[^\s]+)*', txt))
# Links # Links
textile_count += len(re.findall(r'"(\(.+?\))*[^\(]+?(\(.+?\))*":[^\s]+', txt)) textile_count += len(re.findall(r'"(?=".*?\()(\(.+?\))*[^\(]+?(\(.+?\))*":[^\s]+', txt))
if markdown_count > 5 or textile_count > 5: if markdown_count > 5 or textile_count > 5:
if markdown_count > textile_count: if markdown_count > textile_count: