tweaked threshold for preprocess

This commit is contained in:
ldolse 2011-01-08 09:23:32 +08:00
parent dd96c645f0
commit 90177a4205

View File

@ -194,7 +194,7 @@ class PreProcessor(object):
totalwords = 0
totalwords = self.get_word_count(html)
if totalwords < 20:
if totalwords < 50:
self.log("not enough text, not preprocessing")
return html