improved negative lookaheads in heuristics

This commit is contained in:
ldolse 2011-01-24 13:47:53 +08:00
parent 984a7510fc
commit a20cd3336d

View File

@ -203,8 +203,8 @@ class HeuristicProcessor(object):
blank_lines = "" blank_lines = ""
opt_title_open = "(" opt_title_open = "("
opt_title_close = ")?" opt_title_close = ")?"
n_lookahead_open = "\s+(?!" n_lookahead_open = "(?!\s*"
n_lookahead_close = ")" n_lookahead_close = ")\s*"
default_title = r"(<[ibu][^>]*>)?\s{0,3}(?!Chapter)([\w\:\'\"-]+\s{0,3}){1,5}?(</[ibu][^>]*>)?(?=<)" default_title = r"(<[ibu][^>]*>)?\s{0,3}(?!Chapter)([\w\:\'\"-]+\s{0,3}){1,5}?(</[ibu][^>]*>)?(?=<)"
simple_title = r"(<[ibu][^>]*>)?\s{0,3}(?!(Chapter|\s+<)).{0,65}?(</[ibu][^>]*>)?(?=<)" simple_title = r"(<[ibu][^>]*>)?\s{0,3}(?!(Chapter|\s+<)).{0,65}?(</[ibu][^>]*>)?(?=<)"