Rework Italicize patterns to match less false positives.

This commit is contained in:
John Schember 2011-01-18 06:45:53 -05:00
parent 152fac4782
commit cff26ebcbb

View File

@ -139,17 +139,17 @@ class HeuristicProcessor(object):
] ]
ITALICIZE_STYLE_PATS = [ ITALICIZE_STYLE_PATS = [
r'(?msu)_(?P<words>[^\s][^<>_]+?[^\s])?_', r'(?msu)(?<=\s)_(?P<words>\S[^_]{0,40}?\S)?_(?=\s)',
r'(?msu)/(?P<words>[^\s][^<>/]+?[^\s])?/', r'(?msu)(?<=\s)/(?P<words>\S[^/]{0,40}?\S)?/(?=\s)',
r'(?msu)~~(?P<words>[^\s][^<>~]+?[^\s])?~~', r'(?msu)(?<=\s)~~(?P<words>\S[^~]{0,40}?\S)?~~(?=\s)',
r'(?msu)\*(?P<words>[^\s][^<>\*]+?[^\s])?\*', r'(?msu)(?<=\s)\*(?P<words>\S[^\*]{0,40}?\S)?\*(?=\s)',
r'(?msu)~(?P<words>[^\s][^<>~]+?[^\s])?~', r'(?msu)(?<=\s)~(?P<words>\S[^~]{0,40}?\S)?~(?=\s)',
r'(?msu)_/(?P<words>[^\s][^<>/_]+?[^\s])?/_', r'(?msu)(?<=\s)_/(?P<words>\S[^/_]{0,40}?\S)?/_(?=\s)',
r'(?msu)_\*(?P<words>[^\s][^<>\*_]+?[^\s])?\*_', r'(?msu)(?<=\s)_\*(?P<words>\S[^\*_]{0,40}?\S)?\*_(?=\s)',
r'(?msu)\*/(?P<words>[^\s][^<>/\*]+?[^\s])?/\*', r'(?msu)(?<=\s)\*/(?P<words>\S[^/\*]{0,40}?\S)?/\*(?=\s)',
r'(?msu)_\*/(?P<words>[^\s][^<>\*_]+?[^\s])?/\*_', r'(?msu)(?<=\s)_\*/(?P<words>\S[^\*_]{0,40}?\S)?/\*_(?=\s)',
r'(?msu)/:(?P<words>[^\s][^<>:/]+?[^\s])?:/', r'(?msu)(?<=\s)/:(?P<words>\S[^:/]{0,40}?\S)?:/(?=\s)',
r'(?msu)\|:(?P<words>[^\s][^<>:\|]+?[^\s])?:\|', r'(?msu)(?<=\s)\|:(?P<words>\S[^:\|]{0,40}?\S)?:\|(?=\s)',
] ]
for word in ITALICIZE_WORDS: for word in ITALICIZE_WORDS: