Heuristics: Tweak italicize patterns to make them more robust.

This commit is contained in:
John Schember 2011-02-05 10:46:32 -05:00
parent 17206061f7
commit ed2b94ac9d

View File

@ -149,17 +149,17 @@ class HeuristicProcessor(object):
]
ITALICIZE_STYLE_PATS = [
r'(?msu)(?<=\s)_(?P<words>\S[^_]{0,40}?\S)?_(?=[\s\.,\!\?])',
r'(?msu)(?<=\s)/(?P<words>\S[^/]{0,40}?\S)?/(?=[\s\.,\!\?])',
r'(?msu)(?<=\s)~~(?P<words>\S[^~]{0,40}?\S)?~~(?=[\s\.,\!\?])',
r'(?msu)(?<=\s)\*(?P<words>\S[^\*]{0,40}?\S)?\*(?=[\s\.,\!\?])',
r'(?msu)(?<=\s)~(?P<words>\S[^~]{0,40}?\S)?~(?=[\s\.,\!\?])',
r'(?msu)(?<=\s)_/(?P<words>\S[^/_]{0,40}?\S)?/_(?=[\s\.,\!\?])',
r'(?msu)(?<=\s)_\*(?P<words>\S[^\*_]{0,40}?\S)?\*_(?=[\s\.,\!\?])',
r'(?msu)(?<=\s)\*/(?P<words>\S[^/\*]{0,40}?\S)?/\*(?=[\s\.,\!\?])',
r'(?msu)(?<=\s)_\*/(?P<words>\S[^\*_]{0,40}?\S)?/\*_(?=[\s\.,\!\?])',
r'(?msu)(?<=\s)/:(?P<words>\S[^:/]{0,40}?\S)?:/(?=[\s\.,\!\?])',
r'(?msu)(?<=\s)\|:(?P<words>\S[^:\|]{0,40}?\S)?:\|(?=[\s\.,\!\?])',
r'(?msu)(?<=[\s>"])_(?P<words>[^_]+)?_',
r'(?msu)(?<=[\s>"])/(?P<words>[^/]+)?/',
r'(?msu)(?<=[\s>"])~~(?P<words>[^~]+)?~~',
r'(?msu)(?<=[\s>"])\*(?P<words>[^\*]+)?\*',
r'(?msu)(?<=[\s>"])~(?P<words>[^~]+)?~',
r'(?msu)(?<=[\s>"])_/(?P<words>[^/_]+)?/_',
r'(?msu)(?<=[\s>"])_\*(?P<words>[^\*_]+)?\*_',
r'(?msu)(?<=[\s>"])\*/(?P<words>[^/\*]+)?/\*',
r'(?msu)(?<=[\s>"])_\*/(?P<words>[^\*_]+)?/\*_',
r'(?msu)(?<=[\s>"])/:(?P<words>[^:/]+)?:/',
r'(?msu)(?<=[\s>"])\|:(?P<words>[^:\|]+)?:\|',
]
for word in ITALICIZE_WORDS: