Heuristics: Tweak italicize patterns to make them more robust.

This commit is contained in:
John Schember 2011-02-05 10:46:32 -05:00
parent 17206061f7
commit ed2b94ac9d

View File

@ -149,17 +149,17 @@ class HeuristicProcessor(object):
] ]
ITALICIZE_STYLE_PATS = [ ITALICIZE_STYLE_PATS = [
r'(?msu)(?<=\s)_(?P<words>\S[^_]{0,40}?\S)?_(?=[\s\.,\!\?])', r'(?msu)(?<=[\s>"])_(?P<words>[^_]+)?_',
r'(?msu)(?<=\s)/(?P<words>\S[^/]{0,40}?\S)?/(?=[\s\.,\!\?])', r'(?msu)(?<=[\s>"])/(?P<words>[^/]+)?/',
r'(?msu)(?<=\s)~~(?P<words>\S[^~]{0,40}?\S)?~~(?=[\s\.,\!\?])', r'(?msu)(?<=[\s>"])~~(?P<words>[^~]+)?~~',
r'(?msu)(?<=\s)\*(?P<words>\S[^\*]{0,40}?\S)?\*(?=[\s\.,\!\?])', r'(?msu)(?<=[\s>"])\*(?P<words>[^\*]+)?\*',
r'(?msu)(?<=\s)~(?P<words>\S[^~]{0,40}?\S)?~(?=[\s\.,\!\?])', r'(?msu)(?<=[\s>"])~(?P<words>[^~]+)?~',
r'(?msu)(?<=\s)_/(?P<words>\S[^/_]{0,40}?\S)?/_(?=[\s\.,\!\?])', r'(?msu)(?<=[\s>"])_/(?P<words>[^/_]+)?/_',
r'(?msu)(?<=\s)_\*(?P<words>\S[^\*_]{0,40}?\S)?\*_(?=[\s\.,\!\?])', r'(?msu)(?<=[\s>"])_\*(?P<words>[^\*_]+)?\*_',
r'(?msu)(?<=\s)\*/(?P<words>\S[^/\*]{0,40}?\S)?/\*(?=[\s\.,\!\?])', r'(?msu)(?<=[\s>"])\*/(?P<words>[^/\*]+)?/\*',
r'(?msu)(?<=\s)_\*/(?P<words>\S[^\*_]{0,40}?\S)?/\*_(?=[\s\.,\!\?])', r'(?msu)(?<=[\s>"])_\*/(?P<words>[^\*_]+)?/\*_',
r'(?msu)(?<=\s)/:(?P<words>\S[^:/]{0,40}?\S)?:/(?=[\s\.,\!\?])', r'(?msu)(?<=[\s>"])/:(?P<words>[^:/]+)?:/',
r'(?msu)(?<=\s)\|:(?P<words>\S[^:\|]{0,40}?\S)?:\|(?=[\s\.,\!\?])', r'(?msu)(?<=[\s>"])\|:(?P<words>[^:\|]+)?:\|',
] ]
for word in ITALICIZE_WORDS: for word in ITALICIZE_WORDS: