From cff26ebcbba92ba1bf9d65e7dcc4393b156677f2 Mon Sep 17 00:00:00 2001 From: John Schember Date: Tue, 18 Jan 2011 06:45:53 -0500 Subject: [PATCH] Rework Italicize patterns to match less false positives. --- src/calibre/ebooks/conversion/utils.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index bfb5f1c153..5fc986b7d8 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -139,17 +139,17 @@ class HeuristicProcessor(object): ] ITALICIZE_STYLE_PATS = [ - r'(?msu)_(?P[^\s][^<>_]+?[^\s])?_', - r'(?msu)/(?P[^\s][^<>/]+?[^\s])?/', - r'(?msu)~~(?P[^\s][^<>~]+?[^\s])?~~', - r'(?msu)\*(?P[^\s][^<>\*]+?[^\s])?\*', - r'(?msu)~(?P[^\s][^<>~]+?[^\s])?~', - r'(?msu)_/(?P[^\s][^<>/_]+?[^\s])?/_', - r'(?msu)_\*(?P[^\s][^<>\*_]+?[^\s])?\*_', - r'(?msu)\*/(?P[^\s][^<>/\*]+?[^\s])?/\*', - r'(?msu)_\*/(?P[^\s][^<>\*_]+?[^\s])?/\*_', - r'(?msu)/:(?P[^\s][^<>:/]+?[^\s])?:/', - r'(?msu)\|:(?P[^\s][^<>:\|]+?[^\s])?:\|', + r'(?msu)(?<=\s)_(?P\S[^_]{0,40}?\S)?_(?=\s)', + r'(?msu)(?<=\s)/(?P\S[^/]{0,40}?\S)?/(?=\s)', + r'(?msu)(?<=\s)~~(?P\S[^~]{0,40}?\S)?~~(?=\s)', + r'(?msu)(?<=\s)\*(?P\S[^\*]{0,40}?\S)?\*(?=\s)', + r'(?msu)(?<=\s)~(?P\S[^~]{0,40}?\S)?~(?=\s)', + r'(?msu)(?<=\s)_/(?P\S[^/_]{0,40}?\S)?/_(?=\s)', + r'(?msu)(?<=\s)_\*(?P\S[^\*_]{0,40}?\S)?\*_(?=\s)', + r'(?msu)(?<=\s)\*/(?P\S[^/\*]{0,40}?\S)?/\*(?=\s)', + r'(?msu)(?<=\s)_\*/(?P\S[^\*_]{0,40}?\S)?/\*_(?=\s)', + r'(?msu)(?<=\s)/:(?P\S[^:/]{0,40}?\S)?:/(?=\s)', + r'(?msu)(?<=\s)\|:(?P\S[^:\|]{0,40}?\S)?:\|(?=\s)', ] for word in ITALICIZE_WORDS: