From e01af280545bdcafce22efea5e16500324eac09f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 13 Feb 2013 09:44:59 +0530 Subject: [PATCH] Transformaing to titlecase - handle typographic hyphens in all caps phrases --- src/calibre/utils/titlecase.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/utils/titlecase.py b/src/calibre/utils/titlecase.py index 97daed23a9..8a9d1ecd1a 100755 --- a/src/calibre/utils/titlecase.py +++ b/src/calibre/utils/titlecase.py @@ -15,7 +15,7 @@ __all__ = ['titlecase'] __version__ = '0.5' SMALL = 'a|an|and|as|at|but|by|en|for|if|in|of|on|or|the|to|v\.?|via|vs\.?' -PUNCT = r"""!"#$%&'‘()*+,\-./:;?@[\\\]_`{|}~""" +PUNCT = r"""!"#$%&'‘()*+,‒\–—―./:;?@[\\\]_`{|}~""" SMALL_WORDS = re.compile(r'^(%s)$' % SMALL, re.I) INLINE_PERIOD = re.compile(r'[a-z][.][a-z]', re.I) @@ -26,7 +26,7 @@ SMALL_LAST = re.compile(r'\b(%s)[%s]?$' % (SMALL, PUNCT), re.I) SMALL_AFTER_NUM = re.compile(r'(\d+\s+)(a|an|the)\b', re.I) SUBPHRASE = re.compile(r'([:.;?!][ ])(%s)' % SMALL) APOS_SECOND = re.compile(r"^[dol]{1}['‘]{1}[a-z]+$", re.I) -ALL_CAPS = re.compile(r'^[A-Z\s%s]+$' % PUNCT) +ALL_CAPS = re.compile(r'^[A-Z0-9\s%s]+$' % PUNCT) UC_INITIALS = re.compile(r"^(?:[A-Z]{1}\.{1}|[A-Z]{1}\.{1}[A-Z]{1})+$") _lang = None