From 2f711f84fa1099948077b23235e5594270233db6 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 22 Dec 2013 08:47:51 +0530 Subject: [PATCH] Title casing: Fix presence of some non-english characters or smart punctuation causing all-caps text to not be properly lowercased --- src/calibre/utils/titlecase.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/calibre/utils/titlecase.py b/src/calibre/utils/titlecase.py index 44c0357300..3e01d3a526 100755 --- a/src/calibre/utils/titlecase.py +++ b/src/calibre/utils/titlecase.py @@ -9,13 +9,13 @@ License: http://www.opensource.org/licenses/mit-license.php import re -from calibre.utils.icu import capitalize +from calibre.utils.icu import capitalize, upper __all__ = ['titlecase'] __version__ = '0.5' SMALL = 'a|an|and|as|at|but|by|en|for|if|in|of|on|or|the|to|v\.?|via|vs\.?' -PUNCT = r"""!"#$%&'‘()*+,\-‒–—―./:;?@[\\\]_`{|}~""" +PUNCT = r"""!"#$%&'‘’()*+,\-‒–—―./:;?@[\\\]_`{|}~""" SMALL_WORDS = re.compile(r'^(%s)$' % SMALL, re.I) INLINE_PERIOD = re.compile(r'[a-z][.][a-z]', re.I) @@ -26,7 +26,6 @@ SMALL_LAST = re.compile(r'\b(%s)[%s]?$' % (SMALL, PUNCT), re.I|re.U) SMALL_AFTER_NUM = re.compile(r'(\d+\s+)(a|an|the)\b', re.I|re.U) SUBPHRASE = re.compile(r'([:.;?!][ ])(%s)' % SMALL) APOS_SECOND = re.compile(r"^[dol]{1}['‘]{1}[a-z]+$", re.I) -ALL_CAPS = re.compile(r'^[A-Z0-9\s%s]+$' % PUNCT) UC_INITIALS = re.compile(r"^(?:[A-Z]{1}\.{1}|[A-Z]{1}\.{1}[A-Z]{1})+$") _lang = None @@ -51,7 +50,7 @@ def titlecase(text): """ - all_caps = ALL_CAPS.match(text) + all_caps = upper(text) == text words = re.split('\s+', text) line = []