From b0acd1bd1997c5b6e057523311168e4e41a33218 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 21 Nov 2013 22:01:34 +0530 Subject: [PATCH] Titlecase: Fix incorrect title casing for some non-English words. Fixes #1253692 [Change Case captalizes letter after accented character](https://bugs.launchpad.net/calibre/+bug/1253692) --- src/calibre/utils/titlecase.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/calibre/utils/titlecase.py b/src/calibre/utils/titlecase.py index 6eda2e93e4..44c0357300 100755 --- a/src/calibre/utils/titlecase.py +++ b/src/calibre/utils/titlecase.py @@ -21,9 +21,9 @@ SMALL_WORDS = re.compile(r'^(%s)$' % SMALL, re.I) INLINE_PERIOD = re.compile(r'[a-z][.][a-z]', re.I) UC_ELSEWHERE = re.compile(r'[%s]*?[a-zA-Z]+[A-Z]+?' % PUNCT) CAPFIRST = re.compile(r"^[%s]*?([A-Za-z])" % PUNCT) -SMALL_FIRST = re.compile(r'^([%s]*)(%s)\b' % (PUNCT, SMALL), re.I) -SMALL_LAST = re.compile(r'\b(%s)[%s]?$' % (SMALL, PUNCT), re.I) -SMALL_AFTER_NUM = re.compile(r'(\d+\s+)(a|an|the)\b', re.I) +SMALL_FIRST = re.compile(r'^([%s]*)(%s)\b' % (PUNCT, SMALL), re.I|re.U) +SMALL_LAST = re.compile(r'\b(%s)[%s]?$' % (SMALL, PUNCT), re.I|re.U) +SMALL_AFTER_NUM = re.compile(r'(\d+\s+)(a|an|the)\b', re.I|re.U) SUBPHRASE = re.compile(r'([:.;?!][ ])(%s)' % SMALL) APOS_SECOND = re.compile(r"^[dol]{1}['‘]{1}[a-z]+$", re.I) ALL_CAPS = re.compile(r'^[A-Z0-9\s%s]+$' % PUNCT) @@ -80,7 +80,6 @@ def titlecase(text): hyphenated.append(CAPFIRST.sub(lambda m: icu_upper(m.group(0)), item)) line.append("-".join(hyphenated)) - result = " ".join(line) result = SMALL_FIRST.sub(lambda m: '%s%s' % (