Titlecase: Fix incorrect title casing for some non-English words. Fixes #1253692 [Change Case captalizes letter after accented character](https://bugs.launchpad.net/calibre/+bug/1253692)

This commit is contained in:
Kovid Goyal 2013-11-21 22:01:34 +05:30
parent 2de03bb4b4
commit b0acd1bd19

View File

@ -21,9 +21,9 @@ SMALL_WORDS = re.compile(r'^(%s)$' % SMALL, re.I)
INLINE_PERIOD = re.compile(r'[a-z][.][a-z]', re.I)
UC_ELSEWHERE = re.compile(r'[%s]*?[a-zA-Z]+[A-Z]+?' % PUNCT)
CAPFIRST = re.compile(r"^[%s]*?([A-Za-z])" % PUNCT)
SMALL_FIRST = re.compile(r'^([%s]*)(%s)\b' % (PUNCT, SMALL), re.I)
SMALL_LAST = re.compile(r'\b(%s)[%s]?$' % (SMALL, PUNCT), re.I)
SMALL_AFTER_NUM = re.compile(r'(\d+\s+)(a|an|the)\b', re.I)
SMALL_FIRST = re.compile(r'^([%s]*)(%s)\b' % (PUNCT, SMALL), re.I|re.U)
SMALL_LAST = re.compile(r'\b(%s)[%s]?$' % (SMALL, PUNCT), re.I|re.U)
SMALL_AFTER_NUM = re.compile(r'(\d+\s+)(a|an|the)\b', re.I|re.U)
SUBPHRASE = re.compile(r'([:.;?!][ ])(%s)' % SMALL)
APOS_SECOND = re.compile(r"^[dol]{1}[']{1}[a-z]+$", re.I)
ALL_CAPS = re.compile(r'^[A-Z0-9\s%s]+$' % PUNCT)
@ -80,7 +80,6 @@ def titlecase(text):
hyphenated.append(CAPFIRST.sub(lambda m: icu_upper(m.group(0)), item))
line.append("-".join(hyphenated))
result = " ".join(line)
result = SMALL_FIRST.sub(lambda m: '%s%s' % (