mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Various fixes to Title Casing. Fixes #7846 (Title Case doesn't quite work)
This commit is contained in:
parent
dd2aa48916
commit
826221c0f5
@ -77,6 +77,9 @@ def py_strcmp(a, b):
|
||||
def icu_case_sensitive_strcmp(collator, a, b):
|
||||
return collator.strcmp(a, b)
|
||||
|
||||
def icu_capitalize(s):
|
||||
s = lower(s)
|
||||
return s.replace(s[0], upper(s[0]), 1)
|
||||
|
||||
load_icu()
|
||||
load_collator()
|
||||
@ -104,10 +107,6 @@ lower = (lambda s: s.lower()) if _icu_not_ok else \
|
||||
title_case = (lambda s: s.title()) if _icu_not_ok else \
|
||||
partial(_icu.title, get_locale())
|
||||
|
||||
def icu_capitalize(s):
|
||||
s = lower(s)
|
||||
return s.replace(s[0], upper(s[0]))
|
||||
|
||||
capitalize = (lambda s: s.capitalize()) if _icu_not_ok else \
|
||||
(lambda s: icu_capitalize(s))
|
||||
|
||||
@ -226,12 +225,16 @@ pêché'''
|
||||
test_strcmp(german + french)
|
||||
|
||||
print '\nTesting case transforms in current locale'
|
||||
for x in ('a', 'Alice\'s code'):
|
||||
from calibre.utils.titlecase import titlecase
|
||||
for x in ('a', 'Alice\'s code', 'macdonald\'s machine', '02 the wars'):
|
||||
print 'Upper: ', x, '->', 'py:', x.upper().encode('utf-8'), 'icu:', upper(x).encode('utf-8')
|
||||
print 'Lower: ', x, '->', 'py:', x.lower().encode('utf-8'), 'icu:', lower(x).encode('utf-8')
|
||||
print 'Title: ', x, '->', 'py:', x.title().encode('utf-8'), 'icu:', title_case(x).encode('utf-8')
|
||||
print 'Title: ', x, '->', 'py:', x.title().encode('utf-8'), 'icu:', title_case(x).encode('utf-8'), 'titlecase:', titlecase(x).encode('utf-8')
|
||||
print 'Capitalize:', x, '->', 'py:', x.capitalize().encode('utf-8'), 'icu:', capitalize(x).encode('utf-8')
|
||||
print
|
||||
|
||||
# }}}
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
||||
|
||||
|
@ -23,11 +23,12 @@ UC_ELSEWHERE = re.compile(r'[%s]*?[a-zA-Z]+[A-Z]+?' % PUNCT)
|
||||
CAPFIRST = re.compile(r"^[%s]*?([A-Za-z])" % PUNCT)
|
||||
SMALL_FIRST = re.compile(r'^([%s]*)(%s)\b' % (PUNCT, SMALL), re.I)
|
||||
SMALL_LAST = re.compile(r'\b(%s)[%s]?$' % (SMALL, PUNCT), re.I)
|
||||
SMALL_AFTER_NUM = re.compile(r'(\d+\s+)(a|an|the)\b', re.I)
|
||||
SUBPHRASE = re.compile(r'([:.;?!][ ])(%s)' % SMALL)
|
||||
APOS_SECOND = re.compile(r"^[dol]{1}['‘]{1}[a-z]+$", re.I)
|
||||
ALL_CAPS = re.compile(r'^[A-Z\s%s]+$' % PUNCT)
|
||||
UC_INITIALS = re.compile(r"^(?:[A-Z]{1}\.{1}|[A-Z]{1}\.{1}[A-Z]{1})+$")
|
||||
MAC_MC = re.compile(r"^([Mm]a?c)(\w+)")
|
||||
MAC_MC = re.compile(r"^([Mm]a?c)(.+)")
|
||||
|
||||
def titlecase(text):
|
||||
|
||||
@ -44,7 +45,7 @@ def titlecase(text):
|
||||
|
||||
all_caps = ALL_CAPS.match(text)
|
||||
|
||||
words = re.split('\s', text)
|
||||
words = re.split('\s+', text)
|
||||
line = []
|
||||
for word in words:
|
||||
if all_caps:
|
||||
@ -55,8 +56,8 @@ def titlecase(text):
|
||||
word = icu_lower(word)
|
||||
|
||||
if APOS_SECOND.match(word):
|
||||
word = word.replace(word[0], icu_upper(word[0]))
|
||||
word = word.replace(word[2], icu_upper(word[2]))
|
||||
word = word.replace(word[0], icu_upper(word[0]), 1)
|
||||
word = word[:2] + icu_upper(word[2]) + word[3:]
|
||||
line.append(word)
|
||||
continue
|
||||
if INLINE_PERIOD.search(word) or UC_ELSEWHERE.match(word):
|
||||
@ -67,7 +68,7 @@ def titlecase(text):
|
||||
continue
|
||||
|
||||
match = MAC_MC.match(word)
|
||||
if match:
|
||||
if match and not match.group(2).startswith('hin'):
|
||||
line.append("%s%s" % (capitalize(match.group(1)),
|
||||
capitalize(match.group(2))))
|
||||
continue
|
||||
@ -85,6 +86,10 @@ def titlecase(text):
|
||||
capitalize(m.group(2))
|
||||
), result)
|
||||
|
||||
result = SMALL_AFTER_NUM.sub(lambda m: '%s%s' % (m.group(1),
|
||||
capitalize(m.group(2))
|
||||
), result)
|
||||
|
||||
result = SMALL_LAST.sub(lambda m: capitalize(m.group(0)), result)
|
||||
|
||||
result = SUBPHRASE.sub(lambda m: '%s%s' % (
|
||||
|
Loading…
x
Reference in New Issue
Block a user