Fix titlecasing of words not capitalizing the first letter, if the first letter is not an English character. Fixes #1265202 [ToC Editor - remaining bug with titlecase](https://bugs.launchpad.net/calibre/+bug/1265202)

This commit is contained in:
Kovid Goyal 2013-12-31 22:25:24 +05:30
parent f11a7a1d25
commit c1920935d5

View File

@ -4,6 +4,7 @@
"""
Original Perl version by: John Gruber http://daringfireball.net/ 10 May 2008
Python version by Stuart Colville http://muffinresearch.co.uk
Modifications to make it work with non-ascii chars by Kovid Goyal
License: http://www.opensource.org/licenses/mit-license.php
"""
@ -20,7 +21,7 @@ PUNCT = r"""!"#$%&'()*+,\-‒–—―./:;?@[\\\]_`{|}~"""
SMALL_WORDS = re.compile(r'^(%s)$' % SMALL, re.I)
INLINE_PERIOD = re.compile(r'[a-z][.][a-z]', re.I)
UC_ELSEWHERE = re.compile(r'[%s]*?[a-zA-Z]+[A-Z]+?' % PUNCT)
CAPFIRST = re.compile(r"^[%s]*?([A-Za-z])" % PUNCT)
CAPFIRST = re.compile(ur"^[%s]*?(\w)" % PUNCT, flags=re.UNICODE)
SMALL_FIRST = re.compile(r'^([%s]*)(%s)\b' % (PUNCT, SMALL), re.I|re.U)
SMALL_LAST = re.compile(r'\b(%s)[%s]?$' % (SMALL, PUNCT), re.I|re.U)
SMALL_AFTER_NUM = re.compile(r'(\d+\s+)(a|an|the)\b', re.I|re.U)