diff --git a/src/calibre/utils/titlecase.py b/src/calibre/utils/titlecase.py index 3e01d3a526..1877b2c248 100755 --- a/src/calibre/utils/titlecase.py +++ b/src/calibre/utils/titlecase.py @@ -4,6 +4,7 @@ """ Original Perl version by: John Gruber http://daringfireball.net/ 10 May 2008 Python version by Stuart Colville http://muffinresearch.co.uk +Modifications to make it work with non-ascii chars by Kovid Goyal License: http://www.opensource.org/licenses/mit-license.php """ @@ -20,7 +21,7 @@ PUNCT = r"""!"#$%&'‘’()*+,\-‒–—―./:;?@[\\\]_`{|}~""" SMALL_WORDS = re.compile(r'^(%s)$' % SMALL, re.I) INLINE_PERIOD = re.compile(r'[a-z][.][a-z]', re.I) UC_ELSEWHERE = re.compile(r'[%s]*?[a-zA-Z]+[A-Z]+?' % PUNCT) -CAPFIRST = re.compile(r"^[%s]*?([A-Za-z])" % PUNCT) +CAPFIRST = re.compile(ur"^[%s]*?(\w)" % PUNCT, flags=re.UNICODE) SMALL_FIRST = re.compile(r'^([%s]*)(%s)\b' % (PUNCT, SMALL), re.I|re.U) SMALL_LAST = re.compile(r'\b(%s)[%s]?$' % (SMALL, PUNCT), re.I|re.U) SMALL_AFTER_NUM = re.compile(r'(\d+\s+)(a|an|the)\b', re.I|re.U)