mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Title casing: Fix presence of some non-english characters or smart punctuation causing all-caps text to not be properly lowercased
This commit is contained in:
parent
f588e2d9e2
commit
2f711f84fa
@ -9,13 +9,13 @@ License: http://www.opensource.org/licenses/mit-license.php
|
||||
|
||||
import re
|
||||
|
||||
from calibre.utils.icu import capitalize
|
||||
from calibre.utils.icu import capitalize, upper
|
||||
|
||||
__all__ = ['titlecase']
|
||||
__version__ = '0.5'
|
||||
|
||||
SMALL = 'a|an|and|as|at|but|by|en|for|if|in|of|on|or|the|to|v\.?|via|vs\.?'
|
||||
PUNCT = r"""!"#$%&'‘()*+,\-‒–—―./:;?@[\\\]_`{|}~"""
|
||||
PUNCT = r"""!"#$%&'‘’()*+,\-‒–—―./:;?@[\\\]_`{|}~"""
|
||||
|
||||
SMALL_WORDS = re.compile(r'^(%s)$' % SMALL, re.I)
|
||||
INLINE_PERIOD = re.compile(r'[a-z][.][a-z]', re.I)
|
||||
@ -26,7 +26,6 @@ SMALL_LAST = re.compile(r'\b(%s)[%s]?$' % (SMALL, PUNCT), re.I|re.U)
|
||||
SMALL_AFTER_NUM = re.compile(r'(\d+\s+)(a|an|the)\b', re.I|re.U)
|
||||
SUBPHRASE = re.compile(r'([:.;?!][ ])(%s)' % SMALL)
|
||||
APOS_SECOND = re.compile(r"^[dol]{1}['‘]{1}[a-z]+$", re.I)
|
||||
ALL_CAPS = re.compile(r'^[A-Z0-9\s%s]+$' % PUNCT)
|
||||
UC_INITIALS = re.compile(r"^(?:[A-Z]{1}\.{1}|[A-Z]{1}\.{1}[A-Z]{1})+$")
|
||||
|
||||
_lang = None
|
||||
@ -51,7 +50,7 @@ def titlecase(text):
|
||||
|
||||
"""
|
||||
|
||||
all_caps = ALL_CAPS.match(text)
|
||||
all_caps = upper(text) == text
|
||||
|
||||
words = re.split('\s+', text)
|
||||
line = []
|
||||
|
Loading…
x
Reference in New Issue
Block a user