mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Title casing: Fix presence of some non-english characters or smart punctuation causing all-caps text to not be properly lowercased
This commit is contained in:
parent
f588e2d9e2
commit
2f711f84fa
@ -9,13 +9,13 @@ License: http://www.opensource.org/licenses/mit-license.php
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from calibre.utils.icu import capitalize
|
from calibre.utils.icu import capitalize, upper
|
||||||
|
|
||||||
__all__ = ['titlecase']
|
__all__ = ['titlecase']
|
||||||
__version__ = '0.5'
|
__version__ = '0.5'
|
||||||
|
|
||||||
SMALL = 'a|an|and|as|at|but|by|en|for|if|in|of|on|or|the|to|v\.?|via|vs\.?'
|
SMALL = 'a|an|and|as|at|but|by|en|for|if|in|of|on|or|the|to|v\.?|via|vs\.?'
|
||||||
PUNCT = r"""!"#$%&'‘()*+,\-‒–—―./:;?@[\\\]_`{|}~"""
|
PUNCT = r"""!"#$%&'‘’()*+,\-‒–—―./:;?@[\\\]_`{|}~"""
|
||||||
|
|
||||||
SMALL_WORDS = re.compile(r'^(%s)$' % SMALL, re.I)
|
SMALL_WORDS = re.compile(r'^(%s)$' % SMALL, re.I)
|
||||||
INLINE_PERIOD = re.compile(r'[a-z][.][a-z]', re.I)
|
INLINE_PERIOD = re.compile(r'[a-z][.][a-z]', re.I)
|
||||||
@ -26,7 +26,6 @@ SMALL_LAST = re.compile(r'\b(%s)[%s]?$' % (SMALL, PUNCT), re.I|re.U)
|
|||||||
SMALL_AFTER_NUM = re.compile(r'(\d+\s+)(a|an|the)\b', re.I|re.U)
|
SMALL_AFTER_NUM = re.compile(r'(\d+\s+)(a|an|the)\b', re.I|re.U)
|
||||||
SUBPHRASE = re.compile(r'([:.;?!][ ])(%s)' % SMALL)
|
SUBPHRASE = re.compile(r'([:.;?!][ ])(%s)' % SMALL)
|
||||||
APOS_SECOND = re.compile(r"^[dol]{1}['‘]{1}[a-z]+$", re.I)
|
APOS_SECOND = re.compile(r"^[dol]{1}['‘]{1}[a-z]+$", re.I)
|
||||||
ALL_CAPS = re.compile(r'^[A-Z0-9\s%s]+$' % PUNCT)
|
|
||||||
UC_INITIALS = re.compile(r"^(?:[A-Z]{1}\.{1}|[A-Z]{1}\.{1}[A-Z]{1})+$")
|
UC_INITIALS = re.compile(r"^(?:[A-Z]{1}\.{1}|[A-Z]{1}\.{1}[A-Z]{1})+$")
|
||||||
|
|
||||||
_lang = None
|
_lang = None
|
||||||
@ -51,7 +50,7 @@ def titlecase(text):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
all_caps = ALL_CAPS.match(text)
|
all_caps = upper(text) == text
|
||||||
|
|
||||||
words = re.split('\s+', text)
|
words = re.split('\s+', text)
|
||||||
line = []
|
line = []
|
||||||
|
Loading…
x
Reference in New Issue
Block a user