Transformaing to titlecase - handle typographic hyphens in all caps phrases

This commit is contained in:
Kovid Goyal 2013-02-13 09:44:59 +05:30
parent 24bbe04875
commit e01af28054

View File

@ -15,7 +15,7 @@ __all__ = ['titlecase']
__version__ = '0.5'
SMALL = 'a|an|and|as|at|but|by|en|for|if|in|of|on|or|the|to|v\.?|via|vs\.?'
PUNCT = r"""!"#$%&'()*+,\-./:;?@[\\\]_`{|}~"""
PUNCT = r"""!"#$%&'()*+,\–—―./:;?@[\\\]_`{|}~"""
SMALL_WORDS = re.compile(r'^(%s)$' % SMALL, re.I)
INLINE_PERIOD = re.compile(r'[a-z][.][a-z]', re.I)
@ -26,7 +26,7 @@ SMALL_LAST = re.compile(r'\b(%s)[%s]?$' % (SMALL, PUNCT), re.I)
SMALL_AFTER_NUM = re.compile(r'(\d+\s+)(a|an|the)\b', re.I)
SUBPHRASE = re.compile(r'([:.;?!][ ])(%s)' % SMALL)
APOS_SECOND = re.compile(r"^[dol]{1}[']{1}[a-z]+$", re.I)
ALL_CAPS = re.compile(r'^[A-Z\s%s]+$' % PUNCT)
ALL_CAPS = re.compile(r'^[A-Z0-9\s%s]+$' % PUNCT)
UC_INITIALS = re.compile(r"^(?:[A-Z]{1}\.{1}|[A-Z]{1}\.{1}[A-Z]{1})+$")
_lang = None