mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Function to remove surrounding punctuation
This commit is contained in:
parent
c344ef0775
commit
aa1f52460e
@ -6,3 +6,28 @@ from __future__ import absolute_import, division, print_function, unicode_litera
|
||||
|
||||
# TODO: lower case word? remove trailing punctuation. abort early if contains = or length < 4 or length > 99
|
||||
# TODO: test with replacement words
|
||||
|
||||
|
||||
import regex
|
||||
REGEX_FLAGS = regex.VERSION1 | regex.WORD | regex.FULLCASE | regex.UNICODE
|
||||
|
||||
|
||||
def pats():
|
||||
ans = getattr(pats, 'ans', None)
|
||||
if ans is None:
|
||||
pats.ans = ans = regex.compile(r'^\p{P}+', REGEX_FLAGS), regex.compile(r'\p{P}+$', REGEX_FLAGS)
|
||||
return ans
|
||||
|
||||
|
||||
def remove_punctuation(word):
|
||||
leading, trailing = pats()
|
||||
prefix = suffix = ''
|
||||
nword, n = leading.subn('', word)
|
||||
if n > 0:
|
||||
count = len(word) - len(nword)
|
||||
prefix, word = word[:count], nword
|
||||
nword, n = trailing.subn('', word)
|
||||
if n > 0:
|
||||
count = len(word) - len(nword)
|
||||
suffix, word = word[-count:], nword
|
||||
return prefix, word, suffix
|
||||
|
@ -4,13 +4,15 @@
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import shutil, os
|
||||
import os
|
||||
import shutil
|
||||
import unittest
|
||||
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
from calibre.utils.hyphenation.dictionaries import (
|
||||
dictionary_name_for_locale, path_to_dictionary
|
||||
)
|
||||
from calibre.utils.hyphenation.hyphenate import remove_punctuation
|
||||
|
||||
|
||||
class TestHyphenation(unittest.TestCase):
|
||||
@ -61,6 +63,11 @@ class TestHyphenation(unittest.TestCase):
|
||||
)
|
||||
self.assertFalse(cache[0])
|
||||
|
||||
def test_remove_punctuation(self):
|
||||
self.ae(remove_punctuation('word'), ('', 'word', ''))
|
||||
self.ae(remove_punctuation('wo.rd.'), ('', 'wo.rd', '.'))
|
||||
self.ae(remove_punctuation('"«word!!'), ('"«', 'word', '!!'))
|
||||
|
||||
|
||||
def find_tests():
|
||||
return unittest.defaultTestLoader.loadTestsFromTestCase(TestHyphenation)
|
||||
|
Loading…
x
Reference in New Issue
Block a user