mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Function to remove surrounding punctuation
This commit is contained in:
parent
c344ef0775
commit
aa1f52460e
@ -6,3 +6,28 @@ from __future__ import absolute_import, division, print_function, unicode_litera
|
|||||||
|
|
||||||
# TODO: lower case word? remove trailing punctuation. abort early if contains = or length < 4 or length > 99
|
# TODO: lower case word? remove trailing punctuation. abort early if contains = or length < 4 or length > 99
|
||||||
# TODO: test with replacement words
|
# TODO: test with replacement words
|
||||||
|
|
||||||
|
|
||||||
|
import regex
|
||||||
|
REGEX_FLAGS = regex.VERSION1 | regex.WORD | regex.FULLCASE | regex.UNICODE
|
||||||
|
|
||||||
|
|
||||||
|
def pats():
|
||||||
|
ans = getattr(pats, 'ans', None)
|
||||||
|
if ans is None:
|
||||||
|
pats.ans = ans = regex.compile(r'^\p{P}+', REGEX_FLAGS), regex.compile(r'\p{P}+$', REGEX_FLAGS)
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
|
def remove_punctuation(word):
|
||||||
|
leading, trailing = pats()
|
||||||
|
prefix = suffix = ''
|
||||||
|
nword, n = leading.subn('', word)
|
||||||
|
if n > 0:
|
||||||
|
count = len(word) - len(nword)
|
||||||
|
prefix, word = word[:count], nword
|
||||||
|
nword, n = trailing.subn('', word)
|
||||||
|
if n > 0:
|
||||||
|
count = len(word) - len(nword)
|
||||||
|
suffix, word = word[-count:], nword
|
||||||
|
return prefix, word, suffix
|
||||||
|
@ -4,13 +4,15 @@
|
|||||||
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
import shutil, os
|
import os
|
||||||
|
import shutil
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||||
from calibre.utils.hyphenation.dictionaries import (
|
from calibre.utils.hyphenation.dictionaries import (
|
||||||
dictionary_name_for_locale, path_to_dictionary
|
dictionary_name_for_locale, path_to_dictionary
|
||||||
)
|
)
|
||||||
|
from calibre.utils.hyphenation.hyphenate import remove_punctuation
|
||||||
|
|
||||||
|
|
||||||
class TestHyphenation(unittest.TestCase):
|
class TestHyphenation(unittest.TestCase):
|
||||||
@ -61,6 +63,11 @@ class TestHyphenation(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
self.assertFalse(cache[0])
|
self.assertFalse(cache[0])
|
||||||
|
|
||||||
|
def test_remove_punctuation(self):
|
||||||
|
self.ae(remove_punctuation('word'), ('', 'word', ''))
|
||||||
|
self.ae(remove_punctuation('wo.rd.'), ('', 'wo.rd', '.'))
|
||||||
|
self.ae(remove_punctuation('"«word!!'), ('"«', 'word', '!!'))
|
||||||
|
|
||||||
|
|
||||||
def find_tests():
|
def find_tests():
|
||||||
return unittest.defaultTestLoader.loadTestsFromTestCase(TestHyphenation)
|
return unittest.defaultTestLoader.loadTestsFromTestCase(TestHyphenation)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user