mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
DRYer
This commit is contained in:
parent
15a0112b47
commit
1eba328f26
@ -14,37 +14,36 @@ _iterators = {}
|
|||||||
_lock = Lock()
|
_lock = Lock()
|
||||||
|
|
||||||
|
|
||||||
|
def get_iterator(lang):
|
||||||
|
it = _iterators.get(lang)
|
||||||
|
if it is None:
|
||||||
|
it = _iterators[lang] = _icu.BreakIterator(_icu.UBRK_WORD, lang_as_iso639_1(lang) or lang)
|
||||||
|
return it
|
||||||
|
|
||||||
|
|
||||||
def split_into_words(text, lang='en'):
|
def split_into_words(text, lang='en'):
|
||||||
with _lock:
|
with _lock:
|
||||||
it = _iterators.get(lang, None)
|
it = get_iterator(lang)
|
||||||
if it is None:
|
|
||||||
it = _iterators[lang] = _icu.BreakIterator(_icu.UBRK_WORD, lang_as_iso639_1(lang) or lang)
|
|
||||||
it.set_text(text)
|
it.set_text(text)
|
||||||
return [text[p:p+s] for p, s in it.split2()]
|
return [text[p:p+s] for p, s in it.split2()]
|
||||||
|
|
||||||
|
|
||||||
def split_into_words_and_positions(text, lang='en'):
|
def split_into_words_and_positions(text, lang='en'):
|
||||||
with _lock:
|
with _lock:
|
||||||
it = _iterators.get(lang, None)
|
it = get_iterator(lang)
|
||||||
if it is None:
|
|
||||||
it = _iterators[lang] = _icu.BreakIterator(_icu.UBRK_WORD, lang_as_iso639_1(lang) or lang)
|
|
||||||
it.set_text(text)
|
it.set_text(text)
|
||||||
return it.split2()
|
return it.split2()
|
||||||
|
|
||||||
|
|
||||||
def index_of(needle, haystack, lang='en'):
|
def index_of(needle, haystack, lang='en'):
|
||||||
with _lock:
|
with _lock:
|
||||||
it = _iterators.get(lang, None)
|
it = get_iterator(lang)
|
||||||
if it is None:
|
|
||||||
it = _iterators[lang] = _icu.BreakIterator(_icu.UBRK_WORD, lang_as_iso639_1(lang) or lang)
|
|
||||||
it.set_text(haystack)
|
it.set_text(haystack)
|
||||||
return it.index(needle)
|
return it.index(needle)
|
||||||
|
|
||||||
|
|
||||||
def count_words(text, lang='en'):
|
def count_words(text, lang='en'):
|
||||||
with _lock:
|
with _lock:
|
||||||
it = _iterators.get(lang, None)
|
it = get_iterator(lang)
|
||||||
if it is None:
|
|
||||||
it = _iterators[lang] = _icu.BreakIterator(_icu.UBRK_WORD, lang_as_iso639_1(lang) or lang)
|
|
||||||
it.set_text(text)
|
it.set_text(text)
|
||||||
return it.count_words()
|
return it.count_words()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user