mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
Implement loading of both builtin and user installed dictionaries
This commit is contained in:
parent
ba894573e4
commit
0c397d32e5
2
resources/dictionaries/en-US/locales
Normal file
2
resources/dictionaries/en-US/locales
Normal file
@ -0,0 +1,2 @@
|
||||
en-US
|
||||
en-PH
|
173
src/calibre/spell/dictionary.py
Normal file
173
src/calibre/spell/dictionary.py
Normal file
@ -0,0 +1,173 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import cPickle, os, glob
|
||||
from collections import namedtuple
|
||||
from operator import attrgetter
|
||||
|
||||
from calibre.constants import plugins, config_dir
|
||||
from calibre.utils.config import JSONConfig
|
||||
from calibre.utils.localization import get_lang, canonicalize_lang
|
||||
|
||||
DictionaryLocale = namedtuple('DictionaryLocale', 'langcode countrycode')
|
||||
Dictionary = namedtuple('Dictionary', 'primary_locale locales dicpath affpath builtin name')
|
||||
LoadedDictionary = namedtuple('Dictionary', 'primary_locale locales obj builtin name')
|
||||
hunspell = plugins['hunspell'][0]
|
||||
if hunspell is None:
|
||||
raise RuntimeError('Failed to load hunspell: %s' % plugins[1])
|
||||
dprefs = JSONConfig('dictionaries/prefs.json')
|
||||
dprefs.defaults['preferred_dictionaries'] = {}
|
||||
dprefs.defaults['preferred_locales'] = {}
|
||||
not_present = object()
|
||||
|
||||
ccodes, ccodemap, country_names = None, None, None
|
||||
def get_codes():
|
||||
global ccodes, ccodemap
|
||||
if ccodes is None:
|
||||
data = cPickle.loads(P('localization/iso3166.pickle', allow_user_override=False, data=True))
|
||||
ccodes, ccodemap, country_names = data['codes'], data['three_map'], data['names']
|
||||
return ccodes, ccodemap
|
||||
|
||||
def parse_lang_code(raw):
|
||||
parts = raw.replace('_', '-').split('-')
|
||||
lc = canonicalize_lang(parts[0])
|
||||
if lc is None:
|
||||
raise ValueError('Invalid language code: %r' % raw)
|
||||
cc = None
|
||||
if len(parts) > 1:
|
||||
ccodes, ccodemap = get_codes()[:2]
|
||||
q = parts[1].upper()
|
||||
if q in ccodes:
|
||||
cc = q
|
||||
else:
|
||||
cc = ccodemap.get(q, None)
|
||||
return DictionaryLocale(lc, cc)
|
||||
|
||||
_builtins = _custom = None
|
||||
|
||||
def builtin_dictionaries():
|
||||
global _builtins
|
||||
if _builtins is None:
|
||||
dics = []
|
||||
for lc in glob.glob(os.path.join(P('dictionaries', allow_user_override=False), '*/locales')):
|
||||
locales = filter(None, open(lc, 'rb').read().decode('utf-8').splitlines())
|
||||
locale = locales[0]
|
||||
base = os.path.dirname(lc)
|
||||
dics.append(Dictionary(
|
||||
parse_lang_code(locale), frozenset(map(parse_lang_code, locales)), os.path.join(base, '%s.dic' % locale),
|
||||
os.path.join(base, '%s.aff' % locale), True, None))
|
||||
_builtins = frozenset(dics)
|
||||
return _builtins
|
||||
|
||||
def custom_dictionaries(reread=False):
|
||||
global _custom
|
||||
if reread:
|
||||
_custom = None
|
||||
if _custom is None:
|
||||
dics = []
|
||||
for lc in glob.glob(os.path.join(config_dir, 'dictionaries', '*/locales')):
|
||||
locales = filter(None, open(lc, 'rb').read().decode('utf-8').splitlines())
|
||||
name, locale, locales = locales[0], locales[1], locales[1:]
|
||||
base = os.path.dirname(lc)
|
||||
dics.append(Dictionary(
|
||||
parse_lang_code(locale), frozenset(map(parse_lang_code, locales)), os.path.join(base, '%s.dic' % locale),
|
||||
os.path.join(base, '%s.aff' % locale), False, name))
|
||||
_custom = frozenset(dics)
|
||||
return _custom
|
||||
|
||||
_default_lang_codes = {'eng':parse_lang_code('en-US'), 'deu':parse_lang_code('de-DE'), 'spa':parse_lang_code('es-ES'), 'fra':parse_lang_code('fr-FR')}
|
||||
|
||||
def get_dictionary(locale, exact_match=False):
|
||||
preferred = {parse_lang_code(k):v for k, v in dprefs['preferred_dictionaries']}.get(locale, None)
|
||||
# First find all dictionaries that match locale exactly
|
||||
exact_matches = {}
|
||||
for collection in (custom_dictionaries(), builtin_dictionaries()):
|
||||
for d in collection:
|
||||
if d.primary_locale == locale:
|
||||
exact_matches[d.name] = d
|
||||
for d in collection:
|
||||
for q in d.locales:
|
||||
if q == locale and d.name not in exact_matches:
|
||||
exact_matches[d.name] = d
|
||||
|
||||
# If the user has specified a preferred dictionary for this locale, use it,
|
||||
# otherwise, if a builtin dictionary exists, use that
|
||||
if preferred in exact_matches:
|
||||
return exact_matches[preferred]
|
||||
# Return one of the exactly matching dictionaries, preferring user
|
||||
# installed to builtin ones
|
||||
for k in sorted(exact_matches, key=lambda x: (1, None) if x is None else (0, x)):
|
||||
return exact_matches[k]
|
||||
|
||||
if exact_match:
|
||||
return
|
||||
|
||||
# No dictionary matched the locale exactly, we will now fallback to
|
||||
# matching only on language. First see if a dictionary matching the
|
||||
# preferred locale for the language exists.
|
||||
best_locale = dprefs['preferred_locales'].get(locale.langcode, _default_lang_codes.get(locale.langcode, None))
|
||||
if best_locale is not None:
|
||||
ans = get_dictionary(best_locale, exact_match=True)
|
||||
if ans is not None:
|
||||
return ans
|
||||
|
||||
# Now just return any dictionary that matches the language, preferring user
|
||||
# installed ones to builtin ones
|
||||
for collection in (custom_dictionaries(), builtin_dictionaries()):
|
||||
for d in sorted(collection, key=attrgetter('name')):
|
||||
if d.primary_locale.langcode == locale.langcode:
|
||||
return d
|
||||
|
||||
def load_dictionary(dictionary):
|
||||
with open(dictionary.dicpath, 'rb') as dic, open(dictionary.affpath, 'rb') as aff:
|
||||
obj = hunspell.Dictionary(dic.read(), aff.read())
|
||||
return LoadedDictionary(dictionary.primary_locale, dictionary.locales, obj, dictionary.builtin, dictionary.name)
|
||||
|
||||
class Dictionaries(object):
|
||||
|
||||
def __init__(self):
|
||||
self.dictionaries = {}
|
||||
self.word_cache = {}
|
||||
try:
|
||||
self.default_locale = parse_lang_code(get_lang())
|
||||
except ValueError:
|
||||
self.default_locale = parse_lang_code('en-US')
|
||||
|
||||
def clear_caches(self):
|
||||
self.dictionaries.clear(), self.word_cache.clear()
|
||||
|
||||
def dictionary_for_locale(self, locale):
|
||||
ans = self.dictionaries.get(locale, not_present)
|
||||
if ans is not_present:
|
||||
ans = get_dictionary(locale)
|
||||
if ans is not None:
|
||||
ans = load_dictionary(ans)
|
||||
self.dictionaries[locale] = ans
|
||||
return ans
|
||||
|
||||
def recognized(self, word, locale=None):
|
||||
locale = locale or self.default_locale
|
||||
if not isinstance(locale, DictionaryLocale):
|
||||
locale = parse_lang_code(locale)
|
||||
key = (word, locale)
|
||||
ans = self.word_cache.get(key, None)
|
||||
if ans is None:
|
||||
ans = False
|
||||
d = self.dictionary_for_locale(locale)
|
||||
if d is not None:
|
||||
try:
|
||||
ans = d.obj.recognized(word)
|
||||
except ValueError:
|
||||
pass
|
||||
self.word_cache[key] = ans
|
||||
return ans
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
dictionaries = Dictionaries()
|
||||
print (dictionaries.recognized('recognized', 'en'))
|
@ -30,13 +30,6 @@ def parse_xcu(raw, origin='%origin%'):
|
||||
ans[(dic, aff)] = locales
|
||||
return ans
|
||||
|
||||
def generate_locale_map(dictionaries):
|
||||
ans = {}
|
||||
for (dic, aff), locales in dictionaries.iteritems():
|
||||
for locale in locales:
|
||||
if locale not in ans:
|
||||
ans[locale] = (dic, aff)
|
||||
|
||||
def import_from_libreoffice_source_tree(source_path):
|
||||
dictionaries = {}
|
||||
for x in glob.glob(os.path.join(source_path, '*', 'dictionaries.xcu')):
|
||||
@ -58,6 +51,8 @@ def import_from_libreoffice_source_tree(source_path):
|
||||
for src in (dic, aff):
|
||||
df = os.path.join(dest, locale + os.path.splitext(src)[1])
|
||||
shutil.copyfile(src, df)
|
||||
with open(os.path.join(dest, 'locales'), 'wb') as f:
|
||||
f.write(('\n'.join(locales)).encode('utf-8'))
|
||||
|
||||
if want_locales:
|
||||
raise Exception('Failed to find dictionaries for some wanted locales: %s' % want_locales)
|
||||
|
Loading…
x
Reference in New Issue
Block a user