mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
Implement loading of both builtin and user installed dictionaries
This commit is contained in:
parent
ba894573e4
commit
0c397d32e5
2
resources/dictionaries/en-US/locales
Normal file
2
resources/dictionaries/en-US/locales
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
en-US
|
||||||
|
en-PH
|
173
src/calibre/spell/dictionary.py
Normal file
173
src/calibre/spell/dictionary.py
Normal file
@ -0,0 +1,173 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
import cPickle, os, glob
|
||||||
|
from collections import namedtuple
|
||||||
|
from operator import attrgetter
|
||||||
|
|
||||||
|
from calibre.constants import plugins, config_dir
|
||||||
|
from calibre.utils.config import JSONConfig
|
||||||
|
from calibre.utils.localization import get_lang, canonicalize_lang
|
||||||
|
|
||||||
|
DictionaryLocale = namedtuple('DictionaryLocale', 'langcode countrycode')
|
||||||
|
Dictionary = namedtuple('Dictionary', 'primary_locale locales dicpath affpath builtin name')
|
||||||
|
LoadedDictionary = namedtuple('Dictionary', 'primary_locale locales obj builtin name')
|
||||||
|
hunspell = plugins['hunspell'][0]
|
||||||
|
if hunspell is None:
|
||||||
|
raise RuntimeError('Failed to load hunspell: %s' % plugins[1])
|
||||||
|
dprefs = JSONConfig('dictionaries/prefs.json')
|
||||||
|
dprefs.defaults['preferred_dictionaries'] = {}
|
||||||
|
dprefs.defaults['preferred_locales'] = {}
|
||||||
|
not_present = object()
|
||||||
|
|
||||||
|
ccodes, ccodemap, country_names = None, None, None
|
||||||
|
def get_codes():
|
||||||
|
global ccodes, ccodemap
|
||||||
|
if ccodes is None:
|
||||||
|
data = cPickle.loads(P('localization/iso3166.pickle', allow_user_override=False, data=True))
|
||||||
|
ccodes, ccodemap, country_names = data['codes'], data['three_map'], data['names']
|
||||||
|
return ccodes, ccodemap
|
||||||
|
|
||||||
|
def parse_lang_code(raw):
|
||||||
|
parts = raw.replace('_', '-').split('-')
|
||||||
|
lc = canonicalize_lang(parts[0])
|
||||||
|
if lc is None:
|
||||||
|
raise ValueError('Invalid language code: %r' % raw)
|
||||||
|
cc = None
|
||||||
|
if len(parts) > 1:
|
||||||
|
ccodes, ccodemap = get_codes()[:2]
|
||||||
|
q = parts[1].upper()
|
||||||
|
if q in ccodes:
|
||||||
|
cc = q
|
||||||
|
else:
|
||||||
|
cc = ccodemap.get(q, None)
|
||||||
|
return DictionaryLocale(lc, cc)
|
||||||
|
|
||||||
|
_builtins = _custom = None
|
||||||
|
|
||||||
|
def builtin_dictionaries():
|
||||||
|
global _builtins
|
||||||
|
if _builtins is None:
|
||||||
|
dics = []
|
||||||
|
for lc in glob.glob(os.path.join(P('dictionaries', allow_user_override=False), '*/locales')):
|
||||||
|
locales = filter(None, open(lc, 'rb').read().decode('utf-8').splitlines())
|
||||||
|
locale = locales[0]
|
||||||
|
base = os.path.dirname(lc)
|
||||||
|
dics.append(Dictionary(
|
||||||
|
parse_lang_code(locale), frozenset(map(parse_lang_code, locales)), os.path.join(base, '%s.dic' % locale),
|
||||||
|
os.path.join(base, '%s.aff' % locale), True, None))
|
||||||
|
_builtins = frozenset(dics)
|
||||||
|
return _builtins
|
||||||
|
|
||||||
|
def custom_dictionaries(reread=False):
|
||||||
|
global _custom
|
||||||
|
if reread:
|
||||||
|
_custom = None
|
||||||
|
if _custom is None:
|
||||||
|
dics = []
|
||||||
|
for lc in glob.glob(os.path.join(config_dir, 'dictionaries', '*/locales')):
|
||||||
|
locales = filter(None, open(lc, 'rb').read().decode('utf-8').splitlines())
|
||||||
|
name, locale, locales = locales[0], locales[1], locales[1:]
|
||||||
|
base = os.path.dirname(lc)
|
||||||
|
dics.append(Dictionary(
|
||||||
|
parse_lang_code(locale), frozenset(map(parse_lang_code, locales)), os.path.join(base, '%s.dic' % locale),
|
||||||
|
os.path.join(base, '%s.aff' % locale), False, name))
|
||||||
|
_custom = frozenset(dics)
|
||||||
|
return _custom
|
||||||
|
|
||||||
|
_default_lang_codes = {'eng':parse_lang_code('en-US'), 'deu':parse_lang_code('de-DE'), 'spa':parse_lang_code('es-ES'), 'fra':parse_lang_code('fr-FR')}
|
||||||
|
|
||||||
|
def get_dictionary(locale, exact_match=False):
|
||||||
|
preferred = {parse_lang_code(k):v for k, v in dprefs['preferred_dictionaries']}.get(locale, None)
|
||||||
|
# First find all dictionaries that match locale exactly
|
||||||
|
exact_matches = {}
|
||||||
|
for collection in (custom_dictionaries(), builtin_dictionaries()):
|
||||||
|
for d in collection:
|
||||||
|
if d.primary_locale == locale:
|
||||||
|
exact_matches[d.name] = d
|
||||||
|
for d in collection:
|
||||||
|
for q in d.locales:
|
||||||
|
if q == locale and d.name not in exact_matches:
|
||||||
|
exact_matches[d.name] = d
|
||||||
|
|
||||||
|
# If the user has specified a preferred dictionary for this locale, use it,
|
||||||
|
# otherwise, if a builtin dictionary exists, use that
|
||||||
|
if preferred in exact_matches:
|
||||||
|
return exact_matches[preferred]
|
||||||
|
# Return one of the exactly matching dictionaries, preferring user
|
||||||
|
# installed to builtin ones
|
||||||
|
for k in sorted(exact_matches, key=lambda x: (1, None) if x is None else (0, x)):
|
||||||
|
return exact_matches[k]
|
||||||
|
|
||||||
|
if exact_match:
|
||||||
|
return
|
||||||
|
|
||||||
|
# No dictionary matched the locale exactly, we will now fallback to
|
||||||
|
# matching only on language. First see if a dictionary matching the
|
||||||
|
# preferred locale for the language exists.
|
||||||
|
best_locale = dprefs['preferred_locales'].get(locale.langcode, _default_lang_codes.get(locale.langcode, None))
|
||||||
|
if best_locale is not None:
|
||||||
|
ans = get_dictionary(best_locale, exact_match=True)
|
||||||
|
if ans is not None:
|
||||||
|
return ans
|
||||||
|
|
||||||
|
# Now just return any dictionary that matches the language, preferring user
|
||||||
|
# installed ones to builtin ones
|
||||||
|
for collection in (custom_dictionaries(), builtin_dictionaries()):
|
||||||
|
for d in sorted(collection, key=attrgetter('name')):
|
||||||
|
if d.primary_locale.langcode == locale.langcode:
|
||||||
|
return d
|
||||||
|
|
||||||
|
def load_dictionary(dictionary):
|
||||||
|
with open(dictionary.dicpath, 'rb') as dic, open(dictionary.affpath, 'rb') as aff:
|
||||||
|
obj = hunspell.Dictionary(dic.read(), aff.read())
|
||||||
|
return LoadedDictionary(dictionary.primary_locale, dictionary.locales, obj, dictionary.builtin, dictionary.name)
|
||||||
|
|
||||||
|
class Dictionaries(object):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.dictionaries = {}
|
||||||
|
self.word_cache = {}
|
||||||
|
try:
|
||||||
|
self.default_locale = parse_lang_code(get_lang())
|
||||||
|
except ValueError:
|
||||||
|
self.default_locale = parse_lang_code('en-US')
|
||||||
|
|
||||||
|
def clear_caches(self):
|
||||||
|
self.dictionaries.clear(), self.word_cache.clear()
|
||||||
|
|
||||||
|
def dictionary_for_locale(self, locale):
|
||||||
|
ans = self.dictionaries.get(locale, not_present)
|
||||||
|
if ans is not_present:
|
||||||
|
ans = get_dictionary(locale)
|
||||||
|
if ans is not None:
|
||||||
|
ans = load_dictionary(ans)
|
||||||
|
self.dictionaries[locale] = ans
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def recognized(self, word, locale=None):
|
||||||
|
locale = locale or self.default_locale
|
||||||
|
if not isinstance(locale, DictionaryLocale):
|
||||||
|
locale = parse_lang_code(locale)
|
||||||
|
key = (word, locale)
|
||||||
|
ans = self.word_cache.get(key, None)
|
||||||
|
if ans is None:
|
||||||
|
ans = False
|
||||||
|
d = self.dictionary_for_locale(locale)
|
||||||
|
if d is not None:
|
||||||
|
try:
|
||||||
|
ans = d.obj.recognized(word)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
self.word_cache[key] = ans
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
dictionaries = Dictionaries()
|
||||||
|
print (dictionaries.recognized('recognized', 'en'))
|
@ -30,13 +30,6 @@ def parse_xcu(raw, origin='%origin%'):
|
|||||||
ans[(dic, aff)] = locales
|
ans[(dic, aff)] = locales
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def generate_locale_map(dictionaries):
|
|
||||||
ans = {}
|
|
||||||
for (dic, aff), locales in dictionaries.iteritems():
|
|
||||||
for locale in locales:
|
|
||||||
if locale not in ans:
|
|
||||||
ans[locale] = (dic, aff)
|
|
||||||
|
|
||||||
def import_from_libreoffice_source_tree(source_path):
|
def import_from_libreoffice_source_tree(source_path):
|
||||||
dictionaries = {}
|
dictionaries = {}
|
||||||
for x in glob.glob(os.path.join(source_path, '*', 'dictionaries.xcu')):
|
for x in glob.glob(os.path.join(source_path, '*', 'dictionaries.xcu')):
|
||||||
@ -58,6 +51,8 @@ def import_from_libreoffice_source_tree(source_path):
|
|||||||
for src in (dic, aff):
|
for src in (dic, aff):
|
||||||
df = os.path.join(dest, locale + os.path.splitext(src)[1])
|
df = os.path.join(dest, locale + os.path.splitext(src)[1])
|
||||||
shutil.copyfile(src, df)
|
shutil.copyfile(src, df)
|
||||||
|
with open(os.path.join(dest, 'locales'), 'wb') as f:
|
||||||
|
f.write(('\n'.join(locales)).encode('utf-8'))
|
||||||
|
|
||||||
if want_locales:
|
if want_locales:
|
||||||
raise Exception('Failed to find dictionaries for some wanted locales: %s' % want_locales)
|
raise Exception('Failed to find dictionaries for some wanted locales: %s' % want_locales)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user