From cd4f231d74ddd88f99d54216168d82ee0f0215e4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 1 Dec 2019 13:29:19 +0530 Subject: [PATCH] Code to get path to dictionaries --- src/calibre/utils/hyphenation/dictionaries.py | 57 +++++++++++++++++++ .../utils/hyphenation/test_hyphenation.py | 32 ++++++++++- 2 files changed, 87 insertions(+), 2 deletions(-) diff --git a/src/calibre/utils/hyphenation/dictionaries.py b/src/calibre/utils/hyphenation/dictionaries.py index 0e683740e0..8622cbda46 100644 --- a/src/calibre/utils/hyphenation/dictionaries.py +++ b/src/calibre/utils/hyphenation/dictionaries.py @@ -4,8 +4,14 @@ from __future__ import absolute_import, division, print_function, unicode_literals +import errno import json +import os +import tarfile +from io import BytesIO +from calibre.constants import cache_dir +from calibre.ptempfile import TemporaryDirectory from calibre.utils.localization import lang_as_iso639_1 from polyglot.builtins import iteritems from polyglot.functools import lru_cache @@ -38,7 +44,58 @@ def dictionary_name_for_locale(loc): return lmap['en_us'] if loc == 'de': return lmap['de_de'] + if loc == 'es': + return lmap['es_es'] q = loc + '_' for k, v in iteritems(lmap): if k.startswith(q): return lmap[k] + + +def extract_dicts(cache_path): + with TemporaryDirectory(dir=cache_path) as tdir: + try: + from calibre_lzma.xz import decompress + except ImportError: + tf = tarfile.open(P('hyphenation/dictionaries.tar.xz')) + else: + buf = BytesIO() + decompress(P('hyphenation/dictionaries.tar.xz', data=True), outfile=buf) + buf.seek(0) + tf = tarfile.TarFile(fileobj=buf) + with tf: + tf.extractall(tdir) + dest = os.path.join(cache_path, 'f') + with TemporaryDirectory(dir=cache_path) as trash: + try: + os.rename(dest, os.path.join(trash, 'f')) + except EnvironmentError as err: + if err.errno != errno.ENOENT: + raise + os.rename(tdir, dest) + is_cache_up_to_date.updated = True + + +def is_cache_up_to_date(cache_path): + if hasattr(is_cache_up_to_date, 'updated'): + return True + hsh = P('hyphenation/sha1sum', data=True) + try: + with open(os.path.join(cache_path, 'f', 'sha1sum'), 'rb') as f: + return f.read() == hsh + except EnvironmentError: + pass + return False + + +def path_to_dictionary(dictionary_name): + cd = getattr(path_to_dictionary, 'cache_dir', None) or cache_dir() + cache_path = os.path.join(cd, 'hyphenation') + try: + os.makedirs(cache_path) + except EnvironmentError as err: + if err.errno != errno.EEXIST: + raise + if not is_cache_up_to_date(cache_path): + extract_dicts(cache_path) + return os.path.join(cache_path, 'f', dictionary_name) diff --git a/src/calibre/utils/hyphenation/test_hyphenation.py b/src/calibre/utils/hyphenation/test_hyphenation.py index b65c78b89a..502250994c 100644 --- a/src/calibre/utils/hyphenation/test_hyphenation.py +++ b/src/calibre/utils/hyphenation/test_hyphenation.py @@ -4,22 +4,50 @@ from __future__ import absolute_import, division, print_function, unicode_literals +import shutil, os import unittest -from calibre.utils.hyphenation.dictionaries import dictionary_name_for_locale + +from calibre.ptempfile import PersistentTemporaryDirectory +from calibre.utils.hyphenation.dictionaries import ( + dictionary_name_for_locale, path_to_dictionary +) class TestHyphenation(unittest.TestCase): ae = unittest.TestCase.assertEqual + @classmethod + def setUpClass(cls): + tdir = PersistentTemporaryDirectory() + path_to_dictionary.cache_dir = tdir + + @classmethod + def tearDownClass(cls): + try: + shutil.rmtree(path_to_dictionary.cache_dir) + except EnvironmentError: + pass + path_to_dictionary.cache_dir = None + def test_locale_to_hyphen_dictionary(self): + def t(x, expected=None): - self.ae(dictionary_name_for_locale(x), 'hyph_{}.dic'.format(expected) if expected else None) + self.ae( + dictionary_name_for_locale(x), + 'hyph_{}.dic'.format(expected) if expected else None + ) + t('en', 'en_US') t('en_IN', 'en_GB') t('de', 'de_DE') + t('es', 'es_ANY') + t('nl', 'nl_NL') t('fr', 'fr') t('XXX') + self.assertTrue( + os.path.exists(path_to_dictionary(dictionary_name_for_locale('en'))) + ) def find_tests():