From a28d2bc1fc8e652e01fd731687b1c12bc84e7b2f Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Mon, 22 Dec 2025 13:18:16 -0500 Subject: [PATCH] Add --translation-cache --- libretranslate/app.py | 17 +++++++--- libretranslate/cache.py | 54 ++++++++++++++++++++++++++++++++ libretranslate/default_values.py | 5 +++ libretranslate/main.py | 7 +++++ libretranslate/storage.py | 9 ++++-- scripts/gunicorn_conf.py | 3 +- 6 files changed, 87 insertions(+), 8 deletions(-) create mode 100644 libretranslate/cache.py diff --git a/libretranslate/app.py b/libretranslate/app.py index c1cae52..d20104c 100644 --- a/libretranslate/app.py +++ b/libretranslate/app.py @@ -22,7 +22,7 @@ from werkzeug.exceptions import HTTPException from werkzeug.http import http_date from werkzeug.utils import secure_filename -from libretranslate import flood, remove_translated_files, scheduler, secret, security, storage +from libretranslate import flood, remove_translated_files, scheduler, secret, security, storage, cache from libretranslate.language import model2iso, iso2model, detect_languages, improve_translation_formatting from libretranslate.locales import ( _, @@ -199,6 +199,7 @@ def create_app(args): bp = Blueprint('Main app', __name__) storage.setup(args.shared_storage) + trans_cache = cache.setup(args.translation_cache) if not args.disable_files_translation: remove_translated_files.setup(get_upload_dir()) @@ -765,6 +766,13 @@ def create_app(args): src_texts = q if batch else [q] + ak = get_req_api_key() + cache_key = None + if trans_cache.should_check(ak): + cache_key, hit = trans_cache.hit(src_texts, source_lang, target_lang, text_format, num_alternatives) + if hit is not None: + return Response(hit, status=200, mimetype="application/json") + if char_limit != -1: for text in src_texts: if len(text) > char_limit: @@ -832,8 +840,6 @@ def create_app(args): result["detectedLanguage"] = [model2iso(detected_src_lang)] * len(q) if num_alternatives > 0: result["alternatives"] = batch_alternatives - - return jsonify(result) else: translator = src_lang.get_translation(tgt_lang) if translator is None: @@ -857,8 +863,11 @@ def create_app(args): result["detectedLanguage"] = model2iso(detected_src_lang) if num_alternatives > 0: result["alternatives"] = alternatives + + if cache_key is not None: + trans_cache.cache(cache_key, result) - return jsonify(result) + return jsonify(result) except Exception as e: raise e abort(500, description=_("Cannot translate text: %(text)s", text=str(e))) diff --git a/libretranslate/cache.py b/libretranslate/cache.py new file mode 100644 index 0000000..428ca95 --- /dev/null +++ b/libretranslate/cache.py @@ -0,0 +1,54 @@ +from libretranslate.storage import get_storage +import hashlib +import json +import gzip + +cache = None +def get_translation_cache(): + return cache + +class TranslationCache: + def __init__(self, translation_cache_aks): + self.enabled = len(translation_cache_aks) > 0 + self.api_keys = [ak for ak in translation_cache_aks if ak.lower() != "all"] + self.cache_all = "all" in [ak.lower() for ak in translation_cache_aks] + self.expire = 604800 # 7 days + self.storage = get_storage() + + assert self.storage is not None, "Storage is none" + + def should_check(self, ak): + return self.enabled and (self.cache_all or ak in self.api_keys) + + def hit(self, src_texts, source_lang, target_lang, text_format, num_alternatives): + text_blob = "|".join(src_texts) if isinstance(src_texts, list) else src_texts + fingerprint = f"{text_blob}:{source_lang}:{target_lang}:{text_format}:{num_alternatives}" + cache_key = "tcache_" + hashlib.md5(fingerprint.encode('utf-8')).hexdigest() + + cached = self.storage.get_str(cache_key, raw=True) + if cached == "": + cached = None + + if cached is not None: + try: + cached = gzip.decompress(cached).decode('utf-8') + except Exception as e: + print(str(e)) + + return cache_key, cached + + def cache(self, cache_key, content): + try: + if isinstance(content, dict): + content = json.dumps(content) + compressed = gzip.compress(content.encode('utf-8')) + + self.storage.set_str(cache_key, compressed, self.expire) + except Exception as e: + print(str(e)) + +def setup(translation_cache_aks): + global cache + + cache = TranslationCache(translation_cache_aks) + return cache \ No newline at end of file diff --git a/libretranslate/default_values.py b/libretranslate/default_values.py index 1b1d72d..6b7485c 100644 --- a/libretranslate/default_values.py +++ b/libretranslate/default_values.py @@ -231,6 +231,11 @@ _default_options_objects = [ 'default_value': '', 'value_type': 'str' }, + { + 'name': 'TRANSLATION_CACHE', + 'default_value': '', + 'value_type': 'str' + }, { 'name': 'URL_PREFIX', 'default_value': '', diff --git a/libretranslate/main.py b/libretranslate/main.py index ff8901b..2d13c6b 100644 --- a/libretranslate/main.py +++ b/libretranslate/main.py @@ -236,6 +236,13 @@ def get_parser(): type=str, help="Protect the /metrics endpoint by allowing only clients that have a valid Authorization Bearer token (%(default)s)", ) + parser.add_argument( + "--translation-cache", + type=operator.methodcaller("split", ","), + default=DEFARGS['TRANSLATION_CACHE'], + metavar="", + help="Cache translation output for users with a particular API key (or 'all' to cache all translations)", + ) parser.add_argument( "--url-prefix", default=DEFARGS['URL_PREFIX'], diff --git a/libretranslate/storage.py b/libretranslate/storage.py index 715315f..0a2fefc 100644 --- a/libretranslate/storage.py +++ b/libretranslate/storage.py @@ -63,7 +63,7 @@ class MemoryStorage(Storage): 'ex': None if ex is None else time.time() + ex } - def get_str(self, key): + def get_str(self, key, raw=False): d = self.store.get(key, {'value': '', 'ex': None}) if d['ex'] is None: return d['value'] @@ -138,12 +138,15 @@ class RedisStorage(Storage): def set_str(self, key, value, ex=None): self.conn.set(key, value, ex=ex) - def get_str(self, key): + def get_str(self, key, raw=False): v = self.conn.get(key) if v is None: return "" else: - return v.decode('utf-8') + if raw: + return v + else: + return v.decode('utf-8') def get_hash_int(self, ns, key): v = self.conn.hget(ns, key) diff --git a/scripts/gunicorn_conf.py b/scripts/gunicorn_conf.py index b1eacc7..591ca3f 100644 --- a/scripts/gunicorn_conf.py +++ b/scripts/gunicorn_conf.py @@ -40,8 +40,9 @@ def on_starting(server): args = get_args() - from libretranslate import flood, scheduler, secret, storage + from libretranslate import flood, scheduler, secret, storage, cache storage.setup(args.shared_storage) + cache.setup(args.translation_cache) scheduler.setup(args) flood.setup(args) secret.setup(args) \ No newline at end of file