From d55ffcf46abd3a4fb40d26853dde76d24c7156cd Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 18 Jan 2026 10:13:33 +0530 Subject: [PATCH] Start work on replacing python gettext with native code version Needed to use translations for both python and Qt without invoking the GIL when used from Qt. --- setup/extensions.json | 9 +- src/calibre/constants.py | 1 + src/calibre/utils/run_tests.py | 2 + src/calibre/utils/translator/__init__.py | 0 src/calibre/utils/translator/main.cpp | 98 ++++ src/calibre/utils/translator/mo_parser.cpp | 230 ++++++++ src/calibre/utils/translator/mo_parser.h | 82 +++ .../translator/plural_expression_parser.cpp | 508 ++++++++++++++++++ .../translator/plural_expression_parser.h | 91 ++++ .../utils/translator/test_translator.py | 32 ++ 10 files changed, 1052 insertions(+), 1 deletion(-) create mode 100644 src/calibre/utils/translator/__init__.py create mode 100644 src/calibre/utils/translator/main.cpp create mode 100644 src/calibre/utils/translator/mo_parser.cpp create mode 100644 src/calibre/utils/translator/mo_parser.h create mode 100644 src/calibre/utils/translator/plural_expression_parser.cpp create mode 100644 src/calibre/utils/translator/plural_expression_parser.h create mode 100644 src/calibre/utils/translator/test_translator.py diff --git a/setup/extensions.json b/setup/extensions.json index d16e03fa89..4454d6550e 100644 --- a/setup/extensions.json +++ b/setup/extensions.json @@ -176,11 +176,18 @@ }, { "name": "progress_indicator", - "sources": "calibre/gui2/progress_indicator/QProgressIndicator.cpp calibre/gui2/progress_indicator/CalibreStyle.cpp", + "sources": "calibre/gui2/progress_indicator/QProgressIndicator.cpp calibre/gui2/progress_indicator/CalibreStyle.cpp calibre/gui2/progress_indicator/MoTranslator.cpp calibre/gui2/progress_indicator/PluralExpressionParser.cpp", "headers": "calibre/gui2/progress_indicator/QProgressIndicator.h", "sip_files": "calibre/gui2/progress_indicator/QProgressIndicator.sip", "inc_dirs": "calibre/gui2/progress_indicator" }, + { + "name": "translator", + "sources": "calibre/utils/translator/mo_parser.cpp calibre/utils/translator/plural_expression_parser.cpp calibre/utils/translator/main.cpp", + "headers": "calibre/utils/translator/mo_translator.h calibre/utils/translator/plural_expression_parser.h", + "inc_dirs": "calibre/utils/translator", + "needs_c++": "17" + }, { "name": "imageops", "sources": "calibre/utils/imageops/imageops.cpp calibre/utils/imageops/quantize.cpp calibre/utils/imageops/ordered_dither.cpp", diff --git a/src/calibre/constants.py b/src/calibre/constants.py index 39a4a7bdfe..4b12554fe4 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -261,6 +261,7 @@ class ExtensionsImporter: 'piper', 'html_as_json', 'fast_css_transform', + 'translator', 'fast_html_entities', 'unicode_names', 'html_syntax_highlighter', diff --git a/src/calibre/utils/run_tests.py b/src/calibre/utils/run_tests.py index 5a568739c6..7088c3e643 100644 --- a/src/calibre/utils/run_tests.py +++ b/src/calibre/utils/run_tests.py @@ -322,6 +322,8 @@ def find_tests(which_tests=None, exclude_tests=None): from calibre.utils.windows.wintest import find_tests a(find_tests()) a(unittest.defaultTestLoader.loadTestsFromTestCase(TestImports)) + from calibre.utils.translator.test_translator import find_tests + a(find_tests()) if ok('dbcli'): from calibre.db.cli.tests import find_tests a(find_tests()) diff --git a/src/calibre/utils/translator/__init__.py b/src/calibre/utils/translator/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/calibre/utils/translator/main.cpp b/src/calibre/utils/translator/main.cpp new file mode 100644 index 0000000000..6e6cca1c10 --- /dev/null +++ b/src/calibre/utils/translator/main.cpp @@ -0,0 +1,98 @@ +/* + * main.cpp + * Copyright (C) 2026 Kovid Goyal + * + * Distributed under terms of the GPL3 license. + */ + +#define PY_SSIZE_T_CLEAN +#define UNICODE +#define _UNICODE + +#include +#include "mo_parser.h" + +typedef struct { + PyObject_HEAD + + PyObject *fallback; + MOParser parser; +} Translator; + +extern PyTypeObject Translator_Type; + +static PyObject * +new_translator(PyTypeObject *type, PyObject *args, PyObject *kwds) { + const char *mo_data = NULL; Py_ssize_t sz = 0; + if (!PyArg_ParseTuple(args, "|z#", &mo_data, &sz)) return NULL; + Translator *self = (Translator *)(&Translator_Type)->tp_alloc(&Translator_Type, 0); + if (self != NULL) { + new (&self->parser) MOParser(); + if (mo_data != NULL) { + std::string err = self->parser.load(mo_data, sz); + if (err.size()) { + Py_CLEAR(self); + PyErr_SetString(PyExc_ValueError, err.c_str()); return NULL; + } + } + } + return (PyObject*) self; +} + +static void +dealloc_translator(Translator* self) { + Py_CLEAR(self->fallback); + self->parser.~MOParser(); + Py_TYPE(self)->tp_free((PyObject*)self); +} + +static PyObject* +plural(PyObject *self_, PyObject *pn) { + if (!PyLong_Check(pn)) { PyErr_SetString(PyExc_TypeError, "n must be an integer"); return NULL; } + unsigned long n = PyLong_AsUnsignedLong(pn); + Translator *self = (Translator*)self_; + return PyLong_FromUnsignedLong(self->parser.plural(n)); +} + +static PyMethodDef translator_methods[] = { + {"plural", plural, METH_O, "plural(n: int) -> int:\n\n" + "Get the message catalog index based on the plural form specification." + }, + {NULL} /* Sentinel */ +}; + +PyTypeObject Translator_Type = { + .ob_base = PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "translator.Translator", + .tp_basicsize = sizeof(Translator), + .tp_dealloc = (destructor)dealloc_translator, + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = "Translator", + .tp_methods = translator_methods, + .tp_new = new_translator, +}; + + +static PyMethodDef methods[] = { + {NULL, NULL, 0, NULL} +}; + +static int +exec_module(PyObject *m) { + if (PyType_Ready(&Translator_Type) < 0) return -1; + if (PyModule_AddObject(m, "Translator", (PyObject *)&Translator_Type) != 0) return -1; + Py_INCREF(&Translator_Type); + return 0; +} + +static PyModuleDef_Slot slots[] = { {Py_mod_exec, (void*)exec_module}, {0, NULL} }; + +static struct PyModuleDef module_def = {PyModuleDef_HEAD_INIT}; + +CALIBRE_MODINIT_FUNC PyInit_translator(void) { + module_def.m_name = "translator"; + module_def.m_doc = "Support for GNU gettext translations without holding the GIL so that it can be used in Qt as well"; + module_def.m_methods = methods; + module_def.m_slots = slots; + return PyModuleDef_Init(&module_def); +} diff --git a/src/calibre/utils/translator/mo_parser.cpp b/src/calibre/utils/translator/mo_parser.cpp new file mode 100644 index 0000000000..c43d23f02c --- /dev/null +++ b/src/calibre/utils/translator/mo_parser.cpp @@ -0,0 +1,230 @@ +#include +#include +#include "mo_parser.h" + +// Magic numbers for .mo files +constexpr uint32_t MO_MAGIC_LE = 0x950412de; +constexpr uint32_t MO_MAGIC_BE = 0xde120495; + +MOParser::MOParser() : swap_bytes_(false), loaded_(false), data(NULL), sz(0), num_plurals_(2), plural_expr_("n != 1") { } + +MOParser::~MOParser() { + std::free((void*)data); data = NULL; +} + +uint32_t MOParser::swap32(uint32_t value) const { + return ((value & 0x000000FF) << 24) | + ((value & 0x0000FF00) << 8) | + ((value & 0x00FF0000) >> 8) | + ((value & 0xFF000000) >> 24); +} + +bool MOParser::needsSwap(uint32_t magic) const { + return magic == MO_MAGIC_BE; +} + +std::string MOParser::load(const char *data, size_t sz) { + char *copy = (char*)std::malloc(sz); + std::memcpy(copy, data, sz); + this->data = copy; + this->sz = sz; + std::string err = ""; + err = parseHeader(); + if (err.size()) return err; + err = parseStrings(); + if (err.size()) return err; + loaded_ = true; + return err; +} + +std::string MOParser::parseHeader() { + if (sz < sizeof(MOHeader)) return ".mo data too small (" + std::to_string(sz) + ")"; + + // Read magic number to determine endianness + uint32_t magic; std::memcpy(&magic, data, sizeof(uint32_t)); + + if (magic != MO_MAGIC_LE && magic != MO_MAGIC_BE) { + return ".mo data has unrecognised magic bytes"; + } + + swap_bytes_ = needsSwap(magic); + + // Read header + std::memcpy(&header_, data, sizeof(MOHeader)); + + // Swap bytes if needed + if (swap_bytes_) { + header_.magic = swap32(header_. magic); + header_.revision = swap32(header_.revision); + header_.num_strings = swap32(header_.num_strings); + header_.offset_original = swap32(header_.offset_original); + header_.offset_translation = swap32(header_.offset_translation); + header_.hash_table_size = swap32(header_.hash_table_size); + header_.hash_table_offset = swap32(header_.hash_table_offset); + } + + return ""; +} + +std::string MOParser::parseStrings() { + for (uint32_t i = 0; i < header_.num_strings; ++i) { + // Read original string descriptor + size_t orig_desc_offset = header_.offset_original + i * sizeof(StringDescriptor); + if (orig_desc_offset + sizeof(StringDescriptor) > sz) return ".mo data too small for string descriptor"; + + StringDescriptor orig_desc; + std::memcpy(&orig_desc, data + orig_desc_offset, sizeof(StringDescriptor)); + + if (swap_bytes_) { + orig_desc.length = swap32(orig_desc.length); + orig_desc.offset = swap32(orig_desc.offset); + } + + // Read translation string descriptor + size_t trans_desc_offset = header_.offset_translation + i * sizeof(StringDescriptor); + if (trans_desc_offset + sizeof(StringDescriptor) > sz) return ".mo data too small for translation string descriptor"; + StringDescriptor trans_desc; + std::memcpy(&trans_desc, data + trans_desc_offset, sizeof(StringDescriptor)); + + if (swap_bytes_) { + trans_desc.length = swap32(trans_desc.length); + trans_desc.offset = swap32(trans_desc.offset); + } + + // Read original string + if (orig_desc.offset + orig_desc.length > sz) return ".mo data too small for msgid"; + std::string_view msgid(data + orig_desc.offset, orig_desc.length); + + // Read translation string + if (trans_desc.offset + trans_desc.length > sz) return ".mo data too small for msg"; + std::string_view msgstr(data + trans_desc.offset, trans_desc.length); + + // First entry (empty msgid) contains metadata + if (msgid.empty() && i == 0) { + std::string err = parseMetadata(msgstr); + if (err.size()) return err; + } else translations_[msgid] = msgstr; + } + + return ""; +} + +static bool +starts_with(std::string_view sv, std::string_view prefix) { + return sv.size() >= prefix.size() && + sv.substr(0, prefix.size()) == prefix; +} + +std::string +MOParser::parsePluralForms(std::string_view plural_forms_line) { + // Extract nplurals + size_t nplurals_pos = plural_forms_line.find("nplurals="); + if (nplurals_pos != std::string::npos) { + nplurals_pos += 9; // strlen("nplurals=") + num_plurals_ = std::atoi(plural_forms_line.data() + nplurals_pos); + } + + // Extract plural expression + size_t plural_pos = plural_forms_line.find("plural="); + if (plural_pos != std:: string::npos) { + plural_pos += 7; // strlen("plural=") + size_t semicolon = plural_forms_line.find(';', plural_pos); + if (semicolon != std::string::npos) { + plural_expr_ = plural_forms_line.substr(plural_pos, semicolon - plural_pos); + + // Trim whitespace + size_t first = plural_expr_.find_first_not_of(" \t\r\n"); + size_t last = plural_expr_.find_last_not_of(" \t\r\n"); + if (first != std::string::npos && last != std::string::npos) { + plural_expr_ = plural_expr_.substr(first, last - first + 1); + } + + // Parse the expression + if (! plural_parser_.parse(plural_expr_)) { + return std::string("failed to parse plural forms expresion: " + plural_expr_); + // Fall back to default + plural_expr_ = "n != 1"; + plural_parser_.parse(plural_expr_); + } + } + } else { + // No plural expression, use default + plural_parser_.parse(plural_expr_); + } + return ""; +} + +static std::string +to_ascii_lower(std::string_view sv) { + std::string result; + result.resize(sv.size()); + std::transform(sv.begin(), sv.end(), result.begin(), + [](unsigned char c) { return std::tolower(c); }); + return result; +} +std::string +MOParser::parseMetadata(std::string_view header) { + size_t pos = 0, start = 0; + bool found_plural_forms = false; + while (pos < header.size()) { + if (header[pos] == '\n') { + std::string_view line = header.substr(start, pos-start); + start = pos + 1; + if (starts_with(line, "Plural-Forms:")) { + std::string err = parsePluralForms(line); + if (err.size()) return err; + found_plural_forms = true; + } else if (starts_with(line, "Content-Type:")) { + size_t ctpos = line.find("charset="); + if (ctpos != std::string::npos) { + std::string charset = to_ascii_lower(line.substr( + ctpos + sizeof("charset"), line.size() - ctpos - sizeof("charset"))); + if (charset != "utf8" && charset != "utf-8") { + return "unsupported charset in .mo file: " + std::string(charset); + } + } + } + } + pos++; + } + if (!found_plural_forms) plural_parser_.parse(plural_expr_); + return ""; +} + +std::string_view MOParser::gettext(std::string_view msgid) const { + auto it = translations_.find(msgid); + if (it != translations_.end() && ! it->second.empty()) { + // Return first translation (before any null byte) + size_t null_pos = it->second.find('\0'); + return (null_pos != std::string::npos) ? it->second.substr(0, null_pos) : it->second; + } + return msgid; // Return original if no translation found +} + +std::string_view MOParser::ngettext(std::string_view msgid, std::string_view msgid_plural, unsigned long n) const { + // Create composite key for plural forms (msgid\0msgid_plural) + std::string key = std::string(msgid) + '\0' + std::string(msgid_plural); + + auto it = translations_.find(key); + if (it != translations_.end() && !it->second.empty()) { + // Determine which plural form to use + unsigned long plural_index = plural(n); + + // Ensure index is within bounds + if (plural_index >= static_cast(num_plurals_)) plural_index = num_plurals_ - 1; + + // Split translation by null bytes + size_t start = 0; + size_t pos; + + while ((pos = it->second.find('\0', start)) != std::string::npos) { + std::string_view q = it->second.substr(start, pos - start); + if (plural_index < 1) return q; + start = pos + 1; + plural_index--; + } + } + + // Fallback to English-style pluralization + return n <= 1 ? msgid : msgid_plural; +} diff --git a/src/calibre/utils/translator/mo_parser.h b/src/calibre/utils/translator/mo_parser.h new file mode 100644 index 0000000000..cb83618b0c --- /dev/null +++ b/src/calibre/utils/translator/mo_parser.h @@ -0,0 +1,82 @@ +/* + * MoTranslator.h + * Copyright (C) 2026 Kovid Goyal + * + * Distributed under terms of the GPL3 license. + */ + +#pragma once + +#include +#include +#include +#include "plural_expression_parser.h" + +class MOParser { +public: + MOParser(); + ~MOParser(); + + // Load a . mo file + std::string load(const char *data, size_t sz); + + // Get translation for a simple string + std::string_view gettext(std::string_view msgid) const; + + // Get translation for plural forms + std::string_view ngettext(std::string_view msgid, const std::string_view msgid_plural, unsigned long n) const; + + // Check if file is loaded + bool isLoaded() const { return loaded_; } + + // Get the number of strings in the catalog + size_t size() const { return translations_.size(); } + + // Get plural expression string (for debugging) + std::string getPluralExpression() const { return plural_expr_; } + + // Get number of plural forms + int getNumPlurals() const { return num_plurals_; } + + // Get plural message index + unsigned long plural(int n) const { return plural_parser_.evaluate(n); } + +private: + struct MOHeader { + uint32_t magic; + uint32_t revision; + uint32_t num_strings; + uint32_t offset_original; + uint32_t offset_translation; + uint32_t hash_table_size; + uint32_t hash_table_offset; + }; + + struct StringDescriptor { + uint32_t length; + uint32_t offset; + }; + + std::string parseHeader(); + std::string parseStrings(); + std::string parseMetadata(std::string_view header); + std::string parsePluralForms(std::string_view line); + + uint32_t swap32(uint32_t value) const; + bool needsSwap(uint32_t magic) const; + + MOHeader header_; + bool swap_bytes_; + bool loaded_; + const char *data; + size_t sz; + + // Map from msgid to translation(s) + // For plural forms, translations are separated by null bytes + std::unordered_map translations_; + + // Plural forms support + int num_plurals_; + std::string plural_expr_; + PluralExpressionParser plural_parser_; +}; diff --git a/src/calibre/utils/translator/plural_expression_parser.cpp b/src/calibre/utils/translator/plural_expression_parser.cpp new file mode 100644 index 0000000000..bf584aaae4 --- /dev/null +++ b/src/calibre/utils/translator/plural_expression_parser.cpp @@ -0,0 +1,508 @@ +#include "plural_expression_parser.h" +#include + +// AST Node implementations + +class NumberNode : public ASTNode { +public: + explicit NumberNode(unsigned long value) : value_(value) {} + unsigned long evaluate(unsigned long) const override { return value_; } +private: + unsigned long value_; +}; + +class VariableNode : public ASTNode { +public: + unsigned long evaluate(unsigned long n) const override { return n; } +}; + +class BinaryOpNode : public ASTNode { +public: + enum class Op { ADD, SUB, MUL, DIV, MOD, EQ, NE, LT, LE, GT, GE, AND, OR }; + + BinaryOpNode(Op op, std::unique_ptr left, std:: unique_ptr right) + : op_(op), left_(std::move(left)), right_(std::move(right)) {} + + unsigned long evaluate(unsigned long n) const override { + unsigned long left_val = left_->evaluate(n); + unsigned long right_val = right_->evaluate(n); + + switch (op_) { + case Op::ADD: return left_val + right_val; + case Op::SUB: return left_val - right_val; + case Op::MUL: return left_val * right_val; + case Op::DIV: return right_val != 0 ? left_val / right_val : 0; + case Op::MOD: return right_val != 0 ? left_val % right_val : 0; + case Op:: EQ: return left_val == right_val ? 1 : 0; + case Op::NE: return left_val != right_val ? 1 : 0; + case Op::LT: return left_val < right_val ? 1 : 0; + case Op::LE: return left_val <= right_val ? 1 : 0; + case Op::GT: return left_val > right_val ? 1 : 0; + case Op::GE: return left_val >= right_val ? 1 : 0; + case Op:: AND: return (left_val && right_val) ? 1 : 0; + case Op::OR: return (left_val || right_val) ? 1 : 0; + } + return 0; + } + +private: + Op op_; + std::unique_ptr left_; + std::unique_ptr right_; +}; + +class UnaryOpNode : public ASTNode { +public: + enum class Op { NOT, NEG }; + + UnaryOpNode(Op op, std:: unique_ptr operand) + : op_(op), operand_(std::move(operand)) {} + + unsigned long evaluate(unsigned long n) const override { + unsigned long val = operand_->evaluate(n); + switch (op_) { + case Op::NOT: return !val ? 1 : 0; + case Op::NEG: return -val; + } + return 0; + } + +private: + Op op_; + std::unique_ptr operand_; +}; + +class TernaryNode : public ASTNode { +public: + TernaryNode(std::unique_ptr condition, + std::unique_ptr true_expr, + std::unique_ptr false_expr) + : condition_(std::move(condition)) + , true_expr_(std::move(true_expr)) + , false_expr_(std:: move(false_expr)) {} + + unsigned long evaluate(unsigned long n) const override { + unsigned long cond = condition_->evaluate(n); + return cond ? true_expr_->evaluate(n) : false_expr_->evaluate(n); + } + +private: + std::unique_ptr condition_; + std:: unique_ptr true_expr_; + std:: unique_ptr false_expr_; +}; + +// PluralExpressionParser implementation + +PluralExpressionParser:: PluralExpressionParser() + : current_(0) + , has_error_(false) { +} + +PluralExpressionParser::~PluralExpressionParser() { +} + +std::vector PluralExpressionParser::tokenize(const std::string& expr) { + std::vector tokens; + size_t i = 0; + + while (i < expr.length()) { + char c = expr[i]; + + // Skip whitespace + if (std::isspace(c)) { + i++; + continue; + } + + // Numbers + if (std::isdigit(c)) { + unsigned long value = 0; + while (i < expr.length() && std::isdigit(expr[i])) { + value = value * 10 + (expr[i] - '0'); + i++; + } + tokens.emplace_back(TokenType::NUMBER, value); + continue; + } + + // Variable 'n' + if (c == 'n') { + tokens.emplace_back(TokenType::VARIABLE); + i++; + continue; + } + + // Two-character operators + if (i + 1 < expr.length()) { + std::string two_char = expr.substr(i, 2); + if (two_char == "==") { + tokens.emplace_back(TokenType::EQUAL); + i += 2; + continue; + } else if (two_char == "!=") { + tokens.emplace_back(TokenType::NOT_EQUAL); + i += 2; + continue; + } else if (two_char == "<=") { + tokens.emplace_back(TokenType::LESS_EQUAL); + i += 2; + continue; + } else if (two_char == ">=") { + tokens.emplace_back(TokenType::GREATER_EQUAL); + i += 2; + continue; + } else if (two_char == "&&") { + tokens.emplace_back(TokenType::AND); + i += 2; + continue; + } else if (two_char == "||") { + tokens.emplace_back(TokenType::OR); + i += 2; + continue; + } + } + + // Single-character operators + switch (c) { + case '+': tokens.emplace_back(TokenType::PLUS); break; + case '-': tokens.emplace_back(TokenType:: MINUS); break; + case '*': tokens.emplace_back(TokenType::MULTIPLY); break; + case '/': tokens.emplace_back(TokenType:: DIVIDE); break; + case '%': tokens.emplace_back(TokenType::MODULO); break; + case '<': tokens.emplace_back(TokenType:: LESS); break; + case '>': tokens.emplace_back(TokenType::GREATER); break; + case '!': tokens.emplace_back(TokenType::NOT); break; + case '?': tokens.emplace_back(TokenType::QUESTION); break; + case ':': tokens.emplace_back(TokenType:: COLON); break; + case '(': tokens.emplace_back(TokenType::LPAREN); break; + case ')': tokens.emplace_back(TokenType::RPAREN); break; + default: + // Unknown character, skip it + break; + } + i++; + } + + tokens.emplace_back(TokenType::END); + return tokens; +} + +bool PluralExpressionParser::parse(const std::string& expression) { + tokens_ = tokenize(expression); + current_ = 0; + has_error_ = false; + error_message_. clear(); + root_ = nullptr; + + root_ = parseExpression(); + return root_ != nullptr && !has_error_; +} + +unsigned long PluralExpressionParser::evaluate(unsigned long n) const { + if (! root_) { + return 0; + } + return root_->evaluate(n); +} + +Token PluralExpressionParser:: peek() const { + if (current_ < tokens_.size()) { + return tokens_[current_]; + } + return Token(TokenType::END); +} + +Token PluralExpressionParser::consume() { + if (current_ < tokens_.size()) { + return tokens_[current_++]; + } + return Token(TokenType::END); +} + +bool PluralExpressionParser:: match(TokenType type) { + if (check(type)) { + consume(); + return true; + } + return false; +} + +bool PluralExpressionParser::check(TokenType type) const { + return peek().type == type; +} + +void PluralExpressionParser::setError(const std:: string& message) { + has_error_ = true; + error_message_ = message; +} + +std::unique_ptr PluralExpressionParser::parseExpression() { + return parseTernary(); +} + +std::unique_ptr PluralExpressionParser::parseTernary() { + auto expr = parseLogicalOr(); + if (!expr) { + return nullptr; + } + + if (match(TokenType::QUESTION)) { + auto true_expr = parseExpression(); + if (!true_expr) { + return nullptr; + } + + if (!match(TokenType:: COLON)) { + setError("Expected ':' in ternary expression"); + return nullptr; + } + + auto false_expr = parseTernary(); + if (!false_expr) { + return nullptr; + } + + return std::make_unique(std::move(expr), + std::move(true_expr), + std::move(false_expr)); + } + + return expr; +} + +std::unique_ptr PluralExpressionParser::parseLogicalOr() { + auto left = parseLogicalAnd(); + if (!left) { + return nullptr; + } + + while (match(TokenType::OR)) { + auto right = parseLogicalAnd(); + if (!right) { + return nullptr; + } + left = std::make_unique(BinaryOpNode::Op::OR, + std::move(left), + std::move(right)); + } + + return left; +} + +std::unique_ptr PluralExpressionParser::parseLogicalAnd() { + auto left = parseEquality(); + if (!left) { + return nullptr; + } + + while (match(TokenType::AND)) { + auto right = parseEquality(); + if (!right) { + return nullptr; + } + left = std::make_unique(BinaryOpNode:: Op::AND, + std::move(left), + std::move(right)); + } + + return left; +} + +std::unique_ptr PluralExpressionParser::parseEquality() { + auto left = parseRelational(); + if (!left) { + return nullptr; + } + + while (true) { + if (match(TokenType::EQUAL)) { + auto right = parseRelational(); + if (!right) { + return nullptr; + } + left = std::make_unique(BinaryOpNode::Op::EQ, + std::move(left), + std::move(right)); + } else if (match(TokenType::NOT_EQUAL)) { + auto right = parseRelational(); + if (!right) { + return nullptr; + } + left = std::make_unique(BinaryOpNode::Op::NE, + std::move(left), + std::move(right)); + } else { + break; + } + } + + return left; +} + +std::unique_ptr PluralExpressionParser::parseRelational() { + auto left = parseAdditive(); + if (!left) { + return nullptr; + } + + while (true) { + if (match(TokenType::LESS)) { + auto right = parseAdditive(); + if (!right) { + return nullptr; + } + left = std::make_unique(BinaryOpNode::Op::LT, + std::move(left), + std::move(right)); + } else if (match(TokenType::LESS_EQUAL)) { + auto right = parseAdditive(); + if (!right) { + return nullptr; + } + left = std::make_unique(BinaryOpNode::Op::LE, + std:: move(left), + std::move(right)); + } else if (match(TokenType::GREATER)) { + auto right = parseAdditive(); + if (!right) { + return nullptr; + } + left = std::make_unique(BinaryOpNode::Op::GT, + std::move(left), + std:: move(right)); + } else if (match(TokenType:: GREATER_EQUAL)) { + auto right = parseAdditive(); + if (!right) { + return nullptr; + } + left = std::make_unique(BinaryOpNode::Op::GE, + std::move(left), + std::move(right)); + } else { + break; + } + } + + return left; +} + +std:: unique_ptr PluralExpressionParser:: parseAdditive() { + auto left = parseMultiplicative(); + if (!left) { + return nullptr; + } + + while (true) { + if (match(TokenType::PLUS)) { + auto right = parseMultiplicative(); + if (!right) { + return nullptr; + } + left = std::make_unique(BinaryOpNode::Op::ADD, + std::move(left), + std::move(right)); + } else if (match(TokenType:: MINUS)) { + auto right = parseMultiplicative(); + if (!right) { + return nullptr; + } + left = std::make_unique(BinaryOpNode::Op::SUB, + std::move(left), + std::move(right)); + } else { + break; + } + } + + return left; +} + +std:: unique_ptr PluralExpressionParser:: parseMultiplicative() { + auto left = parseUnary(); + if (!left) { + return nullptr; + } + + while (true) { + if (match(TokenType::MULTIPLY)) { + auto right = parseUnary(); + if (!right) { + return nullptr; + } + left = std::make_unique(BinaryOpNode::Op::MUL, + std::move(left), + std::move(right)); + } else if (match(TokenType::DIVIDE)) { + auto right = parseUnary(); + if (!right) { + return nullptr; + } + left = std::make_unique(BinaryOpNode::Op::DIV, + std:: move(left), + std::move(right)); + } else if (match(TokenType::MODULO)) { + auto right = parseUnary(); + if (!right) { + return nullptr; + } + left = std::make_unique(BinaryOpNode::Op::MOD, + std::move(left), + std::move(right)); + } else { + break; + } + } + + return left; +} + +std::unique_ptr PluralExpressionParser::parseUnary() { + if (match(TokenType::NOT)) { + auto operand = parseUnary(); + if (!operand) { + return nullptr; + } + return std::make_unique(UnaryOpNode::Op::NOT, std::move(operand)); + } + + if (match(TokenType:: MINUS)) { + auto operand = parseUnary(); + if (!operand) { + return nullptr; + } + return std::make_unique(UnaryOpNode:: Op::NEG, std::move(operand)); + } + + return parsePrimary(); +} + +std::unique_ptr PluralExpressionParser::parsePrimary() { + // Number + if (check(TokenType::NUMBER)) { + Token tok = consume(); + return std::make_unique(tok.value); + } + + // Variable 'n' + if (match(TokenType::VARIABLE)) { + return std::make_unique(); + } + + // Parenthesized expression + if (match(TokenType::LPAREN)) { + auto expr = parseExpression(); + if (!expr) { + return nullptr; + } + + if (!match(TokenType:: RPAREN)) { + setError("Expected ')' after expression"); + return nullptr; + } + return expr; + } + + setError("Unexpected token in expression"); + return nullptr; +} diff --git a/src/calibre/utils/translator/plural_expression_parser.h b/src/calibre/utils/translator/plural_expression_parser.h new file mode 100644 index 0000000000..7e06795e1c --- /dev/null +++ b/src/calibre/utils/translator/plural_expression_parser.h @@ -0,0 +1,91 @@ +#pragma once + +#include +#include +#include + +// Token types for the plural expression language +enum class TokenType { + NUMBER, + VARIABLE, // 'n' + PLUS, // + + MINUS, // - + MULTIPLY, // * + DIVIDE, // / + MODULO, // % + EQUAL, // == + NOT_EQUAL, // != + LESS, // < + LESS_EQUAL, // <= + GREATER, // > + GREATER_EQUAL, // >= + AND, // && + OR, // || + NOT, // ! + QUESTION, // ? + COLON, // : + LPAREN, // ( + RPAREN, // ) + END +}; + +struct Token { + TokenType type; + unsigned long value; // For NUMBER tokens + + Token(TokenType t, unsigned long v = 0) : type(t), value(v) {} +}; + +// Abstract syntax tree node +class ASTNode { +public: + virtual ~ASTNode() = default; + virtual unsigned long evaluate(unsigned long n) const = 0; +}; + +class PluralExpressionParser { +public: + PluralExpressionParser(); + ~PluralExpressionParser(); + + // Parse a plural expression string + bool parse(const std::string& expression); + + // Evaluate the parsed expression for a given n + unsigned long evaluate(unsigned long n) const; + + // Check if expression is valid + bool isValid() const { return root_ != nullptr && !has_error_; } + + // Get error message if parsing failed + const std::string& getError() const { return error_message_; } + +private: + // Tokenizer + std::vector tokenize(const std::string& expr); + + // Recursive descent parser (returns nullptr on error) + std::unique_ptr parseExpression(); + std::unique_ptr parseTernary(); + std::unique_ptr parseLogicalOr(); + std::unique_ptr parseLogicalAnd(); + std::unique_ptr parseEquality(); + std::unique_ptr parseRelational(); + std::unique_ptr parseAdditive(); + std::unique_ptr parseMultiplicative(); + std::unique_ptr parseUnary(); + std::unique_ptr parsePrimary(); + + // Helper methods + Token peek() const; + Token consume(); + bool match(TokenType type); + bool check(TokenType type) const; + void setError(const std::string& message); + + std::vector tokens_; + size_t current_; + std::unique_ptr root_; + bool has_error_; + std:: string error_message_; +}; diff --git a/src/calibre/utils/translator/test_translator.py b/src/calibre/utils/translator/test_translator.py new file mode 100644 index 0000000000..587bd75b6f --- /dev/null +++ b/src/calibre/utils/translator/test_translator.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +# License: GPLv3 Copyright: 2026, Kovid Goyal + +import gettext +import io +import unittest +import zipfile + +from calibre.utils.localization import available_translations, get_lc_messages_path +from calibre_extensions.translator import Translator + + +class TestTranslator(unittest.TestCase): + + def test_translator(self): + with zipfile.ZipFile(P('localization/locales.zip', allow_user_override=False), 'r') as zf: + for lang in available_translations(): + mpath = get_lc_messages_path(lang) + if mpath is not None: + data = zf.read(mpath + '/messages.mo') + test_translator(self, lang, data) + + +def test_translator(self: TestTranslator, lang: str, data: bytes) -> None: + n = Translator(data) + o = gettext.GNUTranslations(io.BytesIO(data)) + for i in range(1, 100): + self.assertEqual(o.plural(i), n.plural(i), f'plural() not equal for language: {lang}') + + +def find_tests(): + return unittest.defaultTestLoader.loadTestsFromTestCase(TestTranslator)