Start work on replacing python gettext with native code version

Needed to use translations for both python and Qt without invoking the
GIL when used from Qt.
This commit is contained in:
Kovid Goyal 2026-01-18 10:13:33 +05:30
parent 4eb9d9eca2
commit d55ffcf46a
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
10 changed files with 1052 additions and 1 deletions

View File

@ -176,11 +176,18 @@
},
{
"name": "progress_indicator",
"sources": "calibre/gui2/progress_indicator/QProgressIndicator.cpp calibre/gui2/progress_indicator/CalibreStyle.cpp",
"sources": "calibre/gui2/progress_indicator/QProgressIndicator.cpp calibre/gui2/progress_indicator/CalibreStyle.cpp calibre/gui2/progress_indicator/MoTranslator.cpp calibre/gui2/progress_indicator/PluralExpressionParser.cpp",
"headers": "calibre/gui2/progress_indicator/QProgressIndicator.h",
"sip_files": "calibre/gui2/progress_indicator/QProgressIndicator.sip",
"inc_dirs": "calibre/gui2/progress_indicator"
},
{
"name": "translator",
"sources": "calibre/utils/translator/mo_parser.cpp calibre/utils/translator/plural_expression_parser.cpp calibre/utils/translator/main.cpp",
"headers": "calibre/utils/translator/mo_translator.h calibre/utils/translator/plural_expression_parser.h",
"inc_dirs": "calibre/utils/translator",
"needs_c++": "17"
},
{
"name": "imageops",
"sources": "calibre/utils/imageops/imageops.cpp calibre/utils/imageops/quantize.cpp calibre/utils/imageops/ordered_dither.cpp",

View File

@ -261,6 +261,7 @@ class ExtensionsImporter:
'piper',
'html_as_json',
'fast_css_transform',
'translator',
'fast_html_entities',
'unicode_names',
'html_syntax_highlighter',

View File

@ -322,6 +322,8 @@ def find_tests(which_tests=None, exclude_tests=None):
from calibre.utils.windows.wintest import find_tests
a(find_tests())
a(unittest.defaultTestLoader.loadTestsFromTestCase(TestImports))
from calibre.utils.translator.test_translator import find_tests
a(find_tests())
if ok('dbcli'):
from calibre.db.cli.tests import find_tests
a(find_tests())

View File

View File

@ -0,0 +1,98 @@
/*
* main.cpp
* Copyright (C) 2026 Kovid Goyal <kovid at kovidgoyal.net>
*
* Distributed under terms of the GPL3 license.
*/
#define PY_SSIZE_T_CLEAN
#define UNICODE
#define _UNICODE
#include <Python.h>
#include "mo_parser.h"
typedef struct {
PyObject_HEAD
PyObject *fallback;
MOParser parser;
} Translator;
extern PyTypeObject Translator_Type;
static PyObject *
new_translator(PyTypeObject *type, PyObject *args, PyObject *kwds) {
const char *mo_data = NULL; Py_ssize_t sz = 0;
if (!PyArg_ParseTuple(args, "|z#", &mo_data, &sz)) return NULL;
Translator *self = (Translator *)(&Translator_Type)->tp_alloc(&Translator_Type, 0);
if (self != NULL) {
new (&self->parser) MOParser();
if (mo_data != NULL) {
std::string err = self->parser.load(mo_data, sz);
if (err.size()) {
Py_CLEAR(self);
PyErr_SetString(PyExc_ValueError, err.c_str()); return NULL;
}
}
}
return (PyObject*) self;
}
static void
dealloc_translator(Translator* self) {
Py_CLEAR(self->fallback);
self->parser.~MOParser();
Py_TYPE(self)->tp_free((PyObject*)self);
}
static PyObject*
plural(PyObject *self_, PyObject *pn) {
if (!PyLong_Check(pn)) { PyErr_SetString(PyExc_TypeError, "n must be an integer"); return NULL; }
unsigned long n = PyLong_AsUnsignedLong(pn);
Translator *self = (Translator*)self_;
return PyLong_FromUnsignedLong(self->parser.plural(n));
}
static PyMethodDef translator_methods[] = {
{"plural", plural, METH_O, "plural(n: int) -> int:\n\n"
"Get the message catalog index based on the plural form specification."
},
{NULL} /* Sentinel */
};
PyTypeObject Translator_Type = {
.ob_base = PyVarObject_HEAD_INIT(NULL, 0)
.tp_name = "translator.Translator",
.tp_basicsize = sizeof(Translator),
.tp_dealloc = (destructor)dealloc_translator,
.tp_flags = Py_TPFLAGS_DEFAULT,
.tp_doc = "Translator",
.tp_methods = translator_methods,
.tp_new = new_translator,
};
static PyMethodDef methods[] = {
{NULL, NULL, 0, NULL}
};
static int
exec_module(PyObject *m) {
if (PyType_Ready(&Translator_Type) < 0) return -1;
if (PyModule_AddObject(m, "Translator", (PyObject *)&Translator_Type) != 0) return -1;
Py_INCREF(&Translator_Type);
return 0;
}
static PyModuleDef_Slot slots[] = { {Py_mod_exec, (void*)exec_module}, {0, NULL} };
static struct PyModuleDef module_def = {PyModuleDef_HEAD_INIT};
CALIBRE_MODINIT_FUNC PyInit_translator(void) {
module_def.m_name = "translator";
module_def.m_doc = "Support for GNU gettext translations without holding the GIL so that it can be used in Qt as well";
module_def.m_methods = methods;
module_def.m_slots = slots;
return PyModuleDef_Init(&module_def);
}

View File

@ -0,0 +1,230 @@
#include <cstring>
#include <algorithm>
#include "mo_parser.h"
// Magic numbers for .mo files
constexpr uint32_t MO_MAGIC_LE = 0x950412de;
constexpr uint32_t MO_MAGIC_BE = 0xde120495;
MOParser::MOParser() : swap_bytes_(false), loaded_(false), data(NULL), sz(0), num_plurals_(2), plural_expr_("n != 1") { }
MOParser::~MOParser() {
std::free((void*)data); data = NULL;
}
uint32_t MOParser::swap32(uint32_t value) const {
return ((value & 0x000000FF) << 24) |
((value & 0x0000FF00) << 8) |
((value & 0x00FF0000) >> 8) |
((value & 0xFF000000) >> 24);
}
bool MOParser::needsSwap(uint32_t magic) const {
return magic == MO_MAGIC_BE;
}
std::string MOParser::load(const char *data, size_t sz) {
char *copy = (char*)std::malloc(sz);
std::memcpy(copy, data, sz);
this->data = copy;
this->sz = sz;
std::string err = "";
err = parseHeader();
if (err.size()) return err;
err = parseStrings();
if (err.size()) return err;
loaded_ = true;
return err;
}
std::string MOParser::parseHeader() {
if (sz < sizeof(MOHeader)) return ".mo data too small (" + std::to_string(sz) + ")";
// Read magic number to determine endianness
uint32_t magic; std::memcpy(&magic, data, sizeof(uint32_t));
if (magic != MO_MAGIC_LE && magic != MO_MAGIC_BE) {
return ".mo data has unrecognised magic bytes";
}
swap_bytes_ = needsSwap(magic);
// Read header
std::memcpy(&header_, data, sizeof(MOHeader));
// Swap bytes if needed
if (swap_bytes_) {
header_.magic = swap32(header_. magic);
header_.revision = swap32(header_.revision);
header_.num_strings = swap32(header_.num_strings);
header_.offset_original = swap32(header_.offset_original);
header_.offset_translation = swap32(header_.offset_translation);
header_.hash_table_size = swap32(header_.hash_table_size);
header_.hash_table_offset = swap32(header_.hash_table_offset);
}
return "";
}
std::string MOParser::parseStrings() {
for (uint32_t i = 0; i < header_.num_strings; ++i) {
// Read original string descriptor
size_t orig_desc_offset = header_.offset_original + i * sizeof(StringDescriptor);
if (orig_desc_offset + sizeof(StringDescriptor) > sz) return ".mo data too small for string descriptor";
StringDescriptor orig_desc;
std::memcpy(&orig_desc, data + orig_desc_offset, sizeof(StringDescriptor));
if (swap_bytes_) {
orig_desc.length = swap32(orig_desc.length);
orig_desc.offset = swap32(orig_desc.offset);
}
// Read translation string descriptor
size_t trans_desc_offset = header_.offset_translation + i * sizeof(StringDescriptor);
if (trans_desc_offset + sizeof(StringDescriptor) > sz) return ".mo data too small for translation string descriptor";
StringDescriptor trans_desc;
std::memcpy(&trans_desc, data + trans_desc_offset, sizeof(StringDescriptor));
if (swap_bytes_) {
trans_desc.length = swap32(trans_desc.length);
trans_desc.offset = swap32(trans_desc.offset);
}
// Read original string
if (orig_desc.offset + orig_desc.length > sz) return ".mo data too small for msgid";
std::string_view msgid(data + orig_desc.offset, orig_desc.length);
// Read translation string
if (trans_desc.offset + trans_desc.length > sz) return ".mo data too small for msg";
std::string_view msgstr(data + trans_desc.offset, trans_desc.length);
// First entry (empty msgid) contains metadata
if (msgid.empty() && i == 0) {
std::string err = parseMetadata(msgstr);
if (err.size()) return err;
} else translations_[msgid] = msgstr;
}
return "";
}
static bool
starts_with(std::string_view sv, std::string_view prefix) {
return sv.size() >= prefix.size() &&
sv.substr(0, prefix.size()) == prefix;
}
std::string
MOParser::parsePluralForms(std::string_view plural_forms_line) {
// Extract nplurals
size_t nplurals_pos = plural_forms_line.find("nplurals=");
if (nplurals_pos != std::string::npos) {
nplurals_pos += 9; // strlen("nplurals=")
num_plurals_ = std::atoi(plural_forms_line.data() + nplurals_pos);
}
// Extract plural expression
size_t plural_pos = plural_forms_line.find("plural=");
if (plural_pos != std:: string::npos) {
plural_pos += 7; // strlen("plural=")
size_t semicolon = plural_forms_line.find(';', plural_pos);
if (semicolon != std::string::npos) {
plural_expr_ = plural_forms_line.substr(plural_pos, semicolon - plural_pos);
// Trim whitespace
size_t first = plural_expr_.find_first_not_of(" \t\r\n");
size_t last = plural_expr_.find_last_not_of(" \t\r\n");
if (first != std::string::npos && last != std::string::npos) {
plural_expr_ = plural_expr_.substr(first, last - first + 1);
}
// Parse the expression
if (! plural_parser_.parse(plural_expr_)) {
return std::string("failed to parse plural forms expresion: " + plural_expr_);
// Fall back to default
plural_expr_ = "n != 1";
plural_parser_.parse(plural_expr_);
}
}
} else {
// No plural expression, use default
plural_parser_.parse(plural_expr_);
}
return "";
}
static std::string
to_ascii_lower(std::string_view sv) {
std::string result;
result.resize(sv.size());
std::transform(sv.begin(), sv.end(), result.begin(),
[](unsigned char c) { return std::tolower(c); });
return result;
}
std::string
MOParser::parseMetadata(std::string_view header) {
size_t pos = 0, start = 0;
bool found_plural_forms = false;
while (pos < header.size()) {
if (header[pos] == '\n') {
std::string_view line = header.substr(start, pos-start);
start = pos + 1;
if (starts_with(line, "Plural-Forms:")) {
std::string err = parsePluralForms(line);
if (err.size()) return err;
found_plural_forms = true;
} else if (starts_with(line, "Content-Type:")) {
size_t ctpos = line.find("charset=");
if (ctpos != std::string::npos) {
std::string charset = to_ascii_lower(line.substr(
ctpos + sizeof("charset"), line.size() - ctpos - sizeof("charset")));
if (charset != "utf8" && charset != "utf-8") {
return "unsupported charset in .mo file: " + std::string(charset);
}
}
}
}
pos++;
}
if (!found_plural_forms) plural_parser_.parse(plural_expr_);
return "";
}
std::string_view MOParser::gettext(std::string_view msgid) const {
auto it = translations_.find(msgid);
if (it != translations_.end() && ! it->second.empty()) {
// Return first translation (before any null byte)
size_t null_pos = it->second.find('\0');
return (null_pos != std::string::npos) ? it->second.substr(0, null_pos) : it->second;
}
return msgid; // Return original if no translation found
}
std::string_view MOParser::ngettext(std::string_view msgid, std::string_view msgid_plural, unsigned long n) const {
// Create composite key for plural forms (msgid\0msgid_plural)
std::string key = std::string(msgid) + '\0' + std::string(msgid_plural);
auto it = translations_.find(key);
if (it != translations_.end() && !it->second.empty()) {
// Determine which plural form to use
unsigned long plural_index = plural(n);
// Ensure index is within bounds
if (plural_index >= static_cast<unsigned long>(num_plurals_)) plural_index = num_plurals_ - 1;
// Split translation by null bytes
size_t start = 0;
size_t pos;
while ((pos = it->second.find('\0', start)) != std::string::npos) {
std::string_view q = it->second.substr(start, pos - start);
if (plural_index < 1) return q;
start = pos + 1;
plural_index--;
}
}
// Fallback to English-style pluralization
return n <= 1 ? msgid : msgid_plural;
}

View File

@ -0,0 +1,82 @@
/*
* MoTranslator.h
* Copyright (C) 2026 Kovid Goyal <kovid at kovidgoyal.net>
*
* Distributed under terms of the GPL3 license.
*/
#pragma once
#include <string>
#include <cstdint>
#include <unordered_map>
#include "plural_expression_parser.h"
class MOParser {
public:
MOParser();
~MOParser();
// Load a . mo file
std::string load(const char *data, size_t sz);
// Get translation for a simple string
std::string_view gettext(std::string_view msgid) const;
// Get translation for plural forms
std::string_view ngettext(std::string_view msgid, const std::string_view msgid_plural, unsigned long n) const;
// Check if file is loaded
bool isLoaded() const { return loaded_; }
// Get the number of strings in the catalog
size_t size() const { return translations_.size(); }
// Get plural expression string (for debugging)
std::string getPluralExpression() const { return plural_expr_; }
// Get number of plural forms
int getNumPlurals() const { return num_plurals_; }
// Get plural message index
unsigned long plural(int n) const { return plural_parser_.evaluate(n); }
private:
struct MOHeader {
uint32_t magic;
uint32_t revision;
uint32_t num_strings;
uint32_t offset_original;
uint32_t offset_translation;
uint32_t hash_table_size;
uint32_t hash_table_offset;
};
struct StringDescriptor {
uint32_t length;
uint32_t offset;
};
std::string parseHeader();
std::string parseStrings();
std::string parseMetadata(std::string_view header);
std::string parsePluralForms(std::string_view line);
uint32_t swap32(uint32_t value) const;
bool needsSwap(uint32_t magic) const;
MOHeader header_;
bool swap_bytes_;
bool loaded_;
const char *data;
size_t sz;
// Map from msgid to translation(s)
// For plural forms, translations are separated by null bytes
std::unordered_map<std::string_view, std::string_view> translations_;
// Plural forms support
int num_plurals_;
std::string plural_expr_;
PluralExpressionParser plural_parser_;
};

View File

@ -0,0 +1,508 @@
#include "plural_expression_parser.h"
#include <cctype>
// AST Node implementations
class NumberNode : public ASTNode {
public:
explicit NumberNode(unsigned long value) : value_(value) {}
unsigned long evaluate(unsigned long) const override { return value_; }
private:
unsigned long value_;
};
class VariableNode : public ASTNode {
public:
unsigned long evaluate(unsigned long n) const override { return n; }
};
class BinaryOpNode : public ASTNode {
public:
enum class Op { ADD, SUB, MUL, DIV, MOD, EQ, NE, LT, LE, GT, GE, AND, OR };
BinaryOpNode(Op op, std::unique_ptr<ASTNode> left, std:: unique_ptr<ASTNode> right)
: op_(op), left_(std::move(left)), right_(std::move(right)) {}
unsigned long evaluate(unsigned long n) const override {
unsigned long left_val = left_->evaluate(n);
unsigned long right_val = right_->evaluate(n);
switch (op_) {
case Op::ADD: return left_val + right_val;
case Op::SUB: return left_val - right_val;
case Op::MUL: return left_val * right_val;
case Op::DIV: return right_val != 0 ? left_val / right_val : 0;
case Op::MOD: return right_val != 0 ? left_val % right_val : 0;
case Op:: EQ: return left_val == right_val ? 1 : 0;
case Op::NE: return left_val != right_val ? 1 : 0;
case Op::LT: return left_val < right_val ? 1 : 0;
case Op::LE: return left_val <= right_val ? 1 : 0;
case Op::GT: return left_val > right_val ? 1 : 0;
case Op::GE: return left_val >= right_val ? 1 : 0;
case Op:: AND: return (left_val && right_val) ? 1 : 0;
case Op::OR: return (left_val || right_val) ? 1 : 0;
}
return 0;
}
private:
Op op_;
std::unique_ptr<ASTNode> left_;
std::unique_ptr<ASTNode> right_;
};
class UnaryOpNode : public ASTNode {
public:
enum class Op { NOT, NEG };
UnaryOpNode(Op op, std:: unique_ptr<ASTNode> operand)
: op_(op), operand_(std::move(operand)) {}
unsigned long evaluate(unsigned long n) const override {
unsigned long val = operand_->evaluate(n);
switch (op_) {
case Op::NOT: return !val ? 1 : 0;
case Op::NEG: return -val;
}
return 0;
}
private:
Op op_;
std::unique_ptr<ASTNode> operand_;
};
class TernaryNode : public ASTNode {
public:
TernaryNode(std::unique_ptr<ASTNode> condition,
std::unique_ptr<ASTNode> true_expr,
std::unique_ptr<ASTNode> false_expr)
: condition_(std::move(condition))
, true_expr_(std::move(true_expr))
, false_expr_(std:: move(false_expr)) {}
unsigned long evaluate(unsigned long n) const override {
unsigned long cond = condition_->evaluate(n);
return cond ? true_expr_->evaluate(n) : false_expr_->evaluate(n);
}
private:
std::unique_ptr<ASTNode> condition_;
std:: unique_ptr<ASTNode> true_expr_;
std:: unique_ptr<ASTNode> false_expr_;
};
// PluralExpressionParser implementation
PluralExpressionParser:: PluralExpressionParser()
: current_(0)
, has_error_(false) {
}
PluralExpressionParser::~PluralExpressionParser() {
}
std::vector<Token> PluralExpressionParser::tokenize(const std::string& expr) {
std::vector<Token> tokens;
size_t i = 0;
while (i < expr.length()) {
char c = expr[i];
// Skip whitespace
if (std::isspace(c)) {
i++;
continue;
}
// Numbers
if (std::isdigit(c)) {
unsigned long value = 0;
while (i < expr.length() && std::isdigit(expr[i])) {
value = value * 10 + (expr[i] - '0');
i++;
}
tokens.emplace_back(TokenType::NUMBER, value);
continue;
}
// Variable 'n'
if (c == 'n') {
tokens.emplace_back(TokenType::VARIABLE);
i++;
continue;
}
// Two-character operators
if (i + 1 < expr.length()) {
std::string two_char = expr.substr(i, 2);
if (two_char == "==") {
tokens.emplace_back(TokenType::EQUAL);
i += 2;
continue;
} else if (two_char == "!=") {
tokens.emplace_back(TokenType::NOT_EQUAL);
i += 2;
continue;
} else if (two_char == "<=") {
tokens.emplace_back(TokenType::LESS_EQUAL);
i += 2;
continue;
} else if (two_char == ">=") {
tokens.emplace_back(TokenType::GREATER_EQUAL);
i += 2;
continue;
} else if (two_char == "&&") {
tokens.emplace_back(TokenType::AND);
i += 2;
continue;
} else if (two_char == "||") {
tokens.emplace_back(TokenType::OR);
i += 2;
continue;
}
}
// Single-character operators
switch (c) {
case '+': tokens.emplace_back(TokenType::PLUS); break;
case '-': tokens.emplace_back(TokenType:: MINUS); break;
case '*': tokens.emplace_back(TokenType::MULTIPLY); break;
case '/': tokens.emplace_back(TokenType:: DIVIDE); break;
case '%': tokens.emplace_back(TokenType::MODULO); break;
case '<': tokens.emplace_back(TokenType:: LESS); break;
case '>': tokens.emplace_back(TokenType::GREATER); break;
case '!': tokens.emplace_back(TokenType::NOT); break;
case '?': tokens.emplace_back(TokenType::QUESTION); break;
case ':': tokens.emplace_back(TokenType:: COLON); break;
case '(': tokens.emplace_back(TokenType::LPAREN); break;
case ')': tokens.emplace_back(TokenType::RPAREN); break;
default:
// Unknown character, skip it
break;
}
i++;
}
tokens.emplace_back(TokenType::END);
return tokens;
}
bool PluralExpressionParser::parse(const std::string& expression) {
tokens_ = tokenize(expression);
current_ = 0;
has_error_ = false;
error_message_. clear();
root_ = nullptr;
root_ = parseExpression();
return root_ != nullptr && !has_error_;
}
unsigned long PluralExpressionParser::evaluate(unsigned long n) const {
if (! root_) {
return 0;
}
return root_->evaluate(n);
}
Token PluralExpressionParser:: peek() const {
if (current_ < tokens_.size()) {
return tokens_[current_];
}
return Token(TokenType::END);
}
Token PluralExpressionParser::consume() {
if (current_ < tokens_.size()) {
return tokens_[current_++];
}
return Token(TokenType::END);
}
bool PluralExpressionParser:: match(TokenType type) {
if (check(type)) {
consume();
return true;
}
return false;
}
bool PluralExpressionParser::check(TokenType type) const {
return peek().type == type;
}
void PluralExpressionParser::setError(const std:: string& message) {
has_error_ = true;
error_message_ = message;
}
std::unique_ptr<ASTNode> PluralExpressionParser::parseExpression() {
return parseTernary();
}
std::unique_ptr<ASTNode> PluralExpressionParser::parseTernary() {
auto expr = parseLogicalOr();
if (!expr) {
return nullptr;
}
if (match(TokenType::QUESTION)) {
auto true_expr = parseExpression();
if (!true_expr) {
return nullptr;
}
if (!match(TokenType:: COLON)) {
setError("Expected ':' in ternary expression");
return nullptr;
}
auto false_expr = parseTernary();
if (!false_expr) {
return nullptr;
}
return std::make_unique<TernaryNode>(std::move(expr),
std::move(true_expr),
std::move(false_expr));
}
return expr;
}
std::unique_ptr<ASTNode> PluralExpressionParser::parseLogicalOr() {
auto left = parseLogicalAnd();
if (!left) {
return nullptr;
}
while (match(TokenType::OR)) {
auto right = parseLogicalAnd();
if (!right) {
return nullptr;
}
left = std::make_unique<BinaryOpNode>(BinaryOpNode::Op::OR,
std::move(left),
std::move(right));
}
return left;
}
std::unique_ptr<ASTNode> PluralExpressionParser::parseLogicalAnd() {
auto left = parseEquality();
if (!left) {
return nullptr;
}
while (match(TokenType::AND)) {
auto right = parseEquality();
if (!right) {
return nullptr;
}
left = std::make_unique<BinaryOpNode>(BinaryOpNode:: Op::AND,
std::move(left),
std::move(right));
}
return left;
}
std::unique_ptr<ASTNode> PluralExpressionParser::parseEquality() {
auto left = parseRelational();
if (!left) {
return nullptr;
}
while (true) {
if (match(TokenType::EQUAL)) {
auto right = parseRelational();
if (!right) {
return nullptr;
}
left = std::make_unique<BinaryOpNode>(BinaryOpNode::Op::EQ,
std::move(left),
std::move(right));
} else if (match(TokenType::NOT_EQUAL)) {
auto right = parseRelational();
if (!right) {
return nullptr;
}
left = std::make_unique<BinaryOpNode>(BinaryOpNode::Op::NE,
std::move(left),
std::move(right));
} else {
break;
}
}
return left;
}
std::unique_ptr<ASTNode> PluralExpressionParser::parseRelational() {
auto left = parseAdditive();
if (!left) {
return nullptr;
}
while (true) {
if (match(TokenType::LESS)) {
auto right = parseAdditive();
if (!right) {
return nullptr;
}
left = std::make_unique<BinaryOpNode>(BinaryOpNode::Op::LT,
std::move(left),
std::move(right));
} else if (match(TokenType::LESS_EQUAL)) {
auto right = parseAdditive();
if (!right) {
return nullptr;
}
left = std::make_unique<BinaryOpNode>(BinaryOpNode::Op::LE,
std:: move(left),
std::move(right));
} else if (match(TokenType::GREATER)) {
auto right = parseAdditive();
if (!right) {
return nullptr;
}
left = std::make_unique<BinaryOpNode>(BinaryOpNode::Op::GT,
std::move(left),
std:: move(right));
} else if (match(TokenType:: GREATER_EQUAL)) {
auto right = parseAdditive();
if (!right) {
return nullptr;
}
left = std::make_unique<BinaryOpNode>(BinaryOpNode::Op::GE,
std::move(left),
std::move(right));
} else {
break;
}
}
return left;
}
std:: unique_ptr<ASTNode> PluralExpressionParser:: parseAdditive() {
auto left = parseMultiplicative();
if (!left) {
return nullptr;
}
while (true) {
if (match(TokenType::PLUS)) {
auto right = parseMultiplicative();
if (!right) {
return nullptr;
}
left = std::make_unique<BinaryOpNode>(BinaryOpNode::Op::ADD,
std::move(left),
std::move(right));
} else if (match(TokenType:: MINUS)) {
auto right = parseMultiplicative();
if (!right) {
return nullptr;
}
left = std::make_unique<BinaryOpNode>(BinaryOpNode::Op::SUB,
std::move(left),
std::move(right));
} else {
break;
}
}
return left;
}
std:: unique_ptr<ASTNode> PluralExpressionParser:: parseMultiplicative() {
auto left = parseUnary();
if (!left) {
return nullptr;
}
while (true) {
if (match(TokenType::MULTIPLY)) {
auto right = parseUnary();
if (!right) {
return nullptr;
}
left = std::make_unique<BinaryOpNode>(BinaryOpNode::Op::MUL,
std::move(left),
std::move(right));
} else if (match(TokenType::DIVIDE)) {
auto right = parseUnary();
if (!right) {
return nullptr;
}
left = std::make_unique<BinaryOpNode>(BinaryOpNode::Op::DIV,
std:: move(left),
std::move(right));
} else if (match(TokenType::MODULO)) {
auto right = parseUnary();
if (!right) {
return nullptr;
}
left = std::make_unique<BinaryOpNode>(BinaryOpNode::Op::MOD,
std::move(left),
std::move(right));
} else {
break;
}
}
return left;
}
std::unique_ptr<ASTNode> PluralExpressionParser::parseUnary() {
if (match(TokenType::NOT)) {
auto operand = parseUnary();
if (!operand) {
return nullptr;
}
return std::make_unique<UnaryOpNode>(UnaryOpNode::Op::NOT, std::move(operand));
}
if (match(TokenType:: MINUS)) {
auto operand = parseUnary();
if (!operand) {
return nullptr;
}
return std::make_unique<UnaryOpNode>(UnaryOpNode:: Op::NEG, std::move(operand));
}
return parsePrimary();
}
std::unique_ptr<ASTNode> PluralExpressionParser::parsePrimary() {
// Number
if (check(TokenType::NUMBER)) {
Token tok = consume();
return std::make_unique<NumberNode>(tok.value);
}
// Variable 'n'
if (match(TokenType::VARIABLE)) {
return std::make_unique<VariableNode>();
}
// Parenthesized expression
if (match(TokenType::LPAREN)) {
auto expr = parseExpression();
if (!expr) {
return nullptr;
}
if (!match(TokenType:: RPAREN)) {
setError("Expected ')' after expression");
return nullptr;
}
return expr;
}
setError("Unexpected token in expression");
return nullptr;
}

View File

@ -0,0 +1,91 @@
#pragma once
#include <string>
#include <memory>
#include <vector>
// Token types for the plural expression language
enum class TokenType {
NUMBER,
VARIABLE, // 'n'
PLUS, // +
MINUS, // -
MULTIPLY, // *
DIVIDE, // /
MODULO, // %
EQUAL, // ==
NOT_EQUAL, // !=
LESS, // <
LESS_EQUAL, // <=
GREATER, // >
GREATER_EQUAL, // >=
AND, // &&
OR, // ||
NOT, // !
QUESTION, // ?
COLON, // :
LPAREN, // (
RPAREN, // )
END
};
struct Token {
TokenType type;
unsigned long value; // For NUMBER tokens
Token(TokenType t, unsigned long v = 0) : type(t), value(v) {}
};
// Abstract syntax tree node
class ASTNode {
public:
virtual ~ASTNode() = default;
virtual unsigned long evaluate(unsigned long n) const = 0;
};
class PluralExpressionParser {
public:
PluralExpressionParser();
~PluralExpressionParser();
// Parse a plural expression string
bool parse(const std::string& expression);
// Evaluate the parsed expression for a given n
unsigned long evaluate(unsigned long n) const;
// Check if expression is valid
bool isValid() const { return root_ != nullptr && !has_error_; }
// Get error message if parsing failed
const std::string& getError() const { return error_message_; }
private:
// Tokenizer
std::vector<Token> tokenize(const std::string& expr);
// Recursive descent parser (returns nullptr on error)
std::unique_ptr<ASTNode> parseExpression();
std::unique_ptr<ASTNode> parseTernary();
std::unique_ptr<ASTNode> parseLogicalOr();
std::unique_ptr<ASTNode> parseLogicalAnd();
std::unique_ptr<ASTNode> parseEquality();
std::unique_ptr<ASTNode> parseRelational();
std::unique_ptr<ASTNode> parseAdditive();
std::unique_ptr<ASTNode> parseMultiplicative();
std::unique_ptr<ASTNode> parseUnary();
std::unique_ptr<ASTNode> parsePrimary();
// Helper methods
Token peek() const;
Token consume();
bool match(TokenType type);
bool check(TokenType type) const;
void setError(const std::string& message);
std::vector<Token> tokens_;
size_t current_;
std::unique_ptr<ASTNode> root_;
bool has_error_;
std:: string error_message_;
};

View File

@ -0,0 +1,32 @@
#!/usr/bin/env python
# License: GPLv3 Copyright: 2026, Kovid Goyal <kovid at kovidgoyal.net>
import gettext
import io
import unittest
import zipfile
from calibre.utils.localization import available_translations, get_lc_messages_path
from calibre_extensions.translator import Translator
class TestTranslator(unittest.TestCase):
def test_translator(self):
with zipfile.ZipFile(P('localization/locales.zip', allow_user_override=False), 'r') as zf:
for lang in available_translations():
mpath = get_lc_messages_path(lang)
if mpath is not None:
data = zf.read(mpath + '/messages.mo')
test_translator(self, lang, data)
def test_translator(self: TestTranslator, lang: str, data: bytes) -> None:
n = Translator(data)
o = gettext.GNUTranslations(io.BytesIO(data))
for i in range(1, 100):
self.assertEqual(o.plural(i), n.plural(i), f'plural() not equal for language: {lang}')
def find_tests():
return unittest.defaultTestLoader.loadTestsFromTestCase(TestTranslator)