mirror of
https://github.com/kovidgoyal/calibre.git
synced 2026-01-29 15:13:30 -05:00
Start work on replacing python gettext with native code version
Needed to use translations for both python and Qt without invoking the GIL when used from Qt.
This commit is contained in:
parent
4eb9d9eca2
commit
d55ffcf46a
@ -176,11 +176,18 @@
|
||||
},
|
||||
{
|
||||
"name": "progress_indicator",
|
||||
"sources": "calibre/gui2/progress_indicator/QProgressIndicator.cpp calibre/gui2/progress_indicator/CalibreStyle.cpp",
|
||||
"sources": "calibre/gui2/progress_indicator/QProgressIndicator.cpp calibre/gui2/progress_indicator/CalibreStyle.cpp calibre/gui2/progress_indicator/MoTranslator.cpp calibre/gui2/progress_indicator/PluralExpressionParser.cpp",
|
||||
"headers": "calibre/gui2/progress_indicator/QProgressIndicator.h",
|
||||
"sip_files": "calibre/gui2/progress_indicator/QProgressIndicator.sip",
|
||||
"inc_dirs": "calibre/gui2/progress_indicator"
|
||||
},
|
||||
{
|
||||
"name": "translator",
|
||||
"sources": "calibre/utils/translator/mo_parser.cpp calibre/utils/translator/plural_expression_parser.cpp calibre/utils/translator/main.cpp",
|
||||
"headers": "calibre/utils/translator/mo_translator.h calibre/utils/translator/plural_expression_parser.h",
|
||||
"inc_dirs": "calibre/utils/translator",
|
||||
"needs_c++": "17"
|
||||
},
|
||||
{
|
||||
"name": "imageops",
|
||||
"sources": "calibre/utils/imageops/imageops.cpp calibre/utils/imageops/quantize.cpp calibre/utils/imageops/ordered_dither.cpp",
|
||||
|
||||
@ -261,6 +261,7 @@ class ExtensionsImporter:
|
||||
'piper',
|
||||
'html_as_json',
|
||||
'fast_css_transform',
|
||||
'translator',
|
||||
'fast_html_entities',
|
||||
'unicode_names',
|
||||
'html_syntax_highlighter',
|
||||
|
||||
@ -322,6 +322,8 @@ def find_tests(which_tests=None, exclude_tests=None):
|
||||
from calibre.utils.windows.wintest import find_tests
|
||||
a(find_tests())
|
||||
a(unittest.defaultTestLoader.loadTestsFromTestCase(TestImports))
|
||||
from calibre.utils.translator.test_translator import find_tests
|
||||
a(find_tests())
|
||||
if ok('dbcli'):
|
||||
from calibre.db.cli.tests import find_tests
|
||||
a(find_tests())
|
||||
|
||||
0
src/calibre/utils/translator/__init__.py
Normal file
0
src/calibre/utils/translator/__init__.py
Normal file
98
src/calibre/utils/translator/main.cpp
Normal file
98
src/calibre/utils/translator/main.cpp
Normal file
@ -0,0 +1,98 @@
|
||||
/*
|
||||
* main.cpp
|
||||
* Copyright (C) 2026 Kovid Goyal <kovid at kovidgoyal.net>
|
||||
*
|
||||
* Distributed under terms of the GPL3 license.
|
||||
*/
|
||||
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#define UNICODE
|
||||
#define _UNICODE
|
||||
|
||||
#include <Python.h>
|
||||
#include "mo_parser.h"
|
||||
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
|
||||
PyObject *fallback;
|
||||
MOParser parser;
|
||||
} Translator;
|
||||
|
||||
extern PyTypeObject Translator_Type;
|
||||
|
||||
static PyObject *
|
||||
new_translator(PyTypeObject *type, PyObject *args, PyObject *kwds) {
|
||||
const char *mo_data = NULL; Py_ssize_t sz = 0;
|
||||
if (!PyArg_ParseTuple(args, "|z#", &mo_data, &sz)) return NULL;
|
||||
Translator *self = (Translator *)(&Translator_Type)->tp_alloc(&Translator_Type, 0);
|
||||
if (self != NULL) {
|
||||
new (&self->parser) MOParser();
|
||||
if (mo_data != NULL) {
|
||||
std::string err = self->parser.load(mo_data, sz);
|
||||
if (err.size()) {
|
||||
Py_CLEAR(self);
|
||||
PyErr_SetString(PyExc_ValueError, err.c_str()); return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
return (PyObject*) self;
|
||||
}
|
||||
|
||||
static void
|
||||
dealloc_translator(Translator* self) {
|
||||
Py_CLEAR(self->fallback);
|
||||
self->parser.~MOParser();
|
||||
Py_TYPE(self)->tp_free((PyObject*)self);
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
plural(PyObject *self_, PyObject *pn) {
|
||||
if (!PyLong_Check(pn)) { PyErr_SetString(PyExc_TypeError, "n must be an integer"); return NULL; }
|
||||
unsigned long n = PyLong_AsUnsignedLong(pn);
|
||||
Translator *self = (Translator*)self_;
|
||||
return PyLong_FromUnsignedLong(self->parser.plural(n));
|
||||
}
|
||||
|
||||
static PyMethodDef translator_methods[] = {
|
||||
{"plural", plural, METH_O, "plural(n: int) -> int:\n\n"
|
||||
"Get the message catalog index based on the plural form specification."
|
||||
},
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
PyTypeObject Translator_Type = {
|
||||
.ob_base = PyVarObject_HEAD_INIT(NULL, 0)
|
||||
.tp_name = "translator.Translator",
|
||||
.tp_basicsize = sizeof(Translator),
|
||||
.tp_dealloc = (destructor)dealloc_translator,
|
||||
.tp_flags = Py_TPFLAGS_DEFAULT,
|
||||
.tp_doc = "Translator",
|
||||
.tp_methods = translator_methods,
|
||||
.tp_new = new_translator,
|
||||
};
|
||||
|
||||
|
||||
static PyMethodDef methods[] = {
|
||||
{NULL, NULL, 0, NULL}
|
||||
};
|
||||
|
||||
static int
|
||||
exec_module(PyObject *m) {
|
||||
if (PyType_Ready(&Translator_Type) < 0) return -1;
|
||||
if (PyModule_AddObject(m, "Translator", (PyObject *)&Translator_Type) != 0) return -1;
|
||||
Py_INCREF(&Translator_Type);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static PyModuleDef_Slot slots[] = { {Py_mod_exec, (void*)exec_module}, {0, NULL} };
|
||||
|
||||
static struct PyModuleDef module_def = {PyModuleDef_HEAD_INIT};
|
||||
|
||||
CALIBRE_MODINIT_FUNC PyInit_translator(void) {
|
||||
module_def.m_name = "translator";
|
||||
module_def.m_doc = "Support for GNU gettext translations without holding the GIL so that it can be used in Qt as well";
|
||||
module_def.m_methods = methods;
|
||||
module_def.m_slots = slots;
|
||||
return PyModuleDef_Init(&module_def);
|
||||
}
|
||||
230
src/calibre/utils/translator/mo_parser.cpp
Normal file
230
src/calibre/utils/translator/mo_parser.cpp
Normal file
@ -0,0 +1,230 @@
|
||||
#include <cstring>
|
||||
#include <algorithm>
|
||||
#include "mo_parser.h"
|
||||
|
||||
// Magic numbers for .mo files
|
||||
constexpr uint32_t MO_MAGIC_LE = 0x950412de;
|
||||
constexpr uint32_t MO_MAGIC_BE = 0xde120495;
|
||||
|
||||
MOParser::MOParser() : swap_bytes_(false), loaded_(false), data(NULL), sz(0), num_plurals_(2), plural_expr_("n != 1") { }
|
||||
|
||||
MOParser::~MOParser() {
|
||||
std::free((void*)data); data = NULL;
|
||||
}
|
||||
|
||||
uint32_t MOParser::swap32(uint32_t value) const {
|
||||
return ((value & 0x000000FF) << 24) |
|
||||
((value & 0x0000FF00) << 8) |
|
||||
((value & 0x00FF0000) >> 8) |
|
||||
((value & 0xFF000000) >> 24);
|
||||
}
|
||||
|
||||
bool MOParser::needsSwap(uint32_t magic) const {
|
||||
return magic == MO_MAGIC_BE;
|
||||
}
|
||||
|
||||
std::string MOParser::load(const char *data, size_t sz) {
|
||||
char *copy = (char*)std::malloc(sz);
|
||||
std::memcpy(copy, data, sz);
|
||||
this->data = copy;
|
||||
this->sz = sz;
|
||||
std::string err = "";
|
||||
err = parseHeader();
|
||||
if (err.size()) return err;
|
||||
err = parseStrings();
|
||||
if (err.size()) return err;
|
||||
loaded_ = true;
|
||||
return err;
|
||||
}
|
||||
|
||||
std::string MOParser::parseHeader() {
|
||||
if (sz < sizeof(MOHeader)) return ".mo data too small (" + std::to_string(sz) + ")";
|
||||
|
||||
// Read magic number to determine endianness
|
||||
uint32_t magic; std::memcpy(&magic, data, sizeof(uint32_t));
|
||||
|
||||
if (magic != MO_MAGIC_LE && magic != MO_MAGIC_BE) {
|
||||
return ".mo data has unrecognised magic bytes";
|
||||
}
|
||||
|
||||
swap_bytes_ = needsSwap(magic);
|
||||
|
||||
// Read header
|
||||
std::memcpy(&header_, data, sizeof(MOHeader));
|
||||
|
||||
// Swap bytes if needed
|
||||
if (swap_bytes_) {
|
||||
header_.magic = swap32(header_. magic);
|
||||
header_.revision = swap32(header_.revision);
|
||||
header_.num_strings = swap32(header_.num_strings);
|
||||
header_.offset_original = swap32(header_.offset_original);
|
||||
header_.offset_translation = swap32(header_.offset_translation);
|
||||
header_.hash_table_size = swap32(header_.hash_table_size);
|
||||
header_.hash_table_offset = swap32(header_.hash_table_offset);
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string MOParser::parseStrings() {
|
||||
for (uint32_t i = 0; i < header_.num_strings; ++i) {
|
||||
// Read original string descriptor
|
||||
size_t orig_desc_offset = header_.offset_original + i * sizeof(StringDescriptor);
|
||||
if (orig_desc_offset + sizeof(StringDescriptor) > sz) return ".mo data too small for string descriptor";
|
||||
|
||||
StringDescriptor orig_desc;
|
||||
std::memcpy(&orig_desc, data + orig_desc_offset, sizeof(StringDescriptor));
|
||||
|
||||
if (swap_bytes_) {
|
||||
orig_desc.length = swap32(orig_desc.length);
|
||||
orig_desc.offset = swap32(orig_desc.offset);
|
||||
}
|
||||
|
||||
// Read translation string descriptor
|
||||
size_t trans_desc_offset = header_.offset_translation + i * sizeof(StringDescriptor);
|
||||
if (trans_desc_offset + sizeof(StringDescriptor) > sz) return ".mo data too small for translation string descriptor";
|
||||
StringDescriptor trans_desc;
|
||||
std::memcpy(&trans_desc, data + trans_desc_offset, sizeof(StringDescriptor));
|
||||
|
||||
if (swap_bytes_) {
|
||||
trans_desc.length = swap32(trans_desc.length);
|
||||
trans_desc.offset = swap32(trans_desc.offset);
|
||||
}
|
||||
|
||||
// Read original string
|
||||
if (orig_desc.offset + orig_desc.length > sz) return ".mo data too small for msgid";
|
||||
std::string_view msgid(data + orig_desc.offset, orig_desc.length);
|
||||
|
||||
// Read translation string
|
||||
if (trans_desc.offset + trans_desc.length > sz) return ".mo data too small for msg";
|
||||
std::string_view msgstr(data + trans_desc.offset, trans_desc.length);
|
||||
|
||||
// First entry (empty msgid) contains metadata
|
||||
if (msgid.empty() && i == 0) {
|
||||
std::string err = parseMetadata(msgstr);
|
||||
if (err.size()) return err;
|
||||
} else translations_[msgid] = msgstr;
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
static bool
|
||||
starts_with(std::string_view sv, std::string_view prefix) {
|
||||
return sv.size() >= prefix.size() &&
|
||||
sv.substr(0, prefix.size()) == prefix;
|
||||
}
|
||||
|
||||
std::string
|
||||
MOParser::parsePluralForms(std::string_view plural_forms_line) {
|
||||
// Extract nplurals
|
||||
size_t nplurals_pos = plural_forms_line.find("nplurals=");
|
||||
if (nplurals_pos != std::string::npos) {
|
||||
nplurals_pos += 9; // strlen("nplurals=")
|
||||
num_plurals_ = std::atoi(plural_forms_line.data() + nplurals_pos);
|
||||
}
|
||||
|
||||
// Extract plural expression
|
||||
size_t plural_pos = plural_forms_line.find("plural=");
|
||||
if (plural_pos != std:: string::npos) {
|
||||
plural_pos += 7; // strlen("plural=")
|
||||
size_t semicolon = plural_forms_line.find(';', plural_pos);
|
||||
if (semicolon != std::string::npos) {
|
||||
plural_expr_ = plural_forms_line.substr(plural_pos, semicolon - plural_pos);
|
||||
|
||||
// Trim whitespace
|
||||
size_t first = plural_expr_.find_first_not_of(" \t\r\n");
|
||||
size_t last = plural_expr_.find_last_not_of(" \t\r\n");
|
||||
if (first != std::string::npos && last != std::string::npos) {
|
||||
plural_expr_ = plural_expr_.substr(first, last - first + 1);
|
||||
}
|
||||
|
||||
// Parse the expression
|
||||
if (! plural_parser_.parse(plural_expr_)) {
|
||||
return std::string("failed to parse plural forms expresion: " + plural_expr_);
|
||||
// Fall back to default
|
||||
plural_expr_ = "n != 1";
|
||||
plural_parser_.parse(plural_expr_);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// No plural expression, use default
|
||||
plural_parser_.parse(plural_expr_);
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
static std::string
|
||||
to_ascii_lower(std::string_view sv) {
|
||||
std::string result;
|
||||
result.resize(sv.size());
|
||||
std::transform(sv.begin(), sv.end(), result.begin(),
|
||||
[](unsigned char c) { return std::tolower(c); });
|
||||
return result;
|
||||
}
|
||||
std::string
|
||||
MOParser::parseMetadata(std::string_view header) {
|
||||
size_t pos = 0, start = 0;
|
||||
bool found_plural_forms = false;
|
||||
while (pos < header.size()) {
|
||||
if (header[pos] == '\n') {
|
||||
std::string_view line = header.substr(start, pos-start);
|
||||
start = pos + 1;
|
||||
if (starts_with(line, "Plural-Forms:")) {
|
||||
std::string err = parsePluralForms(line);
|
||||
if (err.size()) return err;
|
||||
found_plural_forms = true;
|
||||
} else if (starts_with(line, "Content-Type:")) {
|
||||
size_t ctpos = line.find("charset=");
|
||||
if (ctpos != std::string::npos) {
|
||||
std::string charset = to_ascii_lower(line.substr(
|
||||
ctpos + sizeof("charset"), line.size() - ctpos - sizeof("charset")));
|
||||
if (charset != "utf8" && charset != "utf-8") {
|
||||
return "unsupported charset in .mo file: " + std::string(charset);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
if (!found_plural_forms) plural_parser_.parse(plural_expr_);
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string_view MOParser::gettext(std::string_view msgid) const {
|
||||
auto it = translations_.find(msgid);
|
||||
if (it != translations_.end() && ! it->second.empty()) {
|
||||
// Return first translation (before any null byte)
|
||||
size_t null_pos = it->second.find('\0');
|
||||
return (null_pos != std::string::npos) ? it->second.substr(0, null_pos) : it->second;
|
||||
}
|
||||
return msgid; // Return original if no translation found
|
||||
}
|
||||
|
||||
std::string_view MOParser::ngettext(std::string_view msgid, std::string_view msgid_plural, unsigned long n) const {
|
||||
// Create composite key for plural forms (msgid\0msgid_plural)
|
||||
std::string key = std::string(msgid) + '\0' + std::string(msgid_plural);
|
||||
|
||||
auto it = translations_.find(key);
|
||||
if (it != translations_.end() && !it->second.empty()) {
|
||||
// Determine which plural form to use
|
||||
unsigned long plural_index = plural(n);
|
||||
|
||||
// Ensure index is within bounds
|
||||
if (plural_index >= static_cast<unsigned long>(num_plurals_)) plural_index = num_plurals_ - 1;
|
||||
|
||||
// Split translation by null bytes
|
||||
size_t start = 0;
|
||||
size_t pos;
|
||||
|
||||
while ((pos = it->second.find('\0', start)) != std::string::npos) {
|
||||
std::string_view q = it->second.substr(start, pos - start);
|
||||
if (plural_index < 1) return q;
|
||||
start = pos + 1;
|
||||
plural_index--;
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to English-style pluralization
|
||||
return n <= 1 ? msgid : msgid_plural;
|
||||
}
|
||||
82
src/calibre/utils/translator/mo_parser.h
Normal file
82
src/calibre/utils/translator/mo_parser.h
Normal file
@ -0,0 +1,82 @@
|
||||
/*
|
||||
* MoTranslator.h
|
||||
* Copyright (C) 2026 Kovid Goyal <kovid at kovidgoyal.net>
|
||||
*
|
||||
* Distributed under terms of the GPL3 license.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <cstdint>
|
||||
#include <unordered_map>
|
||||
#include "plural_expression_parser.h"
|
||||
|
||||
class MOParser {
|
||||
public:
|
||||
MOParser();
|
||||
~MOParser();
|
||||
|
||||
// Load a . mo file
|
||||
std::string load(const char *data, size_t sz);
|
||||
|
||||
// Get translation for a simple string
|
||||
std::string_view gettext(std::string_view msgid) const;
|
||||
|
||||
// Get translation for plural forms
|
||||
std::string_view ngettext(std::string_view msgid, const std::string_view msgid_plural, unsigned long n) const;
|
||||
|
||||
// Check if file is loaded
|
||||
bool isLoaded() const { return loaded_; }
|
||||
|
||||
// Get the number of strings in the catalog
|
||||
size_t size() const { return translations_.size(); }
|
||||
|
||||
// Get plural expression string (for debugging)
|
||||
std::string getPluralExpression() const { return plural_expr_; }
|
||||
|
||||
// Get number of plural forms
|
||||
int getNumPlurals() const { return num_plurals_; }
|
||||
|
||||
// Get plural message index
|
||||
unsigned long plural(int n) const { return plural_parser_.evaluate(n); }
|
||||
|
||||
private:
|
||||
struct MOHeader {
|
||||
uint32_t magic;
|
||||
uint32_t revision;
|
||||
uint32_t num_strings;
|
||||
uint32_t offset_original;
|
||||
uint32_t offset_translation;
|
||||
uint32_t hash_table_size;
|
||||
uint32_t hash_table_offset;
|
||||
};
|
||||
|
||||
struct StringDescriptor {
|
||||
uint32_t length;
|
||||
uint32_t offset;
|
||||
};
|
||||
|
||||
std::string parseHeader();
|
||||
std::string parseStrings();
|
||||
std::string parseMetadata(std::string_view header);
|
||||
std::string parsePluralForms(std::string_view line);
|
||||
|
||||
uint32_t swap32(uint32_t value) const;
|
||||
bool needsSwap(uint32_t magic) const;
|
||||
|
||||
MOHeader header_;
|
||||
bool swap_bytes_;
|
||||
bool loaded_;
|
||||
const char *data;
|
||||
size_t sz;
|
||||
|
||||
// Map from msgid to translation(s)
|
||||
// For plural forms, translations are separated by null bytes
|
||||
std::unordered_map<std::string_view, std::string_view> translations_;
|
||||
|
||||
// Plural forms support
|
||||
int num_plurals_;
|
||||
std::string plural_expr_;
|
||||
PluralExpressionParser plural_parser_;
|
||||
};
|
||||
508
src/calibre/utils/translator/plural_expression_parser.cpp
Normal file
508
src/calibre/utils/translator/plural_expression_parser.cpp
Normal file
@ -0,0 +1,508 @@
|
||||
#include "plural_expression_parser.h"
|
||||
#include <cctype>
|
||||
|
||||
// AST Node implementations
|
||||
|
||||
class NumberNode : public ASTNode {
|
||||
public:
|
||||
explicit NumberNode(unsigned long value) : value_(value) {}
|
||||
unsigned long evaluate(unsigned long) const override { return value_; }
|
||||
private:
|
||||
unsigned long value_;
|
||||
};
|
||||
|
||||
class VariableNode : public ASTNode {
|
||||
public:
|
||||
unsigned long evaluate(unsigned long n) const override { return n; }
|
||||
};
|
||||
|
||||
class BinaryOpNode : public ASTNode {
|
||||
public:
|
||||
enum class Op { ADD, SUB, MUL, DIV, MOD, EQ, NE, LT, LE, GT, GE, AND, OR };
|
||||
|
||||
BinaryOpNode(Op op, std::unique_ptr<ASTNode> left, std:: unique_ptr<ASTNode> right)
|
||||
: op_(op), left_(std::move(left)), right_(std::move(right)) {}
|
||||
|
||||
unsigned long evaluate(unsigned long n) const override {
|
||||
unsigned long left_val = left_->evaluate(n);
|
||||
unsigned long right_val = right_->evaluate(n);
|
||||
|
||||
switch (op_) {
|
||||
case Op::ADD: return left_val + right_val;
|
||||
case Op::SUB: return left_val - right_val;
|
||||
case Op::MUL: return left_val * right_val;
|
||||
case Op::DIV: return right_val != 0 ? left_val / right_val : 0;
|
||||
case Op::MOD: return right_val != 0 ? left_val % right_val : 0;
|
||||
case Op:: EQ: return left_val == right_val ? 1 : 0;
|
||||
case Op::NE: return left_val != right_val ? 1 : 0;
|
||||
case Op::LT: return left_val < right_val ? 1 : 0;
|
||||
case Op::LE: return left_val <= right_val ? 1 : 0;
|
||||
case Op::GT: return left_val > right_val ? 1 : 0;
|
||||
case Op::GE: return left_val >= right_val ? 1 : 0;
|
||||
case Op:: AND: return (left_val && right_val) ? 1 : 0;
|
||||
case Op::OR: return (left_val || right_val) ? 1 : 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
private:
|
||||
Op op_;
|
||||
std::unique_ptr<ASTNode> left_;
|
||||
std::unique_ptr<ASTNode> right_;
|
||||
};
|
||||
|
||||
class UnaryOpNode : public ASTNode {
|
||||
public:
|
||||
enum class Op { NOT, NEG };
|
||||
|
||||
UnaryOpNode(Op op, std:: unique_ptr<ASTNode> operand)
|
||||
: op_(op), operand_(std::move(operand)) {}
|
||||
|
||||
unsigned long evaluate(unsigned long n) const override {
|
||||
unsigned long val = operand_->evaluate(n);
|
||||
switch (op_) {
|
||||
case Op::NOT: return !val ? 1 : 0;
|
||||
case Op::NEG: return -val;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
private:
|
||||
Op op_;
|
||||
std::unique_ptr<ASTNode> operand_;
|
||||
};
|
||||
|
||||
class TernaryNode : public ASTNode {
|
||||
public:
|
||||
TernaryNode(std::unique_ptr<ASTNode> condition,
|
||||
std::unique_ptr<ASTNode> true_expr,
|
||||
std::unique_ptr<ASTNode> false_expr)
|
||||
: condition_(std::move(condition))
|
||||
, true_expr_(std::move(true_expr))
|
||||
, false_expr_(std:: move(false_expr)) {}
|
||||
|
||||
unsigned long evaluate(unsigned long n) const override {
|
||||
unsigned long cond = condition_->evaluate(n);
|
||||
return cond ? true_expr_->evaluate(n) : false_expr_->evaluate(n);
|
||||
}
|
||||
|
||||
private:
|
||||
std::unique_ptr<ASTNode> condition_;
|
||||
std:: unique_ptr<ASTNode> true_expr_;
|
||||
std:: unique_ptr<ASTNode> false_expr_;
|
||||
};
|
||||
|
||||
// PluralExpressionParser implementation
|
||||
|
||||
PluralExpressionParser:: PluralExpressionParser()
|
||||
: current_(0)
|
||||
, has_error_(false) {
|
||||
}
|
||||
|
||||
PluralExpressionParser::~PluralExpressionParser() {
|
||||
}
|
||||
|
||||
std::vector<Token> PluralExpressionParser::tokenize(const std::string& expr) {
|
||||
std::vector<Token> tokens;
|
||||
size_t i = 0;
|
||||
|
||||
while (i < expr.length()) {
|
||||
char c = expr[i];
|
||||
|
||||
// Skip whitespace
|
||||
if (std::isspace(c)) {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Numbers
|
||||
if (std::isdigit(c)) {
|
||||
unsigned long value = 0;
|
||||
while (i < expr.length() && std::isdigit(expr[i])) {
|
||||
value = value * 10 + (expr[i] - '0');
|
||||
i++;
|
||||
}
|
||||
tokens.emplace_back(TokenType::NUMBER, value);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Variable 'n'
|
||||
if (c == 'n') {
|
||||
tokens.emplace_back(TokenType::VARIABLE);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Two-character operators
|
||||
if (i + 1 < expr.length()) {
|
||||
std::string two_char = expr.substr(i, 2);
|
||||
if (two_char == "==") {
|
||||
tokens.emplace_back(TokenType::EQUAL);
|
||||
i += 2;
|
||||
continue;
|
||||
} else if (two_char == "!=") {
|
||||
tokens.emplace_back(TokenType::NOT_EQUAL);
|
||||
i += 2;
|
||||
continue;
|
||||
} else if (two_char == "<=") {
|
||||
tokens.emplace_back(TokenType::LESS_EQUAL);
|
||||
i += 2;
|
||||
continue;
|
||||
} else if (two_char == ">=") {
|
||||
tokens.emplace_back(TokenType::GREATER_EQUAL);
|
||||
i += 2;
|
||||
continue;
|
||||
} else if (two_char == "&&") {
|
||||
tokens.emplace_back(TokenType::AND);
|
||||
i += 2;
|
||||
continue;
|
||||
} else if (two_char == "||") {
|
||||
tokens.emplace_back(TokenType::OR);
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Single-character operators
|
||||
switch (c) {
|
||||
case '+': tokens.emplace_back(TokenType::PLUS); break;
|
||||
case '-': tokens.emplace_back(TokenType:: MINUS); break;
|
||||
case '*': tokens.emplace_back(TokenType::MULTIPLY); break;
|
||||
case '/': tokens.emplace_back(TokenType:: DIVIDE); break;
|
||||
case '%': tokens.emplace_back(TokenType::MODULO); break;
|
||||
case '<': tokens.emplace_back(TokenType:: LESS); break;
|
||||
case '>': tokens.emplace_back(TokenType::GREATER); break;
|
||||
case '!': tokens.emplace_back(TokenType::NOT); break;
|
||||
case '?': tokens.emplace_back(TokenType::QUESTION); break;
|
||||
case ':': tokens.emplace_back(TokenType:: COLON); break;
|
||||
case '(': tokens.emplace_back(TokenType::LPAREN); break;
|
||||
case ')': tokens.emplace_back(TokenType::RPAREN); break;
|
||||
default:
|
||||
// Unknown character, skip it
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
tokens.emplace_back(TokenType::END);
|
||||
return tokens;
|
||||
}
|
||||
|
||||
bool PluralExpressionParser::parse(const std::string& expression) {
|
||||
tokens_ = tokenize(expression);
|
||||
current_ = 0;
|
||||
has_error_ = false;
|
||||
error_message_. clear();
|
||||
root_ = nullptr;
|
||||
|
||||
root_ = parseExpression();
|
||||
return root_ != nullptr && !has_error_;
|
||||
}
|
||||
|
||||
unsigned long PluralExpressionParser::evaluate(unsigned long n) const {
|
||||
if (! root_) {
|
||||
return 0;
|
||||
}
|
||||
return root_->evaluate(n);
|
||||
}
|
||||
|
||||
Token PluralExpressionParser:: peek() const {
|
||||
if (current_ < tokens_.size()) {
|
||||
return tokens_[current_];
|
||||
}
|
||||
return Token(TokenType::END);
|
||||
}
|
||||
|
||||
Token PluralExpressionParser::consume() {
|
||||
if (current_ < tokens_.size()) {
|
||||
return tokens_[current_++];
|
||||
}
|
||||
return Token(TokenType::END);
|
||||
}
|
||||
|
||||
bool PluralExpressionParser:: match(TokenType type) {
|
||||
if (check(type)) {
|
||||
consume();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PluralExpressionParser::check(TokenType type) const {
|
||||
return peek().type == type;
|
||||
}
|
||||
|
||||
void PluralExpressionParser::setError(const std:: string& message) {
|
||||
has_error_ = true;
|
||||
error_message_ = message;
|
||||
}
|
||||
|
||||
std::unique_ptr<ASTNode> PluralExpressionParser::parseExpression() {
|
||||
return parseTernary();
|
||||
}
|
||||
|
||||
std::unique_ptr<ASTNode> PluralExpressionParser::parseTernary() {
|
||||
auto expr = parseLogicalOr();
|
||||
if (!expr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (match(TokenType::QUESTION)) {
|
||||
auto true_expr = parseExpression();
|
||||
if (!true_expr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (!match(TokenType:: COLON)) {
|
||||
setError("Expected ':' in ternary expression");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto false_expr = parseTernary();
|
||||
if (!false_expr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return std::make_unique<TernaryNode>(std::move(expr),
|
||||
std::move(true_expr),
|
||||
std::move(false_expr));
|
||||
}
|
||||
|
||||
return expr;
|
||||
}
|
||||
|
||||
std::unique_ptr<ASTNode> PluralExpressionParser::parseLogicalOr() {
|
||||
auto left = parseLogicalAnd();
|
||||
if (!left) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
while (match(TokenType::OR)) {
|
||||
auto right = parseLogicalAnd();
|
||||
if (!right) {
|
||||
return nullptr;
|
||||
}
|
||||
left = std::make_unique<BinaryOpNode>(BinaryOpNode::Op::OR,
|
||||
std::move(left),
|
||||
std::move(right));
|
||||
}
|
||||
|
||||
return left;
|
||||
}
|
||||
|
||||
std::unique_ptr<ASTNode> PluralExpressionParser::parseLogicalAnd() {
|
||||
auto left = parseEquality();
|
||||
if (!left) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
while (match(TokenType::AND)) {
|
||||
auto right = parseEquality();
|
||||
if (!right) {
|
||||
return nullptr;
|
||||
}
|
||||
left = std::make_unique<BinaryOpNode>(BinaryOpNode:: Op::AND,
|
||||
std::move(left),
|
||||
std::move(right));
|
||||
}
|
||||
|
||||
return left;
|
||||
}
|
||||
|
||||
std::unique_ptr<ASTNode> PluralExpressionParser::parseEquality() {
|
||||
auto left = parseRelational();
|
||||
if (!left) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
while (true) {
|
||||
if (match(TokenType::EQUAL)) {
|
||||
auto right = parseRelational();
|
||||
if (!right) {
|
||||
return nullptr;
|
||||
}
|
||||
left = std::make_unique<BinaryOpNode>(BinaryOpNode::Op::EQ,
|
||||
std::move(left),
|
||||
std::move(right));
|
||||
} else if (match(TokenType::NOT_EQUAL)) {
|
||||
auto right = parseRelational();
|
||||
if (!right) {
|
||||
return nullptr;
|
||||
}
|
||||
left = std::make_unique<BinaryOpNode>(BinaryOpNode::Op::NE,
|
||||
std::move(left),
|
||||
std::move(right));
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return left;
|
||||
}
|
||||
|
||||
std::unique_ptr<ASTNode> PluralExpressionParser::parseRelational() {
|
||||
auto left = parseAdditive();
|
||||
if (!left) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
while (true) {
|
||||
if (match(TokenType::LESS)) {
|
||||
auto right = parseAdditive();
|
||||
if (!right) {
|
||||
return nullptr;
|
||||
}
|
||||
left = std::make_unique<BinaryOpNode>(BinaryOpNode::Op::LT,
|
||||
std::move(left),
|
||||
std::move(right));
|
||||
} else if (match(TokenType::LESS_EQUAL)) {
|
||||
auto right = parseAdditive();
|
||||
if (!right) {
|
||||
return nullptr;
|
||||
}
|
||||
left = std::make_unique<BinaryOpNode>(BinaryOpNode::Op::LE,
|
||||
std:: move(left),
|
||||
std::move(right));
|
||||
} else if (match(TokenType::GREATER)) {
|
||||
auto right = parseAdditive();
|
||||
if (!right) {
|
||||
return nullptr;
|
||||
}
|
||||
left = std::make_unique<BinaryOpNode>(BinaryOpNode::Op::GT,
|
||||
std::move(left),
|
||||
std:: move(right));
|
||||
} else if (match(TokenType:: GREATER_EQUAL)) {
|
||||
auto right = parseAdditive();
|
||||
if (!right) {
|
||||
return nullptr;
|
||||
}
|
||||
left = std::make_unique<BinaryOpNode>(BinaryOpNode::Op::GE,
|
||||
std::move(left),
|
||||
std::move(right));
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return left;
|
||||
}
|
||||
|
||||
std:: unique_ptr<ASTNode> PluralExpressionParser:: parseAdditive() {
|
||||
auto left = parseMultiplicative();
|
||||
if (!left) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
while (true) {
|
||||
if (match(TokenType::PLUS)) {
|
||||
auto right = parseMultiplicative();
|
||||
if (!right) {
|
||||
return nullptr;
|
||||
}
|
||||
left = std::make_unique<BinaryOpNode>(BinaryOpNode::Op::ADD,
|
||||
std::move(left),
|
||||
std::move(right));
|
||||
} else if (match(TokenType:: MINUS)) {
|
||||
auto right = parseMultiplicative();
|
||||
if (!right) {
|
||||
return nullptr;
|
||||
}
|
||||
left = std::make_unique<BinaryOpNode>(BinaryOpNode::Op::SUB,
|
||||
std::move(left),
|
||||
std::move(right));
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return left;
|
||||
}
|
||||
|
||||
std:: unique_ptr<ASTNode> PluralExpressionParser:: parseMultiplicative() {
|
||||
auto left = parseUnary();
|
||||
if (!left) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
while (true) {
|
||||
if (match(TokenType::MULTIPLY)) {
|
||||
auto right = parseUnary();
|
||||
if (!right) {
|
||||
return nullptr;
|
||||
}
|
||||
left = std::make_unique<BinaryOpNode>(BinaryOpNode::Op::MUL,
|
||||
std::move(left),
|
||||
std::move(right));
|
||||
} else if (match(TokenType::DIVIDE)) {
|
||||
auto right = parseUnary();
|
||||
if (!right) {
|
||||
return nullptr;
|
||||
}
|
||||
left = std::make_unique<BinaryOpNode>(BinaryOpNode::Op::DIV,
|
||||
std:: move(left),
|
||||
std::move(right));
|
||||
} else if (match(TokenType::MODULO)) {
|
||||
auto right = parseUnary();
|
||||
if (!right) {
|
||||
return nullptr;
|
||||
}
|
||||
left = std::make_unique<BinaryOpNode>(BinaryOpNode::Op::MOD,
|
||||
std::move(left),
|
||||
std::move(right));
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return left;
|
||||
}
|
||||
|
||||
std::unique_ptr<ASTNode> PluralExpressionParser::parseUnary() {
|
||||
if (match(TokenType::NOT)) {
|
||||
auto operand = parseUnary();
|
||||
if (!operand) {
|
||||
return nullptr;
|
||||
}
|
||||
return std::make_unique<UnaryOpNode>(UnaryOpNode::Op::NOT, std::move(operand));
|
||||
}
|
||||
|
||||
if (match(TokenType:: MINUS)) {
|
||||
auto operand = parseUnary();
|
||||
if (!operand) {
|
||||
return nullptr;
|
||||
}
|
||||
return std::make_unique<UnaryOpNode>(UnaryOpNode:: Op::NEG, std::move(operand));
|
||||
}
|
||||
|
||||
return parsePrimary();
|
||||
}
|
||||
|
||||
std::unique_ptr<ASTNode> PluralExpressionParser::parsePrimary() {
|
||||
// Number
|
||||
if (check(TokenType::NUMBER)) {
|
||||
Token tok = consume();
|
||||
return std::make_unique<NumberNode>(tok.value);
|
||||
}
|
||||
|
||||
// Variable 'n'
|
||||
if (match(TokenType::VARIABLE)) {
|
||||
return std::make_unique<VariableNode>();
|
||||
}
|
||||
|
||||
// Parenthesized expression
|
||||
if (match(TokenType::LPAREN)) {
|
||||
auto expr = parseExpression();
|
||||
if (!expr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (!match(TokenType:: RPAREN)) {
|
||||
setError("Expected ')' after expression");
|
||||
return nullptr;
|
||||
}
|
||||
return expr;
|
||||
}
|
||||
|
||||
setError("Unexpected token in expression");
|
||||
return nullptr;
|
||||
}
|
||||
91
src/calibre/utils/translator/plural_expression_parser.h
Normal file
91
src/calibre/utils/translator/plural_expression_parser.h
Normal file
@ -0,0 +1,91 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
// Token types for the plural expression language
|
||||
enum class TokenType {
|
||||
NUMBER,
|
||||
VARIABLE, // 'n'
|
||||
PLUS, // +
|
||||
MINUS, // -
|
||||
MULTIPLY, // *
|
||||
DIVIDE, // /
|
||||
MODULO, // %
|
||||
EQUAL, // ==
|
||||
NOT_EQUAL, // !=
|
||||
LESS, // <
|
||||
LESS_EQUAL, // <=
|
||||
GREATER, // >
|
||||
GREATER_EQUAL, // >=
|
||||
AND, // &&
|
||||
OR, // ||
|
||||
NOT, // !
|
||||
QUESTION, // ?
|
||||
COLON, // :
|
||||
LPAREN, // (
|
||||
RPAREN, // )
|
||||
END
|
||||
};
|
||||
|
||||
struct Token {
|
||||
TokenType type;
|
||||
unsigned long value; // For NUMBER tokens
|
||||
|
||||
Token(TokenType t, unsigned long v = 0) : type(t), value(v) {}
|
||||
};
|
||||
|
||||
// Abstract syntax tree node
|
||||
class ASTNode {
|
||||
public:
|
||||
virtual ~ASTNode() = default;
|
||||
virtual unsigned long evaluate(unsigned long n) const = 0;
|
||||
};
|
||||
|
||||
class PluralExpressionParser {
|
||||
public:
|
||||
PluralExpressionParser();
|
||||
~PluralExpressionParser();
|
||||
|
||||
// Parse a plural expression string
|
||||
bool parse(const std::string& expression);
|
||||
|
||||
// Evaluate the parsed expression for a given n
|
||||
unsigned long evaluate(unsigned long n) const;
|
||||
|
||||
// Check if expression is valid
|
||||
bool isValid() const { return root_ != nullptr && !has_error_; }
|
||||
|
||||
// Get error message if parsing failed
|
||||
const std::string& getError() const { return error_message_; }
|
||||
|
||||
private:
|
||||
// Tokenizer
|
||||
std::vector<Token> tokenize(const std::string& expr);
|
||||
|
||||
// Recursive descent parser (returns nullptr on error)
|
||||
std::unique_ptr<ASTNode> parseExpression();
|
||||
std::unique_ptr<ASTNode> parseTernary();
|
||||
std::unique_ptr<ASTNode> parseLogicalOr();
|
||||
std::unique_ptr<ASTNode> parseLogicalAnd();
|
||||
std::unique_ptr<ASTNode> parseEquality();
|
||||
std::unique_ptr<ASTNode> parseRelational();
|
||||
std::unique_ptr<ASTNode> parseAdditive();
|
||||
std::unique_ptr<ASTNode> parseMultiplicative();
|
||||
std::unique_ptr<ASTNode> parseUnary();
|
||||
std::unique_ptr<ASTNode> parsePrimary();
|
||||
|
||||
// Helper methods
|
||||
Token peek() const;
|
||||
Token consume();
|
||||
bool match(TokenType type);
|
||||
bool check(TokenType type) const;
|
||||
void setError(const std::string& message);
|
||||
|
||||
std::vector<Token> tokens_;
|
||||
size_t current_;
|
||||
std::unique_ptr<ASTNode> root_;
|
||||
bool has_error_;
|
||||
std:: string error_message_;
|
||||
};
|
||||
32
src/calibre/utils/translator/test_translator.py
Normal file
32
src/calibre/utils/translator/test_translator.py
Normal file
@ -0,0 +1,32 @@
|
||||
#!/usr/bin/env python
|
||||
# License: GPLv3 Copyright: 2026, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
|
||||
import gettext
|
||||
import io
|
||||
import unittest
|
||||
import zipfile
|
||||
|
||||
from calibre.utils.localization import available_translations, get_lc_messages_path
|
||||
from calibre_extensions.translator import Translator
|
||||
|
||||
|
||||
class TestTranslator(unittest.TestCase):
|
||||
|
||||
def test_translator(self):
|
||||
with zipfile.ZipFile(P('localization/locales.zip', allow_user_override=False), 'r') as zf:
|
||||
for lang in available_translations():
|
||||
mpath = get_lc_messages_path(lang)
|
||||
if mpath is not None:
|
||||
data = zf.read(mpath + '/messages.mo')
|
||||
test_translator(self, lang, data)
|
||||
|
||||
|
||||
def test_translator(self: TestTranslator, lang: str, data: bytes) -> None:
|
||||
n = Translator(data)
|
||||
o = gettext.GNUTranslations(io.BytesIO(data))
|
||||
for i in range(1, 100):
|
||||
self.assertEqual(o.plural(i), n.plural(i), f'plural() not equal for language: {lang}')
|
||||
|
||||
|
||||
def find_tests():
|
||||
return unittest.defaultTestLoader.loadTestsFromTestCase(TestTranslator)
|
||||
Loading…
x
Reference in New Issue
Block a user