From e6a1a191f600609b7f02bd9575b014afa0695ce7 Mon Sep 17 00:00:00 2001 From: Charles Haley Date: Wed, 5 Feb 2025 14:57:59 +0000 Subject: [PATCH] Add the east Asian language transliteration tweak we discussed. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I tested it with the Gutenberg book "苦悶の欄" by Earl Derr Biggers (https://www.gutenberg.org/ebooks/39287). With the tweak empty the title was transliterated to "Ku Men noLan". With the tweak set to 'ja' it was transliterated to "Kumon no Ran". Is this transliteration correct? I don't know. --- resources/default_tweaks.py | 10 ++++++++++ src/calibre/utils/localization.py | 5 ++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py index 8c5a50e6d2..1c123f214b 100644 --- a/resources/default_tweaks.py +++ b/resources/default_tweaks.py @@ -580,3 +580,13 @@ openers_by_scheme = {} # It must be one of the values Default, Sunday, Monday, Tuesday, Wednesday, # Thursday, Friday, or Saturday, all in English, spelled exactly as shown. calendar_start_day_of_week = 'Default' + +#: East Asian language to use for transliteration +# Setting this tweak will make calibre use the specified language as the "base" +# language when transliterating East Asian languages to ASCII. This might be +# useful if you run calibre in English but want text transliterated to +# Japanese. The valid values are 'ja', 'kr', 'vn', 'zh', and '' (empty string). +# The empty string means use the user interface language as the base language. +# Any value not in the above list will be treated as the empty string. +# Example: east_asian_base_language = 'ja' +east_asian_base_language = '' diff --git a/src/calibre/utils/localization.py b/src/calibre/utils/localization.py index 1f5a195305..5878060a9b 100644 --- a/src/calibre/utils/localization.py +++ b/src/calibre/utils/localization.py @@ -548,7 +548,10 @@ def get_udc(): global _udc if _udc is None: from calibre.ebooks.unihandecode import Unihandecoder - _udc = Unihandecoder(lang=get_lang()) + from calibre.utils.config_base import tweaks + lang = tweaks.get('east_asian_base_language') + lang = lang if lang in ('ja', 'kr', 'vn', 'zh') else get_lang() + _udc = Unihandecoder(lang=lang) return _udc