Merge branch 'online-dictionaries' of https://github.com/un-pogaz/calibre

This commit is contained in:
Kovid Goyal 2023-10-01 19:59:57 +05:30
commit a44bb42eda
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 217 additions and 45 deletions

View File

@ -0,0 +1,69 @@
{
"af-ZA": "af_ZA",
"an-ES": "an_ES",
"ar": "ar",
"be-BY": "be_BY",
"bg-BG": "bg_BG",
"bn-BD": "bn_BD",
"bo": "bo",
"br-FR": "br_FR",
"bs-BA": "bs_BA",
"ca": "ca",
"ckb": "ckb",
"cs-CZ": "cs_CZ",
"da-DK": "da_DK",
"de-AT": "de",
"de-CH": "de",
"de-DE": "de",
"el-GR": "el_GR",
"en-AU": "en",
"en-CA": "en",
"en-GB": "en",
"en-US": "en",
"en-ZA": "en",
"eo": "eo",
"es-ES": "es",
"et-EE": "et_EE",
"fa-IR": "fa_IR",
"fr-FR": "fr_FR",
"gd-GB": "gd_GB",
"gl-ES": "gl",
"gug-PY": "gug",
"gu-IN": "gu_IN",
"he-IL": "he_IL",
"hi-IN": "hi_IN",
"hr-HR": "hr_HR",
"hu-HU": "hu_HU",
"id-ID": "id",
"is-IS": "is",
"it-IT": "it_IT",
"kmr": "kmr_Latn",
"ko-KR": "ko_KR",
"lo-LA": "lo_LA",
"lt-LT": "lt_LT",
"lv-LV": "lv_LV",
"mn-MN": "mn_MN",
"ne-NP": "ne_NP",
"nl-NL": "nl_NL",
"nb-NO": "no",
"nn-NO": "no",
"oc-FR": "oc_FR",
"pl-PL": "pl_PL",
"pt-BR": "pt_BR",
"pt-PT": "pt_PT",
"ro-RO": "ro",
"ru-RU": "ru_RU",
"si-LK": "si_LK",
"sk-SK": "sk_SK",
"sl-SI": "sl_SI",
"sq-AL": "sq_AL",
"sr-RS": "sr",
"sv-FI": "sv_SE",
"sv-SE": "sv_SE",
"sw-TZ": "sw_TZ",
"te-IN": "te_IN",
"th-TH": "th_TH",
"tr-TR": "tr_TR",
"uk-UA": "uk_UA",
"vi-VN": "vi"
}

View File

@ -12,10 +12,10 @@ from itertools import chain
from qt.core import ( from qt.core import (
QT_VERSION_STR, QAbstractItemView, QAbstractTableModel, QAction, QApplication, QT_VERSION_STR, QAbstractItemView, QAbstractTableModel, QAction, QApplication,
QCheckBox, QComboBox, QDialog, QDialogButtonBox, QFont, QFormLayout, QGridLayout, QCheckBox, QComboBox, QDialog, QDialogButtonBox, QFont, QFormLayout, QGridLayout,
QHBoxLayout, QIcon, QInputDialog, QKeySequence, QLabel, QLineEdit, QListWidget, QHBoxLayout, QIcon, QInputDialog, QKeySequence, QLabel, QLineEdit,
QListWidgetItem, QMenu, QModelIndex, QPlainTextEdit, QPushButton, QSize, QListWidget,QListWidgetItem, QMenu, QModelIndex, QPlainTextEdit, QPushButton,
QStackedLayout, Qt, QTableView, QTimer, QToolButton, QTreeWidget, QTreeWidgetItem, QSize, QStackedLayout, Qt, QTableView, QTimer, QToolButton, QTreeWidget,
QVBoxLayout, QWidget, pyqtSignal, QTabWidget, QTreeWidgetItem, QVBoxLayout, QWidget, pyqtSignal,
) )
from threading import Thread from threading import Thread
@ -37,10 +37,10 @@ from calibre.gui2.widgets2 import FlowLayout
from calibre.spell import DictionaryLocale from calibre.spell import DictionaryLocale
from calibre.spell.break_iterator import split_into_words from calibre.spell.break_iterator import split_into_words
from calibre.spell.dictionary import ( from calibre.spell.dictionary import (
best_locale_for_language, builtin_dictionaries, custom_dictionaries, dprefs, best_locale_for_language, builtin_dictionaries, catalog_online_dictionaries,
get_dictionary, remove_dictionary, rename_dictionary, custom_dictionaries, dprefs, get_dictionary, remove_dictionary, rename_dictionary,
) )
from calibre.spell.import_from import import_from_oxt from calibre.spell.import_from import import_from_oxt, import_from_online
from calibre.startup import connect_lambda from calibre.startup import connect_lambda
from calibre.utils.icu import contains, primary_contains, primary_sort_key, sort_key from calibre.utils.icu import contains, primary_contains, primary_sort_key, sort_key
from calibre.utils.localization import ( from calibre.utils.localization import (
@ -64,17 +64,74 @@ def country_map():
_country_map = msgpack_loads(P('localization/iso3166.calibre_msgpack', data=True, allow_user_override=False)) _country_map = msgpack_loads(P('localization/iso3166.calibre_msgpack', data=True, allow_user_override=False))
return _country_map return _country_map
def current_languages_dictionaries(reread=False):
all_dictionaries = builtin_dictionaries() | custom_dictionaries(reread=reread)
languages = defaultdict(lambda : defaultdict(set))
for d in all_dictionaries:
for locale in d.locales | {d.primary_locale}:
languages[locale.langcode][locale.countrycode].add(d)
return languages
class AddDictionary(QDialog): # {{{ class AddDictionary(QDialog): # {{{
def __init__(self, parent=None): def __init__(self, parent=None):
QDialog.__init__(self, parent) QDialog.__init__(self, parent)
self.setWindowTitle(_('Add a dictionary')) self.setWindowTitle(_('Add a dictionary'))
self.l = l = QFormLayout(self) l = QVBoxLayout(self)
l.setFieldGrowthPolicy(QFormLayout.FieldGrowthPolicy.AllNonFixedFieldsGrow)
self.setLayout(l) self.setLayout(l)
self.la = la = QLabel('<p>' + _( self.tabs = tabs = QTabWidget(self)
l.addWidget(self.tabs)
self.bb = bb = QDialogButtonBox(QDialogButtonBox.StandardButton.Ok|QDialogButtonBox.StandardButton.Cancel)
bb.accepted.connect(self.accept)
bb.rejected.connect(self.reject)
l.addWidget(bb)
self.web_download = QWidget(self)
self.oxt_import = QWidget(self)
tabs.addTab(self.web_download, _('Download online'))
tabs.addTab(self.oxt_import, _('Import from OXT file'))
tabs.currentChanged.connect(self.tab_changed)
# Download online tab
l = QFormLayout(self.web_download)
l.setFieldGrowthPolicy(QFormLayout.FieldGrowthPolicy.AllNonFixedFieldsGrow)
self.web_download.setLayout(l)
la = QLabel('<p>' + _(
'''{0} supports the use of LibreOffice dictionaries for spell checking. You can
download some of them from <a href="{1}">the LibreOffice dictionaries repository</a>.'''
).format(__appname__, 'https://github.com/LibreOffice/dictionaries')+'<p>') # noqa
la.setWordWrap(True)
la.setOpenExternalLinks(True)
la.setMinimumWidth(450)
l.addRow(la)
self.combobox_online = c = QComboBox(self)
l.addRow(_('Langue to download:'), c)
c.addItem('', None)
languages = current_languages_dictionaries(reread=False)
def k(dictionary):
return sort_key(calibre_langcode_to_name(dictionary['primary_locale'].langcode))
for data in sorted(catalog_online_dictionaries(), key=lambda x:k(x)):
if languages.get(data['primary_locale'].langcode, {}).get(data['primary_locale'].countrycode, None):
continue
local = calibre_langcode_to_name(data['primary_locale'].langcode)
country = country_map()['names'].get(data['primary_locale'].countrycode, None)
text = f'{local} ({country})' if country else local
data['text'] = text
c.addItem(text, data)
# Oxt import tab
l = QFormLayout(self.oxt_import)
l.setFieldGrowthPolicy(QFormLayout.FieldGrowthPolicy.AllNonFixedFieldsGrow)
self.oxt_import.setLayout(l)
la = QLabel('<p>' + _(
'''{0} supports the use of LibreOffice dictionaries for spell checking. You can '''{0} supports the use of LibreOffice dictionaries for spell checking. You can
download more dictionaries from <a href="{1}">the LibreOffice extensions repository</a>. download more dictionaries from <a href="{1}">the LibreOffice extensions repository</a>.
The dictionary will download as an .oxt file. Simply specify the path to the The dictionary will download as an .oxt file. Simply specify the path to the
@ -85,12 +142,12 @@ class AddDictionary(QDialog): # {{{
la.setMinimumWidth(450) la.setMinimumWidth(450)
l.addRow(la) l.addRow(la)
self.h = h = QHBoxLayout() h = QHBoxLayout()
self.path = p = QLineEdit(self) self.path = p = QLineEdit(self)
p.setPlaceholderText(_('Path to OXT file')) p.setPlaceholderText(_('Path to OXT file'))
h.addWidget(p) h.addWidget(p)
self.b = b = QToolButton(self) self.button_open_oxt = b = QToolButton(self)
b.setIcon(QIcon.ic('document_open.png')) b.setIcon(QIcon.ic('document_open.png'))
b.setToolTip(_('Browse for an OXT file')) b.setToolTip(_('Browse for an OXT file'))
b.clicked.connect(self.choose_file) b.clicked.connect(self.choose_file)
@ -102,11 +159,11 @@ class AddDictionary(QDialog): # {{{
n.setPlaceholderText(_('Choose a nickname for this dictionary')) n.setPlaceholderText(_('Choose a nickname for this dictionary'))
l.addRow(_('&Nickname:'), n) l.addRow(_('&Nickname:'), n)
self.bb = bb = QDialogButtonBox(QDialogButtonBox.StandardButton.Ok|QDialogButtonBox.StandardButton.Cancel) def tab_changed(self, idx):
bb.accepted.connect(self.accept) if idx == 0:
bb.rejected.connect(self.reject) self.combobox_online.setFocus(Qt.FocusReason.OtherFocusReason)
l.addRow(bb) elif idx == 1:
b.setFocus(Qt.FocusReason.OtherFocusReason) self.button_open_oxt.setFocus(Qt.FocusReason.OtherFocusReason)
def choose_file(self): def choose_file(self):
path = choose_files(self, 'choose-dict-for-import', _('Choose OXT Dictionary'), filters=[ path = choose_files(self, 'choose-dict-for-import', _('Choose OXT Dictionary'), filters=[
@ -121,7 +178,7 @@ class AddDictionary(QDialog): # {{{
def nickname(self): def nickname(self):
return str(self.nick.text()).strip() return str(self.nick.text()).strip()
def accept(self): def _process_oxt_import(self):
nick = self.nickname nick = self.nickname
if not nick: if not nick:
return error_dialog(self, _('Must specify nickname'), _( return error_dialog(self, _('Must specify nickname'), _(
@ -140,6 +197,31 @@ class AddDictionary(QDialog): # {{{
if num == 0: if num == 0:
return error_dialog(self, _('No dictionaries'), _( return error_dialog(self, _('No dictionaries'), _(
'No dictionaries were found in %s') % oxt, show=True) 'No dictionaries were found in %s') % oxt, show=True)
def _process_online_download(self):
data = self.combobox_online.currentData()
nick = 'online-'+data['name']
directory = data['directory']
if nick in {d.name for d in custom_dictionaries()}:
return error_dialog(self, _('Nickname already used'), _(
'A dictionary with the nick name "%s" already exists.') % nick, show=True)
try:
num = import_from_online(directory, nick)
except:
import traceback
return error_dialog(self, _('Failed to download dictionaries'), _(
'Failed to download dictionaries for "{:s}". Click "Show details" for more information').format(data['text']),
det_msg=traceback.format_exc(), show=True)
if num == 0:
return error_dialog(self, _('No dictionaries'), _(
'No dictionary was found for "{:s}"').format(data['text']), show=True)
def accept(self):
idx = self.tabs.currentIndex()
if idx== 0:
self._process_online_download()
elif idx == 1:
self._process_oxt_import()
QDialog.accept(self) QDialog.accept(self)
# }}} # }}}
@ -481,11 +563,7 @@ class ManageDictionaries(Dialog): # {{{
rename_dictionary(d, str(item.text(0))) rename_dictionary(d, str(item.text(0)))
def build_dictionaries(self, reread=False): def build_dictionaries(self, reread=False):
all_dictionaries = builtin_dictionaries() | custom_dictionaries(reread=reread) languages = current_languages_dictionaries(reread=reread)
languages = defaultdict(lambda : defaultdict(set))
for d in all_dictionaries:
for locale in d.locales | {d.primary_locale}:
languages[locale.langcode][locale.countrycode].add(d)
bf = QFont(self.dictionaries.font()) bf = QFont(self.dictionaries.font())
bf.setBold(True) bf.setBold(True)
itf = QFont(self.dictionaries.font()) itf = QFont(self.dictionaries.font())
@ -1527,5 +1605,5 @@ def find_next_error(current_editor, current_editor_name, gui_parent, show_editor
if __name__ == '__main__': if __name__ == '__main__':
app = QApplication([]) app = QApplication([])
dictionaries.initialize() dictionaries.initialize()
ManageUserDictionaries.test() ManageDictionaries.test()
del app del app

View File

@ -7,6 +7,7 @@ import os
import re import re
import shutil import shutil
import sys import sys
import json
from collections import defaultdict, namedtuple from collections import defaultdict, namedtuple
from functools import partial from functools import partial
from itertools import chain from itertools import chain
@ -62,6 +63,18 @@ def builtin_dictionaries():
return _builtins return _builtins
def catalog_online_dictionaries():
loaded = json.loads(P('dictionaries/online-catalog.json', allow_user_override=False, data=True))
try:
loaded.update(json.loads(P('dictionaries/online-catalog.json', data=True)))
except:
pass
rslt = []
for lang, directory in loaded.items():
rslt.append({'primary_locale':parse_lang_code(lang), 'name':lang,'directory':directory})
return rslt
def custom_dictionaries(reread=False): def custom_dictionaries(reread=False):
global _custom global _custom
if _custom is None or reread: if _custom is None or reread:

View File

@ -8,6 +8,7 @@ import sys, glob, os, tempfile, re, codecs
from lxml import etree from lxml import etree
from calibre import browser
from calibre.constants import config_dir from calibre.constants import config_dir
from calibre.utils.xml_parse import safe_xml_fromstring from calibre.utils.xml_parse import safe_xml_fromstring
from calibre.utils.zipfile import ZipFile from calibre.utils.zipfile import ZipFile
@ -26,6 +27,7 @@ def XPath(x):
BUILTIN_LOCALES = {'en-US', 'en-GB', 'es-ES'} BUILTIN_LOCALES = {'en-US', 'en-GB', 'es-ES'}
ONLINE_DICTIONARY_BASE_URL = 'https://raw.githubusercontent.com/LibreOffice/dictionaries/master/'
def parse_xcu(raw, origin='%origin%'): def parse_xcu(raw, origin='%origin%'):
@ -108,14 +110,35 @@ def uniq(vals, kmap=lambda x:x):
return tuple(x for x, k in zip(vals, lvals) if k not in seen and not seen_add(k)) return tuple(x for x, k in zip(vals, lvals) if k not in seen and not seen_add(k))
def import_from_oxt(source_path, name, dest_dir=None, prefix='dic-'): def _import_from_virtual_directory(read_file_func, name, dest_dir=None, prefix='dic-'):
from calibre.spell.dictionary import parse_lang_code from calibre.spell.dictionary import parse_lang_code
dest_dir = dest_dir or os.path.join(config_dir, 'dictionaries') dest_dir = dest_dir or os.path.join(config_dir, 'dictionaries')
if not os.path.exists(dest_dir): if not os.path.exists(dest_dir):
os.makedirs(dest_dir) os.makedirs(dest_dir)
num = 0 num = 0
with ZipFile(source_path) as zf:
root = safe_xml_fromstring(read_file_func('META-INF/manifest.xml'))
xcu = XPath('//manifest:file-entry[@manifest:media-type="application/vnd.sun.star.configuration-data"]')(root)[0].get(
'{%s}full-path' % NS_MAP['manifest'])
for (dic, aff), locales in iteritems(parse_xcu(read_file_func(xcu), origin='')):
dic, aff = dic.lstrip('/'), aff.lstrip('/')
d = tempfile.mkdtemp(prefix=prefix, dir=dest_dir)
locales = uniq([x for x in map(fill_country_code, locales) if parse_lang_code(x).countrycode])
if not locales:
continue
metadata = [name] + list(locales)
with open(os.path.join(d, 'locales'), 'wb') as f:
f.write(('\n'.join(metadata)).encode('utf-8'))
dd, ad = convert_to_utf8(read_file_func(dic), read_file_func(aff))
with open(os.path.join(d, '%s.dic' % locales[0]), 'wb') as f:
f.write(dd)
with open(os.path.join(d, '%s.aff' % locales[0]), 'wb') as f:
f.write(ad)
num += 1
return num
def import_from_oxt(source_path, name, dest_dir=None, prefix='dic-'):
with ZipFile(source_path) as zf:
def read_file(key): def read_file(key):
try: try:
return zf.open(key).read() return zf.open(key).read()
@ -128,26 +151,15 @@ def import_from_oxt(source_path, name, dest_dir=None, prefix='dic-'):
key = key[3:] key = key[3:]
return zf.open(key.lstrip('/')).read() return zf.open(key.lstrip('/')).read()
root = safe_xml_fromstring(zf.open('META-INF/manifest.xml').read()) return _import_from_virtual_directory(read_file, name, dest_dir=dest_dir, prefix=prefix)
xcu = XPath('//manifest:file-entry[@manifest:media-type="application/vnd.sun.star.configuration-data"]')(root)[0].get(
'{%s}full-path' % NS_MAP['manifest'])
for (dic, aff), locales in iteritems(parse_xcu(zf.open(xcu).read(), origin='')):
dic, aff = dic.lstrip('/'), aff.lstrip('/')
d = tempfile.mkdtemp(prefix=prefix, dir=dest_dir)
locales = uniq([x for x in map(fill_country_code, locales) if parse_lang_code(x).countrycode])
if not locales:
continue
metadata = [name] + list(locales)
with open(os.path.join(d, 'locales'), 'wb') as f:
f.write(('\n'.join(metadata)).encode('utf-8'))
dd, ad = convert_to_utf8(read_file(dic), read_file(aff))
with open(os.path.join(d, '%s.dic' % locales[0]), 'wb') as f:
f.write(dd)
with open(os.path.join(d, '%s.aff' % locales[0]), 'wb') as f:
f.write(ad)
num += 1
return num
def import_from_online(directory, name, dest_dir=None, prefix='dic-'):
br = browser()
def read_file(key):
rp = br.open('/'.join([ONLINE_DICTIONARY_BASE_URL, directory, key]))
return rp.read()
return _import_from_virtual_directory(read_file, name, dest_dir=dest_dir, prefix=prefix)
if __name__ == '__main__': if __name__ == '__main__':
import_from_libreoffice_source_tree(sys.argv[-1]) import_from_libreoffice_source_tree(sys.argv[-1])