mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-05-24 02:02:32 -04:00
116 lines
4.0 KiB
Python
116 lines
4.0 KiB
Python
#!/usr/bin/env python
|
|
# License: GPLv3 Copyright: 2019, Kovid Goyal <kovid at kovidgoyal.net>
|
|
|
|
import glob
|
|
import hashlib
|
|
import json
|
|
import os
|
|
import shutil
|
|
import tarfile
|
|
from io import BytesIO
|
|
|
|
from setup.revendor import ReVendor
|
|
|
|
|
|
def locales_from_dicts(dicts):
|
|
ans = {}
|
|
for path in dicts:
|
|
name = bname = os.path.basename(path)
|
|
name = name[len('hyph_'):-len('.dic')]
|
|
ans[name.replace('-', '_')] = bname
|
|
return ans
|
|
|
|
|
|
def locales_from_xcu(xcu, dicts):
|
|
from lxml import etree
|
|
with open(xcu, 'rb') as f:
|
|
root = etree.fromstring(f.read(), parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False))
|
|
ans = {}
|
|
dicts = {os.path.basename(x) for x in dicts}
|
|
for value in root.xpath('//*[contains(text(),"DICT_HYPH")]'):
|
|
node = value.getparent().getparent()
|
|
locales = path = None
|
|
for prop in node:
|
|
name = prop.get('{http://openoffice.org/2001/registry}name')
|
|
if name == 'Locales':
|
|
locales = [x.replace('-', '_') for x in prop[0].text.split()]
|
|
elif name == 'Locations':
|
|
path = prop[0].text.strip().split('/')[-1]
|
|
if locales and path in dicts:
|
|
for locale in locales:
|
|
ans[locale] = path
|
|
return ans
|
|
|
|
|
|
def process_dictionaries(src, output_dir):
|
|
locale_data = {}
|
|
for x in os.listdir(src):
|
|
q = os.path.join(src, x)
|
|
if not os.path.isdir(q):
|
|
continue
|
|
dicts = tuple(glob.glob(os.path.join(q, 'hyph_*.dic')))
|
|
if not dicts:
|
|
continue
|
|
xcu = os.path.join(q, 'dictionaries.xcu')
|
|
locales = (
|
|
locales_from_xcu(xcu, dicts) if os.path.exists(xcu) else
|
|
locales_from_dicts(dicts))
|
|
if locales:
|
|
locale_data.update(locales)
|
|
for d in dicts:
|
|
shutil.copyfile(
|
|
d, os.path.join(output_dir, os.path.basename(d)))
|
|
data = json.dumps(locale_data, indent=2)
|
|
if not isinstance(data, bytes):
|
|
data = data.encode('utf-8')
|
|
with open(os.path.join(output_dir, 'locales.json'), 'wb') as f:
|
|
f.write(data)
|
|
|
|
|
|
def compress_tar(buf, outf):
|
|
buf.seek(0)
|
|
try:
|
|
from calibre_lzma.xz import compress
|
|
except ImportError:
|
|
import lzma
|
|
outf.write(lzma.compress(buf.getvalue(), preset=9 | lzma.PRESET_EXTREME))
|
|
else:
|
|
compress(buf, outf)
|
|
|
|
|
|
class Hyphenation(ReVendor):
|
|
|
|
description = 'Download the hyphenation dictionaries'
|
|
NAME = 'hyphenation'
|
|
TAR_NAME = 'hyphenation dictionaries'
|
|
VERSION = 'master'
|
|
DOWNLOAD_URL = f'https://github.com/LibreOffice/dictionaries/archive/{VERSION}.tar.gz'
|
|
CAN_USE_SYSTEM_VERSION = False
|
|
|
|
def run(self, opts):
|
|
self.clean()
|
|
os.makedirs(self.vendored_dir)
|
|
with self.temp_dir() as dl_src, self.temp_dir() as output_dir:
|
|
src = opts.path_to_hyphenation or self.download_vendor_release(dl_src, opts.hyphenation_url)
|
|
process_dictionaries(src, output_dir)
|
|
dics = sorted(x for x in os.listdir(output_dir) if x.endswith('.dic'))
|
|
m = hashlib.sha1()
|
|
for dic in dics:
|
|
with open(os.path.join(output_dir, dic), 'rb') as f:
|
|
m.update(f.read())
|
|
hsh = str(m.hexdigest())
|
|
buf = BytesIO()
|
|
with tarfile.TarFile(fileobj=buf, mode='w') as tf:
|
|
for dic in dics:
|
|
with open(os.path.join(output_dir, dic), 'rb') as df:
|
|
tinfo = tf.gettarinfo(arcname=dic, fileobj=df)
|
|
tinfo.mtime = 0
|
|
tinfo.uid = tinfo.gid = 1000
|
|
tinfo.uname = tinfo.gname = 'kovid'
|
|
tf.addfile(tinfo, df)
|
|
with open(os.path.join(self.vendored_dir, 'dictionaries.tar.xz'), 'wb') as f:
|
|
compress_tar(buf, f)
|
|
with open(os.path.join(self.vendored_dir, 'sha1sum'), 'w') as f:
|
|
f.write(hsh)
|
|
shutil.copy(os.path.join(output_dir, 'locales.json'), self.vendored_dir)
|