#!/usr/bin/env python2 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai # License: GPLv3 Copyright: 2019, Kovid Goyal from __future__ import absolute_import, division, print_function, unicode_literals import glob import hashlib import json import os import shutil import subprocess from io import BytesIO from zipfile import ZipFile from setup import Command, download_securely URL = 'https://github.com/LibreOffice/dictionaries/archive/master.zip' def locales_from_dicts(dicts): ans = {} for path in dicts: name = bname = os.path.basename(path) name = name[len('hyph_'):-len('.dic')] ans[name.replace('-', '_')] = bname return ans def locales_from_xcu(xcu, dicts): from lxml import etree with open(xcu, 'rb') as f: root = etree.fromstring(f.read()) ans = {} dicts = {os.path.basename(x) for x in dicts} for value in root.xpath('//*[contains(text(),"DICT_HYPH")]'): node = value.getparent().getparent() locales = path = None for prop in node: name = prop.get('{http://openoffice.org/2001/registry}name') if name == 'Locales': locales = [x.replace('-', '_') for x in prop[0].text.split()] elif name == 'Locations': path = prop[0].text.strip().split('/')[-1] if locales and path in dicts: for locale in locales: ans[locale] = path return ans def process_dictionaries(src, output_dir): locale_data = {} for x in os.listdir(src): q = os.path.join(src, x) if not os.path.isdir(q): continue dicts = tuple(glob.glob(os.path.join(q, 'hyph_*.dic'))) if not dicts: continue xcu = os.path.join(q, 'dictionaries.xcu') locales = ( locales_from_xcu(xcu, dicts) if os.path.exists(xcu) else locales_from_dicts(dicts)) if locales: locale_data.update(locales) for d in dicts: shutil.copyfile( d, os.path.join(output_dir, os.path.basename(d))) data = json.dumps(locale_data, indent=2) if not isinstance(data, bytes): data = data.encode('utf-8') with open(os.path.join(output_dir, 'locales.json'), 'wb') as f: f.write(data) class Hyphenation(Command): description = 'Download the hyphenation dictionaries' def add_options(self, parser): pass # parser.add_option('--path-to-mathjax', help='Path to the MathJax source code') @property def hyphenation_dir(self): return self.j(self.RESOURCES, 'hyphenation') def clean(self): if os.path.exists(self.hyphenation_dir): shutil.rmtree(self.hyphenation_dir) def run(self, opts): self.clean() os.makedirs(self.hyphenation_dir) self.info('Downloading hyphenation dictionaries...') with self.temp_dir() as src, ZipFile(BytesIO(download_securely(URL))) as zf, self.temp_dir() as output_dir: zf.extractall(src) if len(os.listdir(src)) == 1: src = os.path.join(src, os.listdir(src)[0]) process_dictionaries(src, output_dir) dics = [x for x in os.listdir(output_dir) if x.endswith('.dic')] m = hashlib.sha1() for dic in sorted(dics): with open(os.path.join(output_dir, dic), 'rb') as f: m.update(f.read()) hsh = type('')(m.hexdigest()) subprocess.check_call([ 'tar', '-cJf', os.path.join(self.hyphenation_dir, 'dictionaries.tar.xz')] + dics , env={'XZ_OPT': '-9e -T0'}, cwd=output_dir) shutil.copy(self.j(output_dir, 'locales.json'), self.hyphenation_dir) with open(os.path.join(self.hyphenation_dir, 'sha1sum'), 'w') as f: f.write(hsh)