mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Code to fetch hyphenation dictionaries
This commit is contained in:
parent
f57d45de8d
commit
1c54eb1648
1
.gitignore
vendored
1
.gitignore
vendored
@ -15,6 +15,7 @@ build
|
|||||||
dist
|
dist
|
||||||
docs
|
docs
|
||||||
resources/localization
|
resources/localization
|
||||||
|
resources/hyphenation
|
||||||
resources/scripts.calibre_msgpack
|
resources/scripts.calibre_msgpack
|
||||||
resources/ebook-convert-complete.calibre_msgpack
|
resources/ebook-convert-complete.calibre_msgpack
|
||||||
resources/builtin_recipes.xml
|
resources/builtin_recipes.xml
|
||||||
|
@ -6,7 +6,8 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import sys, re, os, platform, subprocess, time, errno
|
import sys, re, os, platform, subprocess, time, errno, tempfile, shutil
|
||||||
|
from contextlib import contextmanager
|
||||||
|
|
||||||
is64bit = platform.architecture()[0] == '64bit'
|
is64bit = platform.architecture()[0] == '64bit'
|
||||||
iswindows = re.search('win(32|64)', sys.platform)
|
iswindows = re.search('win(32|64)', sys.platform)
|
||||||
@ -289,6 +290,14 @@ class Command(object):
|
|||||||
warnings.append((args, kwargs))
|
warnings.append((args, kwargs))
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def temp_dir(self, **kw):
|
||||||
|
ans = tempfile.mkdtemp(**kw)
|
||||||
|
try:
|
||||||
|
yield ans
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(ans)
|
||||||
|
|
||||||
|
|
||||||
def installer_name(ext, is64bit=False):
|
def installer_name(ext, is64bit=False):
|
||||||
if is64bit and ext == 'msi':
|
if is64bit and ext == 'msi':
|
||||||
|
@ -21,7 +21,7 @@ __all__ = [
|
|||||||
'upload_user_manual', 'upload_demo', 'reupload',
|
'upload_user_manual', 'upload_demo', 'reupload',
|
||||||
'stage1', 'stage2', 'stage3', 'stage4', 'stage5', 'publish', 'publish_betas',
|
'stage1', 'stage2', 'stage3', 'stage4', 'stage5', 'publish', 'publish_betas',
|
||||||
'linux', 'linux32', 'linux64', 'win', 'win32', 'win64', 'osx', 'build_dep',
|
'linux', 'linux32', 'linux64', 'win', 'win32', 'win64', 'osx', 'build_dep',
|
||||||
'export_packages',
|
'export_packages', 'hyphenation'
|
||||||
]
|
]
|
||||||
|
|
||||||
from setup.installers import Linux, Win, OSX, Linux32, Linux64, Win32, Win64, ExtDev, BuildDep, ExportPackages
|
from setup.installers import Linux, Win, OSX, Linux32, Linux64, Win32, Win64, ExtDev, BuildDep, ExportPackages
|
||||||
@ -45,6 +45,9 @@ build = Build()
|
|||||||
from setup.mathjax import MathJax
|
from setup.mathjax import MathJax
|
||||||
mathjax = MathJax()
|
mathjax = MathJax()
|
||||||
|
|
||||||
|
from setup.hyphenation import Hyphenation
|
||||||
|
hyphenation = Hyphenation()
|
||||||
|
|
||||||
from setup.git_version import GitVersion
|
from setup.git_version import GitVersion
|
||||||
git_version = GitVersion()
|
git_version = GitVersion()
|
||||||
|
|
||||||
|
103
setup/hyphenation.py
Normal file
103
setup/hyphenation.py
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
#!/usr/bin/env python2
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||||
|
# License: GPLv3 Copyright: 2019, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
import glob
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
from io import BytesIO
|
||||||
|
from zipfile import ZipFile
|
||||||
|
|
||||||
|
from setup import Command, download_securely
|
||||||
|
|
||||||
|
URL = 'https://github.com/LibreOffice/dictionaries/archive/master.zip'
|
||||||
|
|
||||||
|
|
||||||
|
def locales_from_dicts(dicts):
|
||||||
|
ans = {}
|
||||||
|
for path in dicts:
|
||||||
|
name = bname = os.path.basename(path)
|
||||||
|
name = name[len('hyph_'):-len('.dic')]
|
||||||
|
ans[name.replace('-', '_')] = bname
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
|
def locales_from_xcu(xcu, dicts):
|
||||||
|
from lxml import etree
|
||||||
|
with open(xcu, 'rb') as f:
|
||||||
|
root = etree.fromstring(f.read())
|
||||||
|
ans = {}
|
||||||
|
dicts = {os.path.basename(x) for x in dicts}
|
||||||
|
for value in root.xpath('//*[contains(text(),"DICT_HYPH")]'):
|
||||||
|
node = value.getparent().getparent()
|
||||||
|
locales = path = None
|
||||||
|
for prop in node:
|
||||||
|
name = prop.get('{http://openoffice.org/2001/registry}name')
|
||||||
|
if name == 'Locales':
|
||||||
|
locales = [x.replace('-', '_') for x in prop[0].text.split()]
|
||||||
|
elif name == 'Locations':
|
||||||
|
path = prop[0].text.strip().split('/')[-1]
|
||||||
|
if locales and path in dicts:
|
||||||
|
for locale in locales:
|
||||||
|
ans[locale] = path
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
|
def process_dictionaries(src, output_dir):
|
||||||
|
locale_data = {}
|
||||||
|
for x in os.listdir(src):
|
||||||
|
q = os.path.join(src, x)
|
||||||
|
if not os.path.isdir(q):
|
||||||
|
continue
|
||||||
|
dicts = tuple(glob.glob(os.path.join(q, 'hyph_*.dic')))
|
||||||
|
if not dicts:
|
||||||
|
continue
|
||||||
|
xcu = os.path.join(q, 'dictionaries.xcu')
|
||||||
|
locales = (
|
||||||
|
locales_from_xcu(xcu, dicts) if os.path.exists(xcu) else
|
||||||
|
locales_from_dicts(dicts))
|
||||||
|
if locales:
|
||||||
|
locale_data.update(locales)
|
||||||
|
for d in dicts:
|
||||||
|
shutil.copyfile(
|
||||||
|
d, os.path.join(output_dir, os.path.basename(d)))
|
||||||
|
data = json.dumps(locale_data, indent=2)
|
||||||
|
if not isinstance(data, bytes):
|
||||||
|
data = data.encode('utf-8')
|
||||||
|
with open(os.path.join(output_dir, 'locales.json'), 'wb') as f:
|
||||||
|
f.write(data)
|
||||||
|
|
||||||
|
|
||||||
|
class Hyphenation(Command):
|
||||||
|
|
||||||
|
description = 'Download the hyphenation dictionaries'
|
||||||
|
|
||||||
|
def add_options(self, parser):
|
||||||
|
pass
|
||||||
|
# parser.add_option('--path-to-mathjax', help='Path to the MathJax source code')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def hyphenation_dir(self):
|
||||||
|
return self.j(self.RESOURCES, 'hyphenation')
|
||||||
|
|
||||||
|
def clean(self):
|
||||||
|
if os.path.exists(self.hyphenation_dir):
|
||||||
|
shutil.rmtree(self.hyphenation_dir)
|
||||||
|
|
||||||
|
def run(self, opts):
|
||||||
|
self.clean()
|
||||||
|
os.makedirs(self.hyphenation_dir)
|
||||||
|
self.info('Downloading hyphenation dictionaries...')
|
||||||
|
with self.temp_dir() as src, ZipFile(BytesIO(download_securely(URL))) as zf, self.temp_dir() as output_dir:
|
||||||
|
zf.extractall(src)
|
||||||
|
if len(os.listdir(src)) == 1:
|
||||||
|
src = os.path.join(src, os.listdir(src)[0])
|
||||||
|
process_dictionaries(src, output_dir)
|
||||||
|
dics = [x for x in os.listdir(output_dir) if x.endswith('.dic')]
|
||||||
|
subprocess.check_call([
|
||||||
|
'tar', '-cJf', os.path.join(self.hyphenation_dir, 'dictionaries.tar.xz')] + dics
|
||||||
|
, env={'XZ_OPT': '-9e -T0'}, cwd=output_dir)
|
||||||
|
shutil.copy(self.j(output_dir, 'locales.json'), self.hyphenation_dir)
|
@ -199,7 +199,7 @@ class RapydScript(Command): # {{{
|
|||||||
class Resources(Command): # {{{
|
class Resources(Command): # {{{
|
||||||
|
|
||||||
description = 'Compile various needed calibre resources'
|
description = 'Compile various needed calibre resources'
|
||||||
sub_commands = ['kakasi', 'mathjax', 'rapydscript']
|
sub_commands = ['kakasi', 'mathjax', 'rapydscript', 'hyphenation']
|
||||||
|
|
||||||
def run(self, opts):
|
def run(self, opts):
|
||||||
from calibre.utils.serialize import msgpack_dumps
|
from calibre.utils.serialize import msgpack_dumps
|
||||||
|
Loading…
x
Reference in New Issue
Block a user