mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
More work on de-vendoring pykakasi
This commit is contained in:
parent
ec8c06caa9
commit
04d5728ef9
@ -323,13 +323,6 @@ License: GPL-3
|
||||
The full text of the GPL is distributed as in
|
||||
/usr/share/common-licenses/GPL-3 on Debian systems.
|
||||
|
||||
Files: src/calibre/ebooks/unihandecode/pykakasi/*
|
||||
Copyright: 2011, Hiroshi Miura <miurahr@linux.com>
|
||||
Copyright: 1992, Hironobu Takahashi
|
||||
License: GPL-2+
|
||||
The full text of the GPL is distributed as in
|
||||
/usr/share/common-licenses/GPL on Debian systems.
|
||||
|
||||
Files: src/calibre/ebooks/unihandecode/*
|
||||
Copyright: 2010-2011, Hiroshi Miura <miurahr@linux.com>
|
||||
Copyright: 2009, John Schember
|
||||
|
@ -85,11 +85,12 @@ def ignore_in_lib(base, items, ignored_dirs=None):
|
||||
ignored_dirs = {'.svn', '.bzr', '.git', 'test', 'tests', 'testing'}
|
||||
for name in items:
|
||||
path = j(base, name)
|
||||
is_kakasi = 'pykakasi' in path
|
||||
if os.path.isdir(path):
|
||||
if name != 'plugins' and (name in ignored_dirs or not is_package_dir(path)):
|
||||
if name != 'plugins' and (name in ignored_dirs or not is_package_dir(path)) and not (is_kakasi and name == 'data'):
|
||||
ans.append(name)
|
||||
else:
|
||||
if name.rpartition('.')[-1] not in ('so', 'py'):
|
||||
if name.rpartition('.')[-1] not in ('so', 'py') and not (is_kakasi and name.endswith('.db')):
|
||||
ans.append(name)
|
||||
return ans
|
||||
|
||||
|
@ -614,12 +614,15 @@ class Freeze:
|
||||
|
||||
@flush
|
||||
def add_package_dir(self, x, dest=None):
|
||||
is_kakasi = 'pykakasi' in x
|
||||
allowed_exts = ('', '.py', '.so')
|
||||
if is_kakasi:
|
||||
allowed_exts += ('.db',)
|
||||
def ignore(root, files):
|
||||
ans = []
|
||||
for y in files:
|
||||
ext = os.path.splitext(y)[1]
|
||||
if ext not in ('', '.py', '.so') or \
|
||||
(not ext and not os.path.isdir(join(root, y))):
|
||||
if ext not in allowed_exts or (not ext and not os.path.isdir(join(root, y))):
|
||||
ans.append(y)
|
||||
|
||||
return ans
|
||||
|
@ -1043,6 +1043,24 @@
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"name": "wrapt",
|
||||
"unix": {
|
||||
"filename": "wrapt-1.16.0-py3-none-any.whl",
|
||||
"hash": "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1",
|
||||
"urls": ["pypi"]
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"name": "deprecated",
|
||||
"unix": {
|
||||
"filename": "Deprecated-1.2.14-py2.py3-none-any.whl",
|
||||
"hash": "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c",
|
||||
"urls": ["pypi"]
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"name": "pykakasi",
|
||||
"unix": {
|
||||
|
@ -11,7 +11,7 @@ __all__ = [
|
||||
'gui',
|
||||
'git_version',
|
||||
'develop', 'install',
|
||||
'kakasi', 'rapydscript', 'cacerts', 'recent_uas', 'resources',
|
||||
'rapydscript', 'cacerts', 'recent_uas', 'resources',
|
||||
'check', 'test', 'test_rs', 'upgrade_source_code',
|
||||
'sdist', 'bootstrap', 'extdev',
|
||||
'manual', 'tag_release',
|
||||
@ -90,10 +90,9 @@ from setup.test import Test, TestRS
|
||||
test = Test()
|
||||
test_rs = TestRS()
|
||||
|
||||
from setup.resources import CACerts, Kakasi, RapydScript, RecentUAs, Resources
|
||||
from setup.resources import CACerts, RapydScript, RecentUAs, Resources
|
||||
|
||||
resources = Resources()
|
||||
kakasi = Kakasi()
|
||||
cacerts = CACerts()
|
||||
recent_uas = RecentUAs()
|
||||
rapydscript = RapydScript()
|
||||
|
@ -9,13 +9,11 @@ import errno
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import zipfile
|
||||
from zlib import compress
|
||||
|
||||
from polyglot.builtins import codepoint_to_chr, iteritems, itervalues, only_unicode_recursive
|
||||
from setup import Command, __appname__, basenames, download_securely, dump_json
|
||||
from polyglot.builtins import iteritems, itervalues, only_unicode_recursive
|
||||
from setup import Command, basenames, download_securely, dump_json
|
||||
|
||||
|
||||
def get_opts_from_parser(parser):
|
||||
@ -29,113 +27,6 @@ def get_opts_from_parser(parser):
|
||||
yield from do_opt(o)
|
||||
|
||||
|
||||
class Kakasi(Command): # {{{
|
||||
|
||||
description = 'Compile resources for unihandecode'
|
||||
|
||||
KAKASI_PATH = os.path.join(Command.SRC, __appname__,
|
||||
'ebooks', 'unihandecode', 'pykakasi')
|
||||
|
||||
def run(self, opts):
|
||||
self.records = {}
|
||||
src = self.j(self.KAKASI_PATH, 'kakasidict.utf8')
|
||||
dest = self.j(self.RESOURCES, 'localization',
|
||||
'pykakasi','kanwadict2.calibre_msgpack')
|
||||
base = os.path.dirname(dest)
|
||||
if not os.path.exists(base):
|
||||
os.makedirs(base)
|
||||
|
||||
if self.newer(dest, src):
|
||||
self.info('\tGenerating Kanwadict')
|
||||
|
||||
for line in open(src, "rb"):
|
||||
self.parsekdict(line)
|
||||
self.kanwaout(dest)
|
||||
|
||||
src = self.j(self.KAKASI_PATH, 'itaijidict.utf8')
|
||||
dest = self.j(self.RESOURCES, 'localization',
|
||||
'pykakasi','itaijidict2.calibre_msgpack')
|
||||
|
||||
if self.newer(dest, src):
|
||||
self.info('\tGenerating Itaijidict')
|
||||
self.mkitaiji(src, dest)
|
||||
|
||||
src = self.j(self.KAKASI_PATH, 'kanadict.utf8')
|
||||
dest = self.j(self.RESOURCES, 'localization',
|
||||
'pykakasi','kanadict2.calibre_msgpack')
|
||||
|
||||
if self.newer(dest, src):
|
||||
self.info('\tGenerating kanadict')
|
||||
self.mkkanadict(src, dest)
|
||||
|
||||
def mkitaiji(self, src, dst):
|
||||
dic = {}
|
||||
for line in open(src, "rb"):
|
||||
line = line.decode('utf-8').strip()
|
||||
if line.startswith(';;'): # skip comment
|
||||
continue
|
||||
if re.match(r"^$",line):
|
||||
continue
|
||||
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:codepoint_to_chr(int(x.group(1),16)), line)
|
||||
dic[pair[0]] = pair[1]
|
||||
from calibre.utils.serialize import msgpack_dumps
|
||||
with open(dst, 'wb') as f:
|
||||
f.write(msgpack_dumps(dic))
|
||||
|
||||
def mkkanadict(self, src, dst):
|
||||
dic = {}
|
||||
for line in open(src, "rb"):
|
||||
line = line.decode('utf-8').strip()
|
||||
if line.startswith(';;'): # skip comment
|
||||
continue
|
||||
if re.match(r"^$",line):
|
||||
continue
|
||||
(alpha, kana) = line.split(' ')
|
||||
dic[kana] = alpha
|
||||
from calibre.utils.serialize import msgpack_dumps
|
||||
with open(dst, 'wb') as f:
|
||||
f.write(msgpack_dumps(dic))
|
||||
|
||||
def parsekdict(self, line):
|
||||
line = line.decode('utf-8').strip()
|
||||
if line.startswith(';;'): # skip comment
|
||||
return
|
||||
(yomi, kanji) = line.split(' ')
|
||||
if ord(yomi[-1:]) <= ord('z'):
|
||||
tail = yomi[-1:]
|
||||
yomi = yomi[:-1]
|
||||
else:
|
||||
tail = ''
|
||||
self.updaterec(kanji, yomi, tail)
|
||||
|
||||
def updaterec(self, kanji, yomi, tail):
|
||||
key = "%04x"%ord(kanji[0])
|
||||
if key in self.records:
|
||||
if kanji in self.records[key]:
|
||||
rec = self.records[key][kanji]
|
||||
rec.append((yomi,tail))
|
||||
self.records[key].update({kanji: rec})
|
||||
else:
|
||||
self.records[key][kanji]=[(yomi, tail)]
|
||||
else:
|
||||
self.records[key] = {}
|
||||
self.records[key][kanji]=[(yomi, tail)]
|
||||
|
||||
def kanwaout(self, out):
|
||||
from calibre.utils.serialize import msgpack_dumps
|
||||
with open(out, 'wb') as f:
|
||||
dic = {}
|
||||
for k, v in iteritems(self.records):
|
||||
dic[k] = compress(msgpack_dumps(v))
|
||||
f.write(msgpack_dumps(dic))
|
||||
|
||||
def clean(self):
|
||||
kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi')
|
||||
if os.path.exists(kakasi):
|
||||
shutil.rmtree(kakasi)
|
||||
# }}}
|
||||
|
||||
|
||||
class CACerts(Command): # {{{
|
||||
|
||||
description = 'Get updated mozilla CA certificate bundle'
|
||||
@ -213,7 +104,7 @@ class RapydScript(Command): # {{{
|
||||
class Resources(Command): # {{{
|
||||
|
||||
description = 'Compile various needed calibre resources'
|
||||
sub_commands = ['kakasi', 'liberation_fonts', 'mathjax', 'rapydscript', 'hyphenation', 'piper_voices']
|
||||
sub_commands = ['liberation_fonts', 'mathjax', 'rapydscript', 'hyphenation', 'piper_voices']
|
||||
|
||||
def run(self, opts):
|
||||
from calibre.utils.serialize import msgpack_dumps
|
||||
@ -337,8 +228,6 @@ class Resources(Command): # {{{
|
||||
x = self.j(self.RESOURCES, x+'.pickle')
|
||||
if os.path.exists(x):
|
||||
os.remove(x)
|
||||
from setup.commands import kakasi
|
||||
kakasi.clean()
|
||||
for x in ('builtin_recipes.xml', 'builtin_recipes.zip',
|
||||
'template-functions.json', 'user-manual-translation-stats.json'):
|
||||
x = self.j(self.RESOURCES, x)
|
||||
|
@ -16,15 +16,48 @@ This functionality is owned by Kakasi Japanese processing engine.
|
||||
Copyright (c) 2010 Hiroshi Miura
|
||||
'''
|
||||
|
||||
import pickle
|
||||
import re
|
||||
from importlib.resources import files
|
||||
|
||||
from pykakasi import kakasi
|
||||
from pykakasi.kanji import Itaiji, Kanwa
|
||||
from pykakasi.properties import Configurations
|
||||
from pykakasi.scripts import Jisyo
|
||||
|
||||
from calibre.ebooks.unihandecode.jacodepoints import CODEPOINTS as JACODES
|
||||
from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
|
||||
from calibre.ebooks.unihandecode.unidecoder import Unidecoder
|
||||
|
||||
|
||||
# pykakasi uses paths for its dictionaries rather than using the
|
||||
# Traversable API of importlib.resources so we have to hack around it, sigh.
|
||||
def dictdata(dbfile: str):
|
||||
t = files('pykakasi')
|
||||
q = t.joinpath('data').joinpath(dbfile)
|
||||
return q.read_bytes()
|
||||
|
||||
|
||||
def jisyo_init(self, dbname):
|
||||
self._dict = pickle.loads(dictdata(dbname))
|
||||
|
||||
|
||||
def itaiji_init(self):
|
||||
if self._itaijidict is None:
|
||||
with self._lock:
|
||||
if self._itaijidict is None:
|
||||
self._itaijidict = pickle.loads(dictdata(Configurations.jisyo_itaiji))
|
||||
|
||||
def kanwa_init(self):
|
||||
if self._jisyo_table is None:
|
||||
with self._lock:
|
||||
if self._jisyo_table is None:
|
||||
self._jisyo_table = pickle.loads(dictdata(Configurations.jisyo_kanwa))
|
||||
|
||||
Jisyo.__init__ = jisyo_init
|
||||
Itaiji.__init__ = itaiji_init
|
||||
Kanwa.__init__ = kanwa_init
|
||||
|
||||
class Jadecoder(Unidecoder):
|
||||
|
||||
def __init__(self):
|
||||
|
Loading…
x
Reference in New Issue
Block a user