mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
More work on de-vendoring pykakasi
This commit is contained in:
parent
ec8c06caa9
commit
04d5728ef9
@ -323,13 +323,6 @@ License: GPL-3
|
|||||||
The full text of the GPL is distributed as in
|
The full text of the GPL is distributed as in
|
||||||
/usr/share/common-licenses/GPL-3 on Debian systems.
|
/usr/share/common-licenses/GPL-3 on Debian systems.
|
||||||
|
|
||||||
Files: src/calibre/ebooks/unihandecode/pykakasi/*
|
|
||||||
Copyright: 2011, Hiroshi Miura <miurahr@linux.com>
|
|
||||||
Copyright: 1992, Hironobu Takahashi
|
|
||||||
License: GPL-2+
|
|
||||||
The full text of the GPL is distributed as in
|
|
||||||
/usr/share/common-licenses/GPL on Debian systems.
|
|
||||||
|
|
||||||
Files: src/calibre/ebooks/unihandecode/*
|
Files: src/calibre/ebooks/unihandecode/*
|
||||||
Copyright: 2010-2011, Hiroshi Miura <miurahr@linux.com>
|
Copyright: 2010-2011, Hiroshi Miura <miurahr@linux.com>
|
||||||
Copyright: 2009, John Schember
|
Copyright: 2009, John Schember
|
||||||
|
@ -85,11 +85,12 @@ def ignore_in_lib(base, items, ignored_dirs=None):
|
|||||||
ignored_dirs = {'.svn', '.bzr', '.git', 'test', 'tests', 'testing'}
|
ignored_dirs = {'.svn', '.bzr', '.git', 'test', 'tests', 'testing'}
|
||||||
for name in items:
|
for name in items:
|
||||||
path = j(base, name)
|
path = j(base, name)
|
||||||
|
is_kakasi = 'pykakasi' in path
|
||||||
if os.path.isdir(path):
|
if os.path.isdir(path):
|
||||||
if name != 'plugins' and (name in ignored_dirs or not is_package_dir(path)):
|
if name != 'plugins' and (name in ignored_dirs or not is_package_dir(path)) and not (is_kakasi and name == 'data'):
|
||||||
ans.append(name)
|
ans.append(name)
|
||||||
else:
|
else:
|
||||||
if name.rpartition('.')[-1] not in ('so', 'py'):
|
if name.rpartition('.')[-1] not in ('so', 'py') and not (is_kakasi and name.endswith('.db')):
|
||||||
ans.append(name)
|
ans.append(name)
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
@ -614,12 +614,15 @@ class Freeze:
|
|||||||
|
|
||||||
@flush
|
@flush
|
||||||
def add_package_dir(self, x, dest=None):
|
def add_package_dir(self, x, dest=None):
|
||||||
|
is_kakasi = 'pykakasi' in x
|
||||||
|
allowed_exts = ('', '.py', '.so')
|
||||||
|
if is_kakasi:
|
||||||
|
allowed_exts += ('.db',)
|
||||||
def ignore(root, files):
|
def ignore(root, files):
|
||||||
ans = []
|
ans = []
|
||||||
for y in files:
|
for y in files:
|
||||||
ext = os.path.splitext(y)[1]
|
ext = os.path.splitext(y)[1]
|
||||||
if ext not in ('', '.py', '.so') or \
|
if ext not in allowed_exts or (not ext and not os.path.isdir(join(root, y))):
|
||||||
(not ext and not os.path.isdir(join(root, y))):
|
|
||||||
ans.append(y)
|
ans.append(y)
|
||||||
|
|
||||||
return ans
|
return ans
|
||||||
|
@ -1043,6 +1043,24 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "wrapt",
|
||||||
|
"unix": {
|
||||||
|
"filename": "wrapt-1.16.0-py3-none-any.whl",
|
||||||
|
"hash": "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1",
|
||||||
|
"urls": ["pypi"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "deprecated",
|
||||||
|
"unix": {
|
||||||
|
"filename": "Deprecated-1.2.14-py2.py3-none-any.whl",
|
||||||
|
"hash": "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c",
|
||||||
|
"urls": ["pypi"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"name": "pykakasi",
|
"name": "pykakasi",
|
||||||
"unix": {
|
"unix": {
|
||||||
|
@ -11,7 +11,7 @@ __all__ = [
|
|||||||
'gui',
|
'gui',
|
||||||
'git_version',
|
'git_version',
|
||||||
'develop', 'install',
|
'develop', 'install',
|
||||||
'kakasi', 'rapydscript', 'cacerts', 'recent_uas', 'resources',
|
'rapydscript', 'cacerts', 'recent_uas', 'resources',
|
||||||
'check', 'test', 'test_rs', 'upgrade_source_code',
|
'check', 'test', 'test_rs', 'upgrade_source_code',
|
||||||
'sdist', 'bootstrap', 'extdev',
|
'sdist', 'bootstrap', 'extdev',
|
||||||
'manual', 'tag_release',
|
'manual', 'tag_release',
|
||||||
@ -90,10 +90,9 @@ from setup.test import Test, TestRS
|
|||||||
test = Test()
|
test = Test()
|
||||||
test_rs = TestRS()
|
test_rs = TestRS()
|
||||||
|
|
||||||
from setup.resources import CACerts, Kakasi, RapydScript, RecentUAs, Resources
|
from setup.resources import CACerts, RapydScript, RecentUAs, Resources
|
||||||
|
|
||||||
resources = Resources()
|
resources = Resources()
|
||||||
kakasi = Kakasi()
|
|
||||||
cacerts = CACerts()
|
cacerts = CACerts()
|
||||||
recent_uas = RecentUAs()
|
recent_uas = RecentUAs()
|
||||||
rapydscript = RapydScript()
|
rapydscript = RapydScript()
|
||||||
|
@ -9,13 +9,11 @@ import errno
|
|||||||
import glob
|
import glob
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
import shutil
|
import shutil
|
||||||
import zipfile
|
import zipfile
|
||||||
from zlib import compress
|
|
||||||
|
|
||||||
from polyglot.builtins import codepoint_to_chr, iteritems, itervalues, only_unicode_recursive
|
from polyglot.builtins import iteritems, itervalues, only_unicode_recursive
|
||||||
from setup import Command, __appname__, basenames, download_securely, dump_json
|
from setup import Command, basenames, download_securely, dump_json
|
||||||
|
|
||||||
|
|
||||||
def get_opts_from_parser(parser):
|
def get_opts_from_parser(parser):
|
||||||
@ -29,113 +27,6 @@ def get_opts_from_parser(parser):
|
|||||||
yield from do_opt(o)
|
yield from do_opt(o)
|
||||||
|
|
||||||
|
|
||||||
class Kakasi(Command): # {{{
|
|
||||||
|
|
||||||
description = 'Compile resources for unihandecode'
|
|
||||||
|
|
||||||
KAKASI_PATH = os.path.join(Command.SRC, __appname__,
|
|
||||||
'ebooks', 'unihandecode', 'pykakasi')
|
|
||||||
|
|
||||||
def run(self, opts):
|
|
||||||
self.records = {}
|
|
||||||
src = self.j(self.KAKASI_PATH, 'kakasidict.utf8')
|
|
||||||
dest = self.j(self.RESOURCES, 'localization',
|
|
||||||
'pykakasi','kanwadict2.calibre_msgpack')
|
|
||||||
base = os.path.dirname(dest)
|
|
||||||
if not os.path.exists(base):
|
|
||||||
os.makedirs(base)
|
|
||||||
|
|
||||||
if self.newer(dest, src):
|
|
||||||
self.info('\tGenerating Kanwadict')
|
|
||||||
|
|
||||||
for line in open(src, "rb"):
|
|
||||||
self.parsekdict(line)
|
|
||||||
self.kanwaout(dest)
|
|
||||||
|
|
||||||
src = self.j(self.KAKASI_PATH, 'itaijidict.utf8')
|
|
||||||
dest = self.j(self.RESOURCES, 'localization',
|
|
||||||
'pykakasi','itaijidict2.calibre_msgpack')
|
|
||||||
|
|
||||||
if self.newer(dest, src):
|
|
||||||
self.info('\tGenerating Itaijidict')
|
|
||||||
self.mkitaiji(src, dest)
|
|
||||||
|
|
||||||
src = self.j(self.KAKASI_PATH, 'kanadict.utf8')
|
|
||||||
dest = self.j(self.RESOURCES, 'localization',
|
|
||||||
'pykakasi','kanadict2.calibre_msgpack')
|
|
||||||
|
|
||||||
if self.newer(dest, src):
|
|
||||||
self.info('\tGenerating kanadict')
|
|
||||||
self.mkkanadict(src, dest)
|
|
||||||
|
|
||||||
def mkitaiji(self, src, dst):
|
|
||||||
dic = {}
|
|
||||||
for line in open(src, "rb"):
|
|
||||||
line = line.decode('utf-8').strip()
|
|
||||||
if line.startswith(';;'): # skip comment
|
|
||||||
continue
|
|
||||||
if re.match(r"^$",line):
|
|
||||||
continue
|
|
||||||
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:codepoint_to_chr(int(x.group(1),16)), line)
|
|
||||||
dic[pair[0]] = pair[1]
|
|
||||||
from calibre.utils.serialize import msgpack_dumps
|
|
||||||
with open(dst, 'wb') as f:
|
|
||||||
f.write(msgpack_dumps(dic))
|
|
||||||
|
|
||||||
def mkkanadict(self, src, dst):
|
|
||||||
dic = {}
|
|
||||||
for line in open(src, "rb"):
|
|
||||||
line = line.decode('utf-8').strip()
|
|
||||||
if line.startswith(';;'): # skip comment
|
|
||||||
continue
|
|
||||||
if re.match(r"^$",line):
|
|
||||||
continue
|
|
||||||
(alpha, kana) = line.split(' ')
|
|
||||||
dic[kana] = alpha
|
|
||||||
from calibre.utils.serialize import msgpack_dumps
|
|
||||||
with open(dst, 'wb') as f:
|
|
||||||
f.write(msgpack_dumps(dic))
|
|
||||||
|
|
||||||
def parsekdict(self, line):
|
|
||||||
line = line.decode('utf-8').strip()
|
|
||||||
if line.startswith(';;'): # skip comment
|
|
||||||
return
|
|
||||||
(yomi, kanji) = line.split(' ')
|
|
||||||
if ord(yomi[-1:]) <= ord('z'):
|
|
||||||
tail = yomi[-1:]
|
|
||||||
yomi = yomi[:-1]
|
|
||||||
else:
|
|
||||||
tail = ''
|
|
||||||
self.updaterec(kanji, yomi, tail)
|
|
||||||
|
|
||||||
def updaterec(self, kanji, yomi, tail):
|
|
||||||
key = "%04x"%ord(kanji[0])
|
|
||||||
if key in self.records:
|
|
||||||
if kanji in self.records[key]:
|
|
||||||
rec = self.records[key][kanji]
|
|
||||||
rec.append((yomi,tail))
|
|
||||||
self.records[key].update({kanji: rec})
|
|
||||||
else:
|
|
||||||
self.records[key][kanji]=[(yomi, tail)]
|
|
||||||
else:
|
|
||||||
self.records[key] = {}
|
|
||||||
self.records[key][kanji]=[(yomi, tail)]
|
|
||||||
|
|
||||||
def kanwaout(self, out):
|
|
||||||
from calibre.utils.serialize import msgpack_dumps
|
|
||||||
with open(out, 'wb') as f:
|
|
||||||
dic = {}
|
|
||||||
for k, v in iteritems(self.records):
|
|
||||||
dic[k] = compress(msgpack_dumps(v))
|
|
||||||
f.write(msgpack_dumps(dic))
|
|
||||||
|
|
||||||
def clean(self):
|
|
||||||
kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi')
|
|
||||||
if os.path.exists(kakasi):
|
|
||||||
shutil.rmtree(kakasi)
|
|
||||||
# }}}
|
|
||||||
|
|
||||||
|
|
||||||
class CACerts(Command): # {{{
|
class CACerts(Command): # {{{
|
||||||
|
|
||||||
description = 'Get updated mozilla CA certificate bundle'
|
description = 'Get updated mozilla CA certificate bundle'
|
||||||
@ -213,7 +104,7 @@ class RapydScript(Command): # {{{
|
|||||||
class Resources(Command): # {{{
|
class Resources(Command): # {{{
|
||||||
|
|
||||||
description = 'Compile various needed calibre resources'
|
description = 'Compile various needed calibre resources'
|
||||||
sub_commands = ['kakasi', 'liberation_fonts', 'mathjax', 'rapydscript', 'hyphenation', 'piper_voices']
|
sub_commands = ['liberation_fonts', 'mathjax', 'rapydscript', 'hyphenation', 'piper_voices']
|
||||||
|
|
||||||
def run(self, opts):
|
def run(self, opts):
|
||||||
from calibre.utils.serialize import msgpack_dumps
|
from calibre.utils.serialize import msgpack_dumps
|
||||||
@ -337,8 +228,6 @@ class Resources(Command): # {{{
|
|||||||
x = self.j(self.RESOURCES, x+'.pickle')
|
x = self.j(self.RESOURCES, x+'.pickle')
|
||||||
if os.path.exists(x):
|
if os.path.exists(x):
|
||||||
os.remove(x)
|
os.remove(x)
|
||||||
from setup.commands import kakasi
|
|
||||||
kakasi.clean()
|
|
||||||
for x in ('builtin_recipes.xml', 'builtin_recipes.zip',
|
for x in ('builtin_recipes.xml', 'builtin_recipes.zip',
|
||||||
'template-functions.json', 'user-manual-translation-stats.json'):
|
'template-functions.json', 'user-manual-translation-stats.json'):
|
||||||
x = self.j(self.RESOURCES, x)
|
x = self.j(self.RESOURCES, x)
|
||||||
|
@ -16,15 +16,48 @@ This functionality is owned by Kakasi Japanese processing engine.
|
|||||||
Copyright (c) 2010 Hiroshi Miura
|
Copyright (c) 2010 Hiroshi Miura
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
import pickle
|
||||||
import re
|
import re
|
||||||
|
from importlib.resources import files
|
||||||
|
|
||||||
from pykakasi import kakasi
|
from pykakasi import kakasi
|
||||||
|
from pykakasi.kanji import Itaiji, Kanwa
|
||||||
|
from pykakasi.properties import Configurations
|
||||||
|
from pykakasi.scripts import Jisyo
|
||||||
|
|
||||||
from calibre.ebooks.unihandecode.jacodepoints import CODEPOINTS as JACODES
|
from calibre.ebooks.unihandecode.jacodepoints import CODEPOINTS as JACODES
|
||||||
from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
|
from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
|
||||||
from calibre.ebooks.unihandecode.unidecoder import Unidecoder
|
from calibre.ebooks.unihandecode.unidecoder import Unidecoder
|
||||||
|
|
||||||
|
|
||||||
|
# pykakasi uses paths for its dictionaries rather than using the
|
||||||
|
# Traversable API of importlib.resources so we have to hack around it, sigh.
|
||||||
|
def dictdata(dbfile: str):
|
||||||
|
t = files('pykakasi')
|
||||||
|
q = t.joinpath('data').joinpath(dbfile)
|
||||||
|
return q.read_bytes()
|
||||||
|
|
||||||
|
|
||||||
|
def jisyo_init(self, dbname):
|
||||||
|
self._dict = pickle.loads(dictdata(dbname))
|
||||||
|
|
||||||
|
|
||||||
|
def itaiji_init(self):
|
||||||
|
if self._itaijidict is None:
|
||||||
|
with self._lock:
|
||||||
|
if self._itaijidict is None:
|
||||||
|
self._itaijidict = pickle.loads(dictdata(Configurations.jisyo_itaiji))
|
||||||
|
|
||||||
|
def kanwa_init(self):
|
||||||
|
if self._jisyo_table is None:
|
||||||
|
with self._lock:
|
||||||
|
if self._jisyo_table is None:
|
||||||
|
self._jisyo_table = pickle.loads(dictdata(Configurations.jisyo_kanwa))
|
||||||
|
|
||||||
|
Jisyo.__init__ = jisyo_init
|
||||||
|
Itaiji.__init__ = itaiji_init
|
||||||
|
Kanwa.__init__ = kanwa_init
|
||||||
|
|
||||||
class Jadecoder(Unidecoder):
|
class Jadecoder(Unidecoder):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user