mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
When converting non english texts to english, use the users currect calibre interface language. This allows japanes/korean/vietnamese characters to be correctly converted. Previously they were assumed to be Chinese. Fixes #7622 (Calibre need to switch logic when converting Unicode filename into ASCII)
This commit is contained in:
commit
c4f06e39af
27
COPYRIGHT
27
COPYRIGHT
@ -193,6 +193,33 @@ License: GPL-3
|
|||||||
The full text of the GPL is distributed as in
|
The full text of the GPL is distributed as in
|
||||||
/usr/share/common-licenses/GPL-3 on Debian systems.
|
/usr/share/common-licenses/GPL-3 on Debian systems.
|
||||||
|
|
||||||
|
Files: src/calibre/ebooks/unihandecode/pykakasi/*
|
||||||
|
Copyright: 2011, Hiroshi Miura <miurahr@linux.com>
|
||||||
|
Copyright: 1992, Hironobu Takahashi
|
||||||
|
License: GPL-2+
|
||||||
|
The full text of the GPL is distributed as in
|
||||||
|
/usr/share/common-licenses/GPL on Debian systems.
|
||||||
|
|
||||||
|
Files: resources/kanwadict2.db
|
||||||
|
Files: resources/itaijidict2.pickle
|
||||||
|
Copyright: 2011, Hiroshi Miura <miurahr@linux.com>
|
||||||
|
Copyright: 1992 1993 1994, Hironobu Takahashi (takahasi@tiny.or.jp),
|
||||||
|
Copyright: 1992 1993 1994, Masahiko Sato (masahiko@sato.riec.tohoku.ac.jp),
|
||||||
|
Copyright: 1992 1993 1994, Yukiyoshi Kameyama, Miki Inooka, Akihiko Sasaki, Dai Ando, Junichi Okukawa,
|
||||||
|
Copyright: 1992 1993 1994, Katsushi Sato and Nobuhiro Yamagishi
|
||||||
|
License: GPL-2+
|
||||||
|
The full text of the GPL is distributed as in
|
||||||
|
/usr/share/common-licenses/GPL on Debian systems.
|
||||||
|
|
||||||
|
Files: src/calibre/ebooks/unihandecode/*
|
||||||
|
Copyright: 2010-2011, Hiroshi Miura <miurahr@linux.com>
|
||||||
|
Copyright: 2009, John Schember
|
||||||
|
Copyright: 2007, Russell Norris
|
||||||
|
Copyright: 2001, Sean M. Burke
|
||||||
|
License: GPL-3, Perl
|
||||||
|
The full text of the GPL is distributed as in
|
||||||
|
/usr/share/common-licenses/GPL-3 on Debian systems.
|
||||||
|
|
||||||
Files: src/encutils/__init__.py
|
Files: src/encutils/__init__.py
|
||||||
Copyright: 2005-2008: Christof Hoeke
|
Copyright: 2005-2008: Christof Hoeke
|
||||||
License: LGPL-3+, CC-BY-3.0
|
License: LGPL-3+, CC-BY-3.0
|
||||||
|
@ -6,9 +6,10 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os, cPickle
|
import os, cPickle, re, anydbm, shutil
|
||||||
|
from zlib import compress
|
||||||
|
|
||||||
from setup import Command, basenames
|
from setup import Command, basenames, __appname__
|
||||||
|
|
||||||
def get_opts_from_parser(parser):
|
def get_opts_from_parser(parser):
|
||||||
def do_opt(opt):
|
def do_opt(opt):
|
||||||
@ -26,6 +27,9 @@ class Resources(Command):
|
|||||||
|
|
||||||
description = 'Compile various needed calibre resources'
|
description = 'Compile various needed calibre resources'
|
||||||
|
|
||||||
|
KAKASI_PATH = os.path.join(Command.SRC, __appname__,
|
||||||
|
'ebooks', 'unihandecode', 'pykakasi')
|
||||||
|
|
||||||
def run(self, opts):
|
def run(self, opts):
|
||||||
scripts = {}
|
scripts = {}
|
||||||
for x in ('console', 'gui'):
|
for x in ('console', 'gui'):
|
||||||
@ -101,11 +105,113 @@ class Resources(Command):
|
|||||||
import json
|
import json
|
||||||
json.dump(function_dict, open(dest, 'wb'), indent=4)
|
json.dump(function_dict, open(dest, 'wb'), indent=4)
|
||||||
|
|
||||||
|
self.run_kakasi(opts)
|
||||||
|
|
||||||
|
def run_kakasi(self, opts):
|
||||||
|
self.records = {}
|
||||||
|
src = self.j(self.KAKASI_PATH, 'kakasidict.utf8')
|
||||||
|
dest = self.j(self.RESOURCES, 'localization',
|
||||||
|
'pykakasi','kanwadict2.db')
|
||||||
|
base = os.path.dirname(dest)
|
||||||
|
if not os.path.exists(base):
|
||||||
|
os.makedirs(base)
|
||||||
|
|
||||||
|
if not self.newer(dest, src):
|
||||||
|
self.info('\tKanwadict is up to date')
|
||||||
|
else:
|
||||||
|
self.info('\tGenerating Kanwadict')
|
||||||
|
|
||||||
|
for line in open(src, "r"):
|
||||||
|
self.parsekdict(line)
|
||||||
|
self.kanwaout(dest)
|
||||||
|
|
||||||
|
src = self.j(self.KAKASI_PATH, 'itaijidict.utf8')
|
||||||
|
dest = self.j(self.RESOURCES, 'localization',
|
||||||
|
'pykakasi','itaijidict2.pickle')
|
||||||
|
|
||||||
|
if not self.newer(dest, src):
|
||||||
|
self.info('\tItaijidict is up to date')
|
||||||
|
else:
|
||||||
|
self.info('\tGenerating Itaijidict')
|
||||||
|
self.mkitaiji(src, dest)
|
||||||
|
|
||||||
|
src = self.j(self.KAKASI_PATH, 'kanadict.utf8')
|
||||||
|
dest = self.j(self.RESOURCES, 'localization',
|
||||||
|
'pykakasi','kanadict2.pickle')
|
||||||
|
|
||||||
|
if not self.newer(dest, src):
|
||||||
|
self.info('\tKanadict is up to date')
|
||||||
|
else:
|
||||||
|
self.info('\tGenerating kanadict')
|
||||||
|
self.mkkanadict(src, dest)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def mkitaiji(self, src, dst):
|
||||||
|
dic = {}
|
||||||
|
for line in open(src, "r"):
|
||||||
|
line = line.decode("utf-8").strip()
|
||||||
|
if line.startswith(';;'): # skip comment
|
||||||
|
continue
|
||||||
|
if re.match(r"^$",line):
|
||||||
|
continue
|
||||||
|
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line)
|
||||||
|
dic[pair[0]] = pair[1]
|
||||||
|
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
|
||||||
|
|
||||||
|
def mkkanadict(self, src, dst):
|
||||||
|
dic = {}
|
||||||
|
for line in open(src, "r"):
|
||||||
|
line = line.decode("utf-8").strip()
|
||||||
|
if line.startswith(';;'): # skip comment
|
||||||
|
continue
|
||||||
|
if re.match(r"^$",line):
|
||||||
|
continue
|
||||||
|
(alpha, kana) = line.split(' ')
|
||||||
|
dic[kana] = alpha
|
||||||
|
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
|
||||||
|
|
||||||
|
def parsekdict(self, line):
|
||||||
|
line = line.decode("utf-8").strip()
|
||||||
|
if line.startswith(';;'): # skip comment
|
||||||
|
return
|
||||||
|
(yomi, kanji) = line.split(' ')
|
||||||
|
if ord(yomi[-1:]) <= ord('z'):
|
||||||
|
tail = yomi[-1:]
|
||||||
|
yomi = yomi[:-1]
|
||||||
|
else:
|
||||||
|
tail = ''
|
||||||
|
self.updaterec(kanji, yomi, tail)
|
||||||
|
|
||||||
|
def updaterec(self, kanji, yomi, tail):
|
||||||
|
key = "%04x"%ord(kanji[0])
|
||||||
|
if key in self.records:
|
||||||
|
if kanji in self.records[key]:
|
||||||
|
rec = self.records[key][kanji]
|
||||||
|
rec.append((yomi,tail))
|
||||||
|
self.records[key].update( {kanji: rec} )
|
||||||
|
else:
|
||||||
|
self.records[key][kanji]=[(yomi, tail)]
|
||||||
|
else:
|
||||||
|
self.records[key] = {}
|
||||||
|
self.records[key][kanji]=[(yomi, tail)]
|
||||||
|
|
||||||
|
def kanwaout(self, out):
|
||||||
|
dic = anydbm.open(out, 'c')
|
||||||
|
for (k, v) in self.records.iteritems():
|
||||||
|
dic[k] = compress(cPickle.dumps(v, -1))
|
||||||
|
dic.close()
|
||||||
|
|
||||||
|
|
||||||
def clean(self):
|
def clean(self):
|
||||||
for x in ('scripts', 'recipes', 'ebook-convert-complete'):
|
for x in ('scripts', 'recipes', 'ebook-convert-complete'):
|
||||||
x = self.j(self.RESOURCES, x+'.pickle')
|
x = self.j(self.RESOURCES, x+'.pickle')
|
||||||
if os.path.exists(x):
|
if os.path.exists(x):
|
||||||
os.remove(x)
|
os.remove(x)
|
||||||
|
kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi')
|
||||||
|
if os.path.exists(kakasi):
|
||||||
|
shutil.rmtree(kakasi)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -402,8 +402,8 @@ OptionRecommendation(name='asciiize',
|
|||||||
'with "Mikhail Gorbachiov". Also, note that in '
|
'with "Mikhail Gorbachiov". Also, note that in '
|
||||||
'cases where there are multiple representations of a character '
|
'cases where there are multiple representations of a character '
|
||||||
'(characters shared by Chinese and Japanese for instance) the '
|
'(characters shared by Chinese and Japanese for instance) the '
|
||||||
'representation used by the largest number of people will be '
|
'representation based on the current calibre interface language will be '
|
||||||
'used (Chinese in the previous example).')%\
|
'used.')%\
|
||||||
u'\u041c\u0438\u0445\u0430\u0438\u043b '
|
u'\u041c\u0438\u0445\u0430\u0438\u043b '
|
||||||
u'\u0413\u043e\u0440\u0431\u0430\u0447\u0451\u0432'
|
u'\u0413\u043e\u0440\u0431\u0430\u0447\u0451\u0432'
|
||||||
)
|
)
|
||||||
|
@ -543,9 +543,9 @@ class HTMLPreProcessor(object):
|
|||||||
html = XMLDECL_RE.sub('', html)
|
html = XMLDECL_RE.sub('', html)
|
||||||
|
|
||||||
if getattr(self.extra_opts, 'asciiize', False):
|
if getattr(self.extra_opts, 'asciiize', False):
|
||||||
from calibre.ebooks.unidecode.unidecoder import Unidecoder
|
from calibre.utils.localization import get_udc
|
||||||
unidecoder = Unidecoder()
|
unihandecoder = get_udc()
|
||||||
html = unidecoder.decode(html)
|
html = unihandecoder.decode(html)
|
||||||
|
|
||||||
if getattr(self.extra_opts, 'enable_heuristics', False):
|
if getattr(self.extra_opts, 'enable_heuristics', False):
|
||||||
from calibre.ebooks.conversion.utils import HeuristicProcessor
|
from calibre.ebooks.conversion.utils import HeuristicProcessor
|
||||||
@ -557,10 +557,10 @@ class HTMLPreProcessor(object):
|
|||||||
|
|
||||||
unsupported_unicode_chars = self.extra_opts.output_profile.unsupported_unicode_chars
|
unsupported_unicode_chars = self.extra_opts.output_profile.unsupported_unicode_chars
|
||||||
if unsupported_unicode_chars:
|
if unsupported_unicode_chars:
|
||||||
from calibre.ebooks.unidecode.unidecoder import Unidecoder
|
from calibre.utils.localization import get_udc
|
||||||
unidecoder = Unidecoder()
|
unihandecoder = get_udc()
|
||||||
for char in unsupported_unicode_chars:
|
for char in unsupported_unicode_chars:
|
||||||
asciichar = unidecoder.decode(char)
|
asciichar = unihandecoder.decode(char)
|
||||||
html = html.replace(char, asciichar)
|
html = html.replace(char, asciichar)
|
||||||
|
|
||||||
return html
|
return html
|
||||||
|
File diff suppressed because it is too large
Load Diff
55
src/calibre/ebooks/unihandecode/__init__.py
Normal file
55
src/calibre/ebooks/unihandecode/__init__.py
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
__all__ = ["Unihandecoder"]
|
||||||
|
|
||||||
|
'''
|
||||||
|
Decode unicode text to an ASCII representation of the text.
|
||||||
|
Translate unicode characters to ASCII.
|
||||||
|
|
||||||
|
inspired from John's unidecode library.
|
||||||
|
Copyright(c) 2009, John Schember
|
||||||
|
|
||||||
|
Tranliterate the string from unicode characters to ASCII in Chinese and others.
|
||||||
|
|
||||||
|
'''
|
||||||
|
import unicodedata
|
||||||
|
|
||||||
|
class Unihandecoder(object):
|
||||||
|
preferred_encoding = None
|
||||||
|
decoder = None
|
||||||
|
|
||||||
|
def __init__(self, lang="zh", encoding='utf-8'):
|
||||||
|
self.preferred_encoding = encoding
|
||||||
|
lang = lang.lower()
|
||||||
|
if lang[:2] == u'ja':
|
||||||
|
from calibre.ebooks.unihandecode.jadecoder import Jadecoder
|
||||||
|
self.decoder = Jadecoder()
|
||||||
|
elif lang[:2] == u'kr' or lang == u'korean':
|
||||||
|
from calibre.ebooks.unihandecode.krdecoder import Krdecoder
|
||||||
|
self.decoder = Krdecoder()
|
||||||
|
elif lang[:2] == u'vn' or lang == u'vietnum':
|
||||||
|
from calibre.ebooks.unihandecode.vndecoder import Vndecoder
|
||||||
|
self.decoder = Vndecoder()
|
||||||
|
else: #zh and others
|
||||||
|
from calibre.ebooks.unihandecode.unidecoder import Unidecoder
|
||||||
|
self.decoder = Unidecoder()
|
||||||
|
|
||||||
|
def decode(self, text):
|
||||||
|
try:
|
||||||
|
unicode # python2
|
||||||
|
if not isinstance(text, unicode):
|
||||||
|
try:
|
||||||
|
text = unicode(text)
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
text = text.decode(self.preferred_encoding)
|
||||||
|
except:
|
||||||
|
text = text.decode('utf-8', 'replace')
|
||||||
|
except: # python3, str is unicode
|
||||||
|
pass
|
||||||
|
#at first unicode normalize it. (see Unicode standards)
|
||||||
|
ntext = unicodedata.normalize('NFKC', text)
|
||||||
|
return self.decoder.decode(ntext)
|
5251
src/calibre/ebooks/unihandecode/jacodepoints.py
Normal file
5251
src/calibre/ebooks/unihandecode/jacodepoints.py
Normal file
File diff suppressed because it is too large
Load Diff
41
src/calibre/ebooks/unihandecode/jadecoder.py
Normal file
41
src/calibre/ebooks/unihandecode/jadecoder.py
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
# coding:utf8
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Decode unicode text to an ASCII representation of the text for Japanese.
|
||||||
|
Translate unicode string to ASCII roman string.
|
||||||
|
|
||||||
|
API is based on the python unidecode,
|
||||||
|
which is based on Ruby gem (http://rubyforge.org/projects/unidecode/)
|
||||||
|
and perl module Text::Unidecode
|
||||||
|
(http://search.cpan.org/~sburke/Text-Unidecode-0.04/).
|
||||||
|
|
||||||
|
This functionality is owned by Kakasi Japanese processing engine.
|
||||||
|
|
||||||
|
Copyright (c) 2010 Hiroshi Miura
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.ebooks.unihandecode.unidecoder import Unidecoder
|
||||||
|
from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
|
||||||
|
from calibre.ebooks.unihandecode.jacodepoints import CODEPOINTS as JACODES
|
||||||
|
from calibre.ebooks.unihandecode.pykakasi.kakasi import kakasi
|
||||||
|
|
||||||
|
class Jadecoder(Unidecoder):
|
||||||
|
kakasi = None
|
||||||
|
codepoints = {}
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.codepoints = CODEPOINTS
|
||||||
|
self.codepoints.update(JACODES)
|
||||||
|
self.kakasi = kakasi()
|
||||||
|
|
||||||
|
def decode(self, text):
|
||||||
|
try:
|
||||||
|
result=self.kakasi.do(text)
|
||||||
|
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),result)
|
||||||
|
except:
|
||||||
|
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),text)
|
||||||
|
|
5251
src/calibre/ebooks/unihandecode/krcodepoints.py
Normal file
5251
src/calibre/ebooks/unihandecode/krcodepoints.py
Normal file
File diff suppressed because it is too large
Load Diff
24
src/calibre/ebooks/unihandecode/krdecoder.py
Normal file
24
src/calibre/ebooks/unihandecode/krdecoder.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Decode unicode text to an ASCII representation of the text in Korean.
|
||||||
|
Based on unidecoder.
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.ebooks.unihandecode.unidecoder import Unidecoder
|
||||||
|
from calibre.ebooks.unihandecode.krcodepoints import CODEPOINTS as HANCODES
|
||||||
|
from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
|
||||||
|
|
||||||
|
class Krdecoder(Unidecoder):
|
||||||
|
|
||||||
|
codepoints = {}
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.codepoints = CODEPOINTS
|
||||||
|
self.codepoints.update(HANCODES)
|
||||||
|
|
5
src/calibre/ebooks/unihandecode/pykakasi/__init__.py
Normal file
5
src/calibre/ebooks/unihandecode/pykakasi/__init__.py
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
from calibre.ebooks.unihandecode.pykakasi.kakasi import kakasi
|
||||||
|
kakasi
|
||||||
|
|
||||||
|
__all__ = ["pykakasi"]
|
||||||
|
|
185
src/calibre/ebooks/unihandecode/pykakasi/h2a.py
Normal file
185
src/calibre/ebooks/unihandecode/pykakasi/h2a.py
Normal file
@ -0,0 +1,185 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# h2a.py
|
||||||
|
#
|
||||||
|
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
|
||||||
|
#
|
||||||
|
# Original copyright:
|
||||||
|
# * KAKASI (Kanji Kana Simple inversion program)
|
||||||
|
# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
|
||||||
|
# * Copyright (C) 1992
|
||||||
|
# * Hironobu Takahashi (takahasi@tiny.or.jp)
|
||||||
|
# *
|
||||||
|
# * This program is free software; you can redistribute it and/or modify
|
||||||
|
# * it under the terms of the GNU General Public License as published by
|
||||||
|
# * the Free Software Foundation; either versions 2, or (at your option)
|
||||||
|
# * any later version.
|
||||||
|
# *
|
||||||
|
# * This program is distributed in the hope that it will be useful
|
||||||
|
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# * GNU General Public License for more details.
|
||||||
|
# *
|
||||||
|
# * You should have received a copy of the GNU General Public License
|
||||||
|
# * along with KAKASI, see the file COPYING. If not, write to the Free
|
||||||
|
# * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
|
||||||
|
# * 02111-1307, USA.
|
||||||
|
# */
|
||||||
|
|
||||||
|
class H2a (object):
|
||||||
|
|
||||||
|
H2a_table = {
|
||||||
|
u"\u3041":"a", u"\u3042":"a",
|
||||||
|
u"\u3043":"i", u"\u3044":"i",
|
||||||
|
u"\u3045":"u", u"\u3046":"u",
|
||||||
|
u"\u3046\u309b":"vu", u"\u3046\u309b\u3041":"va",
|
||||||
|
u"\u3046\u309b\u3043":"vi", u"\u3046\u309b\u3047":"ve",
|
||||||
|
u"\u3046\u309b\u3049":"vo",
|
||||||
|
u"\u3047":"e", u"\u3048":"e",
|
||||||
|
u"\u3049":"o", u"\u304a":"o",
|
||||||
|
|
||||||
|
u"\u304b":"ka", u"\u304c":"ga",
|
||||||
|
u"\u304d":"ki", u"\u304d\u3041":"kya",
|
||||||
|
u"\u304d\u3045":"kyu", u"\u304d\u3049":"kyo",
|
||||||
|
u"\u304e":"gi", u"\u3050\u3083":"gya",
|
||||||
|
u"\u304e\u3045":"gyu", u"\u304e\u3087":"gyo",
|
||||||
|
u"\u304f":"ku", u"\u3050":"gu",
|
||||||
|
u"\u3051":"ke", u"\u3052":"ge",
|
||||||
|
u"\u3053":"ko", u"\u3054":"go",
|
||||||
|
|
||||||
|
u"\u3055":"sa", u"\u3056":"za",
|
||||||
|
u"\u3057":"shi", u"\u3057\u3083":"sha",
|
||||||
|
u"\u3057\u3085":"shu", u"\u3057\u3087":"sho",
|
||||||
|
u"\u3058":"ji", u"\u3058\u3083":"ja",
|
||||||
|
u"\u3058\u3085":"ju", u"\u3058\u3087":"jo",
|
||||||
|
u"\u3059":"su", u"\u305a":"zu",
|
||||||
|
u"\u305b":"se", u"\u305c":"ze",
|
||||||
|
u"\u305d":"so", u"\u305e":"zo",
|
||||||
|
|
||||||
|
u"\u305f":"ta", u"\u3060":"da",
|
||||||
|
u"\u3061":"chi", u"\u3061\u3047":"che", u"\u3061\u3083":"cha",
|
||||||
|
u"\u3061\u3085":"chu", u"\u3061\u3087":"cho",
|
||||||
|
u"\u3062":"ji", u"\u3062\u3083":"ja",
|
||||||
|
u"\u3062\u3085":"ju", u"\u3062\u3087":"jo",
|
||||||
|
|
||||||
|
u"\u3063":"tsu",
|
||||||
|
u"\u3063\u3046\u309b":"vvu",
|
||||||
|
u"\u3063\u3046\u309b\u3041":"vva",
|
||||||
|
u"\u3063\u3046\u309b\u3043":"vvi",
|
||||||
|
u"\u3063\u3046\u309b\u3047":"vve",
|
||||||
|
u"\u3063\u3046\u309b\u3049":"vvo",
|
||||||
|
u"\u3063\u304b":"kka", u"\u3063\u304c":"gga",
|
||||||
|
u"\u3063\u304d":"kki", u"\u3063\u304d\u3083":"kkya",
|
||||||
|
u"\u3063\u304d\u3085":"kkyu", u"\u3063\u304d\u3087":"kkyo",
|
||||||
|
u"\u3063\u304e":"ggi", u"\u3063\u304e\u3083":"ggya",
|
||||||
|
u"\u3063\u304e\u3085":"ggyu", u"\u3063\u304e\u3087":"ggyo",
|
||||||
|
u"\u3063\u304f":"kku", u"\u3063\u3050":"ggu",
|
||||||
|
u"\u3063\u3051":"kke", u"\u3063\u3052":"gge",
|
||||||
|
u"\u3063\u3053":"kko", u"\u3063\u3054":"ggo",
|
||||||
|
u"\u3063\u3055":"ssa", u"\u3063\u3056":"zza",
|
||||||
|
u"\u3063\u3057":"sshi", u"\u3063\u3057\u3083":"ssha",
|
||||||
|
u"\u3063\u3057\u3085":"sshu", u"\u3063\u3057\u3087":"ssho",
|
||||||
|
u"\u3063\u3058":"jji", u"\u3063\u3058\u3083":"jja",
|
||||||
|
u"\u3063\u3058\u3085":"jju", u"\u3063\u3058\u3087":"jjo",
|
||||||
|
u"\u3063\u3059":"ssu", u"\u3063\u305a":"zzu",
|
||||||
|
u"\u3063\u305b":"sse", u"\u3063\u305e":"zze",
|
||||||
|
u"\u3063\u305d":"sso", u"\u3063\u305e":"zzo",
|
||||||
|
u"\u3063\u305f":"tta", u"\u3063\u3060":"dda",
|
||||||
|
u"\u3063\u3061":"tchi", u"\u3063\u3061\u3083":"tcha",
|
||||||
|
u"\u3063\u3061\u3085":"tchu", u"\u3063\u3061\u3087":"tcho",
|
||||||
|
u"\u3063\u3062":"jji", u"\u3063\u3062\u3083":"jjya",
|
||||||
|
u"\u3063\u3062\u3085":"jjyu", u"\u3063\u3062\u3087":"jjyo",
|
||||||
|
u"\u3063\u3064":"ttsu", u"\u3063\u3065":"zzu",
|
||||||
|
u"\u3063\u3066":"tte", u"\u3063\u3067":"dde",
|
||||||
|
u"\u3063\u3068":"tto", u"\u3063\u3069":"ddo",
|
||||||
|
u"\u3063\u306f":"hha", u"\u3063\u3070":"bba",
|
||||||
|
u"\u3063\u3071":"ppa",
|
||||||
|
u"\u3063\u3072":"hhi", u"\u3063\u3072\u3083":"hhya",
|
||||||
|
u"\u3063\u3072\u3085":"hhyu", u"\u3063\u3072\u3087":"hhyo",
|
||||||
|
u"\u3063\u3073":"bbi", u"\u3063\u3073\u3083":"bbya",
|
||||||
|
u"\u3063\u3073\u3085":"bbyu", u"\u3063\u3073\u3087":"bbyo",
|
||||||
|
u"\u3063\u3074":"ppi", u"\u3063\u3074\u3083":"ppya",
|
||||||
|
u"\u3063\u3074\u3085":"ppyu", u"\u3063\u3074\u3087":"ppyo",
|
||||||
|
u"\u3063\u3075":"ffu", u"\u3063\u3075\u3041":"ffa",
|
||||||
|
u"\u3063\u3075\u3043":"ffi", u"\u3063\u3075\u3047":"ffe",
|
||||||
|
u"\u3063\u3075\u3049":"ffo",
|
||||||
|
u"\u3063\u3076":"bbu", u"\u3063\u3077":"ppu",
|
||||||
|
u"\u3063\u3078":"hhe", u"\u3063\u3079":"bbe",
|
||||||
|
u"\u3063\u307a":"ppe",
|
||||||
|
u"\u3063\u307b":"hho", u"\u3063\u307c":"bbo",
|
||||||
|
u"\u3063\u307d":"ppo",
|
||||||
|
u"\u3063\u3084":"yya", u"\u3063\u3086":"yyu",
|
||||||
|
u"\u3063\u3088":"yyo",
|
||||||
|
u"\u3063\u3089":"rra", u"\u3063\u308a":"rri",
|
||||||
|
u"\u3063\u308a\u3083":"rrya", u"\u3063\u308a\u3085":"rryu",
|
||||||
|
u"\u3063\u308a\u3087":"rryo",
|
||||||
|
u"\u3063\u308b":"rru", u"\u3063\u308c":"rre",
|
||||||
|
u"\u3063\u308d":"rro",
|
||||||
|
|
||||||
|
u"\u3064":"tsu", u"\u3065":"zu",
|
||||||
|
u"\u3066":"te", u"\u3067":"de", u"\u3067\u3043":"di",
|
||||||
|
u"\u3068":"to", u"\u3069":"do",
|
||||||
|
|
||||||
|
u"\u306a":"na",
|
||||||
|
u"\u306b":"ni", u"\u306b\u3083":"nya",
|
||||||
|
u"\u306b\u3085":"nyu", u"\u306b\u3087":"nyo",
|
||||||
|
u"\u306c":"nu", u"\u306d":"ne", u"\u306e":"no",
|
||||||
|
|
||||||
|
u"\u306f":"ha", u"\u3070":"ba", u"\u3071":"pa",
|
||||||
|
u"\u3072":"hi", u"\u3072\u3083":"hya",
|
||||||
|
u"\u3072\u3085":"hyu", u"\u3072\u3087":"hyo",
|
||||||
|
u"\u3073":"bi", u"\u3073\u3083":"bya",
|
||||||
|
u"\u3073\u3085":"byu", u"\u3073\u3087":"byo",
|
||||||
|
u"\u3074":"pi", u"\u3074\u3083":"pya",
|
||||||
|
u"\u3074\u3085":"pyu", u"\u3074\u3087":"pyo",
|
||||||
|
u"\u3075":"fu", u"\u3075\u3041":"fa",
|
||||||
|
u"\u3075\u3043":"fi", u"\u3075\u3047":"fe",
|
||||||
|
u"\u3075\u3049":"fo",
|
||||||
|
u"\u3076":"bu", u"\u3077":"pu",
|
||||||
|
u"\u3078":"he", u"\u3079":"be", u"\u307a":"pe",
|
||||||
|
u"\u307b":"ho", u"\u307c":"bo", u"\u307d":"po",
|
||||||
|
|
||||||
|
u"\u307e":"ma",
|
||||||
|
u"\u307f":"mi", u"\u307f\u3083":"mya",
|
||||||
|
u"\u307f\u3085":"myu", u"\u307f\u3087":"myo",
|
||||||
|
u"\u3080":"mu", u"\u3081":"me", u"\u3082":"mo",
|
||||||
|
|
||||||
|
u"\u3083":"ya", u"\u3084":"ya",
|
||||||
|
u"\u3085":"yu", u"\u3086":"yu",
|
||||||
|
u"\u3087":"yo", u"\u3088":"yo",
|
||||||
|
|
||||||
|
u"\u3089":"ra",
|
||||||
|
u"\u308a":"ri", u"\u308a\u3083":"rya",
|
||||||
|
u"\u308a\u3085":"ryu", u"\u308a\u3087":"ryo",
|
||||||
|
u"\u308b":"ru", u"\u308c":"re", u"\u308d":"ro",
|
||||||
|
|
||||||
|
u"\u308e":"wa", u"\u308f":"wa",
|
||||||
|
u"\u3090":"i", u"\u3091":"e",
|
||||||
|
u"\u3092":"wo", u"\u3093":"n",
|
||||||
|
|
||||||
|
u"\u3093\u3042":"n'a", u"\u3093\u3044":"n'i",
|
||||||
|
u"\u3093\u3046":"n'u", u"\u3093\u3048":"n'e",
|
||||||
|
u"\u3093\u304a":"n'o",
|
||||||
|
}
|
||||||
|
|
||||||
|
# this class is Borg
|
||||||
|
_shared_state = {}
|
||||||
|
|
||||||
|
def __new__(cls, *p, **k):
|
||||||
|
self = object.__new__(cls, *p, **k)
|
||||||
|
self.__dict__ = cls._shared_state
|
||||||
|
return self
|
||||||
|
|
||||||
|
def isHiragana(self, char):
|
||||||
|
return ( 0x3040 < ord(char) and ord(char) < 0x3094)
|
||||||
|
|
||||||
|
def convert(self, text):
|
||||||
|
Hstr = ""
|
||||||
|
max_len = -1
|
||||||
|
r = min(4, len(text)+1)
|
||||||
|
for x in xrange(r):
|
||||||
|
if text[:x] in self.H2a_table:
|
||||||
|
if max_len < x:
|
||||||
|
max_len = x
|
||||||
|
Hstr = self.H2a_table[text[:x]]
|
||||||
|
return (Hstr, max_len)
|
||||||
|
|
564
src/calibre/ebooks/unihandecode/pykakasi/itaijidict.utf8
Normal file
564
src/calibre/ebooks/unihandecode/pykakasi/itaijidict.utf8
Normal file
@ -0,0 +1,564 @@
|
|||||||
|
芦蘆
|
||||||
|
壱一
|
||||||
|
苅刈
|
||||||
|
舘館
|
||||||
|
曽曾
|
||||||
|
菟兎
|
||||||
|
島嶋
|
||||||
|
盃杯
|
||||||
|
冨富
|
||||||
|
峯峰
|
||||||
|
亘亙
|
||||||
|
弌一
|
||||||
|
乘乗
|
||||||
|
亂乱
|
||||||
|
豫予
|
||||||
|
亊事
|
||||||
|
弍二
|
||||||
|
亞亜
|
||||||
|
亰京
|
||||||
|
从従
|
||||||
|
仭仞
|
||||||
|
佛仏
|
||||||
|
來来
|
||||||
|
儘侭
|
||||||
|
伜倅
|
||||||
|
假仮
|
||||||
|
會会
|
||||||
|
做作
|
||||||
|
傳伝
|
||||||
|
僞偽
|
||||||
|
價価
|
||||||
|
儉倹
|
||||||
|
兒児
|
||||||
|
兔兎
|
||||||
|
竸競
|
||||||
|
兩両
|
||||||
|
囘回
|
||||||
|
册冊
|
||||||
|
冢塚
|
||||||
|
冩写
|
||||||
|
决決
|
||||||
|
冱冴
|
||||||
|
冰氷
|
||||||
|
况況
|
||||||
|
凉涼
|
||||||
|
處処
|
||||||
|
凾函
|
||||||
|
刄刃
|
||||||
|
刔抉
|
||||||
|
刧劫
|
||||||
|
剩剰
|
||||||
|
劍剣
|
||||||
|
劔剣
|
||||||
|
劒剣
|
||||||
|
剱剣
|
||||||
|
劑剤
|
||||||
|
辨弁
|
||||||
|
勞労
|
||||||
|
勳勲
|
||||||
|
勵励
|
||||||
|
勸勧
|
||||||
|
區区
|
||||||
|
卆卒
|
||||||
|
丗世
|
||||||
|
凖準
|
||||||
|
夘卯
|
||||||
|
卻却
|
||||||
|
卷巻
|
||||||
|
厠廁
|
||||||
|
厦廈
|
||||||
|
厮廝
|
||||||
|
厰廠
|
||||||
|
參参
|
||||||
|
雙双
|
||||||
|
咒呪
|
||||||
|
單単
|
||||||
|
噐器
|
||||||
|
營営
|
||||||
|
嚏嚔
|
||||||
|
嚴厳
|
||||||
|
囑嘱
|
||||||
|
囓齧
|
||||||
|
圀国
|
||||||
|
圈圏
|
||||||
|
國国
|
||||||
|
圍囲
|
||||||
|
圓円
|
||||||
|
團団
|
||||||
|
圖図
|
||||||
|
埀垂
|
||||||
|
埓埒
|
||||||
|
塲場
|
||||||
|
壞壊
|
||||||
|
墮堕
|
||||||
|
壓圧
|
||||||
|
壘塁
|
||||||
|
壥廛
|
||||||
|
壤壌
|
||||||
|
壯壮
|
||||||
|
壺壷
|
||||||
|
壹一
|
||||||
|
壻婿
|
||||||
|
壽寿
|
||||||
|
夂夊
|
||||||
|
夛多
|
||||||
|
梦夢
|
||||||
|
竒奇
|
||||||
|
奧奥
|
||||||
|
奬奨
|
||||||
|
侫佞
|
||||||
|
姙妊
|
||||||
|
嫻嫺
|
||||||
|
孃嬢
|
||||||
|
學学
|
||||||
|
斈学
|
||||||
|
寃冤
|
||||||
|
寇冦
|
||||||
|
寢寝
|
||||||
|
寫写
|
||||||
|
寶宝
|
||||||
|
寳宝
|
||||||
|
尅剋
|
||||||
|
將将
|
||||||
|
專専
|
||||||
|
對対
|
||||||
|
尓爾
|
||||||
|
尢尤
|
||||||
|
屆届
|
||||||
|
屬属
|
||||||
|
峽峡
|
||||||
|
嶌嶋
|
||||||
|
嵜崎
|
||||||
|
崙崘
|
||||||
|
嵳嵯
|
||||||
|
嶽岳
|
||||||
|
巛川
|
||||||
|
巵卮
|
||||||
|
帋紙
|
||||||
|
帶帯
|
||||||
|
幤幣
|
||||||
|
廐厩
|
||||||
|
廏厩
|
||||||
|
廣広
|
||||||
|
廚厨
|
||||||
|
廢廃
|
||||||
|
廳庁
|
||||||
|
廰庁
|
||||||
|
廸迪
|
||||||
|
弃棄
|
||||||
|
弉奘
|
||||||
|
彜彝
|
||||||
|
彈弾
|
||||||
|
彌弥
|
||||||
|
弯彎
|
||||||
|
徃往
|
||||||
|
徑径
|
||||||
|
從従
|
||||||
|
徠来
|
||||||
|
悳徳
|
||||||
|
恠怪
|
||||||
|
恆恒
|
||||||
|
悧俐
|
||||||
|
惡悪
|
||||||
|
惠恵
|
||||||
|
忰悴
|
||||||
|
惱悩
|
||||||
|
愼慎
|
||||||
|
愽博
|
||||||
|
慘惨
|
||||||
|
慚慙
|
||||||
|
憇憩
|
||||||
|
應応
|
||||||
|
懷懐
|
||||||
|
懴懺
|
||||||
|
戀恋
|
||||||
|
戞戛
|
||||||
|
戰戦
|
||||||
|
戲戯
|
||||||
|
拔抜
|
||||||
|
拏拿
|
||||||
|
擔担
|
||||||
|
拜拝
|
||||||
|
拂払
|
||||||
|
挾挟
|
||||||
|
搜捜
|
||||||
|
插挿
|
||||||
|
搖揺
|
||||||
|
攝摂
|
||||||
|
攪撹
|
||||||
|
據拠
|
||||||
|
擇択
|
||||||
|
擧拳
|
||||||
|
舉拳
|
||||||
|
抬擡
|
||||||
|
擴拡
|
||||||
|
攜携
|
||||||
|
攵攴
|
||||||
|
攷考
|
||||||
|
收収
|
||||||
|
效効
|
||||||
|
敕勅
|
||||||
|
敍叙
|
||||||
|
敘叙
|
||||||
|
數数
|
||||||
|
變変
|
||||||
|
斷断
|
||||||
|
旙旛
|
||||||
|
昜陽
|
||||||
|
晄晃
|
||||||
|
晉晋
|
||||||
|
晝昼
|
||||||
|
晰晢
|
||||||
|
暎映
|
||||||
|
曉暁
|
||||||
|
暸瞭
|
||||||
|
昿曠
|
||||||
|
曵曳
|
||||||
|
朖朗
|
||||||
|
朞期
|
||||||
|
霸覇
|
||||||
|
杤栃
|
||||||
|
杰傑
|
||||||
|
枩松
|
||||||
|
檜桧
|
||||||
|
條条
|
||||||
|
檮梼
|
||||||
|
梹檳
|
||||||
|
棊棋
|
||||||
|
棧桟
|
||||||
|
棕椶
|
||||||
|
楙茂
|
||||||
|
榮栄
|
||||||
|
槨椁
|
||||||
|
樂楽
|
||||||
|
權権
|
||||||
|
樞枢
|
||||||
|
樣様
|
||||||
|
樓楼
|
||||||
|
橢楕
|
||||||
|
檢検
|
||||||
|
櫻桜
|
||||||
|
鬱欝
|
||||||
|
盜盗
|
||||||
|
飮飲
|
||||||
|
歐嘔
|
||||||
|
歡歓
|
||||||
|
歸帰
|
||||||
|
殘残
|
||||||
|
殱殲
|
||||||
|
殼殻
|
||||||
|
毆殴
|
||||||
|
毓育
|
||||||
|
氣気
|
||||||
|
沒没
|
||||||
|
泪涙
|
||||||
|
濤涛
|
||||||
|
渕淵
|
||||||
|
渊淵
|
||||||
|
淨浄
|
||||||
|
淺浅
|
||||||
|
滿満
|
||||||
|
溂剌
|
||||||
|
溪渓
|
||||||
|
灌潅
|
||||||
|
滯滞
|
||||||
|
澁渋
|
||||||
|
澀渋
|
||||||
|
潛潜
|
||||||
|
濳潜
|
||||||
|
澂澄
|
||||||
|
澑溜
|
||||||
|
澤沢
|
||||||
|
濟済
|
||||||
|
濕湿
|
||||||
|
濱浜
|
||||||
|
濾滬
|
||||||
|
灣湾
|
||||||
|
烱炯
|
||||||
|
烟煙
|
||||||
|
熈煕
|
||||||
|
熏燻
|
||||||
|
燒焼
|
||||||
|
爐炉
|
||||||
|
爭争
|
||||||
|
爲為
|
||||||
|
爼俎
|
||||||
|
犁犂
|
||||||
|
犹猶
|
||||||
|
犲豺
|
||||||
|
狹狭
|
||||||
|
獎奨
|
||||||
|
默黙
|
||||||
|
獨独
|
||||||
|
獸獣
|
||||||
|
獵猟
|
||||||
|
獻献
|
||||||
|
珎珍
|
||||||
|
璢瑠
|
||||||
|
瑯琅
|
||||||
|
珱瓔
|
||||||
|
瓣弁
|
||||||
|
甞嘗
|
||||||
|
甼町
|
||||||
|
畄留
|
||||||
|
畍界
|
||||||
|
畊耕
|
||||||
|
畆畝
|
||||||
|
畧略
|
||||||
|
畫画
|
||||||
|
當当
|
||||||
|
畴疇
|
||||||
|
疊畳
|
||||||
|
疉畳
|
||||||
|
疂畳
|
||||||
|
癡痴
|
||||||
|
發発
|
||||||
|
皃猊
|
||||||
|
皈帰
|
||||||
|
皹皸
|
||||||
|
盖蓋
|
||||||
|
盡尽
|
||||||
|
蘯盪
|
||||||
|
眞真
|
||||||
|
眦眥
|
||||||
|
礦鉱
|
||||||
|
礪砺
|
||||||
|
碎砕
|
||||||
|
碯瑙
|
||||||
|
祕秘
|
||||||
|
祿禄
|
||||||
|
齋斎
|
||||||
|
禪禅
|
||||||
|
禮礼
|
||||||
|
禀稟
|
||||||
|
稱称
|
||||||
|
稻稲
|
||||||
|
稾稿
|
||||||
|
穗穂
|
||||||
|
穩穏
|
||||||
|
龝穐
|
||||||
|
穰穣
|
||||||
|
窗窓
|
||||||
|
竈竃
|
||||||
|
窰窯
|
||||||
|
竊窃
|
||||||
|
竝並
|
||||||
|
筺筐
|
||||||
|
笋筍
|
||||||
|
箟箘
|
||||||
|
筝箏
|
||||||
|
簔蓑
|
||||||
|
籠篭
|
||||||
|
籘籐
|
||||||
|
籖籤
|
||||||
|
粹粋
|
||||||
|
糺糾
|
||||||
|
絲糸
|
||||||
|
經経
|
||||||
|
總総
|
||||||
|
緜綿
|
||||||
|
縣県
|
||||||
|
縱縦
|
||||||
|
繪絵
|
||||||
|
繩縄
|
||||||
|
繼継
|
||||||
|
緕纃
|
||||||
|
續続
|
||||||
|
纖繊
|
||||||
|
纎繊
|
||||||
|
纜繿
|
||||||
|
缺欠
|
||||||
|
罐缶
|
||||||
|
罸罰
|
||||||
|
羃冪
|
||||||
|
羣群
|
||||||
|
羮羹
|
||||||
|
譱善
|
||||||
|
翆翠
|
||||||
|
翦剪
|
||||||
|
耻恥
|
||||||
|
聟婿
|
||||||
|
聨聯
|
||||||
|
聲声
|
||||||
|
聰聡
|
||||||
|
聽聴
|
||||||
|
肅粛
|
||||||
|
冐冒
|
||||||
|
脉脈
|
||||||
|
腦脳
|
||||||
|
腟膣
|
||||||
|
膓腸
|
||||||
|
膸髄
|
||||||
|
膽胆
|
||||||
|
臈臘
|
||||||
|
臟臓
|
||||||
|
臺台
|
||||||
|
與与
|
||||||
|
舊旧
|
||||||
|
舍舎
|
||||||
|
舖舗
|
||||||
|
舩船
|
||||||
|
艢檣
|
||||||
|
舮艫
|
||||||
|
艷艶
|
||||||
|
莖茎
|
||||||
|
莊荘
|
||||||
|
莵兎
|
||||||
|
菷帚
|
||||||
|
萠萌
|
||||||
|
蕚萼
|
||||||
|
蒂蔕
|
||||||
|
萬万
|
||||||
|
葢蓋
|
||||||
|
蘂蕊
|
||||||
|
蕋蕊
|
||||||
|
藪薮
|
||||||
|
藏蔵
|
||||||
|
藝芸
|
||||||
|
藥薬
|
||||||
|
蘓蘇
|
||||||
|
乕虎
|
||||||
|
號号
|
||||||
|
蠣蛎
|
||||||
|
蝨虱
|
||||||
|
蠅蝿
|
||||||
|
螢蛍
|
||||||
|
蟆蟇
|
||||||
|
蟲虫
|
||||||
|
蠏蟹
|
||||||
|
蟷螳
|
||||||
|
蟒蠎
|
||||||
|
蠶蚕
|
||||||
|
蠧蠹
|
||||||
|
蠻蛮
|
||||||
|
衂衄
|
||||||
|
衞衛
|
||||||
|
袵衽
|
||||||
|
裝装
|
||||||
|
襃褒
|
||||||
|
褝襌
|
||||||
|
覩睹
|
||||||
|
覺覚
|
||||||
|
覽覧
|
||||||
|
觀観
|
||||||
|
觧解
|
||||||
|
觸触
|
||||||
|
誡戒
|
||||||
|
謌歌
|
||||||
|
諡謚
|
||||||
|
謠謡
|
||||||
|
證証
|
||||||
|
譛譖
|
||||||
|
譯訳
|
||||||
|
譽誉
|
||||||
|
讀読
|
||||||
|
讓譲
|
||||||
|
讚賛
|
||||||
|
豐豊
|
||||||
|
貉狢
|
||||||
|
貍狸
|
||||||
|
貎猊
|
||||||
|
豼貔
|
||||||
|
貘獏
|
||||||
|
戝財
|
||||||
|
貭質
|
||||||
|
貳弐
|
||||||
|
貮弐
|
||||||
|
賤賎
|
||||||
|
賣売
|
||||||
|
贊賛
|
||||||
|
賍贓
|
||||||
|
赱走
|
||||||
|
踈疎
|
||||||
|
踴踊
|
||||||
|
躰体
|
||||||
|
軆体
|
||||||
|
軈軅
|
||||||
|
軣轟
|
||||||
|
輕軽
|
||||||
|
輙輒
|
||||||
|
輌輛
|
||||||
|
轉転
|
||||||
|
辭辞
|
||||||
|
辯弁
|
||||||
|
迯逃
|
||||||
|
逹達
|
||||||
|
逎遒
|
||||||
|
遞逓
|
||||||
|
遲遅
|
||||||
|
邊辺
|
||||||
|
邉辺
|
||||||
|
邨村
|
||||||
|
鄰隣
|
||||||
|
醉酔
|
||||||
|
醫医
|
||||||
|
釀醸
|
||||||
|
釋釈
|
||||||
|
釡釜
|
||||||
|
釼剣
|
||||||
|
銕鉄
|
||||||
|
錢銭
|
||||||
|
鎭鎮
|
||||||
|
鐵鉄
|
||||||
|
鐡鉄
|
||||||
|
鑒鑑
|
||||||
|
鑄鋳
|
||||||
|
鑛鉱
|
||||||
|
鈩鑪
|
||||||
|
鑚鑽
|
||||||
|
閇閉
|
||||||
|
濶闊
|
||||||
|
關関
|
||||||
|
阯址
|
||||||
|
陷陥
|
||||||
|
險険
|
||||||
|
隱隠
|
||||||
|
隸隷
|
||||||
|
襍雑
|
||||||
|
雜雑
|
||||||
|
靈霊
|
||||||
|
靜静
|
||||||
|
靱靭
|
||||||
|
韭韮
|
||||||
|
韲齏
|
||||||
|
韵韻
|
||||||
|
顏顔
|
||||||
|
顯顕
|
||||||
|
飃飄
|
||||||
|
餘余
|
||||||
|
餝飾
|
||||||
|
餠餅
|
||||||
|
騷騒
|
||||||
|
驅駆
|
||||||
|
驛駅
|
||||||
|
驗験
|
||||||
|
髓髄
|
||||||
|
體体
|
||||||
|
髮髪
|
||||||
|
鬪闘
|
||||||
|
鰺鯵
|
||||||
|
鰛鰮
|
||||||
|
鳬鳧
|
||||||
|
鳫鴈
|
||||||
|
鵄鴟
|
||||||
|
鵞鵝
|
||||||
|
鷄鶏
|
||||||
|
鷏鷆
|
||||||
|
鹽塩
|
||||||
|
麥麦
|
||||||
|
麸麩
|
||||||
|
麪麺
|
||||||
|
點点
|
||||||
|
黨党
|
||||||
|
皷鼓
|
||||||
|
鼡鼠
|
||||||
|
齊斉
|
||||||
|
齒歯
|
||||||
|
齡齢
|
||||||
|
龜亀
|
||||||
|
槇槙
|
||||||
|
遙遥
|
||||||
|
瑤瑶
|
||||||
|
凜凛
|
||||||
|
熙煕
|
83
src/calibre/ebooks/unihandecode/pykakasi/j2h.py
Normal file
83
src/calibre/ebooks/unihandecode/pykakasi/j2h.py
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# j2h.py
|
||||||
|
#
|
||||||
|
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
|
||||||
|
#
|
||||||
|
# Original Copyright:
|
||||||
|
# * KAKASI (Kanji Kana Simple inversion program)
|
||||||
|
# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
|
||||||
|
# * Copyright (C) 1992
|
||||||
|
# * Hironobu Takahashi (takahasi@tiny.or.jp)
|
||||||
|
# *
|
||||||
|
# * This program is free software; you can redistribute it and/or modify
|
||||||
|
# * it under the terms of the GNU General Public License as published by
|
||||||
|
# * the Free Software Foundation; either versions 2, or (at your option)
|
||||||
|
# * any later version.
|
||||||
|
# *
|
||||||
|
# * This program is distributed in the hope that it will be useful
|
||||||
|
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# * GNU General Public License for more details.
|
||||||
|
# *
|
||||||
|
# * You should have received a copy of the GNU General Public License
|
||||||
|
# * along with KAKASI, see the file COPYING. If not, write to the Free
|
||||||
|
# * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
|
||||||
|
# * 02111-1307, USA.
|
||||||
|
# */
|
||||||
|
|
||||||
|
from calibre.ebooks.unihandecode.pykakasi.jisyo import jisyo
|
||||||
|
import re
|
||||||
|
|
||||||
|
class J2H (object):
|
||||||
|
|
||||||
|
kanwa = None
|
||||||
|
|
||||||
|
cl_table = [
|
||||||
|
"","aiueow", "aiueow", "aiueow", "aiueow", "aiueow", "aiueow", "aiueow",
|
||||||
|
"aiueow", "aiueow", "aiueow", "k", "g", "k", "g", "k", "g", "k", "g", "k",
|
||||||
|
"g", "s", "zj", "s", "zj", "s", "zj", "s", "zj", "s", "zj", "t", "d", "tc",
|
||||||
|
"d", "aiueokstchgzjfdbpw", "t", "d", "t", "d", "t", "d", "n", "n", "n", "n",
|
||||||
|
"n", "h", "b", "p", "h", "b", "p", "hf", "b", "p", "h", "b", "p", "h", "b",
|
||||||
|
"p", "m", "m", "m", "m", "m", "y", "y", "y", "y", "y", "y", "rl", "rl",
|
||||||
|
"rl", "rl", "rl", "wiueo", "wiueo", "wiueo", "wiueo", "w", "n", "v", "k",
|
||||||
|
"k", "", "", "", "", "", "", "", "", ""]
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.kanwa = jisyo()
|
||||||
|
|
||||||
|
def isKanji(self, c):
|
||||||
|
return ( 0x3400 <= ord(c) and ord(c) < 0xfa2e)
|
||||||
|
|
||||||
|
def isCletter(self, l, c):
|
||||||
|
if (ord(u"ぁ") <= ord(c) and ord(c) <= 0x309f) and ( l in self.cl_table[ord(c) - ord(u"ぁ")-1]):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def itaiji_conv(self, text):
|
||||||
|
r = []
|
||||||
|
for c in text:
|
||||||
|
if c in self.kanwa.itaijidict:
|
||||||
|
r.append(c)
|
||||||
|
for c in r:
|
||||||
|
text = re.sub(c, self.kanwa.itaijidict[c], text)
|
||||||
|
return text
|
||||||
|
|
||||||
|
def convert(self, text):
|
||||||
|
max_len = 0
|
||||||
|
Hstr = ""
|
||||||
|
table = self.kanwa.load_jisyo(text[0])
|
||||||
|
if table is None:
|
||||||
|
return ("", 0)
|
||||||
|
for (k,v) in table.iteritems():
|
||||||
|
length = len(k)
|
||||||
|
if len(text) >= length:
|
||||||
|
if text.startswith(k):
|
||||||
|
for (yomi, tail) in v:
|
||||||
|
if tail is '':
|
||||||
|
if max_len < length:
|
||||||
|
Hstr = yomi
|
||||||
|
max_len = length
|
||||||
|
elif max_len < length+1 and len(text) > length and self.isCletter(tail, text[length]):
|
||||||
|
Hstr=''.join([yomi,text[length]])
|
||||||
|
max_len = length+1
|
||||||
|
return (Hstr, max_len)
|
53
src/calibre/ebooks/unihandecode/pykakasi/jisyo.py
Normal file
53
src/calibre/ebooks/unihandecode/pykakasi/jisyo.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# jisyo.py
|
||||||
|
#
|
||||||
|
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
|
||||||
|
from cPickle import load
|
||||||
|
import anydbm,marshal
|
||||||
|
from zlib import decompress
|
||||||
|
import os
|
||||||
|
|
||||||
|
import calibre.utils.resources as resources
|
||||||
|
|
||||||
|
class jisyo (object):
|
||||||
|
kanwadict = None
|
||||||
|
itaijidict = None
|
||||||
|
kanadict = None
|
||||||
|
jisyo_table = {}
|
||||||
|
|
||||||
|
# this class is Borg
|
||||||
|
_shared_state = {}
|
||||||
|
|
||||||
|
def __new__(cls, *p, **k):
|
||||||
|
self = object.__new__(cls, *p, **k)
|
||||||
|
self.__dict__ = cls._shared_state
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
if self.kanwadict is None:
|
||||||
|
dictpath = resources.get_path(os.path.join('localization','pykakasi','kanwadict2.db'))
|
||||||
|
self.kanwadict = anydbm.open(dictpath,'r')
|
||||||
|
if self.itaijidict is None:
|
||||||
|
itaijipath = resources.get_path(os.path.join('localization','pykakasi','itaijidict2.pickle'))
|
||||||
|
itaiji_pkl = open(itaijipath, 'rb')
|
||||||
|
self.itaijidict = load(itaiji_pkl)
|
||||||
|
if self.kanadict is None:
|
||||||
|
kanadictpath = resources.get_path(os.path.join('localization','pykakasi','kanadict2.pickle'))
|
||||||
|
kanadict_pkl = open(kanadictpath, 'rb')
|
||||||
|
self.kanadict = load(kanadict_pkl)
|
||||||
|
|
||||||
|
def load_jisyo(self, char):
|
||||||
|
try:#python2
|
||||||
|
key = "%04x"%ord(unicode(char))
|
||||||
|
except:#python3
|
||||||
|
key = "%04x"%ord(char)
|
||||||
|
|
||||||
|
try: #already exist?
|
||||||
|
table = self.jisyo_table[key]
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
table = self.jisyo_table[key] = marshal.loads(decompress(self.kanwadict[key]))
|
||||||
|
except:
|
||||||
|
return None
|
||||||
|
return table
|
||||||
|
|
50
src/calibre/ebooks/unihandecode/pykakasi/k2a.py
Normal file
50
src/calibre/ebooks/unihandecode/pykakasi/k2a.py
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# k2a.py
|
||||||
|
#
|
||||||
|
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
|
||||||
|
#
|
||||||
|
# Original copyright:
|
||||||
|
# * KAKASI (Kanji Kana Simple inversion program)
|
||||||
|
# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
|
||||||
|
# * Copyright (C) 1992
|
||||||
|
# * Hironobu Takahashi (takahasi@tiny.or.jp)
|
||||||
|
# *
|
||||||
|
# * This program is free software; you can redistribute it and/or modify
|
||||||
|
# * it under the terms of the GNU General Public License as published by
|
||||||
|
# * the Free Software Foundation; either versions 2, or (at your option)
|
||||||
|
# * any later version.
|
||||||
|
# *
|
||||||
|
# * This program is distributed in the hope that it will be useful
|
||||||
|
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# * GNU General Public License for more details.
|
||||||
|
# *
|
||||||
|
# * You should have received a copy of the GNU General Public License
|
||||||
|
# * along with KAKASI, see the file COPYING. If not, write to the Free
|
||||||
|
# * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
|
||||||
|
# * 02111-1307, USA.
|
||||||
|
# */
|
||||||
|
|
||||||
|
from calibre.ebooks.unihandecode.pykakasi.jisyo import jisyo
|
||||||
|
|
||||||
|
class K2a (object):
|
||||||
|
|
||||||
|
kanwa = None
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.kanwa = jisyo()
|
||||||
|
|
||||||
|
def isKatakana(self, char):
|
||||||
|
return ( 0x30a0 < ord(char) and ord(char) < 0x30f7)
|
||||||
|
|
||||||
|
def convert(self, text):
|
||||||
|
Hstr = ""
|
||||||
|
max_len = -1
|
||||||
|
r = min(10, len(text)+1)
|
||||||
|
for x in xrange(r):
|
||||||
|
if text[:x] in self.kanwa.kanadict:
|
||||||
|
if max_len < x:
|
||||||
|
max_len = x
|
||||||
|
Hstr = self.kanwa.kanadict[text[:x]]
|
||||||
|
return (Hstr, max_len)
|
||||||
|
|
101
src/calibre/ebooks/unihandecode/pykakasi/kakasi.py
Normal file
101
src/calibre/ebooks/unihandecode/pykakasi/kakasi.py
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# kakasi.py
|
||||||
|
#
|
||||||
|
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
|
||||||
|
#
|
||||||
|
# Original Copyright:
|
||||||
|
# * KAKASI (Kanji Kana Simple inversion program)
|
||||||
|
# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
|
||||||
|
# * Copyright (C) 1992
|
||||||
|
# * Hironobu Takahashi (takahasi@tiny.or.jp)
|
||||||
|
# *
|
||||||
|
# * This program is free software; you can redistribute it and/or modify
|
||||||
|
# * it under the terms of the GNU General Public License as published by
|
||||||
|
# * the Free Software Foundation; either versions 2, or (at your option)
|
||||||
|
# * any later version.
|
||||||
|
# *
|
||||||
|
# * This program is distributed in the hope that it will be useful
|
||||||
|
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# * GNU General Public License for more details.
|
||||||
|
# *
|
||||||
|
# * You should have received a copy of the GNU General Public License
|
||||||
|
# * along with KAKASI, see the file COPYING. If not, write to the Free
|
||||||
|
# * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
|
||||||
|
# * 02111-1307, USA.
|
||||||
|
# */
|
||||||
|
|
||||||
|
from calibre.ebooks.unihandecode.pykakasi.j2h import J2H
|
||||||
|
from calibre.ebooks.unihandecode.pykakasi.h2a import H2a
|
||||||
|
from calibre.ebooks.unihandecode.pykakasi.k2a import K2a
|
||||||
|
|
||||||
|
class kakasi(object):
|
||||||
|
|
||||||
|
j2h = None
|
||||||
|
h2a = None
|
||||||
|
k2a = None
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.j2h = J2H()
|
||||||
|
self.h2a = H2a()
|
||||||
|
self.k2a = K2a()
|
||||||
|
|
||||||
|
|
||||||
|
def do(self, text):
|
||||||
|
otext = ''
|
||||||
|
i = 0
|
||||||
|
while True:
|
||||||
|
if i >= len(text):
|
||||||
|
break
|
||||||
|
|
||||||
|
if self.j2h.isKanji(text[i]):
|
||||||
|
(t, l) = self.j2h.convert(text[i:])
|
||||||
|
if l <= 0:
|
||||||
|
otext = otext + text[i]
|
||||||
|
i = i + 1
|
||||||
|
continue
|
||||||
|
i = i + l
|
||||||
|
m = 0
|
||||||
|
tmptext = ""
|
||||||
|
while True:
|
||||||
|
if m >= len(t):
|
||||||
|
break
|
||||||
|
(s, n) = self.h2a.convert(t[m:])
|
||||||
|
if n <= 0:
|
||||||
|
break
|
||||||
|
m = m + n
|
||||||
|
tmptext = tmptext+s
|
||||||
|
if i >= len(text):
|
||||||
|
otext = otext + tmptext.capitalize()
|
||||||
|
else:
|
||||||
|
otext = otext + tmptext.capitalize() +' '
|
||||||
|
elif self.h2a.isHiragana(text[i]):
|
||||||
|
tmptext = ''
|
||||||
|
while True:
|
||||||
|
(t, l) = self.h2a.convert(text[i:])
|
||||||
|
tmptext = tmptext+t
|
||||||
|
i = i + l
|
||||||
|
if i >= len(text):
|
||||||
|
otext = otext + tmptext
|
||||||
|
break
|
||||||
|
elif not self.h2a.isHiragana(text[i]):
|
||||||
|
otext = otext + tmptext + ' '
|
||||||
|
break
|
||||||
|
elif self.k2a.isKatakana(text[i]):
|
||||||
|
tmptext = ''
|
||||||
|
while True:
|
||||||
|
(t, l) = self.k2a.convert(text[i:])
|
||||||
|
tmptext = tmptext+t
|
||||||
|
i = i + l
|
||||||
|
if i >= len(text):
|
||||||
|
otext = otext + tmptext
|
||||||
|
break
|
||||||
|
elif not self.k2a.isKatakana(text[i]):
|
||||||
|
otext = otext + tmptext + ' '
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
otext = otext + text[i]
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
return otext
|
||||||
|
|
121826
src/calibre/ebooks/unihandecode/pykakasi/kakasidict.utf8
Normal file
121826
src/calibre/ebooks/unihandecode/pykakasi/kakasidict.utf8
Normal file
File diff suppressed because it is too large
Load Diff
317
src/calibre/ebooks/unihandecode/pykakasi/kanadict.utf8
Normal file
317
src/calibre/ebooks/unihandecode/pykakasi/kanadict.utf8
Normal file
@ -0,0 +1,317 @@
|
|||||||
|
;; Kana-Alphabet mapping dictionary
|
||||||
|
;;
|
||||||
|
;; To use this mapping table,
|
||||||
|
;; you should unicode normalize NKFC form.
|
||||||
|
;;
|
||||||
|
;; basic mapping
|
||||||
|
;;
|
||||||
|
a ァ
|
||||||
|
a ア
|
||||||
|
ba バ
|
||||||
|
bba ッバ
|
||||||
|
bbe ッベ
|
||||||
|
bbi ッビ
|
||||||
|
bbo ッボ
|
||||||
|
bbu ッブ
|
||||||
|
bbya ッビャ
|
||||||
|
bbyo ッビョ
|
||||||
|
bbyu ッビュ
|
||||||
|
be ベ
|
||||||
|
bi ビ
|
||||||
|
bo ボ
|
||||||
|
bu ブ
|
||||||
|
bya ビャ
|
||||||
|
byo ビョ
|
||||||
|
byu ビュ
|
||||||
|
cha チャ
|
||||||
|
che チェ
|
||||||
|
chi チ
|
||||||
|
cho チョ
|
||||||
|
chu チュ
|
||||||
|
da ダ
|
||||||
|
dda ッダ
|
||||||
|
dde ッデ
|
||||||
|
ddo ッド
|
||||||
|
de デ
|
||||||
|
di ディ
|
||||||
|
do ド
|
||||||
|
e ェ
|
||||||
|
e エ
|
||||||
|
e ヱ
|
||||||
|
fa ファ
|
||||||
|
fe フェ
|
||||||
|
ffa ッファ
|
||||||
|
ffe ッフェ
|
||||||
|
ffi ッフィ
|
||||||
|
ffo ッフォ
|
||||||
|
ffu ッフ
|
||||||
|
fi フィ
|
||||||
|
fo フォ
|
||||||
|
fu フ
|
||||||
|
ga ガ
|
||||||
|
ge ゲ
|
||||||
|
gga ッガ
|
||||||
|
gge ッゲ
|
||||||
|
ggi ッギ
|
||||||
|
ggo ッゴ
|
||||||
|
ggu ッグ
|
||||||
|
ggya ッギャ
|
||||||
|
ggyo ッギョ
|
||||||
|
ggyu ッギュ
|
||||||
|
gi ギ
|
||||||
|
go ゴ
|
||||||
|
gu グ
|
||||||
|
gya グャ
|
||||||
|
gyo ギョ
|
||||||
|
gyu ギゥ
|
||||||
|
ha ハ
|
||||||
|
he ヘ
|
||||||
|
hha ッハ
|
||||||
|
hhe ッヘ
|
||||||
|
hhi ッヒ
|
||||||
|
hho ッホ
|
||||||
|
hhya ッヒャ
|
||||||
|
hhyo ッヒョ
|
||||||
|
hhyu ッヒュ
|
||||||
|
hi ヒ
|
||||||
|
ho ホ
|
||||||
|
hya ヒャ
|
||||||
|
hyo ヒョ
|
||||||
|
hyu ヒュ
|
||||||
|
i ィ
|
||||||
|
i イ
|
||||||
|
i ヰ
|
||||||
|
ja ジャ
|
||||||
|
ja ヂャ
|
||||||
|
ji ジ
|
||||||
|
ji ヂ
|
||||||
|
jja ッジャ
|
||||||
|
jji ッジ
|
||||||
|
jji ッヂ
|
||||||
|
jjo ッジョ
|
||||||
|
jju ッジュ
|
||||||
|
jjya ッヂャ
|
||||||
|
jjyo ッヂョ
|
||||||
|
jjyu ッヂュ
|
||||||
|
jo ジョ
|
||||||
|
jo ヂョ
|
||||||
|
ju ジュ
|
||||||
|
ju ヂュ
|
||||||
|
ka カ
|
||||||
|
ka ヵ
|
||||||
|
ke ケ
|
||||||
|
ke ヶ
|
||||||
|
ki キ
|
||||||
|
kka ッカ
|
||||||
|
kke ッケ
|
||||||
|
kki ッキ
|
||||||
|
kko ッコ
|
||||||
|
kku ック
|
||||||
|
kkya ッキャ
|
||||||
|
kkyo ッキョ
|
||||||
|
kkyu ッキュ
|
||||||
|
ko コ
|
||||||
|
ku ク
|
||||||
|
kya キァ
|
||||||
|
kyo キォ
|
||||||
|
kyu キゥ
|
||||||
|
ma マ
|
||||||
|
me メ
|
||||||
|
mi ミ
|
||||||
|
mo モ
|
||||||
|
mu ム
|
||||||
|
mya ミャ
|
||||||
|
myo ミョ
|
||||||
|
myu ミュ
|
||||||
|
n ン
|
||||||
|
n'a ンア
|
||||||
|
n'e ンエ
|
||||||
|
n'i ンイ
|
||||||
|
n'o ンオ
|
||||||
|
n'u ンウ
|
||||||
|
na ナ
|
||||||
|
ne ネ
|
||||||
|
ni ニ
|
||||||
|
no ノ
|
||||||
|
nu ヌ
|
||||||
|
nya ニャ
|
||||||
|
nyo ニョ
|
||||||
|
nyu ニュ
|
||||||
|
o ォ
|
||||||
|
o オ
|
||||||
|
pa パ
|
||||||
|
pe ペ
|
||||||
|
pi ピ
|
||||||
|
po ポ
|
||||||
|
ppa ッパ
|
||||||
|
ppe ッペ
|
||||||
|
ppi ッピ
|
||||||
|
ppo ッポ
|
||||||
|
ppu ップ
|
||||||
|
ppya ッピャ
|
||||||
|
ppyo ッピョ
|
||||||
|
ppyu ッピュ
|
||||||
|
pu プ
|
||||||
|
pya ピャ
|
||||||
|
pyo ピョ
|
||||||
|
pyu ピュ
|
||||||
|
ra ラ
|
||||||
|
re レ
|
||||||
|
ri リ
|
||||||
|
ro ロ
|
||||||
|
rra ッラ
|
||||||
|
rre ッレ
|
||||||
|
rri ッリ
|
||||||
|
rro ッロ
|
||||||
|
rru ッル
|
||||||
|
rrya ッリャ
|
||||||
|
rryo ッリョ
|
||||||
|
rryu ッリュ
|
||||||
|
ru ル
|
||||||
|
rya リャ
|
||||||
|
ryo リョ
|
||||||
|
ryu リュ
|
||||||
|
sa サ
|
||||||
|
se セ
|
||||||
|
sha シャ
|
||||||
|
shi シ
|
||||||
|
sho ショ
|
||||||
|
shu シュ
|
||||||
|
so ソ
|
||||||
|
ssa ッサ
|
||||||
|
sse ッセ
|
||||||
|
ssha ッシャ
|
||||||
|
sshi ッシ
|
||||||
|
ssho ッショ
|
||||||
|
sshu ッシュ
|
||||||
|
sso ッソ
|
||||||
|
ssu ッス
|
||||||
|
su ス
|
||||||
|
ta タ
|
||||||
|
tcha ッチャ
|
||||||
|
tchi ッチ
|
||||||
|
tcho ッチョ
|
||||||
|
tchu ッチュ
|
||||||
|
te テ
|
||||||
|
to ト
|
||||||
|
tsu ッ
|
||||||
|
tsu ツ
|
||||||
|
tta ッタ
|
||||||
|
tte ッテ
|
||||||
|
tto ット
|
||||||
|
ttsu ッツ
|
||||||
|
u ゥ
|
||||||
|
u ウ
|
||||||
|
va ヴァ
|
||||||
|
ve ヴェ
|
||||||
|
vi ヴィ
|
||||||
|
vo ヴォ
|
||||||
|
vu ヴ
|
||||||
|
vva ッヴァ
|
||||||
|
vve ッヴェ
|
||||||
|
vvi ッヴィ
|
||||||
|
vvo ッヴォ
|
||||||
|
vvu ッヴ
|
||||||
|
wa ヮ
|
||||||
|
wa ワ
|
||||||
|
wo ヲ
|
||||||
|
ya ャ
|
||||||
|
ya ヤ
|
||||||
|
yo ョ
|
||||||
|
yo ヨ
|
||||||
|
yu ュ
|
||||||
|
yu ユ
|
||||||
|
yya ッヤ
|
||||||
|
yyo ッヨ
|
||||||
|
yyu ッユ
|
||||||
|
za ザ
|
||||||
|
ze ゼ
|
||||||
|
zo ゾ
|
||||||
|
zu ズ
|
||||||
|
zu ヅ
|
||||||
|
zza ッザ
|
||||||
|
zzo ッゾ
|
||||||
|
zzu ッズ
|
||||||
|
zzu ッヅ
|
||||||
|
;;
|
||||||
|
;; extended characters
|
||||||
|
;;
|
||||||
|
;;
|
||||||
|
;; gairai terms
|
||||||
|
;;
|
||||||
|
all オール
|
||||||
|
algrism アルゴリズム
|
||||||
|
answer アンサー
|
||||||
|
base ベース
|
||||||
|
begineer ビギナー
|
||||||
|
connection コネクション
|
||||||
|
contents コンテンツ
|
||||||
|
creator クリエーター
|
||||||
|
comic コミック
|
||||||
|
comics コミックス
|
||||||
|
culture カルチャー
|
||||||
|
debug デバッグ
|
||||||
|
debugging デバッギング
|
||||||
|
design デザイン
|
||||||
|
digital デジタル
|
||||||
|
dillenma ジレンマ
|
||||||
|
directory ディレクトリ
|
||||||
|
disk ディスク
|
||||||
|
document ドキュメント
|
||||||
|
download ダウンロード
|
||||||
|
electric エレクトリック
|
||||||
|
facebook フェイスブック
|
||||||
|
firefox ファイアーフォックス
|
||||||
|
folder フォルダ
|
||||||
|
format フォーマット
|
||||||
|
forum フォーラム
|
||||||
|
fox フォックス
|
||||||
|
free フリー
|
||||||
|
gnome ノーム
|
||||||
|
gnu グヌー
|
||||||
|
gozilla ゴジラ
|
||||||
|
guide ガイド
|
||||||
|
harvard ハーバード
|
||||||
|
help ヘルプ
|
||||||
|
highlight ハイライト
|
||||||
|
japan ジャパン
|
||||||
|
journal ジャーナル
|
||||||
|
library ライブラリ
|
||||||
|
line ライン
|
||||||
|
love ラヴ
|
||||||
|
love ラブ
|
||||||
|
mail メール
|
||||||
|
main メイン
|
||||||
|
mystery ミステリ
|
||||||
|
mozilla モジラ
|
||||||
|
network ネットワーク
|
||||||
|
next ネクスト
|
||||||
|
new ニュー
|
||||||
|
news ニュース
|
||||||
|
native ネイティブ
|
||||||
|
online オンライン
|
||||||
|
open オープン
|
||||||
|
professional プロフェッショナル
|
||||||
|
profile プロファイル
|
||||||
|
programmer プログラマ
|
||||||
|
sample サンプル
|
||||||
|
series シリーズ
|
||||||
|
share シェア
|
||||||
|
social ソーシャル
|
||||||
|
society ソサエティ
|
||||||
|
software ソフトウエア
|
||||||
|
source ソース
|
||||||
|
street ストリート
|
||||||
|
system システム
|
||||||
|
tag タグ
|
||||||
|
text テキスト
|
||||||
|
thunderbird サンダーバード
|
||||||
|
training トレーニング
|
||||||
|
twitter ツイッター
|
||||||
|
unicode ユニコード
|
||||||
|
wall ウオール
|
||||||
|
wall ウォール
|
||||||
|
welcome ウェルカム
|
||||||
|
welcome ウエルカム
|
||||||
|
wikinomics ウィキノミクス
|
||||||
|
york ヨーク
|
1798
src/calibre/ebooks/unihandecode/unicodepoints.py
Normal file
1798
src/calibre/ebooks/unihandecode/unicodepoints.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,12 +1,16 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Decode unicode text to an ASCII representation of the text. Transliterate
|
Decode unicode text to an ASCII representation of the text in Chinese.
|
||||||
unicode characters to ASCII.
|
Transliterate unicode characters to ASCII based on chinese pronounce.
|
||||||
|
|
||||||
|
derived from John's unidecode library.
|
||||||
|
|
||||||
|
Copyright(c) 2009, John Schember
|
||||||
|
|
||||||
Based on the ruby unidecode gem (http://rubyforge.org/projects/unidecode/) which
|
Based on the ruby unidecode gem (http://rubyforge.org/projects/unidecode/) which
|
||||||
is based on the perl module Text::Unidecode
|
is based on the perl module Text::Unidecode
|
||||||
@ -55,29 +59,20 @@ it under the same terms as Perl itself.
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
|
||||||
from calibre.ebooks.unidecode.unicodepoints import CODEPOINTS
|
from calibre.ebooks.unihandecode.zhcodepoints import CODEPOINTS as HANCODES
|
||||||
from calibre.constants import preferred_encoding
|
|
||||||
|
|
||||||
class Unidecoder(object):
|
class Unidecoder(object):
|
||||||
|
|
||||||
|
codepoints = {}
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.codepoints = CODEPOINTS
|
||||||
|
self.codepoints.update(HANCODES)
|
||||||
|
|
||||||
def decode(self, text):
|
def decode(self, text):
|
||||||
'''
|
|
||||||
Tranliterate the string from unicode characters to ASCII.
|
|
||||||
'''
|
|
||||||
# The keys for CODEPOINTS is unicode characters, we want to be sure the
|
|
||||||
# input text is unicode.
|
|
||||||
if not isinstance(text, unicode):
|
|
||||||
try:
|
|
||||||
text = unicode(text)
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
text = text.decode(preferred_encoding)
|
|
||||||
except:
|
|
||||||
text = text.decode('utf-8', 'replace')
|
|
||||||
# Replace characters larger than 127 with their ASCII equivelent.
|
# Replace characters larger than 127 with their ASCII equivelent.
|
||||||
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),
|
return re.sub('[^\x00-\x7f]',lambda x: self.replace_point(x.group()), text)
|
||||||
text)
|
|
||||||
|
|
||||||
def replace_point(self, codepoint):
|
def replace_point(self, codepoint):
|
||||||
'''
|
'''
|
||||||
@ -87,7 +82,7 @@ class Unidecoder(object):
|
|||||||
# Split the unicode character xABCD into parts 0xAB and 0xCD.
|
# Split the unicode character xABCD into parts 0xAB and 0xCD.
|
||||||
# 0xAB represents the group within CODEPOINTS to query and 0xCD
|
# 0xAB represents the group within CODEPOINTS to query and 0xCD
|
||||||
# represents the position in the list of characters for the group.
|
# represents the position in the list of characters for the group.
|
||||||
return CODEPOINTS[self.code_group(codepoint)][self.grouped_point(
|
return self.codepoints[self.code_group(codepoint)][self.grouped_point(
|
||||||
codepoint)]
|
codepoint)]
|
||||||
except:
|
except:
|
||||||
return '?'
|
return '?'
|
||||||
@ -97,12 +92,18 @@ class Unidecoder(object):
|
|||||||
Find what group character is a part of.
|
Find what group character is a part of.
|
||||||
'''
|
'''
|
||||||
# Code groups withing CODEPOINTS take the form 'xAB'
|
# Code groups withing CODEPOINTS take the form 'xAB'
|
||||||
return u'x%02x' % (ord(unicode(character)) >> 8)
|
try:#python2
|
||||||
|
return 'x%02x' % (ord(unicode(character)) >> 8)
|
||||||
|
except:
|
||||||
|
return 'x%02x' % (ord(character) >> 8)
|
||||||
|
|
||||||
def grouped_point(self, character):
|
def grouped_point(self, character):
|
||||||
'''
|
'''
|
||||||
Return the location the replacement character is in the list for a
|
Return the location the replacement character is in the list for a
|
||||||
the group character is a part of.
|
the group character is a part of.
|
||||||
'''
|
'''
|
||||||
return ord(unicode(character)) & 255
|
try:#python2
|
||||||
|
return ord(unicode(character)) & 255
|
||||||
|
except:
|
||||||
|
return ord(character) & 255
|
||||||
|
|
5251
src/calibre/ebooks/unihandecode/vncodepoints.py
Normal file
5251
src/calibre/ebooks/unihandecode/vncodepoints.py
Normal file
File diff suppressed because it is too large
Load Diff
23
src/calibre/ebooks/unihandecode/vndecoder.py
Normal file
23
src/calibre/ebooks/unihandecode/vndecoder.py
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Decode unicode text to an ASCII representation of the text in Vietnamese.
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.ebooks.unihandecode.unidecoder import Unidecoder
|
||||||
|
from calibre.ebooks.unihandecode.vncodepoints import CODEPOINTS as HANCODES
|
||||||
|
from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
|
||||||
|
|
||||||
|
class Vndecoder(Unidecoder):
|
||||||
|
|
||||||
|
codepoints = {}
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.codepoints = CODEPOINTS
|
||||||
|
self.codepoints.update(HANCODES)
|
||||||
|
|
5251
src/calibre/ebooks/unihandecode/zhcodepoints.py
Normal file
5251
src/calibre/ebooks/unihandecode/zhcodepoints.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -6,12 +6,12 @@ meaning as possible.
|
|||||||
import os
|
import os
|
||||||
from math import ceil
|
from math import ceil
|
||||||
|
|
||||||
from calibre.ebooks.unidecode.unidecoder import Unidecoder
|
|
||||||
from calibre import sanitize_file_name
|
from calibre import sanitize_file_name
|
||||||
from calibre.constants import preferred_encoding, iswindows
|
from calibre.constants import preferred_encoding, iswindows
|
||||||
udc = Unidecoder()
|
from calibre.utils.localization import get_udc
|
||||||
|
|
||||||
def ascii_text(orig):
|
def ascii_text(orig):
|
||||||
|
udc = get_udc()
|
||||||
try:
|
try:
|
||||||
ascii = udc.decode(orig)
|
ascii = udc.decode(orig)
|
||||||
except:
|
except:
|
||||||
|
@ -169,3 +169,13 @@ def set_qt_translator(translator):
|
|||||||
return translator.load(p)
|
return translator.load(p)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
_udc = None
|
||||||
|
|
||||||
|
def get_udc():
|
||||||
|
global _udc
|
||||||
|
if _udc is None:
|
||||||
|
from calibre.ebooks.unihandecode import Unihandecoder
|
||||||
|
_udc = Unihandecoder(lang=get_lang())
|
||||||
|
return _udc
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user