Fix a regression that caused incorrect English transliteration of Japanese text even when the UI language is set to Japanese. Fixes #2087850 [[Enhancement] PLease use kakasi for Japanese](https://bugs.launchpad.net/calibre/+bug/2087850)

Devendor pykakasi. Upstream one seems to be in working order so use it.
This commit is contained in:
Kovid Goyal 2024-11-12 15:13:58 +05:30
parent 7a3571cf56
commit ec8c06caa9
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
12 changed files with 36 additions and 123173 deletions

View File

@ -1034,6 +1034,24 @@
}
},
{
"name": "jaconv",
"unix": {
"filename": "jaconv-0.40.tar.gz",
"hash": "sha256:32da74b247f276e09a52d6b35c153df2387965cb85a6f034cc8af21d446f8161",
"urls": ["pypi"]
}
},
{
"name": "pykakasi",
"unix": {
"filename": "pykakasi-2.3.0-py3-none-any.whl",
"hash": "sha256:26d21b090048ff45c6a4d8e962426b7951767216008ec30358e8a9d74af77f29",
"urls": ["pypi"]
}
},
{
"name": "poetry_core",
"comment": "Needed for building zeroconf",

View File

@ -18,24 +18,30 @@ Copyright (c) 2010 Hiroshi Miura
import re
from pykakasi import kakasi
from calibre.ebooks.unihandecode.jacodepoints import CODEPOINTS as JACODES
from calibre.ebooks.unihandecode.pykakasi.kakasi import kakasi
from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
from calibre.ebooks.unihandecode.unidecoder import Unidecoder
class Jadecoder(Unidecoder):
kakasi = None
codepoints = {}
def __init__(self):
self.codepoints = CODEPOINTS
self.codepoints = CODEPOINTS.copy()
self.codepoints.update(JACODES)
self.kakasi = kakasi()
self.kakasi.setMode("H","a") # Hiragana to ascii, default: no conversion
self.kakasi.setMode("K","a") # Katakana to ascii, default: no conversion
self.kakasi.setMode("J","a") # Japanese to ascii, default: no conversion
self.kakasi.setMode("r","Hepburn") # default: use Hepburn Roman table
self.kakasi.setMode("s", True) # add space, default: no separator
self.kakasi.setMode("C", True) # capitalize, default: no capitalize
self.conv = self.kakasi.getConverter()
def decode(self, text):
try:
result=self.kakasi.do(text)
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),result)
except:
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),text)
text = self.conv.do(text)
except Exception:
pass
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()), text)

View File

@ -1,5 +0,0 @@
from calibre.ebooks.unihandecode.pykakasi.kakasi import kakasi
kakasi
__all__ = ["pykakasi"]

View File

@ -1,180 +0,0 @@
# h2a.py
#
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
#
# Original copyright:
# * KAKASI (Kanji Kana Simple inversion program)
# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
# * Copyright (C) 1992
# * Hironobu Takahashi (takahasi@tiny.or.jp)
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either versions 2, or (at your option)
# * any later version.
# *
# * This program is distributed in the hope that it will be useful
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# */
class H2a :
H2a_table = {
"\u3041":"a", "\u3042":"a",
"\u3043":"i", "\u3044":"i",
"\u3045":"u", "\u3046":"u",
"\u3046\u309b":"vu", "\u3046\u309b\u3041":"va",
"\u3046\u309b\u3043":"vi", "\u3046\u309b\u3047":"ve",
"\u3046\u309b\u3049":"vo",
"\u3047":"e", "\u3048":"e",
"\u3049":"o", "\u304a":"o",
"\u304b":"ka", "\u304c":"ga",
"\u304d":"ki", "\u304d\u3041":"kya",
"\u304d\u3045":"kyu", "\u304d\u3049":"kyo",
"\u304e":"gi", "\u3050\u3083":"gya",
"\u304e\u3045":"gyu", "\u304e\u3087":"gyo",
"\u304f":"ku", "\u3050":"gu",
"\u3051":"ke", "\u3052":"ge",
"\u3053":"ko", "\u3054":"go",
"\u3055":"sa", "\u3056":"za",
"\u3057":"shi", "\u3057\u3083":"sha",
"\u3057\u3085":"shu", "\u3057\u3087":"sho",
"\u3058":"ji", "\u3058\u3083":"ja",
"\u3058\u3085":"ju", "\u3058\u3087":"jo",
"\u3059":"su", "\u305a":"zu",
"\u305b":"se", "\u305c":"ze",
"\u305d":"so", "\u305e":"zo",
"\u305f":"ta", "\u3060":"da",
"\u3061":"chi", "\u3061\u3047":"che", "\u3061\u3083":"cha",
"\u3061\u3085":"chu", "\u3061\u3087":"cho",
"\u3062":"ji", "\u3062\u3083":"ja",
"\u3062\u3085":"ju", "\u3062\u3087":"jo",
"\u3063":"tsu",
"\u3063\u3046\u309b":"vvu",
"\u3063\u3046\u309b\u3041":"vva",
"\u3063\u3046\u309b\u3043":"vvi",
"\u3063\u3046\u309b\u3047":"vve",
"\u3063\u3046\u309b\u3049":"vvo",
"\u3063\u304b":"kka", "\u3063\u304c":"gga",
"\u3063\u304d":"kki", "\u3063\u304d\u3083":"kkya",
"\u3063\u304d\u3085":"kkyu", "\u3063\u304d\u3087":"kkyo",
"\u3063\u304e":"ggi", "\u3063\u304e\u3083":"ggya",
"\u3063\u304e\u3085":"ggyu", "\u3063\u304e\u3087":"ggyo",
"\u3063\u304f":"kku", "\u3063\u3050":"ggu",
"\u3063\u3051":"kke", "\u3063\u3052":"gge",
"\u3063\u3053":"kko", "\u3063\u3054":"ggo",
"\u3063\u3055":"ssa", "\u3063\u3056":"zza",
"\u3063\u3057":"sshi", "\u3063\u3057\u3083":"ssha",
"\u3063\u3057\u3085":"sshu", "\u3063\u3057\u3087":"ssho",
"\u3063\u3058":"jji", "\u3063\u3058\u3083":"jja",
"\u3063\u3058\u3085":"jju", "\u3063\u3058\u3087":"jjo",
"\u3063\u3059":"ssu", "\u3063\u305a":"zzu",
"\u3063\u305b":"sse", "\u3063\u305e":"zze",
"\u3063\u305d":"sso", "\u3063\u305c":"zzo",
"\u3063\u305f":"tta", "\u3063\u3060":"dda",
"\u3063\u3061":"tchi", "\u3063\u3061\u3083":"tcha",
"\u3063\u3061\u3085":"tchu", "\u3063\u3061\u3087":"tcho",
"\u3063\u3062":"jji", "\u3063\u3062\u3083":"jjya",
"\u3063\u3062\u3085":"jjyu", "\u3063\u3062\u3087":"jjyo",
"\u3063\u3064":"ttsu", "\u3063\u3065":"zzu",
"\u3063\u3066":"tte", "\u3063\u3067":"dde",
"\u3063\u3068":"tto", "\u3063\u3069":"ddo",
"\u3063\u306f":"hha", "\u3063\u3070":"bba",
"\u3063\u3071":"ppa",
"\u3063\u3072":"hhi", "\u3063\u3072\u3083":"hhya",
"\u3063\u3072\u3085":"hhyu", "\u3063\u3072\u3087":"hhyo",
"\u3063\u3073":"bbi", "\u3063\u3073\u3083":"bbya",
"\u3063\u3073\u3085":"bbyu", "\u3063\u3073\u3087":"bbyo",
"\u3063\u3074":"ppi", "\u3063\u3074\u3083":"ppya",
"\u3063\u3074\u3085":"ppyu", "\u3063\u3074\u3087":"ppyo",
"\u3063\u3075":"ffu", "\u3063\u3075\u3041":"ffa",
"\u3063\u3075\u3043":"ffi", "\u3063\u3075\u3047":"ffe",
"\u3063\u3075\u3049":"ffo",
"\u3063\u3076":"bbu", "\u3063\u3077":"ppu",
"\u3063\u3078":"hhe", "\u3063\u3079":"bbe",
"\u3063\u307a":"ppe",
"\u3063\u307b":"hho", "\u3063\u307c":"bbo",
"\u3063\u307d":"ppo",
"\u3063\u3084":"yya", "\u3063\u3086":"yyu",
"\u3063\u3088":"yyo",
"\u3063\u3089":"rra", "\u3063\u308a":"rri",
"\u3063\u308a\u3083":"rrya", "\u3063\u308a\u3085":"rryu",
"\u3063\u308a\u3087":"rryo",
"\u3063\u308b":"rru", "\u3063\u308c":"rre",
"\u3063\u308d":"rro",
"\u3064":"tsu", "\u3065":"zu",
"\u3066":"te", "\u3067":"de", "\u3067\u3043":"di",
"\u3068":"to", "\u3069":"do",
"\u306a":"na",
"\u306b":"ni", "\u306b\u3083":"nya",
"\u306b\u3085":"nyu", "\u306b\u3087":"nyo",
"\u306c":"nu", "\u306d":"ne", "\u306e":"no",
"\u306f":"ha", "\u3070":"ba", "\u3071":"pa",
"\u3072":"hi", "\u3072\u3083":"hya",
"\u3072\u3085":"hyu", "\u3072\u3087":"hyo",
"\u3073":"bi", "\u3073\u3083":"bya",
"\u3073\u3085":"byu", "\u3073\u3087":"byo",
"\u3074":"pi", "\u3074\u3083":"pya",
"\u3074\u3085":"pyu", "\u3074\u3087":"pyo",
"\u3075":"fu", "\u3075\u3041":"fa",
"\u3075\u3043":"fi", "\u3075\u3047":"fe",
"\u3075\u3049":"fo",
"\u3076":"bu", "\u3077":"pu",
"\u3078":"he", "\u3079":"be", "\u307a":"pe",
"\u307b":"ho", "\u307c":"bo", "\u307d":"po",
"\u307e":"ma",
"\u307f":"mi", "\u307f\u3083":"mya",
"\u307f\u3085":"myu", "\u307f\u3087":"myo",
"\u3080":"mu", "\u3081":"me", "\u3082":"mo",
"\u3083":"ya", "\u3084":"ya",
"\u3085":"yu", "\u3086":"yu",
"\u3087":"yo", "\u3088":"yo",
"\u3089":"ra",
"\u308a":"ri", "\u308a\u3083":"rya",
"\u308a\u3085":"ryu", "\u308a\u3087":"ryo",
"\u308b":"ru", "\u308c":"re", "\u308d":"ro",
"\u308e":"wa", "\u308f":"wa",
"\u3090":"i", "\u3091":"e",
"\u3092":"wo", "\u3093":"n",
"\u3093\u3042":"n'a", "\u3093\u3044":"n'i",
"\u3093\u3046":"n'u", "\u3093\u3048":"n'e",
"\u3093\u304a":"n'o",
}
# this class is Borg
_shared_state = {}
def __new__(cls, *p, **k):
self = object.__new__(cls, *p, **k)
self.__dict__ = cls._shared_state
return self
def isHiragana(self, char):
return (0x3040 < ord(char) and ord(char) < 0x3094)
def convert(self, text):
Hstr = ""
max_len = -1
r = min(4, len(text)+1)
for x in range(r):
if text[:x] in self.H2a_table:
if max_len < x:
max_len = x
Hstr = self.H2a_table[text[:x]]
return (Hstr, max_len)

View File

@ -1,564 +0,0 @@
芦蘆
壱一
苅刈
舘館
曽曾
菟兎
島嶋
盃杯
冨富
峯峰
亘亙
弌一
乘乗
亂乱
豫予
亊事
弍二
亞亜
亰京
从従
仭仞
佛仏
來来
儘侭
伜倅
假仮
會会
做作
傳伝
僞偽
價価
儉倹
兒児
兔兎
竸競
兩両
囘回
册冊
冢塚
冩写
决決
冱冴
冰氷
况況
凉涼
處処
凾函
刄刃
刔抉
刧劫
剩剰
劍剣
劔剣
劒剣
剱剣
劑剤
辨弁
勞労
勳勲
勵励
勸勧
區区
卆卒
丗世
凖準
夘卯
卻却
卷巻
厠廁
厦廈
厮廝
厰廠
參参
雙双
咒呪
單単
噐器
營営
嚏嚔
嚴厳
囑嘱
囓齧
圀国
圈圏
國国
圍囲
圓円
團団
圖図
埀垂
埓埒
塲場
壞壊
墮堕
壓圧
壘塁
壥廛
壤壌
壯壮
壺壷
壹一
壻婿
壽寿
夂夊
夛多
梦夢
竒奇
奧奥
奬奨
侫佞
姙妊
嫻嫺
孃嬢
學学
斈学
寃冤
寇冦
寢寝
寫写
寶宝
寳宝
尅剋
將将
專専
對対
尓爾
尢尤
屆届
屬属
峽峡
嶌嶋
嵜崎
崙崘
嵳嵯
嶽岳
巛川
巵卮
帋紙
帶帯
幤幣
廐厩
廏厩
廣広
廚厨
廢廃
廳庁
廰庁
廸迪
弃棄
弉奘
彜彝
彈弾
彌弥
弯彎
徃往
徑径
從従
徠来
悳徳
恠怪
恆恒
悧俐
惡悪
惠恵
忰悴
惱悩
愼慎
愽博
慘惨
慚慙
憇憩
應応
懷懐
懴懺
戀恋
戞戛
戰戦
戲戯
拔抜
拏拿
擔担
拜拝
拂払
挾挟
搜捜
插挿
搖揺
攝摂
攪撹
據拠
擇択
擧拳
舉拳
抬擡
擴拡
攜携
攵攴
攷考
收収
效効
敕勅
敍叙
敘叙
數数
變変
斷断
旙旛
昜陽
晄晃
晉晋
晝昼
晰晢
暎映
曉暁
暸瞭
昿曠
曵曳
朖朗
朞期
霸覇
杤栃
杰傑
枩松
檜桧
條条
檮梼
梹檳
棊棋
棧桟
棕椶
楙茂
榮栄
槨椁
樂楽
權権
樞枢
樣様
樓楼
橢楕
檢検
櫻桜
鬱欝
盜盗
飮飲
歐嘔
歡歓
歸帰
殘残
殱殲
殼殻
毆殴
毓育
氣気
沒没
泪涙
濤涛
渕淵
渊淵
淨浄
淺浅
滿満
溂剌
溪渓
灌潅
滯滞
澁渋
澀渋
潛潜
濳潜
澂澄
澑溜
澤沢
濟済
濕湿
濱浜
濾滬
灣湾
烱炯
烟煙
熈煕
熏燻
燒焼
爐炉
爭争
爲為
爼俎
犁犂
犹猶
犲豺
狹狭
獎奨
默黙
獨独
獸獣
獵猟
獻献
珎珍
璢瑠
瑯琅
珱瓔
瓣弁
甞嘗
甼町
畄留
畍界
畊耕
畆畝
畧略
畫画
當当
畴疇
疊畳
疉畳
疂畳
癡痴
發発
皃猊
皈帰
皹皸
盖蓋
盡尽
蘯盪
眞真
眦眥
礦鉱
礪砺
碎砕
碯瑙
祕秘
祿禄
齋斎
禪禅
禮礼
禀稟
稱称
稻稲
稾稿
穗穂
穩穏
龝穐
穰穣
窗窓
竈竃
窰窯
竊窃
竝並
筺筐
笋筍
箟箘
筝箏
簔蓑
籠篭
籘籐
籖籤
粹粋
糺糾
絲糸
經経
總総
緜綿
縣県
縱縦
繪絵
繩縄
繼継
緕纃
續続
纖繊
纎繊
纜繿
缺欠
罐缶
罸罰
羃冪
羣群
羮羹
譱善
翆翠
翦剪
耻恥
聟婿
聨聯
聲声
聰聡
聽聴
肅粛
冐冒
脉脈
腦脳
腟膣
膓腸
膸髄
膽胆
臈臘
臟臓
臺台
與与
舊旧
舍舎
舖舗
舩船
艢檣
舮艫
艷艶
莖茎
莊荘
莵兎
菷帚
萠萌
蕚萼
蒂蔕
萬万
葢蓋
蘂蕊
蕋蕊
藪薮
藏蔵
藝芸
藥薬
蘓蘇
乕虎
號号
蠣蛎
蝨虱
蠅蝿
螢蛍
蟆蟇
蟲虫
蠏蟹
蟷螳
蟒蠎
蠶蚕
蠧蠹
蠻蛮
衂衄
衞衛
袵衽
裝装
襃褒
褝襌
覩睹
覺覚
覽覧
觀観
觧解
觸触
誡戒
謌歌
諡謚
謠謡
證証
譛譖
譯訳
譽誉
讀読
讓譲
讚賛
豐豊
貉狢
貍狸
貎猊
豼貔
貘獏
戝財
貭質
貳弐
貮弐
賤賎
賣売
贊賛
賍贓
赱走
踈疎
踴踊
躰体
軆体
軈軅
軣轟
輕軽
輙輒
輌輛
轉転
辭辞
辯弁
迯逃
逹達
逎遒
遞逓
遲遅
邊辺
邉辺
邨村
鄰隣
醉酔
醫医
釀醸
釋釈
釡釜
釼剣
銕鉄
錢銭
鎭鎮
鐵鉄
鐡鉄
鑒鑑
鑄鋳
鑛鉱
鈩鑪
鑚鑽
閇閉
濶闊
關関
阯址
陷陥
險険
隱隠
隸隷
襍雑
雜雑
靈霊
靜静
靱靭
韭韮
韲齏
韵韻
顏顔
顯顕
飃飄
餘余
餝飾
餠餅
騷騒
驅駆
驛駅
驗験
髓髄
體体
髮髪
鬪闘
鰺鯵
鰛鰮
鳬鳧
鳫鴈
鵄鴟
鵞鵝
鷄鶏
鷏鷆
鹽塩
麥麦
麸麩
麪麺
點点
黨党
皷鼓
鼡鼠
齊斉
齒歯
齡齢
龜亀
槇槙
遙遥
瑤瑶
凜凛
熙煕

View File

@ -1,82 +0,0 @@
# j2h.py
#
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
#
# Original Copyright:
# * KAKASI (Kanji Kana Simple inversion program)
# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
# * Copyright (C) 1992
# * Hironobu Takahashi (takahasi@tiny.or.jp)
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either versions 2, or (at your option)
# * any later version.
# *
# * This program is distributed in the hope that it will be useful
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# */
import re
from calibre.ebooks.unihandecode.pykakasi.jisyo import jisyo
from polyglot.builtins import iteritems
class J2H :
kanwa = None
cl_table = [
"","aiueow", "aiueow", "aiueow", "aiueow", "aiueow", "aiueow", "aiueow",
"aiueow", "aiueow", "aiueow", "k", "g", "k", "g", "k", "g", "k", "g", "k",
"g", "s", "zj", "s", "zj", "s", "zj", "s", "zj", "s", "zj", "t", "d", "tc",
"d", "aiueokstchgzjfdbpw", "t", "d", "t", "d", "t", "d", "n", "n", "n", "n",
"n", "h", "b", "p", "h", "b", "p", "hf", "b", "p", "h", "b", "p", "h", "b",
"p", "m", "m", "m", "m", "m", "y", "y", "y", "y", "y", "y", "rl", "rl",
"rl", "rl", "rl", "wiueo", "wiueo", "wiueo", "wiueo", "w", "n", "v", "k",
"k", "", "", "", "", "", "", "", "", ""]
def __init__(self):
self.kanwa = jisyo()
def isKanji(self, c):
return (0x3400 <= ord(c) and ord(c) < 0xfa2e)
def isCletter(self, l, c):
if (ord("") <= ord(c) and ord(c) <= 0x309f) and (l in self.cl_table[ord(c) - ord("")-1]):
return True
return False
def itaiji_conv(self, text):
r = []
for c in text:
if c in self.kanwa.itaijidict:
r.append(c)
for c in r:
text = re.sub(c, self.kanwa.itaijidict[c], text)
return text
def convert(self, text):
max_len = 0
Hstr = ""
table = self.kanwa.load_jisyo(text[0])
if table is None:
return ("", 0)
for (k,v) in iteritems(table):
length = len(k)
if len(text) >= length:
if text.startswith(k):
for (yomi, tail) in v:
if tail == '':
if max_len < length:
Hstr = yomi
max_len = length
elif max_len < length+1 and len(text) > length and self.isCletter(tail, text[length]):
Hstr=''.join([yomi,text[length]])
max_len = length+1
return (Hstr, max_len)

View File

@ -1,49 +0,0 @@
# jisyo.py
#
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
from zlib import decompress
class jisyo :
kanwadict = None
itaijidict = None
kanadict = None
jisyo_table = {}
# this class is Borg
_shared_state = {}
def __new__(cls, *p, **k):
self = object.__new__(cls, *p, **k)
self.__dict__ = cls._shared_state
return self
def __init__(self):
from calibre.utils.resources import get_path as P
from calibre.utils.serialize import msgpack_loads
if self.kanwadict is None:
self.kanwadict = msgpack_loads(
P('localization/pykakasi/kanwadict2.calibre_msgpack', data=True))
if self.itaijidict is None:
self.itaijidict = msgpack_loads(
P('localization/pykakasi/itaijidict2.calibre_msgpack', data=True))
if self.kanadict is None:
self.kanadict = msgpack_loads(
P('localization/pykakasi/kanadict2.calibre_msgpack', data=True))
def load_jisyo(self, char):
if not isinstance(char, str):
char = str(char, 'utf-8')
key = "%04x"%ord(char)
try: # already exist?
table = self.jisyo_table[key]
except:
from calibre.utils.serialize import msgpack_loads
try:
table = self.jisyo_table[key] = msgpack_loads(decompress(self.kanwadict[key]))
except:
return None
return table

View File

@ -1,46 +0,0 @@
# k2a.py
#
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
#
# Original copyright:
# * KAKASI (Kanji Kana Simple inversion program)
# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
# * Copyright (C) 1992
# * Hironobu Takahashi (takahasi@tiny.or.jp)
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either versions 2, or (at your option)
# * any later version.
# *
# * This program is distributed in the hope that it will be useful
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# */
from calibre.ebooks.unihandecode.pykakasi.jisyo import jisyo
class K2a :
kanwa = None
def __init__(self):
self.kanwa = jisyo()
def isKatakana(self, char):
return (0x30a0 < ord(char) and ord(char) < 0x30f7)
def convert(self, text):
Hstr = ""
max_len = -1
r = min(10, len(text)+1)
for x in range(r):
if text[:x] in self.kanwa.kanadict:
if max_len < x:
max_len = x
Hstr = self.kanwa.kanadict[text[:x]]
return (Hstr, max_len)

View File

@ -1,96 +0,0 @@
# kakasi.py
#
# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
#
# Original Copyright:
# * KAKASI (Kanji Kana Simple inversion program)
# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
# * Copyright (C) 1992
# * Hironobu Takahashi (takahasi@tiny.or.jp)
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either versions 2, or (at your option)
# * any later version.
# *
# * This program is distributed in the hope that it will be useful
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# */
from calibre.ebooks.unihandecode.pykakasi.h2a import H2a
from calibre.ebooks.unihandecode.pykakasi.j2h import J2H
from calibre.ebooks.unihandecode.pykakasi.k2a import K2a
class kakasi:
j2h = None
h2a = None
k2a = None
def __init__(self):
self.j2h = J2H()
self.h2a = H2a()
self.k2a = K2a()
def do(self, text):
otext = ''
i = 0
while True:
if i >= len(text):
break
if self.j2h.isKanji(text[i]):
(t, l) = self.j2h.convert(text[i:])
if l <= 0:
otext = otext + text[i]
i = i + 1
continue
i = i + l
m = 0
tmptext = ""
while True:
if m >= len(t):
break
(s, n) = self.h2a.convert(t[m:])
if n <= 0:
break
m = m + n
tmptext = tmptext+s
if i >= len(text):
otext = otext + tmptext.capitalize()
else:
otext = otext + tmptext.capitalize() +' '
elif self.h2a.isHiragana(text[i]):
tmptext = ''
while True:
(t, l) = self.h2a.convert(text[i:])
tmptext = tmptext+t
i = i + l
if i >= len(text):
otext = otext + tmptext
break
elif not self.h2a.isHiragana(text[i]):
otext = otext + tmptext + ' '
break
elif self.k2a.isKatakana(text[i]):
tmptext = ''
while True:
(t, l) = self.k2a.convert(text[i:])
tmptext = tmptext+t
i = i + l
if i >= len(text):
otext = otext + tmptext
break
elif not self.k2a.isKatakana(text[i]):
otext = otext + tmptext + ' '
break
else:
otext = otext + text[i]
i += 1
return otext

File diff suppressed because it is too large Load Diff

View File

@ -1,317 +0,0 @@
;; Kana-Alphabet mapping dictionary
;;
;; To use this mapping table,
;; you should unicode normalize NKFC form.
;;
;; basic mapping
;;
a ァ
a ア
ba バ
bba ッバ
bbe ッベ
bbi ッビ
bbo ッボ
bbu ッブ
bbya ッビャ
bbyo ッビョ
bbyu ッビュ
be ベ
bi ビ
bo ボ
bu ブ
bya ビャ
byo ビョ
byu ビュ
cha チャ
che チェ
chi チ
cho チョ
chu チュ
da ダ
dda ッダ
dde ッデ
ddo ッド
de デ
di ディ
do ド
e ェ
e エ
e ヱ
fa ファ
fe フェ
ffa ッファ
ffe ッフェ
ffi ッフィ
ffo ッフォ
ffu ッフ
fi フィ
fo フォ
fu フ
ga ガ
ge ゲ
gga ッガ
gge ッゲ
ggi ッギ
ggo ッゴ
ggu ッグ
ggya ッギャ
ggyo ッギョ
ggyu ッギュ
gi ギ
go ゴ
gu グ
gya グャ
gyo ギョ
gyu ギゥ
ha ハ
he ヘ
hha ッハ
hhe ッヘ
hhi ッヒ
hho ッホ
hhya ッヒャ
hhyo ッヒョ
hhyu ッヒュ
hi ヒ
ho ホ
hya ヒャ
hyo ヒョ
hyu ヒュ
i ィ
i イ
i ヰ
ja ジャ
ja ヂャ
ji ジ
ji ヂ
jja ッジャ
jji ッジ
jji ッヂ
jjo ッジョ
jju ッジュ
jjya ッヂャ
jjyo ッヂョ
jjyu ッヂュ
jo ジョ
jo ヂョ
ju ジュ
ju ヂュ
ka カ
ka ヵ
ke ケ
ke ヶ
ki キ
kka ッカ
kke ッケ
kki ッキ
kko ッコ
kku ック
kkya ッキャ
kkyo ッキョ
kkyu ッキュ
ko コ
ku ク
kya キァ
kyo キォ
kyu キゥ
ma マ
me メ
mi ミ
mo モ
mu ム
mya ミャ
myo ミョ
myu ミュ
n ン
n'a ンア
n'e ンエ
n'i ンイ
n'o ンオ
n'u ンウ
na ナ
ne ネ
ni ニ
no
nu ヌ
nya ニャ
nyo ニョ
nyu ニュ
o ォ
o オ
pa パ
pe ペ
pi ピ
po ポ
ppa ッパ
ppe ッペ
ppi ッピ
ppo ッポ
ppu ップ
ppya ッピャ
ppyo ッピョ
ppyu ッピュ
pu プ
pya ピャ
pyo ピョ
pyu ピュ
ra ラ
re レ
ri リ
ro ロ
rra ッラ
rre ッレ
rri ッリ
rro ッロ
rru ッル
rrya ッリャ
rryo ッリョ
rryu ッリュ
ru ル
rya リャ
ryo リョ
ryu リュ
sa サ
se セ
sha シャ
shi シ
sho ショ
shu シュ
so ソ
ssa ッサ
sse ッセ
ssha ッシャ
sshi ッシ
ssho ッショ
sshu ッシュ
sso ッソ
ssu ッス
su ス
ta タ
tcha ッチャ
tchi ッチ
tcho ッチョ
tchu ッチュ
te テ
to ト
tsu ッ
tsu ツ
tta ッタ
tte ッテ
tto ット
ttsu ッツ
u ゥ
u ウ
va ヴァ
ve ヴェ
vi ヴィ
vo ヴォ
vu ヴ
vva ッヴァ
vve ッヴェ
vvi ッヴィ
vvo ッヴォ
vvu ッヴ
wa ヮ
wa ワ
wo ヲ
ya ャ
ya ヤ
yo ョ
yo ヨ
yu ュ
yu ユ
yya ッヤ
yyo ッヨ
yyu ッユ
za ザ
ze ゼ
zo ゾ
zu ズ
zu ヅ
zza ッザ
zzo ッゾ
zzu ッズ
zzu ッヅ
;;
;; extended characters
;;
;;
;; gairai terms
;;
all オール
algrism アルゴリズム
answer アンサー
base ベース
begineer ビギナー
connection コネクション
contents コンテンツ
creator クリエーター
comic コミック
comics コミックス
culture カルチャー
debug デバッグ
debugging デバッギング
design デザイン
digital デジタル
dillenma ジレンマ
directory ディレクトリ
disk ディスク
document ドキュメント
download ダウンロード
electric エレクトリック
facebook フェイスブック
firefox ファイアーフォックス
folder フォルダ
format フォーマット
forum フォーラム
fox フォックス
free フリー
gnome ノーム
gnu グヌー
gozilla ゴジラ
guide ガイド
harvard ハーバード
help ヘルプ
highlight ハイライト
japan ジャパン
journal ジャーナル
library ライブラリ
line ライン
love ラヴ
love ラブ
mail メール
main メイン
mystery ミステリ
mozilla モジラ
network ネットワーク
next ネクスト
new ニュー
news ニュース
native ネイティブ
online オンライン
open オープン
professional プロフェッショナル
profile プロファイル
programmer プログラマ
sample サンプル
series シリーズ
share シェア
social ソーシャル
society ソサエティ
software ソフトウエア
source ソース
street ストリート
system システム
tag タグ
text テキスト
thunderbird サンダーバード
training トレーニング
twitter ツイッター
unicode ユニコード
wall ウオール
wall ウォール
welcome ウェルカム
welcome ウエルカム
wikinomics ウィキノミクス
york ヨーク

View File

@ -420,6 +420,10 @@ class BuildTest(unittest.TestCase):
if display_env_var is not None:
os.environ['DISPLAY'] = display_env_var
def test_pykakasi(self):
from calibre.ebooks.unihandecode.jadecoder import Jadecoder
self.assertEqual(Jadecoder().decode("自転車生活の愉しみ"), 'Jitensha Seikatsu no Tanoshi mi')
def test_imaging(self):
from PIL import Image
try: