From 411adb488c4a7650eb0e9cbfa3194eee88e43c3d Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Sat, 5 Feb 2011 16:16:15 +0900 Subject: [PATCH] jadecoder: remove charset check; now kakasi.py handle unknown chars better --- src/calibre/ebooks/unihandecode/jadecoder.py | 1 - src/calibre/ebooks/unihandecode/pykakasi/kakasi.py | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/unihandecode/jadecoder.py b/src/calibre/ebooks/unihandecode/jadecoder.py index a7ed06e725..70624b7c6b 100644 --- a/src/calibre/ebooks/unihandecode/jadecoder.py +++ b/src/calibre/ebooks/unihandecode/jadecoder.py @@ -34,7 +34,6 @@ class Jadecoder(Unidecoder): def decode(self, text): try: - dummy = text.encode("eucjp") # test if text contains only Japanese and ASCII characters. result=self.kakasi.do(text) return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),result) except: diff --git a/src/calibre/ebooks/unihandecode/pykakasi/kakasi.py b/src/calibre/ebooks/unihandecode/pykakasi/kakasi.py index 94eba36e59..dd44d84439 100644 --- a/src/calibre/ebooks/unihandecode/pykakasi/kakasi.py +++ b/src/calibre/ebooks/unihandecode/pykakasi/kakasi.py @@ -53,7 +53,9 @@ class kakasi(object): if self.j2h.isKanji(text[i]): (t, l) = self.j2h.convert(text[i:]) if l <= 0: - break + otext = otext + text[i] + i = i + 1 + continue i = i + l m = 0 tmptext = ""