Merge from trunk

2025-12-24 22:07:21 -05:00 · 2011-02-15 14:28:06 +00:00 · 2011-02-15 14:28:06 +00:00 · 4dbf65009c
commit 4dbf65009c
parent 1c72ea27b3 2d4ec4406e
36 changed files with 146380 additions and 3331 deletions
--- a/27
+++ b/27
@ -193,6 +193,33 @@ License: GPL-3
 The full text of the GPL is distributed as in
 /usr/share/common-licenses/GPL-3 on Debian systems.

+Files: src/calibre/ebooks/unihandecode/pykakasi/*
+Copyright: 2011, Hiroshi Miura <miurahr@linux.com>
+Copyright: 1992, Hironobu Takahashi
+License: GPL-2+
+ The full text of the GPL is distributed as in
+ /usr/share/common-licenses/GPL on Debian systems.
+
+Files: resources/kanwadict2.db
+Files: resources/itaijidict2.pickle
+Copyright: 2011, Hiroshi Miura <miurahr@linux.com>
+Copyright: 1992 1993 1994, Hironobu Takahashi (takahasi@tiny.or.jp),
+Copyright: 1992 1993 1994, Masahiko Sato (masahiko@sato.riec.tohoku.ac.jp),
+Copyright: 1992 1993 1994, Yukiyoshi Kameyama, Miki Inooka, Akihiko Sasaki, Dai Ando, Junichi Okukawa,
+Copyright: 1992 1993 1994, Katsushi Sato and Nobuhiro Yamagishi
+License: GPL-2+
+ The full text of the GPL is distributed as in
+ /usr/share/common-licenses/GPL on Debian systems.
+
+Files: src/calibre/ebooks/unihandecode/*
+Copyright: 2010-2011, Hiroshi Miura <miurahr@linux.com>
+Copyright: 2009, John Schember
+Copyright: 2007, Russell Norris
+Copyright: 2001, Sean M. Burke
+License: GPL-3, Perl
+ The full text of the GPL is distributed as in
+ /usr/share/common-licenses/GPL-3 on Debian systems.
+
 Files: src/encutils/__init__.py
 Copyright: 2005-2008: Christof Hoeke
 License: LGPL-3+, CC-BY-3.0
--- a/imgsrc/news.svg
+++ b/imgsrc/news.svg
@ -13,12 +13,12 @@
   id="Layer_1"
   x="0px"
   y="0px"
-   width="134.77701"
-   height="199.99901"
-   viewBox="0 0 134.777 199.999"
+   width="200"
+   height="200"
+   viewBox="0 0 199.99999 199.99999"
   enable-background="new 0 0 595.28 841.89"
   xml:space="preserve"
-   inkscape:version="0.47 r22583"
+   inkscape:version="0.48.0 r9654"
   sodipodi:docname="news.svg"><metadata
   id="metadata26"><rdf:RDF><cc:Work
       rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
@ -38,22 +38,22 @@
   guidetolerance="10"
   inkscape:pageopacity="0"
   inkscape:pageshadow="2"
-   inkscape:window-width="640"
-   inkscape:window-height="489"
+   inkscape:window-width="1680"
+   inkscape:window-height="997"
   id="namedview22"
   showgrid="false"
   inkscape:zoom="0.28032165"
   inkscape:cx="67.389001"
   inkscape:cy="99.722002"
-   inkscape:window-x="0"
-   inkscape:window-y="41"
-   inkscape:window-maximized="0"
+   inkscape:window-x="-4"
+   inkscape:window-y="30"
+   inkscape:window-maximized="1"
   inkscape:current-layer="Layer_1" />
 <g
   id="g3"
-   transform="translate(-230.25101,-320.668)">
+   transform="translate(-194.57771,-320.66701)">
 	<polygon
-   points="360.241,366.109 345.29,359.678 345.29,343.405 329.945,343.405 324.265,329.15 309.147,335.175 297.64,323.667 286.79,334.517 272.693,328.454 266.263,343.405 249.988,343.405 249.988,358.749 235.734,364.429 241.759,379.548 230.251,391.056 241.101,401.906 235.039,416.002 249.988,422.432 249.988,438.706 265.333,438.706 271.013,452.961 277.817,450.25 277.817,475.111 252.085,475.111 297.64,520.667 343.193,475.111 317.463,475.111 317.463,451.453 322.585,453.656 329.016,438.706 345.29,438.706 345.29,423.362 359.546,417.682 353.521,402.563 365.028,391.056 354.178,380.205 "
+   points="286.79,334.517 272.693,328.454 266.263,343.405 249.988,343.405 249.988,358.749 235.734,364.429 241.759,379.548 230.251,391.056 241.101,401.906 235.039,416.002 249.988,422.432 249.988,438.706 265.333,438.706 271.013,452.961 277.817,450.25 277.817,475.111 252.085,475.111 297.64,520.667 343.193,475.111 317.463,475.111 317.463,451.453 322.585,453.656 329.016,438.706 345.29,438.706 345.29,423.362 359.546,417.682 353.521,402.563 365.028,391.056 354.178,380.205 360.241,366.109 345.29,359.678 345.29,343.405 329.945,343.405 324.265,329.15 309.147,335.175 297.64,323.667 "
   id="polygon5"
   style="fill:#ffffff" />
 	<linearGradient
@ -73,7 +73,7 @@
   id="stop10" />
 	</linearGradient>
 	<polygon
-   points="360.241,363.11 345.29,356.679 345.29,340.406 329.945,340.406 324.265,326.151 309.147,332.176 297.64,320.668 286.79,331.518 272.693,325.455 266.263,340.406 249.988,340.406 249.988,355.75 235.734,361.43 241.759,376.549 230.251,388.057 241.101,398.907 235.039,413.003 249.988,419.433 249.988,435.707 265.333,435.707 271.013,449.962 277.817,447.251 277.817,472.112 252.085,472.112 297.64,517.668 343.193,472.112 317.463,472.112 317.463,448.454 322.585,450.657 329.016,435.707 345.29,435.707 345.29,420.363 359.546,414.683 353.521,399.564 365.028,388.057 354.178,377.206 "
+   points="286.79,331.518 272.693,325.455 266.263,340.406 249.988,340.406 249.988,355.75 235.734,361.43 241.759,376.549 230.251,388.057 241.101,398.907 235.039,413.003 249.988,419.433 249.988,435.707 265.333,435.707 271.013,449.962 277.817,447.251 277.817,472.112 252.085,472.112 297.64,517.668 343.193,472.112 317.463,472.112 317.463,448.454 322.585,450.657 329.016,435.707 345.29,435.707 345.29,420.363 359.546,414.683 353.521,399.564 365.028,388.057 354.178,377.206 360.241,363.11 345.29,356.679 345.29,340.406 329.945,340.406 324.265,326.151 309.147,332.176 297.64,320.668 "
   id="polygon12"
   style="fill:url(#SVGID_1_)" />
 	<g
@ -81,14 +81,16 @@
 		<path
   d="m 273.311,419.168 v -56.752 h 17.935 l 9.01,17.43 c 3.115,5.641 6.399,13.22 8.926,19.873 h 0.252 c -0.842,-7.494 -1.178,-15.41 -1.178,-23.83 v -13.472 h 13.893 v 56.752 H 306.15 l -9.684,-18.861 c -3.116,-5.978 -6.82,-13.641 -9.515,-20.461 h -0.336 c 0.42,7.663 0.589,16.167 0.589,25.345 v 13.978 h -13.893 z"
   id="path16"
-   style="fill:#993720" />
+   style="fill:#993720"
+   inkscape:connector-curvature="0" />
 	</g>
 	<g
   id="g18">
 		<path
   d="m 273.311,416.873 v -56.752 h 17.935 l 9.01,17.43 c 3.115,5.641 6.399,13.22 8.926,19.873 h 0.252 c -0.842,-7.494 -1.178,-15.41 -1.178,-23.83 v -13.472 h 13.893 v 56.752 H 306.15 l -9.684,-18.861 c -3.116,-5.978 -6.82,-13.641 -9.515,-20.461 h -0.336 c 0.42,7.663 0.589,16.167 0.589,25.345 v 13.978 h -13.893 z"
   id="path20"
-   style="fill:#f0efef" />
+   style="fill:#f0efef"
+   inkscape:connector-curvature="0" />
 	</g>
 </g>
 </svg>
--- a/resources/images/news.png
+++ b/resources/images/news.png
--- a/resources/images/news/de_standaard.png
+++ b/resources/images/news/de_standaard.png
--- a/resources/recipes/de_standaard.recipe
+++ b/resources/recipes/de_standaard.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 standaard.be
 '''
@ -9,15 +9,16 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class DeStandaard(BasicNewsRecipe):
    title                 = u'De Standaard'
    __author__            = u'Darko Miletic'
-    language = 'nl_BE'
-
+    language              = 'nl_BE'
    description           = u'News from Belgium in Dutch'
    oldest_article        = 7
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
-
+    masthead_url          = 'http://www.standaard.be/extra/css/images/masthead/logo_340x45.png'
+    publication_type      = 'newspaper'
+    
    keep_only_tags    = [dict(name='div' , attrs={'id':['intro','continued']})]

    feeds          = [(u'De Standaard Online', u'http://feeds.feedburner.com/dso-front')]
@ -27,4 +28,4 @@ class DeStandaard(BasicNewsRecipe):
        return article.get('guid',  None)

    def print_version(self, url):
-        return url.replace('/Detail.aspx?','/PrintArtikel.aspx?')
+        return url.replace('/artikel/detail.aspx?','/Artikel/PrintArtikel.aspx?')
--- a/resources/recipes/smh.recipe
+++ b/resources/recipes/smh.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 smh.com.au
 '''
@ -22,7 +22,11 @@ class Smh_au(BasicNewsRecipe):
    remove_empty_feeds    = True
    masthead_url          = 'http://images.smh.com.au/2010/02/02/1087188/smh-620.jpg'
    publication_type      = 'newspaper'
-    extra_css             = ' h1{font-family: Georgia,"Times New Roman",Times,serif } body{font-family: Arial,Helvetica,sans-serif} .cT-imageLandscape{font-size: x-small} '
+    extra_css             = """ 
+                                h1{font-family: Georgia,"Times New Roman",Times,serif } 
+                                body{font-family: Arial,Helvetica,sans-serif} 
+                                .cT-imageLandscape,.cT-imagePortrait{font-size: x-small} 
+                            """

    conversion_options = {
                          'comment'   : description
@ -38,7 +42,11 @@ class Smh_au(BasicNewsRecipe):
                  ]
    remove_tags_after = [dict(name='div',attrs={'class':'articleBody'})]
    keep_only_tags    = [dict(name='div',attrs={'id':'content'})]
-    remove_attributes = ['width','height']
+    remove_tags       = [ 
+                          dict(attrs={'class':'hidden'}), 
+                          dict(name=['link','meta','base','embed','object','iframe'])
+                        ]
+    remove_attributes = ['width','height','lang']

    def parse_index(self):
        articles = []
@ -66,3 +74,14 @@ class Smh_au(BasicNewsRecipe):
                                 ,'description':description
                                })
        return [(self.tag_to_string(soup.find('title')), articles)]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('bod'):
+            item.name = 'div'
+        for item in soup.findAll('img'):
+            if not item.has_key('alt'):
+               item['alt'] = 'image'
+        return soup
+        
--- a/setup/resources.py
+++ b/setup/resources.py
@ -6,9 +6,10 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import os, cPickle
+import os, cPickle, re, anydbm, shutil
+from zlib import compress

-from setup import Command, basenames
+from setup import Command, basenames, __appname__

 def get_opts_from_parser(parser):
    def do_opt(opt):
@ -26,6 +27,9 @@ class Resources(Command):

    description = 'Compile various needed calibre resources'

+    KAKASI_PATH = os.path.join(Command.SRC,  __appname__,
+            'ebooks', 'unihandecode', 'pykakasi')
+
    def run(self, opts):
        scripts = {}
        for x in ('console', 'gui'):
@ -101,11 +105,107 @@ class Resources(Command):
        import json
        json.dump(function_dict, open(dest, 'wb'), indent=4)

+        self.run_kakasi(opts)
+
+    def run_kakasi(self, opts):
+        self.records = {}
+        src = self.j(self.KAKASI_PATH, 'kakasidict.utf8')
+        dest = self.j(self.RESOURCES, 'localization',
+                'pykakasi','kanwadict2.db')
+        base = os.path.dirname(dest)
+        if not os.path.exists(base):
+            os.makedirs(base)
+
+        if self.newer(dest, src):
+            self.info('\tGenerating Kanwadict')
+
+            for line in open(src, "r"):
+                self.parsekdict(line)
+            self.kanwaout(dest)
+
+        src = self.j(self.KAKASI_PATH, 'itaijidict.utf8')
+        dest = self.j(self.RESOURCES, 'localization',
+                'pykakasi','itaijidict2.pickle')
+
+        if self.newer(dest, src):
+            self.info('\tGenerating Itaijidict')
+            self.mkitaiji(src, dest)
+
+        src = self.j(self.KAKASI_PATH, 'kanadict.utf8')
+        dest = self.j(self.RESOURCES, 'localization',
+                'pykakasi','kanadict2.pickle')
+
+        if self.newer(dest, src):
+            self.info('\tGenerating kanadict')
+            self.mkkanadict(src, dest)
+
+        return
+
+
+    def mkitaiji(self, src, dst):
+        dic = {}
+        for line in open(src, "r"):
+            line = line.decode("utf-8").strip()
+            if line.startswith(';;'): # skip comment
+                continue
+            if re.match(r"^$",line):
+                continue
+            pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line)
+            dic[pair[0]] = pair[1]
+        cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
+
+    def mkkanadict(self, src, dst):
+        dic = {}
+        for line in open(src, "r"):
+            line = line.decode("utf-8").strip()
+            if line.startswith(';;'): # skip comment
+                continue
+            if re.match(r"^$",line):
+                continue
+            (alpha, kana) = line.split(' ')
+            dic[kana] = alpha
+        cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
+
+    def parsekdict(self, line):
+        line = line.decode("utf-8").strip()
+        if line.startswith(';;'): # skip comment
+            return
+        (yomi, kanji) = line.split(' ')
+        if ord(yomi[-1:]) <= ord('z'):
+            tail = yomi[-1:]
+            yomi = yomi[:-1]
+        else:
+            tail = ''
+        self.updaterec(kanji, yomi, tail)
+
+    def updaterec(self, kanji, yomi, tail):
+            key = "%04x"%ord(kanji[0])
+            if key in self.records:
+                if kanji in self.records[key]:
+                    rec = self.records[key][kanji]
+                    rec.append((yomi,tail))
+                    self.records[key].update( {kanji: rec} )
+                else:
+                    self.records[key][kanji]=[(yomi, tail)]
+            else:
+                self.records[key] = {}
+                self.records[key][kanji]=[(yomi, tail)]
+
+    def kanwaout(self, out):
+        dic = anydbm.open(out, 'c')
+        for (k, v) in self.records.iteritems():
+            dic[k] = compress(cPickle.dumps(v, -1))
+        dic.close()
+
+
    def clean(self):
        for x in ('scripts', 'recipes', 'ebook-convert-complete'):
            x = self.j(self.RESOURCES, x+'.pickle')
            if os.path.exists(x):
                os.remove(x)
+        kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi')
+        if os.path.exists(kakasi):
+            shutil.rmtree(kakasi)



--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -152,8 +152,17 @@ def check_ebook_format(stream, current_guess):
        stream.seek(0)
    return ans

+def normalize(x):
+    if isinstance(x, unicode):
+        import unicodedata
+        x = unicodedata.normalize('NFKC', x)
+    return x
+
 def calibre_cover(title, author_string, series_string=None,
        output_format='jpg', title_size=46, author_size=36):
+    title = normalize(title)
+    author_string = normalize(author_string)
+    series_string = normalize(series_string)
    from calibre.utils.magick.draw import create_cover_page, TextLine
    lines = [TextLine(title, title_size), TextLine(author_string, author_size)]
    if series_string:
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -402,8 +402,8 @@ OptionRecommendation(name='asciiize',
            'with "Mikhail Gorbachiov". Also, note that in '
            'cases where there are multiple representations of a character '
            '(characters shared by Chinese and Japanese for instance) the '
-            'representation used by the largest number of people will be '
-            'used (Chinese in the previous example).')%\
+            'representation based on the current calibre interface language will be '
+            'used.')%\
            u'\u041c\u0438\u0445\u0430\u0438\u043b '
            u'\u0413\u043e\u0440\u0431\u0430\u0447\u0451\u0432'
 )
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -543,9 +543,9 @@ class HTMLPreProcessor(object):
        html = XMLDECL_RE.sub('', html)

        if getattr(self.extra_opts, 'asciiize', False):
-            from calibre.ebooks.unidecode.unidecoder import Unidecoder
-            unidecoder = Unidecoder()
-            html = unidecoder.decode(html)
+            from calibre.utils.localization import get_udc
+            unihandecoder = get_udc()
+            html = unihandecoder.decode(html)

        if getattr(self.extra_opts, 'enable_heuristics', False):
            from calibre.ebooks.conversion.utils import HeuristicProcessor
@ -557,10 +557,10 @@ class HTMLPreProcessor(object):

        unsupported_unicode_chars = self.extra_opts.output_profile.unsupported_unicode_chars
        if unsupported_unicode_chars:
-            from calibre.ebooks.unidecode.unidecoder import Unidecoder
-            unidecoder = Unidecoder()
+            from calibre.utils.localization import get_udc
+            unihandecoder = get_udc()
            for char in unsupported_unicode_chars:
-                asciichar = unidecoder.decode(char)
+                asciichar = unihandecoder.decode(char)
                html = html.replace(char, asciichar)

        return html
--- a/src/calibre/ebooks/metadata/mobi.py
+++ b/src/calibre/ebooks/metadata/mobi.py
@ -12,6 +12,7 @@ __docformat__ = 'restructuredtext en'
 from struct import pack, unpack
 from cStringIO import StringIO

+from calibre.ebooks import normalize
 from calibre.ebooks.mobi import MobiError
 from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN
 from calibre.ebooks.mobi.langcodes import iana2mobi
@ -311,6 +312,7 @@ class MetadataUpdater(object):
        return StreamSlicer(self.stream, start, stop)

    def update(self, mi):
+        mi.title = normalize(mi.title)
        def update_exth_record(rec):
            recs.append(rec)
            if rec[0] in self.original_exth_records:
@ -331,12 +333,12 @@ class MetadataUpdater(object):
            kindle_pdoc = None
        if mi.author_sort and pas:
            authors = mi.author_sort
-            update_exth_record((100, authors.encode(self.codec, 'replace')))
+            update_exth_record((100, normalize(authors).encode(self.codec, 'replace')))
        elif mi.authors:
            authors = ';'.join(mi.authors)
-            update_exth_record((100, authors.encode(self.codec, 'replace')))
+            update_exth_record((100, normalize(authors).encode(self.codec, 'replace')))
        if mi.publisher:
-            update_exth_record((101, mi.publisher.encode(self.codec, 'replace')))
+            update_exth_record((101, normalize(mi.publisher).encode(self.codec, 'replace')))
        if mi.comments:
            # Strip user annotations
            a_offset = mi.comments.find('<div class="user_annotations">')
@ -345,12 +347,12 @@ class MetadataUpdater(object):
                mi.comments = mi.comments[:a_offset]
            if ad_offset >= 0:
                mi.comments = mi.comments[:ad_offset]
-            update_exth_record((103, mi.comments.encode(self.codec, 'replace')))
+            update_exth_record((103, normalize(mi.comments).encode(self.codec, 'replace')))
        if mi.isbn:
            update_exth_record((104, mi.isbn.encode(self.codec, 'replace')))
        if mi.tags:
            subjects = '; '.join(mi.tags)
-            update_exth_record((105, subjects.encode(self.codec, 'replace')))
+            update_exth_record((105, normalize(subjects).encode(self.codec, 'replace')))

            if kindle_pdoc and kindle_pdoc in mi.tags:
                update_exth_record((501, str('PDOC')))
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -14,8 +14,9 @@ import re
 from struct import pack
 import time
 from urlparse import urldefrag
-
 from cStringIO import StringIO
+
+from calibre.ebooks import normalize
 from calibre.ebooks.mobi.langcodes import iana2mobi
 from calibre.ebooks.mobi.mobiml import MBP_NS
 from calibre.ebooks.oeb.base import OEB_DOCS
@ -1365,7 +1366,7 @@ class MobiWriter(object):
            self._text_length,
            self._text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf)
        uid = random.randint(0, 0xffffffff)
-        title = unicode(metadata.title[0]).encode('utf-8')
+        title = normalize(unicode(metadata.title[0])).encode('utf-8')
        # The MOBI Header

        # 0x0 - 0x3
@ -1523,12 +1524,12 @@ class MobiWriter(object):
            items = oeb.metadata[term]
            if term == 'creator':
                if self._prefer_author_sort:
-                    creators = [unicode(c.file_as or c) for c in items]
+                    creators = [normalize(unicode(c.file_as or c)) for c in items]
                else:
-                    creators = [unicode(c) for c in items]
+                    creators = [normalize(unicode(c)) for c in items]
                items = ['; '.join(creators)]
            for item in items:
-                data = self.COLLAPSE_RE.sub(' ', unicode(item))
+                data = self.COLLAPSE_RE.sub(' ', normalize(unicode(item)))
                if term == 'identifier':
                    if data.lower().startswith('urn:isbn:'):
                        data = data[9:]
@ -1542,7 +1543,7 @@ class MobiWriter(object):
                nrecs += 1
            if term == 'rights' :
                try:
-                    rights = unicode(oeb.metadata.rights[0]).encode('utf-8')
+                    rights = normalize(unicode(oeb.metadata.rights[0])).encode('utf-8')
                except:
                    rights = 'Unknown'
                exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8))
--- a/src/calibre/ebooks/unidecode/init.py
+++ b/src/calibre/ebooks/unidecode/init.py
--- a/src/calibre/ebooks/unidecode/unicodepoints.py
+++ b/src/calibre/ebooks/unidecode/unicodepoints.py
--- a/src/calibre/ebooks/unihandecode/init.py
+++ b/src/calibre/ebooks/unihandecode/init.py
@ -0,0 +1,57 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+__docformat__ = 'restructuredtext en'
+__all__ = ["Unihandecoder"]
+
+'''
+Decode unicode text to an ASCII representation of the text.
+Translate unicode characters to ASCII.
+
+Inspired from John Schember's unidecode library which was created as part
+of calibre.
+
+Copyright(c) 2009, John Schember
+
+Tranliterate the string from unicode characters to ASCII in Chinese and others.
+
+'''
+import unicodedata
+
+class Unihandecoder(object):
+    preferred_encoding = None
+    decoder = None
+
+    def __init__(self, lang="zh", encoding='utf-8'):
+        self.preferred_encoding = encoding
+        lang = lang.lower()
+        if lang[:2] == u'ja':
+            from calibre.ebooks.unihandecode.jadecoder import Jadecoder
+            self.decoder = Jadecoder()
+        elif lang[:2] == u'kr' or lang == u'korean':
+            from calibre.ebooks.unihandecode.krdecoder import Krdecoder
+            self.decoder = Krdecoder()
+        elif lang[:2] == u'vn' or lang == u'vietnum':
+            from calibre.ebooks.unihandecode.vndecoder import Vndecoder
+            self.decoder = Vndecoder()
+        else: #zh and others
+            from calibre.ebooks.unihandecode.unidecoder import Unidecoder
+            self.decoder = Unidecoder()
+
+    def decode(self, text):
+        try:
+            unicode # python2
+            if not isinstance(text, unicode):
+                try:
+                    text = unicode(text)
+                except:
+                    try:
+                        text = text.decode(self.preferred_encoding)
+                    except:
+                        text = text.decode('utf-8', 'replace')
+        except: # python3, str is unicode
+            pass
+        #at first unicode normalize it. (see Unicode standards)
+        ntext = unicodedata.normalize('NFKC', text)
+        return self.decoder.decode(ntext)
--- a/src/calibre/ebooks/unihandecode/jacodepoints.py
+++ b/src/calibre/ebooks/unihandecode/jacodepoints.py
--- a/src/calibre/ebooks/unihandecode/jadecoder.py
+++ b/src/calibre/ebooks/unihandecode/jadecoder.py
@ -0,0 +1,41 @@
+# coding:utf8
+__license__ = 'GPL 3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Decode unicode text to an ASCII representation of the text for Japanese.
+ Translate unicode string to ASCII roman string.
+
+API is based on the python unidecode,
+which is based on Ruby gem (http://rubyforge.org/projects/unidecode/)
+and  perl module Text::Unidecode
+(http://search.cpan.org/~sburke/Text-Unidecode-0.04/).
+
+This functionality is owned by Kakasi Japanese processing engine.
+
+Copyright (c) 2010 Hiroshi Miura
+'''
+
+import re
+from calibre.ebooks.unihandecode.unidecoder import Unidecoder
+from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
+from calibre.ebooks.unihandecode.jacodepoints import CODEPOINTS as JACODES
+from calibre.ebooks.unihandecode.pykakasi.kakasi import kakasi
+
+class Jadecoder(Unidecoder):
+    kakasi = None
+    codepoints = {}
+
+    def __init__(self):
+        self.codepoints = CODEPOINTS
+        self.codepoints.update(JACODES)
+        self.kakasi = kakasi()
+
+    def decode(self, text):
+        try:
+            result=self.kakasi.do(text)
+            return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),result)
+        except:
+            return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),text)
+
--- a/src/calibre/ebooks/unihandecode/krcodepoints.py
+++ b/src/calibre/ebooks/unihandecode/krcodepoints.py
--- a/src/calibre/ebooks/unihandecode/krdecoder.py
+++ b/src/calibre/ebooks/unihandecode/krdecoder.py
@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Decode unicode text to an ASCII representation of the text in Korean.
+Based on unidecoder.
+
+'''
+
+from calibre.ebooks.unihandecode.unidecoder import Unidecoder
+from calibre.ebooks.unihandecode.krcodepoints import CODEPOINTS as HANCODES
+from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
+
+class Krdecoder(Unidecoder):
+
+    codepoints = {}
+
+    def __init__(self):
+        self.codepoints = CODEPOINTS
+        self.codepoints.update(HANCODES)
+
--- a/src/calibre/ebooks/unihandecode/pykakasi/init.py
+++ b/src/calibre/ebooks/unihandecode/pykakasi/init.py
@ -0,0 +1,5 @@
+from calibre.ebooks.unihandecode.pykakasi.kakasi import kakasi
+kakasi
+
+__all__ = ["pykakasi"]
+
--- a/src/calibre/ebooks/unihandecode/pykakasi/h2a.py
+++ b/src/calibre/ebooks/unihandecode/pykakasi/h2a.py
@ -0,0 +1,185 @@
+# -*- coding: utf-8 -*-
+#  h2a.py
+#
+# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
+#
+# Original copyright:
+# * KAKASI (Kanji Kana Simple inversion program)
+# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
+# * Copyright (C) 1992
+# * Hironobu Takahashi (takahasi@tiny.or.jp)
+# *
+# * This program is free software; you can redistribute it and/or modify
+# * it under the terms of the GNU General Public License as published by
+# * the Free Software Foundation; either versions 2, or (at your option)
+# * any later version.
+# *
+# * This program is distributed in the hope that it will be useful
+# * but WITHOUT ANY WARRANTY; without even the implied warranty of
+# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# * GNU General Public License for more details.
+# *
+# * You should have received a copy of the GNU General Public License
+# * along with KAKASI, see the file COPYING.  If not, write to the Free
+# * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
+# * 02111-1307, USA.
+# */
+
+class H2a (object):
+
+    H2a_table = {
+        u"\u3041":"a", u"\u3042":"a",
+        u"\u3043":"i", u"\u3044":"i",
+        u"\u3045":"u", u"\u3046":"u",
+        u"\u3046\u309b":"vu", u"\u3046\u309b\u3041":"va",
+        u"\u3046\u309b\u3043":"vi", u"\u3046\u309b\u3047":"ve",
+        u"\u3046\u309b\u3049":"vo",
+        u"\u3047":"e", u"\u3048":"e",
+        u"\u3049":"o", u"\u304a":"o",
+
+        u"\u304b":"ka", u"\u304c":"ga",
+        u"\u304d":"ki", u"\u304d\u3041":"kya",
+        u"\u304d\u3045":"kyu", u"\u304d\u3049":"kyo",
+        u"\u304e":"gi", u"\u3050\u3083":"gya",
+        u"\u304e\u3045":"gyu", u"\u304e\u3087":"gyo",
+        u"\u304f":"ku", u"\u3050":"gu",
+        u"\u3051":"ke", u"\u3052":"ge",
+        u"\u3053":"ko", u"\u3054":"go",
+
+        u"\u3055":"sa", u"\u3056":"za",
+        u"\u3057":"shi", u"\u3057\u3083":"sha",
+        u"\u3057\u3085":"shu", u"\u3057\u3087":"sho",
+        u"\u3058":"ji", u"\u3058\u3083":"ja",
+        u"\u3058\u3085":"ju", u"\u3058\u3087":"jo",
+        u"\u3059":"su", u"\u305a":"zu",
+        u"\u305b":"se", u"\u305c":"ze",
+        u"\u305d":"so", u"\u305e":"zo",
+
+        u"\u305f":"ta", u"\u3060":"da",
+        u"\u3061":"chi", u"\u3061\u3047":"che", u"\u3061\u3083":"cha",
+        u"\u3061\u3085":"chu", u"\u3061\u3087":"cho",
+        u"\u3062":"ji", u"\u3062\u3083":"ja",
+        u"\u3062\u3085":"ju", u"\u3062\u3087":"jo",
+
+        u"\u3063":"tsu",
+        u"\u3063\u3046\u309b":"vvu",
+        u"\u3063\u3046\u309b\u3041":"vva",
+        u"\u3063\u3046\u309b\u3043":"vvi",
+        u"\u3063\u3046\u309b\u3047":"vve",
+        u"\u3063\u3046\u309b\u3049":"vvo",
+        u"\u3063\u304b":"kka", u"\u3063\u304c":"gga",
+        u"\u3063\u304d":"kki", u"\u3063\u304d\u3083":"kkya",
+        u"\u3063\u304d\u3085":"kkyu", u"\u3063\u304d\u3087":"kkyo",
+        u"\u3063\u304e":"ggi", u"\u3063\u304e\u3083":"ggya",
+        u"\u3063\u304e\u3085":"ggyu", u"\u3063\u304e\u3087":"ggyo",
+        u"\u3063\u304f":"kku", u"\u3063\u3050":"ggu",
+        u"\u3063\u3051":"kke", u"\u3063\u3052":"gge",
+        u"\u3063\u3053":"kko", u"\u3063\u3054":"ggo",
+        u"\u3063\u3055":"ssa", u"\u3063\u3056":"zza",
+        u"\u3063\u3057":"sshi", u"\u3063\u3057\u3083":"ssha",
+        u"\u3063\u3057\u3085":"sshu", u"\u3063\u3057\u3087":"ssho",
+        u"\u3063\u3058":"jji", u"\u3063\u3058\u3083":"jja",
+        u"\u3063\u3058\u3085":"jju", u"\u3063\u3058\u3087":"jjo",
+        u"\u3063\u3059":"ssu", u"\u3063\u305a":"zzu",
+        u"\u3063\u305b":"sse", u"\u3063\u305e":"zze",
+        u"\u3063\u305d":"sso", u"\u3063\u305e":"zzo",
+        u"\u3063\u305f":"tta", u"\u3063\u3060":"dda",
+        u"\u3063\u3061":"tchi", u"\u3063\u3061\u3083":"tcha",
+        u"\u3063\u3061\u3085":"tchu", u"\u3063\u3061\u3087":"tcho",
+        u"\u3063\u3062":"jji", u"\u3063\u3062\u3083":"jjya",
+        u"\u3063\u3062\u3085":"jjyu", u"\u3063\u3062\u3087":"jjyo",
+        u"\u3063\u3064":"ttsu", u"\u3063\u3065":"zzu",
+        u"\u3063\u3066":"tte", u"\u3063\u3067":"dde",
+        u"\u3063\u3068":"tto", u"\u3063\u3069":"ddo",
+        u"\u3063\u306f":"hha", u"\u3063\u3070":"bba",
+        u"\u3063\u3071":"ppa",
+        u"\u3063\u3072":"hhi", u"\u3063\u3072\u3083":"hhya",
+        u"\u3063\u3072\u3085":"hhyu", u"\u3063\u3072\u3087":"hhyo",
+        u"\u3063\u3073":"bbi", u"\u3063\u3073\u3083":"bbya",
+        u"\u3063\u3073\u3085":"bbyu", u"\u3063\u3073\u3087":"bbyo",
+        u"\u3063\u3074":"ppi", u"\u3063\u3074\u3083":"ppya",
+        u"\u3063\u3074\u3085":"ppyu", u"\u3063\u3074\u3087":"ppyo",
+        u"\u3063\u3075":"ffu", u"\u3063\u3075\u3041":"ffa",
+        u"\u3063\u3075\u3043":"ffi", u"\u3063\u3075\u3047":"ffe",
+        u"\u3063\u3075\u3049":"ffo",
+        u"\u3063\u3076":"bbu", u"\u3063\u3077":"ppu",
+        u"\u3063\u3078":"hhe", u"\u3063\u3079":"bbe",
+        u"\u3063\u307a":"ppe",
+        u"\u3063\u307b":"hho", u"\u3063\u307c":"bbo",
+        u"\u3063\u307d":"ppo",
+        u"\u3063\u3084":"yya", u"\u3063\u3086":"yyu",
+        u"\u3063\u3088":"yyo",
+        u"\u3063\u3089":"rra", u"\u3063\u308a":"rri",
+        u"\u3063\u308a\u3083":"rrya", u"\u3063\u308a\u3085":"rryu",
+        u"\u3063\u308a\u3087":"rryo",
+        u"\u3063\u308b":"rru", u"\u3063\u308c":"rre",
+        u"\u3063\u308d":"rro",
+
+        u"\u3064":"tsu", u"\u3065":"zu",
+        u"\u3066":"te", u"\u3067":"de", u"\u3067\u3043":"di",
+        u"\u3068":"to", u"\u3069":"do",
+
+        u"\u306a":"na",
+        u"\u306b":"ni", u"\u306b\u3083":"nya",
+        u"\u306b\u3085":"nyu", u"\u306b\u3087":"nyo",
+        u"\u306c":"nu", u"\u306d":"ne", u"\u306e":"no",
+
+        u"\u306f":"ha", u"\u3070":"ba", u"\u3071":"pa",
+        u"\u3072":"hi", u"\u3072\u3083":"hya",
+        u"\u3072\u3085":"hyu", u"\u3072\u3087":"hyo",
+        u"\u3073":"bi", u"\u3073\u3083":"bya",
+        u"\u3073\u3085":"byu", u"\u3073\u3087":"byo",
+        u"\u3074":"pi", u"\u3074\u3083":"pya",
+        u"\u3074\u3085":"pyu", u"\u3074\u3087":"pyo",
+        u"\u3075":"fu", u"\u3075\u3041":"fa",
+        u"\u3075\u3043":"fi", u"\u3075\u3047":"fe",
+        u"\u3075\u3049":"fo",
+        u"\u3076":"bu", u"\u3077":"pu",
+        u"\u3078":"he", u"\u3079":"be", u"\u307a":"pe",
+        u"\u307b":"ho", u"\u307c":"bo", u"\u307d":"po",
+
+        u"\u307e":"ma",
+        u"\u307f":"mi", u"\u307f\u3083":"mya",
+        u"\u307f\u3085":"myu", u"\u307f\u3087":"myo",
+        u"\u3080":"mu", u"\u3081":"me", u"\u3082":"mo",
+
+        u"\u3083":"ya", u"\u3084":"ya",
+        u"\u3085":"yu", u"\u3086":"yu",
+        u"\u3087":"yo", u"\u3088":"yo",
+
+        u"\u3089":"ra",
+        u"\u308a":"ri", u"\u308a\u3083":"rya",
+        u"\u308a\u3085":"ryu", u"\u308a\u3087":"ryo",
+        u"\u308b":"ru", u"\u308c":"re", u"\u308d":"ro",
+
+        u"\u308e":"wa", u"\u308f":"wa",
+        u"\u3090":"i", u"\u3091":"e",
+        u"\u3092":"wo", u"\u3093":"n",
+
+        u"\u3093\u3042":"n'a", u"\u3093\u3044":"n'i",
+        u"\u3093\u3046":"n'u", u"\u3093\u3048":"n'e",
+        u"\u3093\u304a":"n'o",
+    }
+
+# this class is Borg
+    _shared_state = {}
+
+    def __new__(cls, *p, **k):
+        self = object.__new__(cls, *p, **k)
+        self.__dict__ = cls._shared_state
+        return self
+
+    def isHiragana(self, char):
+        return ( 0x3040 < ord(char) and ord(char) < 0x3094)
+
+    def convert(self, text):
+        Hstr = ""
+        max_len = -1
+        r = min(4, len(text)+1)
+        for x in xrange(r):
+            if text[:x] in self.H2a_table:
+                if max_len < x:
+                    max_len = x
+                    Hstr = self.H2a_table[text[:x]]
+        return (Hstr, max_len)
+
--- a/src/calibre/ebooks/unihandecode/pykakasi/itaijidict.utf8
+++ b/src/calibre/ebooks/unihandecode/pykakasi/itaijidict.utf8
@ -0,0 +1,564 @@
+芦蘆
+壱一
+苅刈
+舘館
+曽曾
+菟兎
+島嶋
+盃杯
+冨富
+峯峰
+亘亙
+弌一
+乘乗
+亂乱
+豫予
+亊事
+弍二
+亞亜
+亰京
+从従
+仭仞
+佛仏
+來来
+儘侭
+伜倅
+假仮
+會会
+做作
+傳伝
+僞偽
+價価
+儉倹
+兒児
+兔兎
+竸競
+兩両
+囘回
+册冊
+冢塚
+冩写
+决決
+冱冴
+冰氷
+况況
+凉涼
+處処
+凾函
+刄刃
+刔抉
+刧劫
+剩剰
+劍剣
+劔剣
+劒剣
+剱剣
+劑剤
+辨弁
+勞労
+勳勲
+勵励
+勸勧
+區区
+卆卒
+丗世
+凖準
+夘卯
+卻却
+卷巻
+厠廁
+厦廈
+厮廝
+厰廠
+參参
+雙双
+咒呪
+單単
+噐器
+營営
+嚏嚔
+嚴厳
+囑嘱
+囓齧
+圀国
+圈圏
+國国
+圍囲
+圓円
+團団
+圖図
+埀垂
+埓埒
+塲場
+壞壊
+墮堕
+壓圧
+壘塁
+壥廛
+壤壌
+壯壮
+壺壷
+壹一
+壻婿
+壽寿
+夂夊
+夛多
+梦夢
+竒奇
+奧奥
+奬奨
+侫佞
+姙妊
+嫻嫺
+孃嬢
+學学
+斈学
+寃冤
+寇冦
+寢寝
+寫写
+寶宝
+寳宝
+尅剋
+將将
+專専
+對対
+尓爾
+尢尤
+屆届
+屬属
+峽峡
+嶌嶋
+嵜崎
+崙崘
+嵳嵯
+嶽岳
+巛川
+巵卮
+帋紙
+帶帯
+幤幣
+廐厩
+廏厩
+廣広
+廚厨
+廢廃
+廳庁
+廰庁
+廸迪
+弃棄
+弉奘
+彜彝
+彈弾
+彌弥
+弯彎
+徃往
+徑径
+從従
+徠来
+悳徳
+恠怪
+恆恒
+悧俐
+惡悪
+惠恵
+忰悴
+惱悩
+愼慎
+愽博
+慘惨
+慚慙
+憇憩
+應応
+懷懐
+懴懺
+戀恋
+戞戛
+戰戦
+戲戯
+拔抜
+拏拿
+擔担
+拜拝
+拂払
+挾挟
+搜捜
+插挿
+搖揺
+攝摂
+攪撹
+據拠
+擇択
+擧拳
+舉拳
+抬擡
+擴拡
+攜携
+攵攴
+攷考
+收収
+效効
+敕勅
+敍叙
+敘叙
+數数
+變変
+斷断
+旙旛
+昜陽
+晄晃
+晉晋
+晝昼
+晰晢
+暎映
+曉暁
+暸瞭
+昿曠
+曵曳
+朖朗
+朞期
+霸覇
+杤栃
+杰傑
+枩松
+檜桧
+條条
+檮梼
+梹檳
+棊棋
+棧桟
+棕椶
+楙茂
+榮栄
+槨椁
+樂楽
+權権
+樞枢
+樣様
+樓楼
+橢楕
+檢検
+櫻桜
+鬱欝
+盜盗
+飮飲
+歐嘔
+歡歓
+歸帰
+殘残
+殱殲
+殼殻
+毆殴
+毓育
+氣気
+沒没
+泪涙
+濤涛
+渕淵
+渊淵
+淨浄
+淺浅
+滿満
+溂剌
+溪渓
+灌潅
+滯滞
+澁渋
+澀渋
+潛潜
+濳潜
+澂澄
+澑溜
+澤沢
+濟済
+濕湿
+濱浜
+濾滬
+灣湾
+烱炯
+烟煙
+熈煕
+熏燻
+燒焼
+爐炉
+爭争
+爲為
+爼俎
+犁犂
+犹猶
+犲豺
+狹狭
+獎奨
+默黙
+獨独
+獸獣
+獵猟
+獻献
+珎珍
+璢瑠
+瑯琅
+珱瓔
+瓣弁
+甞嘗
+甼町
+畄留
+畍界
+畊耕
+畆畝
+畧略
+畫画
+當当
+畴疇
+疊畳
+疉畳
+疂畳
+癡痴
+發発
+皃猊
+皈帰
+皹皸
+盖蓋
+盡尽
+蘯盪
+眞真
+眦眥
+礦鉱
+礪砺
+碎砕
+碯瑙
+祕秘
+祿禄
+齋斎
+禪禅
+禮礼
+禀稟
+稱称
+稻稲
+稾稿
+穗穂
+穩穏
+龝穐
+穰穣
+窗窓
+竈竃
+窰窯
+竊窃
+竝並
+筺筐
+笋筍
+箟箘
+筝箏
+簔蓑
+籠篭
+籘籐
+籖籤
+粹粋
+糺糾
+絲糸
+經経
+總総
+緜綿
+縣県
+縱縦
+繪絵
+繩縄
+繼継
+緕纃
+續続
+纖繊
+纎繊
+纜繿
+缺欠
+罐缶
+罸罰
+羃冪
+羣群
+羮羹
+譱善
+翆翠
+翦剪
+耻恥
+聟婿
+聨聯
+聲声
+聰聡
+聽聴
+肅粛
+冐冒
+脉脈
+腦脳
+腟膣
+膓腸
+膸髄
+膽胆
+臈臘
+臟臓
+臺台
+與与
+舊旧
+舍舎
+舖舗
+舩船
+艢檣
+舮艫
+艷艶
+莖茎
+莊荘
+莵兎
+菷帚
+萠萌
+蕚萼
+蒂蔕
+萬万
+葢蓋
+蘂蕊
+蕋蕊
+藪薮
+藏蔵
+藝芸
+藥薬
+蘓蘇
+乕虎
+號号
+蠣蛎
+蝨虱
+蠅蝿
+螢蛍
+蟆蟇
+蟲虫
+蠏蟹
+蟷螳
+蟒蠎
+蠶蚕
+蠧蠹
+蠻蛮
+衂衄
+衞衛
+袵衽
+裝装
+襃褒
+褝襌
+覩睹
+覺覚
+覽覧
+觀観
+觧解
+觸触
+誡戒
+謌歌
+諡謚
+謠謡
+證証
+譛譖
+譯訳
+譽誉
+讀読
+讓譲
+讚賛
+豐豊
+貉狢
+貍狸
+貎猊
+豼貔
+貘獏
+戝財
+貭質
+貳弐
+貮弐
+賤賎
+賣売
+贊賛
+賍贓
+赱走
+踈疎
+踴踊
+躰体
+軆体
+軈軅
+軣轟
+輕軽
+輙輒
+輌輛
+轉転
+辭辞
+辯弁
+迯逃
+逹達
+逎遒
+遞逓
+遲遅
+邊辺
+邉辺
+邨村
+鄰隣
+醉酔
+醫医
+釀醸
+釋釈
+釡釜
+釼剣
+銕鉄
+錢銭
+鎭鎮
+鐵鉄
+鐡鉄
+鑒鑑
+鑄鋳
+鑛鉱
+鈩鑪
+鑚鑽
+閇閉
+濶闊
+關関
+阯址
+陷陥
+險険
+隱隠
+隸隷
+襍雑
+雜雑
+靈霊
+靜静
+靱靭
+韭韮
+韲齏
+韵韻
+顏顔
+顯顕
+飃飄
+餘余
+餝飾
+餠餅
+騷騒
+驅駆
+驛駅
+驗験
+髓髄
+體体
+髮髪
+鬪闘
+鰺鯵
+鰛鰮
+鳬鳧
+鳫鴈
+鵄鴟
+鵞鵝
+鷄鶏
+鷏鷆
+鹽塩
+麥麦
+麸麩
+麪麺
+點点
+黨党
+皷鼓
+鼡鼠
+齊斉
+齒歯
+齡齢
+龜亀
+槇槙
+遙遥
+瑤瑶
+凜凛
+熙煕
--- a/src/calibre/ebooks/unihandecode/pykakasi/j2h.py
+++ b/src/calibre/ebooks/unihandecode/pykakasi/j2h.py
@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+#  j2h.py
+#
+# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
+#
+#  Original Copyright:
+# * KAKASI (Kanji Kana Simple inversion program)
+# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
+# * Copyright (C) 1992
+# * Hironobu Takahashi (takahasi@tiny.or.jp)
+# *
+# * This program is free software; you can redistribute it and/or modify
+# * it under the terms of the GNU General Public License as published by
+# * the Free Software Foundation; either versions 2, or (at your option)
+# * any later version.
+# *
+# * This program is distributed in the hope that it will be useful
+# * but WITHOUT ANY WARRANTY; without even the implied warranty of
+# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# * GNU General Public License for more details.
+# *
+# * You should have received a copy of the GNU General Public License
+# * along with KAKASI, see the file COPYING.  If not, write to the Free
+# * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
+# * 02111-1307, USA.
+# */
+
+from calibre.ebooks.unihandecode.pykakasi.jisyo import jisyo
+import re
+
+class J2H (object):
+
+    kanwa = None
+
+    cl_table = [
+	"","aiueow", "aiueow", "aiueow", "aiueow", "aiueow", "aiueow", "aiueow",
+	"aiueow", "aiueow", "aiueow", "k", "g", "k", "g", "k", "g", "k", "g", "k",
+	"g", "s", "zj", "s", "zj", "s", "zj", "s", "zj", "s", "zj", "t", "d", "tc",
+	"d", "aiueokstchgzjfdbpw", "t", "d", "t", "d", "t", "d", "n", "n", "n", "n",
+	"n", "h", "b", "p", "h", "b", "p", "hf", "b", "p", "h", "b", "p", "h", "b",
+	"p", "m", "m", "m", "m", "m", "y", "y", "y", "y", "y", "y", "rl", "rl",
+	"rl", "rl", "rl", "wiueo", "wiueo", "wiueo", "wiueo", "w", "n", "v", "k",
+	"k", "", "", "", "", "", "", "", "", ""]
+
+    def __init__(self):
+        self.kanwa = jisyo()
+
+    def isKanji(self, c):
+        return ( 0x3400 <= ord(c) and ord(c) < 0xfa2e)
+
+    def isCletter(self, l, c):
+        if (ord(u"ぁ") <= ord(c) and  ord(c) <= 0x309f) and (  l in self.cl_table[ord(c) - ord(u"ぁ")-1]):
+            return True
+        return False
+
+    def itaiji_conv(self, text):
+        r = []
+        for c in text:
+            if c in self.kanwa.itaijidict:
+                r.append(c)
+        for c in r:
+            text = re.sub(c, self.kanwa.itaijidict[c], text)
+        return text
+
+    def convert(self, text):
+        max_len = 0
+        Hstr = ""
+        table = self.kanwa.load_jisyo(text[0])
+        if table is None:
+            return ("", 0)
+        for (k,v) in table.iteritems():
+            length = len(k)
+            if len(text) >= length:
+                if text.startswith(k):
+                    for  (yomi, tail) in v:
+                        if tail is '':
+                            if max_len < length:
+                                Hstr = yomi
+                                max_len = length
+                        elif max_len < length+1 and len(text) > length and self.isCletter(tail, text[length]):
+                            Hstr=''.join([yomi,text[length]])
+                            max_len = length+1
+        return (Hstr, max_len)
--- a/src/calibre/ebooks/unihandecode/pykakasi/jisyo.py
+++ b/src/calibre/ebooks/unihandecode/pykakasi/jisyo.py
@ -0,0 +1,53 @@
+# -*- coding: utf-8 -*-
+#  jisyo.py
+#
+# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
+from cPickle import load
+import anydbm,marshal
+from zlib import decompress
+import os
+
+import calibre.utils.resources as resources
+
+class jisyo (object):
+    kanwadict = None
+    itaijidict = None
+    kanadict = None
+    jisyo_table = {}
+
+# this class is Borg
+    _shared_state = {}
+
+    def __new__(cls, *p, **k):
+        self = object.__new__(cls, *p, **k)
+        self.__dict__ = cls._shared_state
+        return self
+
+    def __init__(self):
+        if self.kanwadict is None:
+            dictpath = resources.get_path(os.path.join('localization','pykakasi','kanwadict2.db'))
+            self.kanwadict = anydbm.open(dictpath,'r')
+        if self.itaijidict is  None:
+            itaijipath = resources.get_path(os.path.join('localization','pykakasi','itaijidict2.pickle'))
+            itaiji_pkl = open(itaijipath, 'rb')
+            self.itaijidict = load(itaiji_pkl)
+        if self.kanadict is None:
+            kanadictpath = resources.get_path(os.path.join('localization','pykakasi','kanadict2.pickle'))
+            kanadict_pkl = open(kanadictpath, 'rb')
+            self.kanadict = load(kanadict_pkl)
+
+    def load_jisyo(self, char):
+        try:#python2
+            key = "%04x"%ord(unicode(char))
+        except:#python3
+            key = "%04x"%ord(char)
+
+        try: #already exist?
+            table = self.jisyo_table[key]
+        except:
+            try:
+                table = self.jisyo_table[key]  = marshal.loads(decompress(self.kanwadict[key]))
+            except:
+                return None
+        return table
+
--- a/src/calibre/ebooks/unihandecode/pykakasi/k2a.py
+++ b/src/calibre/ebooks/unihandecode/pykakasi/k2a.py
@ -0,0 +1,50 @@
+# -*- coding: utf-8 -*-
+#  k2a.py
+#
+# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
+#
+# Original copyright:
+# * KAKASI (Kanji Kana Simple inversion program)
+# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
+# * Copyright (C) 1992
+# * Hironobu Takahashi (takahasi@tiny.or.jp)
+# *
+# * This program is free software; you can redistribute it and/or modify
+# * it under the terms of the GNU General Public License as published by
+# * the Free Software Foundation; either versions 2, or (at your option)
+# * any later version.
+# *
+# * This program is distributed in the hope that it will be useful
+# * but WITHOUT ANY WARRANTY; without even the implied warranty of
+# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# * GNU General Public License for more details.
+# *
+# * You should have received a copy of the GNU General Public License
+# * along with KAKASI, see the file COPYING.  If not, write to the Free
+# * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
+# * 02111-1307, USA.
+# */
+
+from calibre.ebooks.unihandecode.pykakasi.jisyo import jisyo
+
+class K2a (object):
+
+    kanwa = None
+
+    def __init__(self):
+        self.kanwa = jisyo()
+
+    def isKatakana(self, char):
+        return ( 0x30a0 < ord(char) and ord(char) < 0x30f7)
+
+    def convert(self, text):
+        Hstr = ""
+        max_len = -1
+        r = min(10, len(text)+1)
+        for x in xrange(r):
+            if text[:x] in self.kanwa.kanadict:
+                if max_len < x:
+                    max_len = x
+                    Hstr = self.kanwa.kanadict[text[:x]]
+        return (Hstr, max_len) 
+
--- a/src/calibre/ebooks/unihandecode/pykakasi/kakasi.py
+++ b/src/calibre/ebooks/unihandecode/pykakasi/kakasi.py
@ -0,0 +1,101 @@
+# -*- coding: utf-8 -*-
+#  kakasi.py
+#
+# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
+#
+#  Original Copyright:
+# * KAKASI (Kanji Kana Simple inversion program)
+# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
+# * Copyright (C) 1992
+# * Hironobu Takahashi (takahasi@tiny.or.jp)
+# *
+# * This program is free software; you can redistribute it and/or modify
+# * it under the terms of the GNU General Public License as published by
+# * the Free Software Foundation; either versions 2, or (at your option)
+# * any later version.
+# *
+# * This program is distributed in the hope that it will be useful
+# * but WITHOUT ANY WARRANTY; without even the implied warranty of
+# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# * GNU General Public License for more details.
+# *
+# * You should have received a copy of the GNU General Public License
+# * along with KAKASI, see the file COPYING.  If not, write to the Free
+# * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
+# * 02111-1307, USA.
+# */
+
+from calibre.ebooks.unihandecode.pykakasi.j2h import J2H
+from calibre.ebooks.unihandecode.pykakasi.h2a import H2a
+from calibre.ebooks.unihandecode.pykakasi.k2a import K2a
+
+class kakasi(object):
+
+    j2h = None
+    h2a = None
+    k2a = None
+
+    def __init__(self):
+        self.j2h = J2H()
+        self.h2a = H2a()
+        self.k2a = K2a()
+
+
+    def do(self, text):
+        otext =  ''
+        i = 0
+        while True:
+            if i >= len(text):
+                break
+
+            if self.j2h.isKanji(text[i]):
+                (t, l) = self.j2h.convert(text[i:])
+                if l <= 0:
+                    otext  = otext + text[i]
+                    i = i + 1
+                    continue
+                i = i + l
+                m = 0
+                tmptext = ""
+                while True:
+                    if m >= len(t):
+                        break
+                    (s, n) = self.h2a.convert(t[m:])
+                    if n <= 0:
+                        break
+                    m = m + n
+                    tmptext = tmptext+s
+                if i >= len(text):
+                    otext = otext + tmptext.capitalize()
+                else:
+                    otext = otext + tmptext.capitalize() +' '
+            elif self.h2a.isHiragana(text[i]):
+                tmptext = ''
+                while True:
+                    (t, l) = self.h2a.convert(text[i:])
+                    tmptext = tmptext+t
+                    i = i + l
+                    if i >= len(text):
+                        otext = otext + tmptext
+                        break
+                    elif not self.h2a.isHiragana(text[i]):
+                        otext = otext + tmptext + ' '
+                        break
+            elif self.k2a.isKatakana(text[i]):
+                tmptext = ''
+                while True:
+                    (t, l) = self.k2a.convert(text[i:])
+                    tmptext = tmptext+t
+                    i = i + l
+                    if i >= len(text):
+                        otext = otext + tmptext
+                        break
+                    elif not self.k2a.isKatakana(text[i]):
+                        otext = otext + tmptext + ' '
+                        break
+            else:
+                otext  = otext + text[i]
+                i += 1
+
+        return otext
+
--- a/src/calibre/ebooks/unihandecode/pykakasi/kakasidict.utf8
+++ b/src/calibre/ebooks/unihandecode/pykakasi/kakasidict.utf8
--- a/src/calibre/ebooks/unihandecode/pykakasi/kanadict.utf8
+++ b/src/calibre/ebooks/unihandecode/pykakasi/kanadict.utf8
@ -0,0 +1,317 @@
+;; Kana-Alphabet mapping dictionary
+;;
+;;  To use this mapping table, 
+;;    you should unicode normalize NKFC form.
+;;
+;; basic mapping
+;;
+a ァ
+a ア
+ba バ
+bba ッバ
+bbe ッベ
+bbi ッビ
+bbo ッボ
+bbu ッブ
+bbya ッビャ
+bbyo ッビョ
+bbyu ッビュ
+be ベ
+bi ビ
+bo ボ
+bu ブ
+bya ビャ
+byo ビョ
+byu ビュ
+cha チャ
+che チェ
+chi チ
+cho チョ
+chu チュ
+da ダ
+dda ッダ
+dde ッデ
+ddo ッド
+de デ
+di ディ
+do ド
+e ェ
+e エ
+e ヱ
+fa ファ
+fe フェ
+ffa ッファ
+ffe ッフェ
+ffi ッフィ
+ffo ッフォ
+ffu ッフ
+fi フィ
+fo フォ
+fu フ
+ga ガ
+ge ゲ
+gga ッガ
+gge ッゲ
+ggi ッギ
+ggo ッゴ
+ggu ッグ
+ggya ッギャ
+ggyo ッギョ
+ggyu ッギュ
+gi ギ
+go ゴ
+gu グ
+gya グャ
+gyo ギョ
+gyu ギゥ
+ha ハ
+he ヘ
+hha ッハ
+hhe ッヘ
+hhi ッヒ
+hho ッホ
+hhya ッヒャ
+hhyo ッヒョ
+hhyu ッヒュ
+hi ヒ
+ho ホ
+hya ヒャ
+hyo ヒョ
+hyu ヒュ
+i ィ
+i イ
+i ヰ
+ja ジャ
+ja ヂャ
+ji ジ
+ji ヂ
+jja ッジャ
+jji ッジ
+jji ッヂ
+jjo ッジョ
+jju ッジュ
+jjya ッヂャ
+jjyo ッヂョ
+jjyu ッヂュ
+jo ジョ
+jo ヂョ
+ju ジュ
+ju ヂュ
+ka カ
+ka ヵ
+ke ケ
+ke ヶ
+ki キ
+kka ッカ
+kke ッケ
+kki ッキ
+kko ッコ
+kku ック
+kkya ッキャ
+kkyo ッキョ
+kkyu ッキュ
+ko コ
+ku ク
+kya キァ
+kyo キォ
+kyu キゥ
+ma マ
+me メ
+mi ミ
+mo モ
+mu ム
+mya ミャ
+myo ミョ
+myu ミュ
+n ン
+n'a ンア
+n'e ンエ
+n'i ンイ
+n'o ンオ
+n'u ンウ
+na ナ
+ne ネ
+ni ニ
+no ノ
+nu ヌ
+nya ニャ
+nyo ニョ
+nyu ニュ
+o ォ
+o オ
+pa パ
+pe ペ
+pi ピ
+po ポ
+ppa ッパ
+ppe ッペ
+ppi ッピ
+ppo ッポ
+ppu ップ
+ppya ッピャ
+ppyo ッピョ
+ppyu ッピュ
+pu プ
+pya ピャ
+pyo ピョ
+pyu ピュ
+ra ラ
+re レ
+ri リ
+ro ロ
+rra ッラ
+rre ッレ
+rri ッリ
+rro ッロ
+rru ッル
+rrya ッリャ
+rryo ッリョ
+rryu ッリュ
+ru ル
+rya リャ
+ryo リョ
+ryu リュ
+sa サ
+se セ
+sha シャ
+shi シ
+sho ショ
+shu シュ
+so ソ
+ssa ッサ
+sse ッセ
+ssha ッシャ
+sshi ッシ
+ssho ッショ
+sshu ッシュ
+sso ッソ
+ssu ッス
+su ス
+ta タ
+tcha ッチャ
+tchi ッチ
+tcho ッチョ
+tchu ッチュ
+te テ
+to ト
+tsu ッ
+tsu ツ
+tta ッタ
+tte ッテ
+tto ット
+ttsu ッツ
+u ゥ
+u ウ
+va ヴァ
+ve ヴェ
+vi ヴィ
+vo ヴォ
+vu ヴ
+vva ッヴァ
+vve ッヴェ
+vvi ッヴィ
+vvo ッヴォ
+vvu ッヴ
+wa ヮ
+wa ワ
+wo ヲ
+ya ャ
+ya ヤ
+yo ョ
+yo ヨ
+yu ュ
+yu ユ
+yya ッヤ
+yyo ッヨ
+yyu ッユ
+za ザ
+ze ゼ
+zo ゾ
+zu ズ
+zu ヅ
+zza ッザ
+zzo ッゾ
+zzu ッズ
+zzu ッヅ
+;;
+;; extended characters
+;;
+;;
+;; gairai terms
+;;
+all オール
+algrism アルゴリズム
+answer アンサー
+base ベース
+begineer ビギナー
+connection コネクション
+contents コンテンツ
+creator クリエーター
+comic コミック
+comics コミックス
+culture カルチャー
+debug デバッグ
+debugging デバッギング
+design デザイン
+digital デジタル
+dillenma ジレンマ
+directory ディレクトリ
+disk ディスク
+document ドキュメント
+download ダウンロード
+electric エレクトリック
+facebook フェイスブック
+firefox ファイアーフォックス
+folder フォルダ
+format フォーマット
+forum フォーラム
+fox フォックス
+free フリー
+gnome ノーム
+gnu グヌー
+gozilla ゴジラ
+guide ガイド
+harvard ハーバード
+help ヘルプ
+highlight ハイライト
+japan ジャパン
+journal ジャーナル
+library ライブラリ
+line ライン
+love ラヴ
+love ラブ
+mail メール
+main メイン
+mystery ミステリ
+mozilla モジラ
+network ネットワーク
+next ネクスト
+new ニュー
+news ニュース
+native ネイティブ
+online オンライン
+open オープン
+professional プロフェッショナル
+profile プロファイル
+programmer プログラマ
+sample サンプル
+series シリーズ
+share シェア
+social ソーシャル
+society ソサエティ
+software ソフトウエア
+source ソース
+street ストリート
+system システム
+tag タグ
+text テキスト
+thunderbird サンダーバード
+training トレーニング
+twitter ツイッター
+unicode ユニコード
+wall ウオール
+wall ウォール
+welcome ウェルカム
+welcome ウエルカム
+wikinomics ウィキノミクス
+york ヨーク
--- a/src/calibre/ebooks/unihandecode/unicodepoints.py
+++ b/src/calibre/ebooks/unihandecode/unicodepoints.py
--- a/src/calibre/ebooks/unihandecode/unidecoder.py
+++ b/src/calibre/ebooks/unihandecode/unidecoder.py
@ -1,12 +1,17 @@
 # -*- coding: utf-8 -*-

 __license__ = 'GPL 3'
-__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 __docformat__ = 'restructuredtext en'

 '''
-Decode unicode text to an ASCII representation of the text. Transliterate
-unicode characters to ASCII.
+Decode unicode text to an ASCII representation of the text in Chinese.
+Transliterate unicode characters to ASCII based on chinese pronounce.
+
+Derived from John Schember's unidecode library. Which was created
+as part of calibre.
+
+Copyright(c) 2009, John Schember <john@nachtimwald.com>

 Based on the ruby unidecode gem (http://rubyforge.org/projects/unidecode/) which
 is based on the perl module Text::Unidecode
@ -55,29 +60,20 @@ it under the same terms as Perl itself.
 '''

 import re
-
-from calibre.ebooks.unidecode.unicodepoints import CODEPOINTS
-from calibre.constants import preferred_encoding
+from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
+from calibre.ebooks.unihandecode.zhcodepoints import CODEPOINTS as HANCODES

 class Unidecoder(object):

+    codepoints = {}
+
+    def __init__(self):
+        self.codepoints = CODEPOINTS
+        self.codepoints.update(HANCODES)
+
    def decode(self, text):
-        '''
-        Tranliterate the string from unicode characters to ASCII.
-        '''
-        # The keys for CODEPOINTS is unicode characters, we want to be sure the
-        # input text is unicode.
-        if not isinstance(text, unicode):
-            try:
-                text = unicode(text)
-            except:
-                try:
-                    text = text.decode(preferred_encoding)
-                except:
-                    text = text.decode('utf-8', 'replace')
        # Replace characters larger than 127 with their ASCII equivelent.
-        return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),
-            text)
+        return re.sub('[^\x00-\x7f]',lambda x: self.replace_point(x.group()), text)

    def replace_point(self, codepoint):
        '''
@ -87,7 +83,7 @@ class Unidecoder(object):
            # Split the unicode character xABCD into parts 0xAB and 0xCD.
            # 0xAB represents the group within CODEPOINTS to query and 0xCD
            # represents the position in the list of characters for the group.
-            return CODEPOINTS[self.code_group(codepoint)][self.grouped_point(
+            return self.codepoints[self.code_group(codepoint)][self.grouped_point(
                codepoint)]
        except:
            return '?'
@ -97,12 +93,18 @@ class Unidecoder(object):
        Find what group character is a part of.
        '''
        # Code groups withing CODEPOINTS take the form 'xAB'
-        return u'x%02x' % (ord(unicode(character)) >> 8)
+        try:#python2
+            return 'x%02x' % (ord(unicode(character)) >> 8)
+        except:
+            return 'x%02x' % (ord(character) >> 8)

    def grouped_point(self, character):
        '''
        Return the location the replacement character is in the list for a
        the group character is a part of.
        '''
-        return ord(unicode(character)) & 255
+        try:#python2
+            return ord(unicode(character)) & 255
+        except:
+            return ord(character) & 255

--- a/src/calibre/ebooks/unihandecode/vncodepoints.py
+++ b/src/calibre/ebooks/unihandecode/vncodepoints.py
--- a/src/calibre/ebooks/unihandecode/vndecoder.py
+++ b/src/calibre/ebooks/unihandecode/vndecoder.py
@ -0,0 +1,23 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Decode unicode text to an ASCII representation of the text in Vietnamese.
+
+'''
+
+from calibre.ebooks.unihandecode.unidecoder import Unidecoder
+from calibre.ebooks.unihandecode.vncodepoints import CODEPOINTS as HANCODES
+from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
+
+class Vndecoder(Unidecoder):
+
+    codepoints = {}
+
+    def __init__(self):
+        self.codepoints = CODEPOINTS
+        self.codepoints.update(HANCODES)
+
--- a/src/calibre/ebooks/unihandecode/zhcodepoints.py
+++ b/src/calibre/ebooks/unihandecode/zhcodepoints.py
--- a/src/calibre/gui2/actions/delete.py
+++ b/src/calibre/gui2/actions/delete.py
@ -271,11 +271,6 @@ class DeleteAction(InterfaceAction):
                        partial(self.library_ids_deleted, current_row=row))
        # Device view is visible.
        else:
-            if not confirm('<p>'+_('The selected books will be '
-                                   '<b>permanently deleted</b> '
-                                   'from your device. Are you sure?')
-                                +'</p>', 'device_delete_books', self.gui):
-                return
            if self.gui.stack.currentIndex() == 1:
                view = self.gui.memory_view
            elif self.gui.stack.currentIndex() == 2:
@ -283,8 +278,14 @@ class DeleteAction(InterfaceAction):
            else:
                view = self.gui.card_b_view
            paths = view.model().paths(rows)
+            ids = view.model().indices(rows)
+            if not confirm('<p>'+_('The selected books will be '
+                                   '<b>permanently deleted</b> '
+                                   'from your device. Are you sure?')
+                                +'</p>', 'device_delete_books', self.gui):
+                return
            job = self.gui.remove_paths(paths)
            self.delete_memory[job] = (paths, view.model())
-            view.model().mark_for_deletion(job, rows)
+            view.model().mark_for_deletion(job, ids, rows_are_ids=True)
            self.gui.status_bar.show_message(_('Deleting books from device.'), 1000)

--- a/src/calibre/utils/filenames.py
+++ b/src/calibre/utils/filenames.py
@ -6,12 +6,12 @@ meaning as possible.
 import os
 from math import ceil

-from calibre.ebooks.unidecode.unidecoder import Unidecoder
 from calibre import sanitize_file_name
 from calibre.constants import preferred_encoding, iswindows
-udc = Unidecoder()
+from calibre.utils.localization import get_udc

 def ascii_text(orig):
+    udc = get_udc()
    try:
        ascii = udc.decode(orig)
    except:
--- a/src/calibre/utils/localization.py
+++ b/src/calibre/utils/localization.py
@ -169,3 +169,13 @@ def set_qt_translator(translator):
                return translator.load(p)
    return False

+_udc = None
+
+def get_udc():
+    global _udc
+    if _udc is None:
+        from calibre.ebooks.unihandecode import Unihandecoder
+        _udc = Unihandecoder(lang=get_lang())
+    return _udc
+
+