KG updates

2025-07-09 03:04:10 -04:00 · 2011-02-15 09:43:17 -07:00 · 2011-02-15 09:43:17 -07:00 · 0bf3e3db12
commit 0bf3e3db12
parent eea052b5d2 040be5fe03
92 changed files with 147810 additions and 3530 deletions
--- a/27
+++ b/27
@ -193,6 +193,33 @@ License: GPL-3
 The full text of the GPL is distributed as in
 /usr/share/common-licenses/GPL-3 on Debian systems.

+Files: src/calibre/ebooks/unihandecode/pykakasi/*
+Copyright: 2011, Hiroshi Miura <miurahr@linux.com>
+Copyright: 1992, Hironobu Takahashi
+License: GPL-2+
+ The full text of the GPL is distributed as in
+ /usr/share/common-licenses/GPL on Debian systems.
+
+Files: resources/kanwadict2.db
+Files: resources/itaijidict2.pickle
+Copyright: 2011, Hiroshi Miura <miurahr@linux.com>
+Copyright: 1992 1993 1994, Hironobu Takahashi (takahasi@tiny.or.jp),
+Copyright: 1992 1993 1994, Masahiko Sato (masahiko@sato.riec.tohoku.ac.jp),
+Copyright: 1992 1993 1994, Yukiyoshi Kameyama, Miki Inooka, Akihiko Sasaki, Dai Ando, Junichi Okukawa,
+Copyright: 1992 1993 1994, Katsushi Sato and Nobuhiro Yamagishi
+License: GPL-2+
+ The full text of the GPL is distributed as in
+ /usr/share/common-licenses/GPL on Debian systems.
+
+Files: src/calibre/ebooks/unihandecode/*
+Copyright: 2010-2011, Hiroshi Miura <miurahr@linux.com>
+Copyright: 2009, John Schember
+Copyright: 2007, Russell Norris
+Copyright: 2001, Sean M. Burke
+License: GPL-3, Perl
+ The full text of the GPL is distributed as in
+ /usr/share/common-licenses/GPL-3 on Debian systems.
+
 Files: src/encutils/__init__.py
 Copyright: 2005-2008: Christof Hoeke
 License: LGPL-3+, CC-BY-3.0
--- a/imgsrc/news.svg
+++ b/imgsrc/news.svg
@ -13,12 +13,12 @@
   id="Layer_1"
   x="0px"
   y="0px"
-   width="134.77701"
-   height="199.99901"
-   viewBox="0 0 134.777 199.999"
+   width="200"
+   height="200"
+   viewBox="0 0 199.99999 199.99999"
   enable-background="new 0 0 595.28 841.89"
   xml:space="preserve"
-   inkscape:version="0.47 r22583"
+   inkscape:version="0.48.0 r9654"
   sodipodi:docname="news.svg"><metadata
   id="metadata26"><rdf:RDF><cc:Work
       rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
@ -38,22 +38,22 @@
   guidetolerance="10"
   inkscape:pageopacity="0"
   inkscape:pageshadow="2"
-   inkscape:window-width="640"
-   inkscape:window-height="489"
+   inkscape:window-width="1680"
+   inkscape:window-height="997"
   id="namedview22"
   showgrid="false"
   inkscape:zoom="0.28032165"
   inkscape:cx="67.389001"
   inkscape:cy="99.722002"
-   inkscape:window-x="0"
-   inkscape:window-y="41"
-   inkscape:window-maximized="0"
+   inkscape:window-x="-4"
+   inkscape:window-y="30"
+   inkscape:window-maximized="1"
   inkscape:current-layer="Layer_1" />
 <g
   id="g3"
-   transform="translate(-230.25101,-320.668)">
+   transform="translate(-194.57771,-320.66701)">
 	<polygon
-   points="360.241,366.109 345.29,359.678 345.29,343.405 329.945,343.405 324.265,329.15 309.147,335.175 297.64,323.667 286.79,334.517 272.693,328.454 266.263,343.405 249.988,343.405 249.988,358.749 235.734,364.429 241.759,379.548 230.251,391.056 241.101,401.906 235.039,416.002 249.988,422.432 249.988,438.706 265.333,438.706 271.013,452.961 277.817,450.25 277.817,475.111 252.085,475.111 297.64,520.667 343.193,475.111 317.463,475.111 317.463,451.453 322.585,453.656 329.016,438.706 345.29,438.706 345.29,423.362 359.546,417.682 353.521,402.563 365.028,391.056 354.178,380.205 "
+   points="286.79,334.517 272.693,328.454 266.263,343.405 249.988,343.405 249.988,358.749 235.734,364.429 241.759,379.548 230.251,391.056 241.101,401.906 235.039,416.002 249.988,422.432 249.988,438.706 265.333,438.706 271.013,452.961 277.817,450.25 277.817,475.111 252.085,475.111 297.64,520.667 343.193,475.111 317.463,475.111 317.463,451.453 322.585,453.656 329.016,438.706 345.29,438.706 345.29,423.362 359.546,417.682 353.521,402.563 365.028,391.056 354.178,380.205 360.241,366.109 345.29,359.678 345.29,343.405 329.945,343.405 324.265,329.15 309.147,335.175 297.64,323.667 "
   id="polygon5"
   style="fill:#ffffff" />
 	<linearGradient
@ -73,7 +73,7 @@
   id="stop10" />
 	</linearGradient>
 	<polygon
-   points="360.241,363.11 345.29,356.679 345.29,340.406 329.945,340.406 324.265,326.151 309.147,332.176 297.64,320.668 286.79,331.518 272.693,325.455 266.263,340.406 249.988,340.406 249.988,355.75 235.734,361.43 241.759,376.549 230.251,388.057 241.101,398.907 235.039,413.003 249.988,419.433 249.988,435.707 265.333,435.707 271.013,449.962 277.817,447.251 277.817,472.112 252.085,472.112 297.64,517.668 343.193,472.112 317.463,472.112 317.463,448.454 322.585,450.657 329.016,435.707 345.29,435.707 345.29,420.363 359.546,414.683 353.521,399.564 365.028,388.057 354.178,377.206 "
+   points="286.79,331.518 272.693,325.455 266.263,340.406 249.988,340.406 249.988,355.75 235.734,361.43 241.759,376.549 230.251,388.057 241.101,398.907 235.039,413.003 249.988,419.433 249.988,435.707 265.333,435.707 271.013,449.962 277.817,447.251 277.817,472.112 252.085,472.112 297.64,517.668 343.193,472.112 317.463,472.112 317.463,448.454 322.585,450.657 329.016,435.707 345.29,435.707 345.29,420.363 359.546,414.683 353.521,399.564 365.028,388.057 354.178,377.206 360.241,363.11 345.29,356.679 345.29,340.406 329.945,340.406 324.265,326.151 309.147,332.176 297.64,320.668 "
   id="polygon12"
   style="fill:url(#SVGID_1_)" />
 	<g
@ -81,14 +81,16 @@
 		<path
   d="m 273.311,419.168 v -56.752 h 17.935 l 9.01,17.43 c 3.115,5.641 6.399,13.22 8.926,19.873 h 0.252 c -0.842,-7.494 -1.178,-15.41 -1.178,-23.83 v -13.472 h 13.893 v 56.752 H 306.15 l -9.684,-18.861 c -3.116,-5.978 -6.82,-13.641 -9.515,-20.461 h -0.336 c 0.42,7.663 0.589,16.167 0.589,25.345 v 13.978 h -13.893 z"
   id="path16"
-   style="fill:#993720" />
+   style="fill:#993720"
+   inkscape:connector-curvature="0" />
 	</g>
 	<g
   id="g18">
 		<path
   d="m 273.311,416.873 v -56.752 h 17.935 l 9.01,17.43 c 3.115,5.641 6.399,13.22 8.926,19.873 h 0.252 c -0.842,-7.494 -1.178,-15.41 -1.178,-23.83 v -13.472 h 13.893 v 56.752 H 306.15 l -9.684,-18.861 c -3.116,-5.978 -6.82,-13.641 -9.515,-20.461 h -0.336 c 0.42,7.663 0.589,16.167 0.589,25.345 v 13.978 h -13.893 z"
   id="path20"
-   style="fill:#f0efef" />
+   style="fill:#f0efef"
+   inkscape:connector-curvature="0" />
 	</g>
 </g>
 </svg>
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -105,7 +105,7 @@ bool_custom_columns_are_tristate = 'yes'
 # title within authors.
 sort_columns_at_startup = None

-#; Control how dates are displayed
+#: Control how dates are displayed
 # Format to be used for publication date and the timestamp (date).
 #  A string controlling how the publication date is displayed in the GUI
 #  d    the day as number without a leading zero (1 to 31)
--- a/resources/images/news.png
+++ b/resources/images/news.png
--- a/resources/images/news/de_standaard.png
+++ b/resources/images/news/de_standaard.png
--- a/resources/recipes/adevarul.recipe
+++ b/resources/recipes/adevarul.recipe
@ -0,0 +1,50 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+adevarul.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Adevarul(BasicNewsRecipe):
+    title                 = u'Adev\u0103rul'
+    language              = 'ro'
+    __author__            = u'Silviu Cotoar\u0103'
+    description           = u'\u0218tiri din Rom\u00e2nia'
+    publisher             = 'Adevarul'
+    category              = 'Ziare,Stiri,Romania'
+    oldest_article        = 5
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    encoding              = 'utf-8'
+    remove_javascript     = True
+    cover_url         = 'http://upload.wikimedia.org/wikipedia/en/d/d6/Logo_noul_adevarul.png'
+
+    conversion_options = {
+                             'comments'   : description
+                            ,'tags'       : category
+                            ,'language'   : language
+                ,'publisher'  : publisher
+                         }
+
+    keep_only_tags = [  dict(name='div', attrs={'class':'article_header'})
+                       ,dict(name='div', attrs={'class':'bd'})
+                     ]
+
+
+    remove_tags = [  dict(name='div', attrs={'class':'bb-wg-article_related_attachements'})
+                    ,dict(name='div', attrs={'class':'bb-md bb-md-article_comments'})
+                ,dict(name='form', attrs={'id':'bb-comment-create-form'})
+              ]
+
+    remove_tags_after = [ dict(name='form', attrs={'id':'bb-comment-create-form'}) ]
+
+    feeds = [ (u'\u0218tiri', u'http://www.adevarul.ro/rss/latest') ]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
+
--- a/resources/recipes/apple_daily.recipe
+++ b/resources/recipes/apple_daily.recipe
@ -0,0 +1,161 @@
+# -*- coding: utf-8 -*-
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class AppleDaily(BasicNewsRecipe):
+
+    title       = u'蘋果日報'
+    __author__  = u'蘋果日報'
+    __publisher__  = u'蘋果日報'
+    description = u'蘋果日報'
+    masthead_url = 'http://hk.apple.nextmedia.com/template/common/header/2009/images/atnextheader_logo_appledaily.gif'
+    language = 'zh_TW'
+    encoding = 'UTF-8'
+    timefmt = ' [%a, %d %b, %Y]'
+    needs_subscription = False
+    remove_javascript = True
+    remove_tags_before = dict(name=['ul', 'h1'])
+    remove_tags_after  = dict(name='form')
+    remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}),
+                dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']),
+                dict(name=['script', 'noscript', 'style', 'form'])]
+    no_stylesheets = True
+    extra_css = '''
+    	@font-face {font-family: "uming", serif, sans-serif;  src: url(res:///usr/share/fonts/truetype/arphic/uming.ttc); }\n
+	    body {margin-right: 8pt; font-family: 'uming', serif;}
+        h1 {font-family: 'uming', serif, sans-serif}
+            '''
+    #extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
+
+    preprocess_regexps = [
+       (re.compile(r'img.php?server=(?P<server>[^&]+)&path=(?P<path>[^&]+).*', re.DOTALL|re.IGNORECASE),
+        lambda match: 'http://' + match.group('server') + '/' + match.group('path')),
+    ]
+
+    def get_cover_url(self):
+        return 'http://hk.apple.nextmedia.com/template/common/header/2009/images/atnextheader_logo_appledaily.gif'
+
+
+    #def get_browser(self):
+        #br = BasicNewsRecipe.get_browser()
+        #if self.username is not None and self.password is not None:
+        #    br.open('http://www.nytimes.com/auth/login')
+        #    br.select_form(name='login')
+        #    br['USERID']   = self.username
+        #    br['PASSWORD'] = self.password
+        #    br.submit()
+        #return br
+
+    def preprocess_html(self, soup):
+        #process all the images
+        for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
+            iurl = tag['src']
+            #print 'checking image: ' + iurl
+
+            #img\.php?server\=(?P<server>[^&]+)&path=(?P<path>[^&]+)
+            p = re.compile(r'img\.php\?server=(?P<server>[^&]+)&path=(?P<path>[^&]+)', re.DOTALL|re.IGNORECASE)
+
+            m = p.search(iurl)
+
+            if m is not None:
+                iurl = 'http://' + m.group('server') + '/' + m.group('path')
+                #print 'working! new url: ' + iurl
+                tag['src'] = iurl
+            #else:
+                #print 'not good'
+
+        for tag in soup.findAll(lambda tag: tag.name.lower()=='a' and tag.has_key('href')):
+            iurl = tag['href']
+            #print 'checking image: ' + iurl
+
+            #img\.php?server\=(?P<server>[^&]+)&path=(?P<path>[^&]+)
+            p = re.compile(r'img\.php\?server=(?P<server>[^&]+)&path=(?P<path>[^&]+)', re.DOTALL|re.IGNORECASE)
+
+            m = p.search(iurl)
+
+            if m is not None:
+                iurl = 'http://' + m.group('server') + '/' + m.group('path')
+                #print 'working! new url: ' + iurl
+                tag['href'] = iurl
+            #else:
+                #print 'not good'
+
+        return soup
+
+
+    def parse_index(self):
+        base = 'http://news.hotpot.hk/fruit'
+        soup = self.index_to_soup('http://news.hotpot.hk/fruit/index.php')
+
+        #def feed_title(div):
+        #    return ''.join(div.findAll(text=True, recursive=False)).strip()
+
+        articles = {}
+        key = None
+        ans = []
+        for div in soup.findAll('li'):
+            key = div.find(text=True, recursive=True);
+            #if key == u'豪情':
+           #    continue;
+
+            print 'section=' + key
+
+            articles[key] = []
+
+            ans.append(key)
+
+            a = div.find('a', href=True)
+
+            if not a:
+                continue
+
+            url = base + '/' + a['href']
+            print 'url=' + url
+
+            if not articles.has_key(key):
+                articles[key] = []
+            else:
+                # sub page
+                subSoup = self.index_to_soup(url)
+
+                for subDiv in subSoup.findAll('li'):
+                    subA = subDiv.find('a', href=True)
+                    subTitle = subDiv.find(text=True, recursive=True)
+                    subUrl = base + '/' + subA['href']
+
+                    print 'subUrl' + subUrl
+
+                    articles[key].append(
+                        dict(title=subTitle,
+                         url=subUrl,
+                         date='',
+                         description='',
+                         content=''))
+
+
+#             elif div['class'] in ['story', 'story headline']:
+#                 a = div.find('a', href=True)
+#                 if not a:
+#                     continue
+#                 url = re.sub(r'\?.*', '', a['href'])
+#                 url += '?pagewanted=all'
+#                 title = self.tag_to_string(a, use_alt=True).strip()
+#                 description = ''
+#                 pubdate = strftime('%a, %d %b')
+#                 summary = div.find(True, attrs={'class':'summary'})
+#                 if summary:
+#                     description = self.tag_to_string(summary, use_alt=False)
+#
+#                 feed = key if key is not None else 'Uncategorized'
+#                 if not articles.has_key(feed):
+#                     articles[feed] = []
+#                 if not 'podcasts' in url:
+#                     articles[feed].append(
+#                               dict(title=title, url=url, date=pubdate,
+#                                    description=description,
+#                                    content=''))
+#        ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2})
+        ans = [(unicode(key), articles[key]) for key in ans if articles.has_key(key)]
+        return ans
+
+
--- a/resources/recipes/capital.recipe
+++ b/resources/recipes/capital.recipe
@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+capital.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Capital(BasicNewsRecipe):
+    title                 = 'Capital'
+    __author__            = u'Silviu Cotoar\u0103'
+    description           = u'\u0218tiri din Rom\u00e2nia'
+    oldest_article        = 5
+    language              = 'ro'
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    category              = 'Ziare,Stiri,Romania'
+    encoding              = 'utf-8'
+    remove_javascript     = True
+    publisher             = 'Capital'
+    cover_url         = 'http://www.mediapress.ro/imagini/sigla-capital-s16.gif'
+
+    conversion_options = {
+                             'comments'   : description
+                            ,'tags'       : category
+                ,'language'   : language
+                ,'publisher'  : publisher
+                         }
+
+    keep_only_tags = [  dict(name='div', attrs={'class':'single one_article'})
+                     ]
+
+    remove_tags = [  dict(name='div', attrs={'class':'single_details'})
+                   , dict(name='div', attrs={'class':'tx-addoceansbanners-pi1'})
+          ]
+
+    feeds = [(u'\u0218tiri', u'http://www.capital.ro/rss.html') ]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
--- a/resources/recipes/catavencu.recipe
+++ b/resources/recipes/catavencu.recipe
@ -0,0 +1,53 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+catavencu.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Catavencu(BasicNewsRecipe):
+    title                 = u'Academia Ca\u0163avencu'
+    __author__            = u'Silviu Cotoar\u0103'
+    description           = 'Tagma cum laude'
+    publisher             = 'Catavencu'
+    oldest_article        = 5
+    language              = 'ro'
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    category              = 'Ziare'
+    encoding              = 'utf-8'
+    cover_url         = 'http://upload.wikimedia.org/wikipedia/en/1/1e/Academia_Catavencu.jpg'
+
+    conversion_options = {
+                             'comments'   : description
+                            ,'tags'       : category
+                            ,'language'   : language
+                ,'publisher'  : publisher
+                         }
+
+    keep_only_tags = [
+            dict(name='ul', attrs={'class':'articles'})
+                     ]
+
+    remove_tags = [
+             dict(name='div', attrs={'class':['tools']})
+           , dict(name='div', attrs={'class':['share']})
+           , dict(name='div', attrs={'class':['category']})
+           , dict(name='div', attrs={'id':['comments']})
+                  ]
+
+    remove_tags_after = [
+              dict(name='div', attrs={'id':'comments'})
+            ]
+
+    feeds          = [
+            (u'Feeds', u'http://catavencu.ro/feed/rss')
+                 ]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
--- a/resources/recipes/de_standaard.recipe
+++ b/resources/recipes/de_standaard.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 standaard.be
 '''
@ -9,15 +9,16 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class DeStandaard(BasicNewsRecipe):
    title                 = u'De Standaard'
    __author__            = u'Darko Miletic'
-    language = 'nl_BE'
-
+    language              = 'nl_BE'
    description           = u'News from Belgium in Dutch'
    oldest_article        = 7
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
-
+    masthead_url          = 'http://www.standaard.be/extra/css/images/masthead/logo_340x45.png'
+    publication_type      = 'newspaper'
+    
    keep_only_tags    = [dict(name='div' , attrs={'id':['intro','continued']})]

    feeds          = [(u'De Standaard Online', u'http://feeds.feedburner.com/dso-front')]
@ -27,4 +28,4 @@ class DeStandaard(BasicNewsRecipe):
        return article.get('guid',  None)

    def print_version(self, url):
-        return url.replace('/Detail.aspx?','/PrintArtikel.aspx?')
+        return url.replace('/artikel/detail.aspx?','/Artikel/PrintArtikel.aspx?')
--- a/resources/recipes/gandul.recipe
+++ b/resources/recipes/gandul.recipe
@ -0,0 +1,47 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+gandul.info
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Gandul(BasicNewsRecipe):
+    title                 = u'G\u00E2ndul'
+    __author__            = u'Silviu Cotoar\u0103'
+    publisher             = 'Gandul'
+    description           = 'Cotidian Online'
+    oldest_article        = 5
+    language              = 'ro'
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    category              = 'Ziare,Stiri,Romania'
+    encoding              = 'utf-8'
+    cover_url         = 'http://storage0.dms.mpinteractiv.ro/media/1/1/1706/1064063/1/logo.jpg?width=400'
+
+    conversion_options = {
+                'comments'    : description
+                            ,'tags'       : category
+                            ,'language'   : language
+                ,'publisher'  : publisher
+                         }
+
+    keep_only_tags = [
+                    dict(name='div', attrs={'class':'article'})
+             ]
+
+    remove_tags = [
+             dict(name='a', attrs={'class':'photo'})
+           , dict(name='div', attrs={'class':'ad'})
+                  ]
+
+    feeds  = [
+        (u'\u0218tiri', u'http://www.gandul.info/rss-stiri-prima-pagina.xml')
+         ]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
--- a/resources/recipes/hotnews.recipe
+++ b/resources/recipes/hotnews.recipe
@ -0,0 +1,46 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+hotnews.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Hotnews(BasicNewsRecipe):
+    title                 = 'Hotnews'
+    __author__            = u'Silviu Cotoar\u0103'
+    description           = u'\u0218tiri din Rom\u00e2nia'
+    publisher             = 'Hotnews'
+    oldest_article        = 5
+    language              = 'ro'
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    category              = 'Ziare,Stiri,Romania'
+    encoding              = 'utf-8'
+    cover_url         = 'http://www.hotnews.ro/images/new/logo.gif'
+
+    conversion_options = {
+                             'comments'   : description
+                            ,'tags'       : category
+                            ,'language'   : language
+                ,'publisher'  : publisher
+                         }
+
+    keep_only_tags = [
+            dict(name='h1', attrs={'class':'title'})
+                   ,dict(name='div', attrs={'id':'articleContent'})
+                     ]
+
+    feeds          = [   (u'\u0218tiri', u'http://www.hotnews.ro/rss/actualitate')
+            ,(u'English', u'http://www.hotnews.ro/rss/english')
+                 ]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
+
+
+
--- a/resources/recipes/jurnalulnational.recipe
+++ b/resources/recipes/jurnalulnational.recipe
@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+jurnalul.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class JurnalulNational(BasicNewsRecipe):
+    title                 = u'Jurnalul Na\u0163ional'
+    __author__            = u'Silviu Cotoar\u0103'
+    description           = u'\u0218tiri din Rom\u00e2nia'
+    publisher             = 'Jurnalul National'
+    oldest_article        = 5
+    language              = 'ro'
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    category              = 'Ziare,Stiri,Romania'
+    encoding              = 'utf-8'
+    cover_url         = 'http://www.jurnalul.ro/images/sigla.png'
+
+    conversion_options = {
+                             'comments'   : description
+                            ,'tags'       : category
+                            ,'language'   : language
+                ,'publisher'  : publisher
+                         }
+
+    keep_only_tags = [
+            dict(name='h1', attrs={'class':'h3 art_title'})
+                   ,dict(name='div', attrs={'class':'only_text'})
+                     ]
+
+    feeds          = [
+             (u'\u0218tiri', u'http://www.jurnalul.ro/rss/stiri-3028.html')
+                ,(u'Special', u'http://www.jurnalul.ro/rss/special-3001.html')
+                    ,(u'Sport', u'http://www.jurnalul.ro/rss/sport-3035.html')
+            ,(u'Bani Afaceri', u'http://www.jurnalul.ro/rss/bani-afaceri-3006.html')
+            ,(u'Viata Sanatoasa', u'http://www.jurnalul.ro/rss/viata-sanatoasa-3010.html')
+            ,(u'Stiinta Tehnica', u'http://www.jurnalul.ro/rss/stiinta-tehnica-3019.html')
+            ,(u'Timp Liber', u'http://www.jurnalul.ro/rss/timp-liber-3022.html')
+            ,(u'Fun', u'http://www.jurnalul.ro/rss/fun-3038.html')
+            ,(u'Acum 20 de ani', u'http://www.jurnalul.ro/rss/acum-20-de-ani-3073.html')
+                 ]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
+
+
+
--- a/resources/recipes/lifehacker.recipe
+++ b/resources/recipes/lifehacker.recipe
@ -8,7 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe

 class Lifehacker(BasicNewsRecipe):
    title                 = 'Lifehacker'
-    __author__            = 'NA'
+    __author__            = 'Kovid Goyal'
    description           = "Computers make us more productive. Yeah, right. Lifehacker recommends the software downloads and web sites that actually save time. Don't live to geek; geek to live."
    publisher             = 'lifehacker.com'
    category              = 'news, IT, Internet, gadgets, tips and tricks, howto, diy'
@ -32,14 +32,20 @@ class Lifehacker(BasicNewsRecipe):
                        , 'language'  : language
                        }

-    remove_attributes  = ['width','height']
-    keep_only_tags     = [dict(attrs={'class':'content permalink'})]
+    remove_attributes  = ['width', 'height', 'style']
    remove_tags_before = dict(name='h1')
-    remove_tags        = [dict(attrs={'class':'contactinfo'})]
-    remove_tags_after  = dict(attrs={'class':'contactinfo'})
+    keep_only_tags = [dict(id='container')]
+    remove_tags_after  = dict(attrs={'class':'post-body'})
+    remove_tags = [
+            dict(id="sharemenu"),
+            {'class': 'related'},
+    ]

    feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/full')]

    def preprocess_html(self, soup):
        return self.adeify_images(soup)

+    def print_version(self, url):
+        return url.replace('#!', '?_escaped_fragment_=')
+
--- a/resources/recipes/mediafax.recipe
+++ b/resources/recipes/mediafax.recipe
@ -0,0 +1,52 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+mediafax.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Mediafax(BasicNewsRecipe):
+    title                 = 'Mediafax'
+    __author__            = u'Silviu Cotoar\u0103'
+    description           = u'\u0218tiri din Rom\u00e2nia'
+    publisher             = 'Mediafax'
+    oldest_article        = 5
+    language              = 'ro'
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    category              = 'Ziare,Stiri,Romania'
+    encoding              = 'utf-8'
+    cover_url         = 'http://storage0.dms.mpinteractiv.ro/media/1/1/1706/4134575/2/logo-mediafax-mass-media-news.jpg?width=400'
+
+    conversion_options = {
+                             'comments'   : description
+                            ,'tags'       : category
+                            ,'language'   : language
+                ,'publisher'  : publisher
+
+                         }
+
+    keep_only_tags = [
+            dict(name='div', attrs={'class':'news tabs-container'})
+                     ]
+
+    remove_tags = [
+            dict(name='ul', attrs={'class':['CategoryNews']})
+           ,dict(name='div', attrs={'class':['read']})
+                  ]
+
+    remove_tags_after = [ dict(name='div', attrs={'class':'cmsItemViews'}) ]
+
+
+    feeds       = [
+            (u'Actualitate', u'http://www.mediafax.ro/rss/')
+              ]
+
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
--- a/resources/recipes/moneyro.recipe
+++ b/resources/recipes/moneyro.recipe
@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+money.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class MoneyRo(BasicNewsRecipe):
+    title                 = 'Money Ro'
+    __author__            = u'Silviu Cotoar\u0103'
+    description           = u'\u0218tiri din Rom\u00e2nia'
+    publisher             = 'MoneyRo'
+    oldest_article        = 5
+    language              = 'ro'
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    category              = 'Ziare,Stiri,Romania'
+    encoding              = 'utf-8'
+    remove_javascript     = True
+    cover_url         = 'http://assets.moneyweb.ro/images/logo_money.jpg'
+
+    conversion_options = {
+                             'comments'   : description
+                            ,'tags'       : category
+                            ,'language'   : language
+                ,'publisher'  : publisher
+                         }
+
+    keep_only_tags = [   dict(name='div', attrs={'id':'titluArticol'})
+               , dict(name='img', attrs={'id':'objImage'})
+               , dict(name='div', attrs={'class':'leftColumnArticle'})
+                     ]
+
+    remove_tags_after = [ dict(name='div', attrs={'id':'articleTags'}) ]
+
+    remove_tags = [  dict(name='div', attrs={'id':'ads'})
+           , dict(name='div', attrs={'id':'aus'})
+           , dict(name='div', attrs={'id':'bb-comment-create-form'})
+           , dict(name='div', attrs={'id':'articleTags'})
+           , dict(name='div', attrs={'class':'breadcrumb'})
+          ]
+
+    feeds = [(u'\u0218tiri', u'http://moneyro.feedsportal.com/c/32533/fe.ed/rss.money.ro/stiri.xml') ]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
+
+
+
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@ -668,7 +668,7 @@ class NYTimes(BasicNewsRecipe):

            try:
                #remove "Related content" bar
-                runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline','articleInline runaroundLeft  ']})
+                runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline','articleInline runaroundLeft  ','articleInline runaroundLeft  lastArticleInline']})
                if runAroundsFound:
                    for runAround in runAroundsFound:
                        #find all section headers
--- a/resources/recipes/prosport.recipe
+++ b/resources/recipes/prosport.recipe
@ -0,0 +1,49 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+prosport.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Prosport(BasicNewsRecipe):
+    title                 = 'Prosport'
+    __author__            = u'Silviu Cotoar\u0103'
+    publisher             = 'Prosport'
+    description           = u'\u0218tiri Sportive din Rom\u00e2nia'
+    oldest_article        = 5
+    language              = 'ro'
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    category              = 'Ziare,Stiri,Romania,Sport'
+    encoding              = 'utf-8'
+    cover_url         = 'http://storage0.dms.mpinteractiv.ro/media/401/581/7946/3688311/1/logo-pro.jpg?width=610'
+
+    conversion_options = {
+                             'comments'   : description
+                            ,'tags'       : category
+                            ,'language'   : language
+                ,'publisher'  : publisher
+                         }
+
+    keep_only_tags = [
+            dict(name='h1', attrs={'class':'a-title'})
+                   ,dict(name='div', attrs={'class':'a-entry'})
+                     ]
+
+    remove_tags = [  dict(name='div', attrs={'class':'utils'})
+            ,dict(name='div', attrs={'class':'g-slide'})
+              ]
+
+
+    feeds          = [ (u'\u0218tiri', u'http://www.prosport.ro/rss.xml')]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
+
+
+
--- a/resources/recipes/realitatea.recipe
+++ b/resources/recipes/realitatea.recipe
@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+realitatea.net
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Realitatea(BasicNewsRecipe):
+    title                 = 'Realitatea'
+    __author__            = u'Silviu Cotoar\u0103'
+    publisher             = 'Realitatea'
+    description           = u'\u0218tiri din Rom\u00e2nia'
+    oldest_article        = 5
+    language              = 'ro'
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    category              = 'Ziare,Stiri,Romania'
+    encoding              = 'utf-8'
+    cover_url         = 'http://assets.realitatea.ro/images/logo.jpg'
+
+    conversion_options = {
+                             'comments'   : description
+                            ,'tags'       : category
+                            ,'language'   : language
+                ,'publisher'  : publisher
+                         }
+
+    keep_only_tags = [
+            dict(name='div', attrs={'class':'articleTitle '})
+                   ,dict(name='div', attrs={'class':'articleBody'})
+                     ]
+
+    remove_tags = [  dict(name='div', attrs={'id':'aus'}) ]
+    feeds          = [ (u'\u0218tiri', u'http://realitatea.feedsportal.com/c/32533/fe.ed/rss.realitatea.net/stiri.xml') ]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
+
+
+
--- a/resources/recipes/smh.recipe
+++ b/resources/recipes/smh.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 smh.com.au
 '''
@ -22,7 +22,11 @@ class Smh_au(BasicNewsRecipe):
    remove_empty_feeds    = True
    masthead_url          = 'http://images.smh.com.au/2010/02/02/1087188/smh-620.jpg'
    publication_type      = 'newspaper'
-    extra_css             = ' h1{font-family: Georgia,"Times New Roman",Times,serif } body{font-family: Arial,Helvetica,sans-serif} .cT-imageLandscape{font-size: x-small} '
+    extra_css             = """ 
+                                h1{font-family: Georgia,"Times New Roman",Times,serif } 
+                                body{font-family: Arial,Helvetica,sans-serif} 
+                                .cT-imageLandscape,.cT-imagePortrait{font-size: x-small} 
+                            """

    conversion_options = {
                          'comment'   : description
@ -38,7 +42,11 @@ class Smh_au(BasicNewsRecipe):
                  ]
    remove_tags_after = [dict(name='div',attrs={'class':'articleBody'})]
    keep_only_tags    = [dict(name='div',attrs={'id':'content'})]
-    remove_attributes = ['width','height']
+    remove_tags       = [ 
+                          dict(attrs={'class':'hidden'}), 
+                          dict(name=['link','meta','base','embed','object','iframe'])
+                        ]
+    remove_attributes = ['width','height','lang']

    def parse_index(self):
        articles = []
@ -66,3 +74,14 @@ class Smh_au(BasicNewsRecipe):
                                 ,'description':description
                                })
        return [(self.tag_to_string(soup.find('title')), articles)]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('bod'):
+            item.name = 'div'
+        for item in soup.findAll('img'):
+            if not item.has_key('alt'):
+               item['alt'] = 'image'
+        return soup
+        
--- a/resources/recipes/standardmoney.recipe
+++ b/resources/recipes/standardmoney.recipe
@ -0,0 +1,46 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+standard.money.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class StandardMoneyRo(BasicNewsRecipe):
+    title                 = 'Standard Money Ro'
+    __author__            = u'Silviu Cotoar\u0103'
+    publisher             = 'Standard Money'
+    description           = 'Portal de Business'
+    oldest_article        = 5
+    language              = 'ro'
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    category              = 'Ziare,Stiri,Romania'
+    encoding              = 'utf-8'
+    cover_url         = 'http://assets.standard.ro/wp-content/themes/standard/images/standard-logo.gif'
+
+    conversion_options = {
+                             'comments'   : description
+                            ,'tags'       : category
+                            ,'language'   : language
+                ,'publisher'  : publisher
+                         }
+
+    keep_only_tags = [
+             dict(name='h1', attrs={'class':'post-title'})
+                   , dict(name='div', attrs={'class':'content_post'})
+                     ]
+
+    feeds          = [
+            (u'Actualitate', u'http://standard.money.ro/feed')
+                 ]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
+
+
+
--- a/resources/recipes/workers_world.recipe
+++ b/resources/recipes/workers_world.recipe
@ -0,0 +1,26 @@
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class WorkersWorld(BasicNewsRecipe):
+
+    title = u'Workers World'
+    description = u'Socialist news and analysis'
+    __author__ = u'urslnx'
+    no_stylesheets = True
+    use_embedded_content = False
+    remove_javascript = True
+    oldest_article = 7
+    max_articles_per_feed = 100
+    encoding = 'utf8'
+    publisher = 'workers.org'
+    category = 'news, politics, USA, world'
+    language = 'en'
+    publication_type = 'newsportal'
+    extra_css = ' body{ font-family: Verdana,Arial,Helvetica,sans-serif; } h1{ font-size: x-large; text-align: left; margin-top:0.5em; margin-bottom:0.25em; } h2{ font-size: large; }  p{ text-align: left; } .published{ font-size: small; } .byline{ font-size: small; } .copyright{ font-size: small; } '
+    remove_tags_before = dict(name='div', attrs={'id':'evernote'})
+    remove_tags_after = dict(name='div', attrs={'id':'footer'})
+
+    masthead_url='http://www.workers.org/graphics/wwlogo300.gif'
+    cover_url = 'http://www.workers.org/pdf/current.jpg'
+    feeds = [(u'Headlines', u'http://www.workers.org/rss/nonstandard_rss.xml'),
+]
+
--- a/resources/recipes/ziarulfinanciar.recipe
+++ b/resources/recipes/ziarulfinanciar.recipe
@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+zf.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ZiarulFinanciar(BasicNewsRecipe):
+    title                 = 'Ziarul Financiar'
+    __author__            = u'Silviu Cotoar\u0103'
+    description           = u'\u0218tiri din Business'
+    publisher             = 'Ziarul Financiar'
+    oldest_article        = 5
+    language              = 'ro'
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    category              = 'Ziare,Stiri,Romania'
+    encoding              = 'utf-8'
+    cover_url         = 'http://storage0.dms.mpinteractiv.ro/media/1/1/1706/7462721/1/ziarul-financiar-big.jpg?width=400'
+
+    conversion_options = {
+                             'comments'   : description
+                            ,'tags'       : category
+                            ,'language'   : language
+                ,'publisher'  : publisher
+                         }
+
+    keep_only_tags = [
+            dict(name='div', attrs={'class':'article'})
+                     ]
+
+    feeds          = [
+            (u'\u0218tiri', u'http://www.zf.ro/rss/zf-24/')
+                 ]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
+
+
+
--- a/setup/resources.py
+++ b/setup/resources.py
@ -6,9 +6,10 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import os, cPickle
+import os, cPickle, re, anydbm, shutil
+from zlib import compress

-from setup import Command, basenames
+from setup import Command, basenames, __appname__

 def get_opts_from_parser(parser):
    def do_opt(opt):
@ -26,6 +27,9 @@ class Resources(Command):

    description = 'Compile various needed calibre resources'

+    KAKASI_PATH = os.path.join(Command.SRC,  __appname__,
+            'ebooks', 'unihandecode', 'pykakasi')
+
    def run(self, opts):
        scripts = {}
        for x in ('console', 'gui'):
@ -101,11 +105,107 @@ class Resources(Command):
        import json
        json.dump(function_dict, open(dest, 'wb'), indent=4)

+        self.run_kakasi(opts)
+
+    def run_kakasi(self, opts):
+        self.records = {}
+        src = self.j(self.KAKASI_PATH, 'kakasidict.utf8')
+        dest = self.j(self.RESOURCES, 'localization',
+                'pykakasi','kanwadict2.db')
+        base = os.path.dirname(dest)
+        if not os.path.exists(base):
+            os.makedirs(base)
+
+        if self.newer(dest, src):
+            self.info('\tGenerating Kanwadict')
+
+            for line in open(src, "r"):
+                self.parsekdict(line)
+            self.kanwaout(dest)
+
+        src = self.j(self.KAKASI_PATH, 'itaijidict.utf8')
+        dest = self.j(self.RESOURCES, 'localization',
+                'pykakasi','itaijidict2.pickle')
+
+        if self.newer(dest, src):
+            self.info('\tGenerating Itaijidict')
+            self.mkitaiji(src, dest)
+
+        src = self.j(self.KAKASI_PATH, 'kanadict.utf8')
+        dest = self.j(self.RESOURCES, 'localization',
+                'pykakasi','kanadict2.pickle')
+
+        if self.newer(dest, src):
+            self.info('\tGenerating kanadict')
+            self.mkkanadict(src, dest)
+
+        return
+
+
+    def mkitaiji(self, src, dst):
+        dic = {}
+        for line in open(src, "r"):
+            line = line.decode("utf-8").strip()
+            if line.startswith(';;'): # skip comment
+                continue
+            if re.match(r"^$",line):
+                continue
+            pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line)
+            dic[pair[0]] = pair[1]
+        cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
+
+    def mkkanadict(self, src, dst):
+        dic = {}
+        for line in open(src, "r"):
+            line = line.decode("utf-8").strip()
+            if line.startswith(';;'): # skip comment
+                continue
+            if re.match(r"^$",line):
+                continue
+            (alpha, kana) = line.split(' ')
+            dic[kana] = alpha
+        cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
+
+    def parsekdict(self, line):
+        line = line.decode("utf-8").strip()
+        if line.startswith(';;'): # skip comment
+            return
+        (yomi, kanji) = line.split(' ')
+        if ord(yomi[-1:]) <= ord('z'):
+            tail = yomi[-1:]
+            yomi = yomi[:-1]
+        else:
+            tail = ''
+        self.updaterec(kanji, yomi, tail)
+
+    def updaterec(self, kanji, yomi, tail):
+            key = "%04x"%ord(kanji[0])
+            if key in self.records:
+                if kanji in self.records[key]:
+                    rec = self.records[key][kanji]
+                    rec.append((yomi,tail))
+                    self.records[key].update( {kanji: rec} )
+                else:
+                    self.records[key][kanji]=[(yomi, tail)]
+            else:
+                self.records[key] = {}
+                self.records[key][kanji]=[(yomi, tail)]
+
+    def kanwaout(self, out):
+        dic = anydbm.open(out, 'c')
+        for (k, v) in self.records.iteritems():
+            dic[k] = compress(cPickle.dumps(v, -1))
+        dic.close()
+
+
    def clean(self):
        for x in ('scripts', 'recipes', 'ebook-convert-complete'):
            x = self.j(self.RESOURCES, x+'.pickle')
            if os.path.exists(x):
                os.remove(x)
+        kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi')
+        if os.path.exists(kakasi):
+            shutil.rmtree(kakasi)



--- a/src/calibre/customize/init.py
+++ b/src/calibre/customize/init.py
@ -90,6 +90,11 @@ class Plugin(object): # {{{
        an optional method validate() that takes no arguments and is called
        immediately after the user clicks OK. Changes are applied if and only
        if the method returns True.
+
+        If for some reason you cannot perform the configuration at this time,
+        return a tuple of two strings (message, details), these will be
+        displayed as a warning dialog to the user and the process will be
+        aborted.
        '''
        raise NotImplementedError()

@ -133,6 +138,12 @@ class Plugin(object): # {{{
        except NotImplementedError:
            config_widget = None

+        if isinstance(config_widget, tuple):
+            from calibre.gui2 import warning_dialog
+            warning_dialog(parent, _('Cannot configure'), config_widget[0],
+                    det_msg=config_widget[1], show=True)
+            return False
+
        if config_widget is not None:
            v.addWidget(config_widget)
            v.addWidget(button_box)
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -2,11 +2,13 @@ import os.path
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

-import textwrap, os, glob, functools
+import textwrap, os, glob, functools, re
+from calibre import guess_type
 from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
    MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase
 from calibre.constants import numeric_version
 from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
+from calibre.ebooks.oeb.base import OEB_IMAGES

 # To archive plugins {{{
 class HTML2ZIP(FileTypePlugin):
@ -82,6 +84,66 @@ class PML2PMLZ(FileTypePlugin):

        return of.name

+class TXT2TXTZ(FileTypePlugin):
+    name = 'TXT to TXTZ'
+    author = 'John Schember'
+    description = _('Create a TXTZ archive when a TXT file is imported '
+        'containing Markdown or Textile references to images. The referenced '
+        'images as well as the TXT file are added to the archive.')
+    version = numeric_version
+    file_types = set(['txt'])
+    supported_platforms = ['windows', 'osx', 'linux']
+    on_import = True
+    
+    def _get_image_references(self, txt, base_dir):
+        images = []
+        
+        # Textile
+        for m in re.finditer(ur'(?mu)(?:[\[{])?\!(?:\. )?(?P<path>[^\s(!]+)\s?(?:\(([^\)]+)\))?\!(?::(\S+))?(?:[\]}]|(?=\s|$))', txt):
+            path = m.group('path')
+            if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
+                images.append(path)
+                
+        # Markdown inline        
+        for m in re.finditer(ur'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\((?P<path>[^\)]*)\)', txt):
+            path = m.group('path')
+            if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
+                images.append(path)
+        
+        # Markdown reference
+        refs = {}
+        for m in re.finditer(ur'(?mu)^(\ ?\ ?\ ?)\[(?P<id>[^\]]*)\]:\s*(?P<path>[^\s]*)$', txt):
+            if m.group('id') and m.group('path'):
+                refs[m.group('id')] = m.group('path')
+        for m in re.finditer(ur'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\[(?P<id>[^\]]*)\]', txt):
+            path = refs.get(m.group('id'), None)
+            if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
+                images.append(path)
+
+        # Remove duplicates
+        return list(set(images))
+    
+    def run(self, path_to_ebook):
+        with open(path_to_ebook, 'rb') as ebf:
+            txt = ebf.read()
+        base_dir = os.path.dirname(path_to_ebook)
+        images = self._get_image_references(txt, base_dir)
+        
+        if images:
+            # Create TXTZ and put file plus images inside of it.
+            import zipfile
+            of = self.temporary_file('_plugin_txt2txtz.txtz')
+            txtz = zipfile.ZipFile(of.name, 'w')
+            txtz.write(path_to_ebook, os.path.basename(path_to_ebook), zipfile.ZIP_DEFLATED)
+            for image in images:
+                txtz.write(os.path.join(base_dir, image), image)
+            txtz.close()
+
+            return of.name
+        else:
+            # No images so just import the TXT file.
+            return path_to_ebook
+
 # }}}

 # Metadata reader plugins {{{
@ -511,14 +573,14 @@ from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
 from calibre.ebooks.metadata.douban import DoubanBooks
 from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
 from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
-        LibraryThingCovers, DoubanCovers
+        AmazonCovers, DoubanCovers
 from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
 from calibre.ebooks.epub.fix.unmanifested import Unmanifested
 from calibre.ebooks.epub.fix.epubcheck import Epubcheck

-plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
+plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
        KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
-        Epubcheck, OpenLibraryCovers, LibraryThingCovers, DoubanCovers,
+        Epubcheck, OpenLibraryCovers, AmazonCovers, DoubanCovers,
        NiceBooksCovers]
 plugins += [
    ComicInput,
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -19,7 +19,7 @@ class ANDROID(USBMS):

    VENDOR_ID   = {
            # HTC
-            0x0bb4 : { 0x0c02 : [0x100, 0x0227, 0x0226],
+            0x0bb4 : { 0x0c02 : [0x100, 0x0227, 0x0226, 0x222],
                       0x0c01 : [0x100, 0x0227, 0x0226],
                       0x0ff9 : [0x0100, 0x0227, 0x0226],
                       0x0c87 : [0x0100, 0x0227, 0x0226],
--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
@ -40,6 +40,7 @@ if iswindows:
 class DriverBase(DeviceConfig, DevicePlugin):
    # Needed for config_widget to work
    FORMATS = ['epub', 'pdf']
+    USER_CAN_ADD_NEW_FORMATS = False
    SUPPORTS_SUB_DIRS = True   # To enable second checkbox in customize widget

    @classmethod
--- a/src/calibre/devices/bambook/driver.py
+++ b/src/calibre/devices/bambook/driver.py
@ -32,6 +32,7 @@ class BAMBOOK(DeviceConfig, DevicePlugin):
    ip = None

    FORMATS = [ "snb" ]
+    USER_CAN_ADD_NEW_FORMATS = False
    VENDOR_ID = 0x230b
    PRODUCT_ID = 0x0001
    BCD = None
@ -421,7 +422,7 @@ class BAMBOOK(DeviceConfig, DevicePlugin):
        from calibre.gui2.device_drivers.configwidget import ConfigWidget
        cw = ConfigWidget(cls.settings(), cls.FORMATS, cls.SUPPORTS_SUB_DIRS,
            cls.MUST_READ_METADATA, cls.SUPPORTS_USE_AUTHOR_SORT,
-            cls.EXTRA_CUSTOMIZATION_MESSAGE)
+            cls.EXTRA_CUSTOMIZATION_MESSAGE, cls)
        # Turn off the Save template
        cw.opt_save_template.setVisible(False)
        cw.label.setVisible(False)
--- a/src/calibre/devices/jetbook/driver.py
+++ b/src/calibre/devices/jetbook/driver.py
@ -93,11 +93,11 @@ class MIBUK(USBMS):

    VENDOR_ID   = [0x0525]
    PRODUCT_ID  = [0xa4a5]
-    BCD         = [0x314]
+    BCD         = [0x314, 0x319]
    SUPPORTS_SUB_DIRS = True

-    VENDOR_NAME      = 'LINUX'
-    WINDOWS_MAIN_MEM = 'WOLDERMIBUK'
+    VENDOR_NAME      = ['LINUX', 'FILE_BAC']
+    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['WOLDERMIBUK', 'KED_STORAGE_GADG']

 class JETBOOK_MINI(USBMS):

--- a/src/calibre/devices/kindle/apnx.py
+++ b/src/calibre/devices/kindle/apnx.py
@ -11,22 +11,42 @@ Generates and writes an APNX page mapping file.
 import struct
 import uuid

+from calibre.ebooks.mobi.reader import MobiReader
 from calibre.ebooks.pdb.header import PdbHeaderReader
+from calibre.utils.logging import default_log

 class APNXBuilder(object):
    '''
-    Currently uses the Adobe 1024 byte count equal one page formula.
+    Create an APNX file using a pseudo page mapping.
    '''

-    def write_apnx(self, mobi_file_path, apnx_path):
+    def write_apnx(self, mobi_file_path, apnx_path, accurate=True):
+        # Check that this is really a MOBI file.
        with open(mobi_file_path, 'rb') as mf:
-            phead = PdbHeaderReader(mf)
-            r0 = phead.section_data(0)
-            text_length = struct.unpack('>I', r0[4:8])[0]
+            ident = PdbHeaderReader(mf).identity()
+        if ident != 'BOOKMOBI':
+            raise Exception(_('Not a valid MOBI file. Reports identity of %s' % ident))

-        pages = self.get_pages(text_length)
+        # Get the pages depending on the chosen parser
+        pages = []
+        if accurate:
+            try:
+                pages = self.get_pages_accurate(mobi_file_path)
+            except:
+                # Fall back to the fast parser if we can't
+                # use the accurate one. Typically this is
+                # due to the file having DRM.
+                pages = self.get_pages_fast(mobi_file_path)
+        else:
+            pages = self.get_pages_fast(mobi_file_path)
+
+        if not pages:
+            raise Exception(_('Could not generate page mapping.'))
+
+        # Generate the APNX file from the page mapping.
        apnx = self.generate_apnx(pages)

+        # Write the APNX.
        with open(apnx_path, 'wb') as apnxf:
            apnxf.write(apnx)

@ -51,18 +71,126 @@ class APNXBuilder(object):
        apnx += struct.pack('>H', 32)
        apnx += page_header

-        # write page values to apnx
+        # Write page values to APNX.
        for page in pages:
-            apnx += struct.pack('>L', page)
+            apnx += struct.pack('>I', page)

        return apnx

-    def get_pages(self, text_length):
+    def get_pages_fast(self, mobi_file_path):
+        '''
+        2300 characters of uncompressed text per page. This is
+        not meant to map 1 to 1 to a print book but to be a
+        close enough measure.
+
+        A test book was chosen and the characters were counted
+        on one page. This number was round to 2240 then 60
+        characters of markup were added to the total giving
+        2300.
+
+        Uncompressed text length is used because it's easily
+        accessible in MOBI files (part of the header). Also,
+        It's faster to work off of the length then to
+        decompress and parse the actual text.
+        '''
+        text_length = 0
        pages = []
        count = 0

+        with open(mobi_file_path, 'rb') as mf:
+            phead = PdbHeaderReader(mf)
+            r0 = phead.section_data(0)
+            text_length = struct.unpack('>I', r0[4:8])[0]
+
        while count < text_length:
            pages.append(count)
-            count += 1024
+            count += 2300
+
+        return pages
+
+    def get_pages_accurate(self, mobi_file_path):
+        '''
+        A more accurate but much more resource intensive and slower
+        method to calculate the page length.
+
+        Parses the uncompressed text. In an average paper back book
+        There are 32 lines per page and a maximum of 70 characters
+        per line.
+
+        Each paragraph starts a new line and every 70 characters
+        (minus markup) in a paragraph starts a new line. The
+        position after every 30 lines will be marked as a new
+        page.
+
+        This can be make more accurate by accounting for
+        <div class="mbp_pagebreak" /> as a new page marker.
+        And <br> elements as an empty line.
+        '''
+        pages = []
+
+        # Get the MOBI html.
+        mr = MobiReader(mobi_file_path, default_log)
+        if mr.book_header.encryption_type != 0:
+            # DRMed book
+            return self.get_pages_fast(mobi_file_path)
+        mr.extract_text()
+
+        # States
+        in_tag = False
+        in_p = False
+        check_p = False
+        closing = False
+        p_char_count = 0
+
+        # Get positions of every line
+        # A line is either a paragraph starting
+        # or every 70 characters in a paragraph.
+        lines = []
+        pos = -1
+        # We want this to be as fast as possible so we
+        # are going to do one pass across the text. re
+        # and string functions will parse the text each
+        # time they are called.
+        #
+        # We can can use .lower() here because we are
+        # not modifying the text. In this case the case
+        # doesn't matter just the absolute character and
+        # the position within the stream.
+        for c in mr.mobi_html.lower():
+            pos += 1
+
+            # Check if we are starting or stopping a p tag.
+            if check_p:
+                if c == '/':
+                    closing = True
+                    continue
+                elif c == 'p':
+                    if closing:
+                        in_p = False
+                    else:
+                        in_p = True
+                        lines.append(pos - 2)
+                check_p = False
+                closing = False
+                continue
+
+            if c == '<':
+                in_tag = True
+                check_p = True
+                continue
+            elif c == '>':
+                in_tag = False
+                check_p = False
+                continue
+
+            if in_p and not in_tag:
+                p_char_count += 1
+                if p_char_count == 70:
+                    lines.append(pos)
+                    p_char_count = 0
+
+        # Every 30 lines is a new page
+        for i in xrange(0, len(lines), 32):
+            pages.append(lines[i])

        return pages
--- a/src/calibre/devices/kindle/driver.py
+++ b/src/calibre/devices/kindle/driver.py
@ -176,6 +176,28 @@ class KINDLE2(KINDLE):
    PRODUCT_ID = [0x0002, 0x0004]
    BCD        = [0x0100]

+    EXTRA_CUSTOMIZATION_MESSAGE = [
+        _('Send page number information when sending books') +
+            ':::' +
+            _('The Kindle 3 and newer versions can use page number information '
+              'in MOBI files. With this option, calibre will calculate and send'
+              ' this information to the Kindle when uploading MOBI files by'
+              ' USB. Note that the page numbers do not correspond to any paper'
+              ' book.'),
+        _('Use slower but more accurate page number generation') +
+            ':::' +
+            _('There are two ways to generate the page number information. Using the more accurate '
+              'generator will produce pages that correspond better to a printed book. '
+              'However, this method is slower and will slow down sending files '
+              'to the Kindle.'),
+    ]
+    EXTRA_CUSTOMIZATION_DEFAULT = [
+        True,
+        False,
+    ]
+    OPT_APNX           = 0
+    OPT_APNX_ACCURATE  = 1
+
    def books(self, oncard=None, end_session=True):
        bl = USBMS.books(self, oncard=oncard, end_session=end_session)
        # Read collections information
@ -212,13 +234,17 @@ class KINDLE2(KINDLE):
        '''
        Hijacking this function to write the apnx file.
        '''
-        if not filepath.lower().endswith('.mobi'):
+        opts = self.settings()
+        if not opts.extra_customization[self.OPT_APNX]:
+            return
+
+        if os.path.splitext(filepath.lower())[1] not in ('.azw', '.mobi', '.prc'):
            return

        apnx_path = '%s.apnx' % os.path.join(path, filename)
        apnx_builder = APNXBuilder()
        try:
-            apnx_builder.write_apnx(filepath, apnx_path)
+            apnx_builder.write_apnx(filepath, apnx_path, accurate=opts.extra_customization[self.OPT_APNX_ACCURATE])
        except:
            print 'Failed to generate APNX'
            import traceback
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@ -98,7 +98,6 @@ class KOBO(USBMS):

        def update_booklist(prefix, path, title, authors, mime, date, ContentType, ImageID, readstatus, MimeType):
            changed = False
-            # if path_to_ext(path) in self.FORMATS:
            try:
                lpath = path.partition(self.normalize_path(prefix))[2]
                if lpath.startswith(os.sep):
@ -220,7 +219,7 @@ class KOBO(USBMS):
        #    2) volume_shorcover
        #    2) content

-        debug_print('delete_via_sql: ContentID: ', ContentID, 'ContentType: ', ContentType) 
+        debug_print('delete_via_sql: ContentID: ', ContentID, 'ContentType: ', ContentType)
        connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite'))
        cursor = connection.cursor()
        t = (ContentID,)
@ -532,7 +531,7 @@ class KOBO(USBMS):
                        if result is None:
                            datelastread = '1970-01-01T00:00:00'
                        else:
-                            datelastread = result[0] if result[0] is not None else '1970-01-01T00:00:00' 
+                            datelastread = result[0] if result[0] is not None else '1970-01-01T00:00:00'

                        t = (datelastread,ContentID,)

--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@ -232,16 +232,37 @@ class Device(DeviceConfig, DevicePlugin):

        time.sleep(5)
        drives = {}
+        seen = set()
+        prod_pat = re.compile(r'PROD_(.+?)&')
+        dup_prod_id = False
+
+        def check_for_dups(pnp_id):
+            try:
+                match = prod_pat.search(pnp_id)
+                if match is not None:
+                    prodid = match.group(1)
+                    if prodid in seen:
+                        return True
+                    else:
+                        seen.add(prodid)
+            except:
+                pass
+            return False
+
+
        for drive, pnp_id in win_pnp_drives().items():
            if self.windows_match_device(pnp_id, 'WINDOWS_CARD_A_MEM') and \
                    not drives.get('carda', False):
                drives['carda'] = drive
+                dup_prod_id |= check_for_dups(pnp_id)
            elif self.windows_match_device(pnp_id, 'WINDOWS_CARD_B_MEM') and \
                    not drives.get('cardb', False):
                drives['cardb'] = drive
+                dup_prod_id |= check_for_dups(pnp_id)
            elif self.windows_match_device(pnp_id, 'WINDOWS_MAIN_MEM') and \
                    not drives.get('main', False):
                drives['main'] = drive
+                dup_prod_id |= check_for_dups(pnp_id)

            if 'main' in drives.keys() and 'carda' in drives.keys() and \
                    'cardb' in drives.keys():
@ -263,7 +284,8 @@ class Device(DeviceConfig, DevicePlugin):

        # Sort drives by their PNP drive numbers if the CARD and MAIN
        # MEM strings are identical
-        if self.WINDOWS_MAIN_MEM in (self.WINDOWS_CARD_A_MEM,
+        if dup_prod_id or \
+                self.WINDOWS_MAIN_MEM in (self.WINDOWS_CARD_A_MEM,
                self.WINDOWS_CARD_B_MEM) or \
                self.WINDOWS_CARD_A_MEM == self.WINDOWS_CARD_B_MEM:
            letters = sorted(drives.values(), cmp=drivecmp)
--- a/src/calibre/devices/usbms/deviceconfig.py
+++ b/src/calibre/devices/usbms/deviceconfig.py
@ -34,6 +34,10 @@ class DeviceConfig(object):
    #: If None the default is used
    SAVE_TEMPLATE = None

+    #: If True the user can add new formats to the driver
+    USER_CAN_ADD_NEW_FORMATS = True
+
+
    @classmethod
    def _default_save_template(cls):
        from calibre.library.save_to_disk import config
@ -73,7 +77,7 @@ class DeviceConfig(object):
        from calibre.gui2.device_drivers.configwidget import ConfigWidget
        cw = ConfigWidget(cls.settings(), cls.FORMATS, cls.SUPPORTS_SUB_DIRS,
            cls.MUST_READ_METADATA, cls.SUPPORTS_USE_AUTHOR_SORT,
-            cls.EXTRA_CUSTOMIZATION_MESSAGE)
+            cls.EXTRA_CUSTOMIZATION_MESSAGE, cls)
        return cw

    @classmethod
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@ -93,9 +93,11 @@ class USBMS(CLI, Device):
        for idx,b in enumerate(bl):
            bl_cache[b.lpath] = idx

+        all_formats = set(self.settings().format_map) | set(self.FORMATS)
+
        def update_booklist(filename, path, prefix):
            changed = False
-            if path_to_ext(filename) in self.FORMATS:
+            if path_to_ext(filename) in all_formats:
                try:
                    lpath = os.path.join(path, filename).partition(self.normalize_path(prefix))[2]
                    if lpath.startswith(os.sep):
--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -152,8 +152,17 @@ def check_ebook_format(stream, current_guess):
        stream.seek(0)
    return ans

+def normalize(x):
+    if isinstance(x, unicode):
+        import unicodedata
+        x = unicodedata.normalize('NFKC', x)
+    return x
+
 def calibre_cover(title, author_string, series_string=None,
        output_format='jpg', title_size=46, author_size=36):
+    title = normalize(title)
+    author_string = normalize(author_string)
+    series_string = normalize(series_string)
    from calibre.utils.magick.draw import create_cover_page, TextLine
    lines = [TextLine(title, title_size), TextLine(author_string, author_size)]
    if series_string:
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -402,8 +402,8 @@ OptionRecommendation(name='asciiize',
            'with "Mikhail Gorbachiov". Also, note that in '
            'cases where there are multiple representations of a character '
            '(characters shared by Chinese and Japanese for instance) the '
-            'representation used by the largest number of people will be '
-            'used (Chinese in the previous example).')%\
+            'representation based on the current calibre interface language will be '
+            'used.')%\
            u'\u041c\u0438\u0445\u0430\u0438\u043b '
            u'\u0413\u043e\u0440\u0431\u0430\u0447\u0451\u0432'
 )
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -543,9 +543,9 @@ class HTMLPreProcessor(object):
        html = XMLDECL_RE.sub('', html)

        if getattr(self.extra_opts, 'asciiize', False):
-            from calibre.ebooks.unidecode.unidecoder import Unidecoder
-            unidecoder = Unidecoder()
-            html = unidecoder.decode(html)
+            from calibre.utils.localization import get_udc
+            unihandecoder = get_udc()
+            html = unihandecoder.decode(html)

        if getattr(self.extra_opts, 'enable_heuristics', False):
            from calibre.ebooks.conversion.utils import HeuristicProcessor
@ -557,10 +557,10 @@ class HTMLPreProcessor(object):

        unsupported_unicode_chars = self.extra_opts.output_profile.unsupported_unicode_chars
        if unsupported_unicode_chars:
-            from calibre.ebooks.unidecode.unidecoder import Unidecoder
-            unidecoder = Unidecoder()
+            from calibre.utils.localization import get_udc
+            unihandecoder = get_udc()
            for char in unsupported_unicode_chars:
-                asciichar = unidecoder.decode(char)
+                asciichar = unihandecoder.decode(char)
                html = html.replace(char, asciichar)

        return html
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@ -156,17 +156,17 @@ class HeuristicProcessor(object):
        ]

        ITALICIZE_STYLE_PATS = [
-            r'(?msu)(?<=[\s>])_(?P<words>[^_]+)?_',
-            r'(?msu)(?<=[\s>])/(?P<words>[^/]+)?/',
-            r'(?msu)(?<=[\s>])~~(?P<words>[^~]+)?~~',
-            r'(?msu)(?<=[\s>])\*(?P<words>[^\*]+)?\*',
-            r'(?msu)(?<=[\s>])~(?P<words>[^~]+)?~',
-            r'(?msu)(?<=[\s>])_/(?P<words>[^/_]+)?/_',
-            r'(?msu)(?<=[\s>])_\*(?P<words>[^\*_]+)?\*_',
-            r'(?msu)(?<=[\s>])\*/(?P<words>[^/\*]+)?/\*',
-            r'(?msu)(?<=[\s>])_\*/(?P<words>[^\*_]+)?/\*_',
-            r'(?msu)(?<=[\s>])/:(?P<words>[^:/]+)?:/',
-            r'(?msu)(?<=[\s>])\|:(?P<words>[^:\|]+)?:\|',
+            r'(?msu)(?<=[\s>])_(?P<words>[^_]+)_',
+            r'(?msu)(?<=[\s>])/(?P<words>[^/]+)/',
+            r'(?msu)(?<=[\s>])~~(?P<words>[^~]+)~~',
+            r'(?msu)(?<=[\s>])\*(?P<words>[^\*]+)\*',
+            r'(?msu)(?<=[\s>])~(?P<words>[^~]+)~',
+            r'(?msu)(?<=[\s>])_/(?P<words>[^/_]+)/_',
+            r'(?msu)(?<=[\s>])_\*(?P<words>[^\*_]+)\*_',
+            r'(?msu)(?<=[\s>])\*/(?P<words>[^/\*]+)/\*',
+            r'(?msu)(?<=[\s>])_\*/(?P<words>[^\*_]+)/\*_',
+            r'(?msu)(?<=[\s>])/:(?P<words>[^:/]+):/',
+            r'(?msu)(?<=[\s>])\|:(?P<words>[^:\|]+):\|',
        ]

        for word in ITALICIZE_WORDS:
--- a/src/calibre/ebooks/metadata/init.py
+++ b/src/calibre/ebooks/metadata/init.py
@ -271,6 +271,8 @@ def check_isbn13(isbn):
    return None

 def check_isbn(isbn):
+    if not isbn:
+        return None
    isbn = re.sub(r'[^0-9X]', '', isbn.upper())
    if len(isbn) == 10:
        return check_isbn10(isbn)
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en'
 Fetch metadata using Amazon AWS
 '''
 import sys, re
+from threading import RLock

 from lxml import html
 from lxml.html import soupparser
@ -17,6 +18,10 @@ from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.library.comments import sanitize_comments_html

+asin_cache = {}
+cover_url_cache = {}
+cache_lock = RLock()
+
 def find_asin(br, isbn):
    q = 'http://www.amazon.com/s?field-keywords='+isbn
    raw = br.open_novisit(q).read()
@ -29,6 +34,12 @@ def find_asin(br, isbn):
        return revs[0]

 def to_asin(br, isbn):
+    with cache_lock:
+        ans = asin_cache.get(isbn, None)
+    if ans:
+        return ans
+    if ans is False:
+        return None
    if len(isbn) == 13:
        try:
            asin = find_asin(br, isbn)
@ -38,8 +49,11 @@ def to_asin(br, isbn):
            asin = None
    else:
        asin = isbn
+    with cache_lock:
+        asin_cache[isbn] = ans if ans else False
    return asin

+
 def get_social_metadata(title, authors, publisher, isbn):
    mi = Metadata(title, authors)
    if not isbn:
@ -58,6 +72,68 @@ def get_social_metadata(title, authors, publisher, isbn):
            return mi
    return mi

+def get_cover_url(isbn, br):
+    isbn = check_isbn(isbn)
+    if not isbn:
+        return None
+    with cache_lock:
+        ans = cover_url_cache.get(isbn, None)
+    if ans:
+        return ans
+    if ans is False:
+        return None
+    asin = to_asin(br, isbn)
+    if asin:
+        ans = _get_cover_url(br, asin)
+        if ans:
+            with cache_lock:
+                cover_url_cache[isbn] = ans
+            return ans
+    from calibre.ebooks.metadata.xisbn import xisbn
+    for i in xisbn.get_associated_isbns(isbn):
+        asin = to_asin(br, i)
+        if asin:
+            ans = _get_cover_url(br, asin)
+            if ans:
+                with cache_lock:
+                    cover_url_cache[isbn] = ans
+                    cover_url_cache[i] = ans
+                return ans
+    with cache_lock:
+        cover_url_cache[isbn] = False
+    return None
+
+def _get_cover_url(br, asin):
+    q = 'http://amzn.com/'+asin
+    try:
+        raw = br.open_novisit(q).read()
+    except Exception, e:
+        if callable(getattr(e, 'getcode', None)) and \
+                e.getcode() == 404:
+            return None
+        raise
+    if '<title>404 - ' in raw:
+        return None
+    raw = xml_to_unicode(raw, strip_encoding_pats=True,
+            resolve_entities=True)[0]
+    try:
+        root = soupparser.fromstring(raw)
+    except:
+        return False
+
+    imgs = root.xpath('//img[@id="prodImage" and @src]')
+    if imgs:
+        src = imgs[0].get('src')
+        parts = src.split('/')
+        if len(parts) > 3:
+            bn = parts[-1]
+            sparts = bn.split('_')
+            if len(sparts) > 2:
+                bn = sparts[0] + sparts[-1]
+                return ('/'.join(parts[:-1]))+'/'+bn
+    return None
+
+
 def get_metadata(br, asin, mi):
    q = 'http://amzn.com/'+asin
    try:
@ -111,18 +187,25 @@ def get_metadata(br, asin, mi):


 def main(args=sys.argv):
-    # Test xisbn
-    print get_social_metadata('Learning Python', None, None, '8324616489')
-    print
+    import tempfile, os
+    tdir = tempfile.gettempdir()
+    br = browser()
+    for title, isbn in [
+            ('Learning Python', '8324616489'), # Test xisbn
+            ('Angels & Demons', '9781416580829'), # Test sophisticated comment formatting
+            # Random tests
+            ('Star Trek: Destiny: Mere Mortals', '9781416551720'),
+            ('The Great Gatsby', '0743273567'),
+            ]:
+        cpath = os.path.join(tdir, title+'.jpg')
+        curl = get_cover_url(isbn, br)
+        if curl is None:
+            print 'No cover found for', title
+        else:
+            open(cpath, 'wb').write(br.open_novisit(curl).read())
+            print 'Cover for', title, 'saved to', cpath

-    # Test sophisticated comment formatting
-    print get_social_metadata('Angels & Demons', None, None, '9781416580829')
-    print
-
-    # Random tests
-    print get_social_metadata('Star Trek: Destiny: Mere Mortals', None, None, '9781416551720')
-    print
-    print get_social_metadata('The Great Gatsby', None, None, '0743273567')
+        print get_social_metadata(title, None, None, isbn)

    return 0

--- a/src/calibre/ebooks/metadata/covers.py
+++ b/src/calibre/ebooks/metadata/covers.py
@ -5,7 +5,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import traceback, socket, re, sys
+import traceback, socket, sys
 from functools import partial
 from threading import Thread, Event
 from Queue import Queue, Empty
@ -15,7 +15,6 @@ import mechanize

 from calibre.customize import Plugin
 from calibre import browser, prints
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.constants import preferred_encoding, DEBUG

 class CoverDownload(Plugin):
@ -112,72 +111,38 @@ class OpenLibraryCovers(CoverDownload): # {{{

 # }}}

-class LibraryThingCovers(CoverDownload): # {{{
+class AmazonCovers(CoverDownload): # {{{

-    name = 'librarything.com covers'
-    description = _('Download covers from librarything.com')
+    name = 'amazon.com covers'
+    description = _('Download covers from amazon.com')
    author = 'Kovid Goyal'

-    LIBRARYTHING = 'http://www.librarything.com/isbn/'
-
-    def get_cover_url(self, isbn, br, timeout=5.):
-
-        try:
-            src = br.open_novisit('http://www.librarything.com/isbn/'+isbn,
-                    timeout=timeout).read().decode('utf-8', 'replace')
-        except Exception, err:
-            if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
-                err = Exception(_('LibraryThing.com timed out. Try again later.'))
-            raise err
-        else:
-            if '/wiki/index.php/HelpThing:Verify' in src:
-                raise Exception('LibraryThing is blocking calibre.')
-            s = BeautifulSoup(src)
-            url = s.find('td', attrs={'class':'left'})
-            if url is None:
-                if s.find('div', attrs={'class':'highloadwarning'}) is not None:
-                    raise Exception(_('Could not fetch cover as server is experiencing high load. Please try again later.'))
-                raise Exception(_('ISBN: %s not found')%isbn)
-            url = url.find('img')
-            if url is None:
-                raise Exception(_('LibraryThing.com server error. Try again later.'))
-            url = re.sub(r'_S[XY]\d+', '', url['src'])
-            return url

    def has_cover(self, mi, ans, timeout=5.):
-        if not mi.isbn or not self.site_customization:
+        if not mi.isbn:
            return False
-        from calibre.ebooks.metadata.library_thing import get_browser, login
-        br = get_browser()
-        un, _, pw = self.site_customization.partition(':')
-        login(br, un, pw)
+        from calibre.ebooks.metadata.amazon import get_cover_url
+        br = browser()
        try:
-            self.get_cover_url(mi.isbn, br, timeout=timeout)
+            get_cover_url(mi.isbn, br)
            self.debug('cover for', mi.isbn, 'found')
            ans.set()
        except Exception, e:
            self.debug(e)

    def get_covers(self, mi, result_queue, abort, timeout=5.):
-        if not mi.isbn or not self.site_customization:
+        if not mi.isbn:
            return
-        from calibre.ebooks.metadata.library_thing import get_browser, login
-        br = get_browser()
-        un, _, pw = self.site_customization.partition(':')
-        login(br, un, pw)
+        from calibre.ebooks.metadata.amazon import get_cover_url
+        br = browser()
        try:
-            url = self.get_cover_url(mi.isbn, br, timeout=timeout)
+            url = get_cover_url(mi.isbn, br)
            cover_data = br.open_novisit(url).read()
            result_queue.put((True, cover_data, 'jpg', self.name))
        except Exception, e:
            result_queue.put((False, self.exception_to_string(e),
                traceback.format_exc(), self.name))

-    def customization_help(self, gui=False):
-        ans = _('To use librarything.com you must sign up for a %sfree account%s '
-                'and enter your username and password separated by a : below.')
-        return '<p>'+ans%('<a href="http://www.librarything.com">', '</a>')
-
 # }}}

 def check_for_cover(mi, timeout=5.): # {{{
--- a/src/calibre/ebooks/metadata/mobi.py
+++ b/src/calibre/ebooks/metadata/mobi.py
@ -12,6 +12,7 @@ __docformat__ = 'restructuredtext en'
 from struct import pack, unpack
 from cStringIO import StringIO

+from calibre.ebooks import normalize
 from calibre.ebooks.mobi import MobiError
 from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN
 from calibre.ebooks.mobi.langcodes import iana2mobi
@ -311,6 +312,7 @@ class MetadataUpdater(object):
        return StreamSlicer(self.stream, start, stop)

    def update(self, mi):
+        mi.title = normalize(mi.title)
        def update_exth_record(rec):
            recs.append(rec)
            if rec[0] in self.original_exth_records:
@ -331,12 +333,12 @@ class MetadataUpdater(object):
            kindle_pdoc = None
        if mi.author_sort and pas:
            authors = mi.author_sort
-            update_exth_record((100, authors.encode(self.codec, 'replace')))
+            update_exth_record((100, normalize(authors).encode(self.codec, 'replace')))
        elif mi.authors:
            authors = ';'.join(mi.authors)
-            update_exth_record((100, authors.encode(self.codec, 'replace')))
+            update_exth_record((100, normalize(authors).encode(self.codec, 'replace')))
        if mi.publisher:
-            update_exth_record((101, mi.publisher.encode(self.codec, 'replace')))
+            update_exth_record((101, normalize(mi.publisher).encode(self.codec, 'replace')))
        if mi.comments:
            # Strip user annotations
            a_offset = mi.comments.find('<div class="user_annotations">')
@ -345,12 +347,12 @@ class MetadataUpdater(object):
                mi.comments = mi.comments[:a_offset]
            if ad_offset >= 0:
                mi.comments = mi.comments[:ad_offset]
-            update_exth_record((103, mi.comments.encode(self.codec, 'replace')))
+            update_exth_record((103, normalize(mi.comments).encode(self.codec, 'replace')))
        if mi.isbn:
            update_exth_record((104, mi.isbn.encode(self.codec, 'replace')))
        if mi.tags:
            subjects = '; '.join(mi.tags)
-            update_exth_record((105, subjects.encode(self.codec, 'replace')))
+            update_exth_record((105, normalize(subjects).encode(self.codec, 'replace')))

            if kindle_pdoc and kindle_pdoc in mi.tags:
                update_exth_record((501, str('PDOC')))
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@ -367,6 +367,9 @@ class MobiMLizer(object):
            istate.attrib['src'] = elem.attrib['src']
            istate.attrib['align'] = 'baseline'
            cssdict = style.cssdict()
+            valign = cssdict.get('vertical-align', None)
+            if valign in ('top', 'bottom', 'middle'):
+                istate.attrib['align'] = valign
            for prop in ('width', 'height'):
                if cssdict[prop] != 'auto':
                    value = style[prop]
@ -451,8 +454,11 @@ class MobiMLizer(object):
                text = COLLAPSE.sub(' ', elem.text)
        valign = style['vertical-align']
        not_baseline = valign in ('super', 'sub', 'text-top',
-                'text-bottom')
-        vtag = 'sup' if valign in ('super', 'text-top') else 'sub'
+                'text-bottom') or (
+                isinstance(valign, (float, int)) and abs(valign) != 0)
+        issup = valign in ('super', 'text-top') or (
+            isinstance(valign, (float, int)) and valign > 0)
+        vtag = 'sup' if issup else 'sub'
        if not_baseline and not ignore_valign and tag not in NOT_VTAGS and not isblock:
            nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
            vbstate = BlockState(etree.SubElement(nroot, XHTML('body')))
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -14,8 +14,9 @@ import re
 from struct import pack
 import time
 from urlparse import urldefrag
-
 from cStringIO import StringIO
+
+from calibre.ebooks import normalize
 from calibre.ebooks.mobi.langcodes import iana2mobi
 from calibre.ebooks.mobi.mobiml import MBP_NS
 from calibre.ebooks.oeb.base import OEB_DOCS
@ -1365,7 +1366,7 @@ class MobiWriter(object):
            self._text_length,
            self._text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf)
        uid = random.randint(0, 0xffffffff)
-        title = unicode(metadata.title[0]).encode('utf-8')
+        title = normalize(unicode(metadata.title[0])).encode('utf-8')
        # The MOBI Header

        # 0x0 - 0x3
@ -1523,12 +1524,12 @@ class MobiWriter(object):
            items = oeb.metadata[term]
            if term == 'creator':
                if self._prefer_author_sort:
-                    creators = [unicode(c.file_as or c) for c in items]
+                    creators = [normalize(unicode(c.file_as or c)) for c in items]
                else:
-                    creators = [unicode(c) for c in items]
+                    creators = [normalize(unicode(c)) for c in items]
                items = ['; '.join(creators)]
            for item in items:
-                data = self.COLLAPSE_RE.sub(' ', unicode(item))
+                data = self.COLLAPSE_RE.sub(' ', normalize(unicode(item)))
                if term == 'identifier':
                    if data.lower().startswith('urn:isbn:'):
                        data = data[9:]
@ -1542,7 +1543,7 @@ class MobiWriter(object):
                nrecs += 1
            if term == 'rights' :
                try:
-                    rights = unicode(oeb.metadata.rights[0]).encode('utf-8')
+                    rights = normalize(unicode(oeb.metadata.rights[0])).encode('utf-8')
                except:
                    rights = 'Unknown'
                exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8))
--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@ -207,7 +207,14 @@ class CSSFlattener(object):
            font_size = self.sbase if self.sbase is not None else \
                self.context.source.fbase
        if 'align' in node.attrib:
-            cssdict['text-align'] = node.attrib['align']
+            if tag != 'img':
+                cssdict['text-align'] = node.attrib['align']
+            else:
+                val = node.attrib['align']
+                if val in ('middle', 'bottom', 'top'):
+                    cssdict['vertical-align'] = val
+                elif val in ('left', 'right'):
+                    cssdict['text-align'] = val
            del node.attrib['align']
        if node.tag == XHTML('font'):
            node.tag = XHTML('span')
--- a/src/calibre/ebooks/pdf/manipulate/merge.py
+++ b/src/calibre/ebooks/pdf/manipulate/merge.py
@ -56,7 +56,7 @@ def add_options(parser):
    group = OptionGroup(parser, _('Merge Options:'), _('Options to control the transformation of pdf'))
    parser.add_option_group(group)
    add_option = group.add_option
-    
+
    for rec in OPTIONS:
        option_recommendation_to_cli_option(add_option, rec)

@ -82,15 +82,15 @@ def main(args=sys.argv, name=''):
    log = Log()
    parser = option_parser(name)
    add_options(parser)
-    
+
    opts, args = parser.parse_args(args)
    args = args[1:]
-    
+
    if len(args) < 2:
        print 'Error: Two or more PDF files are required.\n'
        print_help(parser, log)
        return 1
-    
+
    bad_pdfs = is_valid_pdfs(args)
    if bad_pdfs != []:
        for pdf in bad_pdfs:
@ -104,7 +104,7 @@ def main(args=sys.argv, name=''):
            print 'Error: file `%s` is encrypted.' % pdf
    if enc:
        return 1
-    
+
    mi = metadata_from_formats([args[0]])

    merge_files(args, opts.output, mi)
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@ -4,10 +4,9 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

-import glob
 import os

-from calibre import _ent_pat, xml_entity_to_unicode
+from calibre import _ent_pat, walk, xml_entity_to_unicode
 from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
 from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
 from calibre.ebooks.chardet import detect
@ -16,7 +15,6 @@ from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
    preserve_spaces, detect_paragraph_type, detect_formatting_type, \
    normalize_line_endings, convert_textile, remove_indents, block_to_single_line, \
    separate_hard_scene_breaks
-from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.zipfile import ZipFile

 class TXTInput(InputFormatPlugin):
@ -28,20 +26,23 @@ class TXTInput(InputFormatPlugin):

    options = set([
        OptionRecommendation(name='paragraph_type', recommended_value='auto',
-            choices=['auto', 'block', 'single', 'print', 'unformatted'],
+            choices=['auto', 'block', 'single', 'print', 'unformatted', 'off'],
            help=_('Paragraph structure.\n'
-                   'choices are [\'auto\', \'block\', \'single\', \'print\', \'unformatted\']\n'
+                   'choices are [\'auto\', \'block\', \'single\', \'print\', \'unformatted\', \'off\']\n'
                   '* auto: Try to auto detect paragraph type.\n'
                   '* block: Treat a blank line as a paragraph break.\n'
                   '* single: Assume every line is a paragraph.\n'
                   '* print:  Assume every line starting with 2+ spaces or a tab '
-                   'starts a paragraph.'
-                   '* unformatted: Most lines have hard line breaks, few/no blank lines or indents.')),
+                   'starts a paragraph.\n'
+                   '* unformatted: Most lines have hard line breaks, few/no blank lines or indents. '
+                   'Tries to determine structure and reformat the differentiate elements.\n'
+                   '* off: Don\'t modify the paragraph structure. This is useful when combined with '
+                   'Markdown or Textile formatting to ensure no formatting is lost.')),
        OptionRecommendation(name='formatting_type', recommended_value='auto',
-            choices=['auto', 'none', 'heuristic', 'textile', 'markdown'],
+            choices=['auto', 'plain', 'heuristic', 'textile', 'markdown'],
            help=_('Formatting used within the document.'
                   '* auto: Automatically decide which formatting processor to use.\n'
-                   '* none: Do not process the document formatting. Everything is a '
+                   '* plain: Do not process the document formatting. Everything is a '
                   'paragraph and no styling is applied.\n'
                   '* heuristic: Process using heuristics to determine formatting such '
                   'as chapter headings and italic text.\n'
@ -64,18 +65,17 @@ class TXTInput(InputFormatPlugin):
        txt = ''
        log.debug('Reading text from file...')
        length = 0
+        # [(u'path', mime),]

        # Extract content from zip archive.
        if file_ext == 'txtz':
-            log.debug('De-compressing content to temporary directory...')
-            with TemporaryDirectory('_untxtz') as tdir:
-                zf = ZipFile(stream)
-                zf.extractall(tdir)
+            zf = ZipFile(stream)
+            zf.extractall('.')

-                txts = glob.glob(os.path.join(tdir, '*.txt'))
-                for t in txts:
-                    with open(t, 'rb') as tf:
-                        txt += tf.read()
+            for x in walk('.'):
+                if os.path.splitext(x)[1].lower() == '.txt':
+                    with open(x, 'rb') as tf:
+                        txt += tf.read() + '\n\n'
        else:
            txt = stream.read()

@ -134,7 +134,7 @@ class TXTInput(InputFormatPlugin):
            preprocessor = HeuristicProcessor(options, log=getattr(self, 'log', None))
            txt = preprocessor.punctuation_unwrap(length, txt, 'txt')
            txt = separate_paragraphs_single_line(txt)
-        else:
+        elif options.paragraph_type == 'block':
            txt = separate_hard_scene_breaks(txt)
            txt = block_to_single_line(txt)

@ -178,7 +178,7 @@ class TXTInput(InputFormatPlugin):
            setattr(options, opt.option.name, opt.recommended_value)
        options.input_encoding = 'utf-8'
        base = os.getcwdu()
-        if hasattr(stream, 'name'):
+        if file_ext != 'txtz' and hasattr(stream, 'name'):
            base = os.path.dirname(stream.name)
        fname = os.path.join(base, 'index.html')
        c = 0
@ -190,16 +190,16 @@ class TXTInput(InputFormatPlugin):
            htmlfile.write(html.encode('utf-8'))
        odi = options.debug_pipeline
        options.debug_pipeline = None
-        # Generate oeb from htl conversion.
+        # Generate oeb from html conversion.
        oeb = html_input.convert(open(htmlfile.name, 'rb'), options, 'html', log,
                {})
        options.debug_pipeline = odi
        os.remove(htmlfile.name)
-        
+
        # Set metadata from file.
        from calibre.customize.ui import get_file_type_metadata
        from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
        mi = get_file_type_metadata(stream, file_ext)
        meta_info_to_oeb_metadata(mi, oeb.metadata, log)
-        
+
        return oeb
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@ -126,7 +126,7 @@ def separate_hard_scene_breaks(txt):
            return '\n%s\n' % line
        else:
            return line
-    txt = re.sub(u'(?miu)^[ \t-=~\/]+$', lambda mo: sep_break(mo.group()), txt)
+    txt = re.sub(u'(?miu)^[ \t-=~\/_]+$', lambda mo: sep_break(mo.group()), txt)
    return txt

 def block_to_single_line(txt):
--- a/src/calibre/ebooks/unidecode/init.py
+++ b/src/calibre/ebooks/unidecode/init.py
--- a/src/calibre/ebooks/unidecode/unicodepoints.py
+++ b/src/calibre/ebooks/unidecode/unicodepoints.py
--- a/src/calibre/ebooks/unihandecode/init.py
+++ b/src/calibre/ebooks/unihandecode/init.py
@ -0,0 +1,57 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+__docformat__ = 'restructuredtext en'
+__all__ = ["Unihandecoder"]
+
+'''
+Decode unicode text to an ASCII representation of the text.
+Translate unicode characters to ASCII.
+
+Inspired from John Schember's unidecode library which was created as part
+of calibre.
+
+Copyright(c) 2009, John Schember
+
+Tranliterate the string from unicode characters to ASCII in Chinese and others.
+
+'''
+import unicodedata
+
+class Unihandecoder(object):
+    preferred_encoding = None
+    decoder = None
+
+    def __init__(self, lang="zh", encoding='utf-8'):
+        self.preferred_encoding = encoding
+        lang = lang.lower()
+        if lang[:2] == u'ja':
+            from calibre.ebooks.unihandecode.jadecoder import Jadecoder
+            self.decoder = Jadecoder()
+        elif lang[:2] == u'kr' or lang == u'korean':
+            from calibre.ebooks.unihandecode.krdecoder import Krdecoder
+            self.decoder = Krdecoder()
+        elif lang[:2] == u'vn' or lang == u'vietnum':
+            from calibre.ebooks.unihandecode.vndecoder import Vndecoder
+            self.decoder = Vndecoder()
+        else: #zh and others
+            from calibre.ebooks.unihandecode.unidecoder import Unidecoder
+            self.decoder = Unidecoder()
+
+    def decode(self, text):
+        try:
+            unicode # python2
+            if not isinstance(text, unicode):
+                try:
+                    text = unicode(text)
+                except:
+                    try:
+                        text = text.decode(self.preferred_encoding)
+                    except:
+                        text = text.decode('utf-8', 'replace')
+        except: # python3, str is unicode
+            pass
+        #at first unicode normalize it. (see Unicode standards)
+        ntext = unicodedata.normalize('NFKC', text)
+        return self.decoder.decode(ntext)
--- a/src/calibre/ebooks/unihandecode/jacodepoints.py
+++ b/src/calibre/ebooks/unihandecode/jacodepoints.py
--- a/src/calibre/ebooks/unihandecode/jadecoder.py
+++ b/src/calibre/ebooks/unihandecode/jadecoder.py
@ -0,0 +1,41 @@
+# coding:utf8
+__license__ = 'GPL 3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Decode unicode text to an ASCII representation of the text for Japanese.
+ Translate unicode string to ASCII roman string.
+
+API is based on the python unidecode,
+which is based on Ruby gem (http://rubyforge.org/projects/unidecode/)
+and  perl module Text::Unidecode
+(http://search.cpan.org/~sburke/Text-Unidecode-0.04/).
+
+This functionality is owned by Kakasi Japanese processing engine.
+
+Copyright (c) 2010 Hiroshi Miura
+'''
+
+import re
+from calibre.ebooks.unihandecode.unidecoder import Unidecoder
+from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
+from calibre.ebooks.unihandecode.jacodepoints import CODEPOINTS as JACODES
+from calibre.ebooks.unihandecode.pykakasi.kakasi import kakasi
+
+class Jadecoder(Unidecoder):
+    kakasi = None
+    codepoints = {}
+
+    def __init__(self):
+        self.codepoints = CODEPOINTS
+        self.codepoints.update(JACODES)
+        self.kakasi = kakasi()
+
+    def decode(self, text):
+        try:
+            result=self.kakasi.do(text)
+            return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),result)
+        except:
+            return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),text)
+
--- a/src/calibre/ebooks/unihandecode/krcodepoints.py
+++ b/src/calibre/ebooks/unihandecode/krcodepoints.py
--- a/src/calibre/ebooks/unihandecode/krdecoder.py
+++ b/src/calibre/ebooks/unihandecode/krdecoder.py
@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Decode unicode text to an ASCII representation of the text in Korean.
+Based on unidecoder.
+
+'''
+
+from calibre.ebooks.unihandecode.unidecoder import Unidecoder
+from calibre.ebooks.unihandecode.krcodepoints import CODEPOINTS as HANCODES
+from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
+
+class Krdecoder(Unidecoder):
+
+    codepoints = {}
+
+    def __init__(self):
+        self.codepoints = CODEPOINTS
+        self.codepoints.update(HANCODES)
+
--- a/src/calibre/ebooks/unihandecode/pykakasi/init.py
+++ b/src/calibre/ebooks/unihandecode/pykakasi/init.py
@ -0,0 +1,5 @@
+from calibre.ebooks.unihandecode.pykakasi.kakasi import kakasi
+kakasi
+
+__all__ = ["pykakasi"]
+
--- a/src/calibre/ebooks/unihandecode/pykakasi/h2a.py
+++ b/src/calibre/ebooks/unihandecode/pykakasi/h2a.py
@ -0,0 +1,185 @@
+# -*- coding: utf-8 -*-
+#  h2a.py
+#
+# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
+#
+# Original copyright:
+# * KAKASI (Kanji Kana Simple inversion program)
+# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
+# * Copyright (C) 1992
+# * Hironobu Takahashi (takahasi@tiny.or.jp)
+# *
+# * This program is free software; you can redistribute it and/or modify
+# * it under the terms of the GNU General Public License as published by
+# * the Free Software Foundation; either versions 2, or (at your option)
+# * any later version.
+# *
+# * This program is distributed in the hope that it will be useful
+# * but WITHOUT ANY WARRANTY; without even the implied warranty of
+# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# * GNU General Public License for more details.
+# *
+# * You should have received a copy of the GNU General Public License
+# * along with KAKASI, see the file COPYING.  If not, write to the Free
+# * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
+# * 02111-1307, USA.
+# */
+
+class H2a (object):
+
+    H2a_table = {
+        u"\u3041":"a", u"\u3042":"a",
+        u"\u3043":"i", u"\u3044":"i",
+        u"\u3045":"u", u"\u3046":"u",
+        u"\u3046\u309b":"vu", u"\u3046\u309b\u3041":"va",
+        u"\u3046\u309b\u3043":"vi", u"\u3046\u309b\u3047":"ve",
+        u"\u3046\u309b\u3049":"vo",
+        u"\u3047":"e", u"\u3048":"e",
+        u"\u3049":"o", u"\u304a":"o",
+
+        u"\u304b":"ka", u"\u304c":"ga",
+        u"\u304d":"ki", u"\u304d\u3041":"kya",
+        u"\u304d\u3045":"kyu", u"\u304d\u3049":"kyo",
+        u"\u304e":"gi", u"\u3050\u3083":"gya",
+        u"\u304e\u3045":"gyu", u"\u304e\u3087":"gyo",
+        u"\u304f":"ku", u"\u3050":"gu",
+        u"\u3051":"ke", u"\u3052":"ge",
+        u"\u3053":"ko", u"\u3054":"go",
+
+        u"\u3055":"sa", u"\u3056":"za",
+        u"\u3057":"shi", u"\u3057\u3083":"sha",
+        u"\u3057\u3085":"shu", u"\u3057\u3087":"sho",
+        u"\u3058":"ji", u"\u3058\u3083":"ja",
+        u"\u3058\u3085":"ju", u"\u3058\u3087":"jo",
+        u"\u3059":"su", u"\u305a":"zu",
+        u"\u305b":"se", u"\u305c":"ze",
+        u"\u305d":"so", u"\u305e":"zo",
+
+        u"\u305f":"ta", u"\u3060":"da",
+        u"\u3061":"chi", u"\u3061\u3047":"che", u"\u3061\u3083":"cha",
+        u"\u3061\u3085":"chu", u"\u3061\u3087":"cho",
+        u"\u3062":"ji", u"\u3062\u3083":"ja",
+        u"\u3062\u3085":"ju", u"\u3062\u3087":"jo",
+
+        u"\u3063":"tsu",
+        u"\u3063\u3046\u309b":"vvu",
+        u"\u3063\u3046\u309b\u3041":"vva",
+        u"\u3063\u3046\u309b\u3043":"vvi",
+        u"\u3063\u3046\u309b\u3047":"vve",
+        u"\u3063\u3046\u309b\u3049":"vvo",
+        u"\u3063\u304b":"kka", u"\u3063\u304c":"gga",
+        u"\u3063\u304d":"kki", u"\u3063\u304d\u3083":"kkya",
+        u"\u3063\u304d\u3085":"kkyu", u"\u3063\u304d\u3087":"kkyo",
+        u"\u3063\u304e":"ggi", u"\u3063\u304e\u3083":"ggya",
+        u"\u3063\u304e\u3085":"ggyu", u"\u3063\u304e\u3087":"ggyo",
+        u"\u3063\u304f":"kku", u"\u3063\u3050":"ggu",
+        u"\u3063\u3051":"kke", u"\u3063\u3052":"gge",
+        u"\u3063\u3053":"kko", u"\u3063\u3054":"ggo",
+        u"\u3063\u3055":"ssa", u"\u3063\u3056":"zza",
+        u"\u3063\u3057":"sshi", u"\u3063\u3057\u3083":"ssha",
+        u"\u3063\u3057\u3085":"sshu", u"\u3063\u3057\u3087":"ssho",
+        u"\u3063\u3058":"jji", u"\u3063\u3058\u3083":"jja",
+        u"\u3063\u3058\u3085":"jju", u"\u3063\u3058\u3087":"jjo",
+        u"\u3063\u3059":"ssu", u"\u3063\u305a":"zzu",
+        u"\u3063\u305b":"sse", u"\u3063\u305e":"zze",
+        u"\u3063\u305d":"sso", u"\u3063\u305e":"zzo",
+        u"\u3063\u305f":"tta", u"\u3063\u3060":"dda",
+        u"\u3063\u3061":"tchi", u"\u3063\u3061\u3083":"tcha",
+        u"\u3063\u3061\u3085":"tchu", u"\u3063\u3061\u3087":"tcho",
+        u"\u3063\u3062":"jji", u"\u3063\u3062\u3083":"jjya",
+        u"\u3063\u3062\u3085":"jjyu", u"\u3063\u3062\u3087":"jjyo",
+        u"\u3063\u3064":"ttsu", u"\u3063\u3065":"zzu",
+        u"\u3063\u3066":"tte", u"\u3063\u3067":"dde",
+        u"\u3063\u3068":"tto", u"\u3063\u3069":"ddo",
+        u"\u3063\u306f":"hha", u"\u3063\u3070":"bba",
+        u"\u3063\u3071":"ppa",
+        u"\u3063\u3072":"hhi", u"\u3063\u3072\u3083":"hhya",
+        u"\u3063\u3072\u3085":"hhyu", u"\u3063\u3072\u3087":"hhyo",
+        u"\u3063\u3073":"bbi", u"\u3063\u3073\u3083":"bbya",
+        u"\u3063\u3073\u3085":"bbyu", u"\u3063\u3073\u3087":"bbyo",
+        u"\u3063\u3074":"ppi", u"\u3063\u3074\u3083":"ppya",
+        u"\u3063\u3074\u3085":"ppyu", u"\u3063\u3074\u3087":"ppyo",
+        u"\u3063\u3075":"ffu", u"\u3063\u3075\u3041":"ffa",
+        u"\u3063\u3075\u3043":"ffi", u"\u3063\u3075\u3047":"ffe",
+        u"\u3063\u3075\u3049":"ffo",
+        u"\u3063\u3076":"bbu", u"\u3063\u3077":"ppu",
+        u"\u3063\u3078":"hhe", u"\u3063\u3079":"bbe",
+        u"\u3063\u307a":"ppe",
+        u"\u3063\u307b":"hho", u"\u3063\u307c":"bbo",
+        u"\u3063\u307d":"ppo",
+        u"\u3063\u3084":"yya", u"\u3063\u3086":"yyu",
+        u"\u3063\u3088":"yyo",
+        u"\u3063\u3089":"rra", u"\u3063\u308a":"rri",
+        u"\u3063\u308a\u3083":"rrya", u"\u3063\u308a\u3085":"rryu",
+        u"\u3063\u308a\u3087":"rryo",
+        u"\u3063\u308b":"rru", u"\u3063\u308c":"rre",
+        u"\u3063\u308d":"rro",
+
+        u"\u3064":"tsu", u"\u3065":"zu",
+        u"\u3066":"te", u"\u3067":"de", u"\u3067\u3043":"di",
+        u"\u3068":"to", u"\u3069":"do",
+
+        u"\u306a":"na",
+        u"\u306b":"ni", u"\u306b\u3083":"nya",
+        u"\u306b\u3085":"nyu", u"\u306b\u3087":"nyo",
+        u"\u306c":"nu", u"\u306d":"ne", u"\u306e":"no",
+
+        u"\u306f":"ha", u"\u3070":"ba", u"\u3071":"pa",
+        u"\u3072":"hi", u"\u3072\u3083":"hya",
+        u"\u3072\u3085":"hyu", u"\u3072\u3087":"hyo",
+        u"\u3073":"bi", u"\u3073\u3083":"bya",
+        u"\u3073\u3085":"byu", u"\u3073\u3087":"byo",
+        u"\u3074":"pi", u"\u3074\u3083":"pya",
+        u"\u3074\u3085":"pyu", u"\u3074\u3087":"pyo",
+        u"\u3075":"fu", u"\u3075\u3041":"fa",
+        u"\u3075\u3043":"fi", u"\u3075\u3047":"fe",
+        u"\u3075\u3049":"fo",
+        u"\u3076":"bu", u"\u3077":"pu",
+        u"\u3078":"he", u"\u3079":"be", u"\u307a":"pe",
+        u"\u307b":"ho", u"\u307c":"bo", u"\u307d":"po",
+
+        u"\u307e":"ma",
+        u"\u307f":"mi", u"\u307f\u3083":"mya",
+        u"\u307f\u3085":"myu", u"\u307f\u3087":"myo",
+        u"\u3080":"mu", u"\u3081":"me", u"\u3082":"mo",
+
+        u"\u3083":"ya", u"\u3084":"ya",
+        u"\u3085":"yu", u"\u3086":"yu",
+        u"\u3087":"yo", u"\u3088":"yo",
+
+        u"\u3089":"ra",
+        u"\u308a":"ri", u"\u308a\u3083":"rya",
+        u"\u308a\u3085":"ryu", u"\u308a\u3087":"ryo",
+        u"\u308b":"ru", u"\u308c":"re", u"\u308d":"ro",
+
+        u"\u308e":"wa", u"\u308f":"wa",
+        u"\u3090":"i", u"\u3091":"e",
+        u"\u3092":"wo", u"\u3093":"n",
+
+        u"\u3093\u3042":"n'a", u"\u3093\u3044":"n'i",
+        u"\u3093\u3046":"n'u", u"\u3093\u3048":"n'e",
+        u"\u3093\u304a":"n'o",
+    }
+
+# this class is Borg
+    _shared_state = {}
+
+    def __new__(cls, *p, **k):
+        self = object.__new__(cls, *p, **k)
+        self.__dict__ = cls._shared_state
+        return self
+
+    def isHiragana(self, char):
+        return ( 0x3040 < ord(char) and ord(char) < 0x3094)
+
+    def convert(self, text):
+        Hstr = ""
+        max_len = -1
+        r = min(4, len(text)+1)
+        for x in xrange(r):
+            if text[:x] in self.H2a_table:
+                if max_len < x:
+                    max_len = x
+                    Hstr = self.H2a_table[text[:x]]
+        return (Hstr, max_len)
+
--- a/src/calibre/ebooks/unihandecode/pykakasi/itaijidict.utf8
+++ b/src/calibre/ebooks/unihandecode/pykakasi/itaijidict.utf8
@ -0,0 +1,564 @@
+芦蘆
+壱一
+苅刈
+舘館
+曽曾
+菟兎
+島嶋
+盃杯
+冨富
+峯峰
+亘亙
+弌一
+乘乗
+亂乱
+豫予
+亊事
+弍二
+亞亜
+亰京
+从従
+仭仞
+佛仏
+來来
+儘侭
+伜倅
+假仮
+會会
+做作
+傳伝
+僞偽
+價価
+儉倹
+兒児
+兔兎
+竸競
+兩両
+囘回
+册冊
+冢塚
+冩写
+决決
+冱冴
+冰氷
+况況
+凉涼
+處処
+凾函
+刄刃
+刔抉
+刧劫
+剩剰
+劍剣
+劔剣
+劒剣
+剱剣
+劑剤
+辨弁
+勞労
+勳勲
+勵励
+勸勧
+區区
+卆卒
+丗世
+凖準
+夘卯
+卻却
+卷巻
+厠廁
+厦廈
+厮廝
+厰廠
+參参
+雙双
+咒呪
+單単
+噐器
+營営
+嚏嚔
+嚴厳
+囑嘱
+囓齧
+圀国
+圈圏
+國国
+圍囲
+圓円
+團団
+圖図
+埀垂
+埓埒
+塲場
+壞壊
+墮堕
+壓圧
+壘塁
+壥廛
+壤壌
+壯壮
+壺壷
+壹一
+壻婿
+壽寿
+夂夊
+夛多
+梦夢
+竒奇
+奧奥
+奬奨
+侫佞
+姙妊
+嫻嫺
+孃嬢
+學学
+斈学
+寃冤
+寇冦
+寢寝
+寫写
+寶宝
+寳宝
+尅剋
+將将
+專専
+對対
+尓爾
+尢尤
+屆届
+屬属
+峽峡
+嶌嶋
+嵜崎
+崙崘
+嵳嵯
+嶽岳
+巛川
+巵卮
+帋紙
+帶帯
+幤幣
+廐厩
+廏厩
+廣広
+廚厨
+廢廃
+廳庁
+廰庁
+廸迪
+弃棄
+弉奘
+彜彝
+彈弾
+彌弥
+弯彎
+徃往
+徑径
+從従
+徠来
+悳徳
+恠怪
+恆恒
+悧俐
+惡悪
+惠恵
+忰悴
+惱悩
+愼慎
+愽博
+慘惨
+慚慙
+憇憩
+應応
+懷懐
+懴懺
+戀恋
+戞戛
+戰戦
+戲戯
+拔抜
+拏拿
+擔担
+拜拝
+拂払
+挾挟
+搜捜
+插挿
+搖揺
+攝摂
+攪撹
+據拠
+擇択
+擧拳
+舉拳
+抬擡
+擴拡
+攜携
+攵攴
+攷考
+收収
+效効
+敕勅
+敍叙
+敘叙
+數数
+變変
+斷断
+旙旛
+昜陽
+晄晃
+晉晋
+晝昼
+晰晢
+暎映
+曉暁
+暸瞭
+昿曠
+曵曳
+朖朗
+朞期
+霸覇
+杤栃
+杰傑
+枩松
+檜桧
+條条
+檮梼
+梹檳
+棊棋
+棧桟
+棕椶
+楙茂
+榮栄
+槨椁
+樂楽
+權権
+樞枢
+樣様
+樓楼
+橢楕
+檢検
+櫻桜
+鬱欝
+盜盗
+飮飲
+歐嘔
+歡歓
+歸帰
+殘残
+殱殲
+殼殻
+毆殴
+毓育
+氣気
+沒没
+泪涙
+濤涛
+渕淵
+渊淵
+淨浄
+淺浅
+滿満
+溂剌
+溪渓
+灌潅
+滯滞
+澁渋
+澀渋
+潛潜
+濳潜
+澂澄
+澑溜
+澤沢
+濟済
+濕湿
+濱浜
+濾滬
+灣湾
+烱炯
+烟煙
+熈煕
+熏燻
+燒焼
+爐炉
+爭争
+爲為
+爼俎
+犁犂
+犹猶
+犲豺
+狹狭
+獎奨
+默黙
+獨独
+獸獣
+獵猟
+獻献
+珎珍
+璢瑠
+瑯琅
+珱瓔
+瓣弁
+甞嘗
+甼町
+畄留
+畍界
+畊耕
+畆畝
+畧略
+畫画
+當当
+畴疇
+疊畳
+疉畳
+疂畳
+癡痴
+發発
+皃猊
+皈帰
+皹皸
+盖蓋
+盡尽
+蘯盪
+眞真
+眦眥
+礦鉱
+礪砺
+碎砕
+碯瑙
+祕秘
+祿禄
+齋斎
+禪禅
+禮礼
+禀稟
+稱称
+稻稲
+稾稿
+穗穂
+穩穏
+龝穐
+穰穣
+窗窓
+竈竃
+窰窯
+竊窃
+竝並
+筺筐
+笋筍
+箟箘
+筝箏
+簔蓑
+籠篭
+籘籐
+籖籤
+粹粋
+糺糾
+絲糸
+經経
+總総
+緜綿
+縣県
+縱縦
+繪絵
+繩縄
+繼継
+緕纃
+續続
+纖繊
+纎繊
+纜繿
+缺欠
+罐缶
+罸罰
+羃冪
+羣群
+羮羹
+譱善
+翆翠
+翦剪
+耻恥
+聟婿
+聨聯
+聲声
+聰聡
+聽聴
+肅粛
+冐冒
+脉脈
+腦脳
+腟膣
+膓腸
+膸髄
+膽胆
+臈臘
+臟臓
+臺台
+與与
+舊旧
+舍舎
+舖舗
+舩船
+艢檣
+舮艫
+艷艶
+莖茎
+莊荘
+莵兎
+菷帚
+萠萌
+蕚萼
+蒂蔕
+萬万
+葢蓋
+蘂蕊
+蕋蕊
+藪薮
+藏蔵
+藝芸
+藥薬
+蘓蘇
+乕虎
+號号
+蠣蛎
+蝨虱
+蠅蝿
+螢蛍
+蟆蟇
+蟲虫
+蠏蟹
+蟷螳
+蟒蠎
+蠶蚕
+蠧蠹
+蠻蛮
+衂衄
+衞衛
+袵衽
+裝装
+襃褒
+褝襌
+覩睹
+覺覚
+覽覧
+觀観
+觧解
+觸触
+誡戒
+謌歌
+諡謚
+謠謡
+證証
+譛譖
+譯訳
+譽誉
+讀読
+讓譲
+讚賛
+豐豊
+貉狢
+貍狸
+貎猊
+豼貔
+貘獏
+戝財
+貭質
+貳弐
+貮弐
+賤賎
+賣売
+贊賛
+賍贓
+赱走
+踈疎
+踴踊
+躰体
+軆体
+軈軅
+軣轟
+輕軽
+輙輒
+輌輛
+轉転
+辭辞
+辯弁
+迯逃
+逹達
+逎遒
+遞逓
+遲遅
+邊辺
+邉辺
+邨村
+鄰隣
+醉酔
+醫医
+釀醸
+釋釈
+釡釜
+釼剣
+銕鉄
+錢銭
+鎭鎮
+鐵鉄
+鐡鉄
+鑒鑑
+鑄鋳
+鑛鉱
+鈩鑪
+鑚鑽
+閇閉
+濶闊
+關関
+阯址
+陷陥
+險険
+隱隠
+隸隷
+襍雑
+雜雑
+靈霊
+靜静
+靱靭
+韭韮
+韲齏
+韵韻
+顏顔
+顯顕
+飃飄
+餘余
+餝飾
+餠餅
+騷騒
+驅駆
+驛駅
+驗験
+髓髄
+體体
+髮髪
+鬪闘
+鰺鯵
+鰛鰮
+鳬鳧
+鳫鴈
+鵄鴟
+鵞鵝
+鷄鶏
+鷏鷆
+鹽塩
+麥麦
+麸麩
+麪麺
+點点
+黨党
+皷鼓
+鼡鼠
+齊斉
+齒歯
+齡齢
+龜亀
+槇槙
+遙遥
+瑤瑶
+凜凛
+熙煕
--- a/src/calibre/ebooks/unihandecode/pykakasi/j2h.py
+++ b/src/calibre/ebooks/unihandecode/pykakasi/j2h.py
@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+#  j2h.py
+#
+# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
+#
+#  Original Copyright:
+# * KAKASI (Kanji Kana Simple inversion program)
+# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
+# * Copyright (C) 1992
+# * Hironobu Takahashi (takahasi@tiny.or.jp)
+# *
+# * This program is free software; you can redistribute it and/or modify
+# * it under the terms of the GNU General Public License as published by
+# * the Free Software Foundation; either versions 2, or (at your option)
+# * any later version.
+# *
+# * This program is distributed in the hope that it will be useful
+# * but WITHOUT ANY WARRANTY; without even the implied warranty of
+# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# * GNU General Public License for more details.
+# *
+# * You should have received a copy of the GNU General Public License
+# * along with KAKASI, see the file COPYING.  If not, write to the Free
+# * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
+# * 02111-1307, USA.
+# */
+
+from calibre.ebooks.unihandecode.pykakasi.jisyo import jisyo
+import re
+
+class J2H (object):
+
+    kanwa = None
+
+    cl_table = [
+	"","aiueow", "aiueow", "aiueow", "aiueow", "aiueow", "aiueow", "aiueow",
+	"aiueow", "aiueow", "aiueow", "k", "g", "k", "g", "k", "g", "k", "g", "k",
+	"g", "s", "zj", "s", "zj", "s", "zj", "s", "zj", "s", "zj", "t", "d", "tc",
+	"d", "aiueokstchgzjfdbpw", "t", "d", "t", "d", "t", "d", "n", "n", "n", "n",
+	"n", "h", "b", "p", "h", "b", "p", "hf", "b", "p", "h", "b", "p", "h", "b",
+	"p", "m", "m", "m", "m", "m", "y", "y", "y", "y", "y", "y", "rl", "rl",
+	"rl", "rl", "rl", "wiueo", "wiueo", "wiueo", "wiueo", "w", "n", "v", "k",
+	"k", "", "", "", "", "", "", "", "", ""]
+
+    def __init__(self):
+        self.kanwa = jisyo()
+
+    def isKanji(self, c):
+        return ( 0x3400 <= ord(c) and ord(c) < 0xfa2e)
+
+    def isCletter(self, l, c):
+        if (ord(u"ぁ") <= ord(c) and  ord(c) <= 0x309f) and (  l in self.cl_table[ord(c) - ord(u"ぁ")-1]):
+            return True
+        return False
+
+    def itaiji_conv(self, text):
+        r = []
+        for c in text:
+            if c in self.kanwa.itaijidict:
+                r.append(c)
+        for c in r:
+            text = re.sub(c, self.kanwa.itaijidict[c], text)
+        return text
+
+    def convert(self, text):
+        max_len = 0
+        Hstr = ""
+        table = self.kanwa.load_jisyo(text[0])
+        if table is None:
+            return ("", 0)
+        for (k,v) in table.iteritems():
+            length = len(k)
+            if len(text) >= length:
+                if text.startswith(k):
+                    for  (yomi, tail) in v:
+                        if tail is '':
+                            if max_len < length:
+                                Hstr = yomi
+                                max_len = length
+                        elif max_len < length+1 and len(text) > length and self.isCletter(tail, text[length]):
+                            Hstr=''.join([yomi,text[length]])
+                            max_len = length+1
+        return (Hstr, max_len)
--- a/src/calibre/ebooks/unihandecode/pykakasi/jisyo.py
+++ b/src/calibre/ebooks/unihandecode/pykakasi/jisyo.py
@ -0,0 +1,53 @@
+# -*- coding: utf-8 -*-
+#  jisyo.py
+#
+# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
+from cPickle import load
+import anydbm,marshal
+from zlib import decompress
+import os
+
+import calibre.utils.resources as resources
+
+class jisyo (object):
+    kanwadict = None
+    itaijidict = None
+    kanadict = None
+    jisyo_table = {}
+
+# this class is Borg
+    _shared_state = {}
+
+    def __new__(cls, *p, **k):
+        self = object.__new__(cls, *p, **k)
+        self.__dict__ = cls._shared_state
+        return self
+
+    def __init__(self):
+        if self.kanwadict is None:
+            dictpath = resources.get_path(os.path.join('localization','pykakasi','kanwadict2.db'))
+            self.kanwadict = anydbm.open(dictpath,'r')
+        if self.itaijidict is  None:
+            itaijipath = resources.get_path(os.path.join('localization','pykakasi','itaijidict2.pickle'))
+            itaiji_pkl = open(itaijipath, 'rb')
+            self.itaijidict = load(itaiji_pkl)
+        if self.kanadict is None:
+            kanadictpath = resources.get_path(os.path.join('localization','pykakasi','kanadict2.pickle'))
+            kanadict_pkl = open(kanadictpath, 'rb')
+            self.kanadict = load(kanadict_pkl)
+
+    def load_jisyo(self, char):
+        try:#python2
+            key = "%04x"%ord(unicode(char))
+        except:#python3
+            key = "%04x"%ord(char)
+
+        try: #already exist?
+            table = self.jisyo_table[key]
+        except:
+            try:
+                table = self.jisyo_table[key]  = marshal.loads(decompress(self.kanwadict[key]))
+            except:
+                return None
+        return table
+
--- a/src/calibre/ebooks/unihandecode/pykakasi/k2a.py
+++ b/src/calibre/ebooks/unihandecode/pykakasi/k2a.py
@ -0,0 +1,50 @@
+# -*- coding: utf-8 -*-
+#  k2a.py
+#
+# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
+#
+# Original copyright:
+# * KAKASI (Kanji Kana Simple inversion program)
+# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
+# * Copyright (C) 1992
+# * Hironobu Takahashi (takahasi@tiny.or.jp)
+# *
+# * This program is free software; you can redistribute it and/or modify
+# * it under the terms of the GNU General Public License as published by
+# * the Free Software Foundation; either versions 2, or (at your option)
+# * any later version.
+# *
+# * This program is distributed in the hope that it will be useful
+# * but WITHOUT ANY WARRANTY; without even the implied warranty of
+# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# * GNU General Public License for more details.
+# *
+# * You should have received a copy of the GNU General Public License
+# * along with KAKASI, see the file COPYING.  If not, write to the Free
+# * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
+# * 02111-1307, USA.
+# */
+
+from calibre.ebooks.unihandecode.pykakasi.jisyo import jisyo
+
+class K2a (object):
+
+    kanwa = None
+
+    def __init__(self):
+        self.kanwa = jisyo()
+
+    def isKatakana(self, char):
+        return ( 0x30a0 < ord(char) and ord(char) < 0x30f7)
+
+    def convert(self, text):
+        Hstr = ""
+        max_len = -1
+        r = min(10, len(text)+1)
+        for x in xrange(r):
+            if text[:x] in self.kanwa.kanadict:
+                if max_len < x:
+                    max_len = x
+                    Hstr = self.kanwa.kanadict[text[:x]]
+        return (Hstr, max_len) 
+
--- a/src/calibre/ebooks/unihandecode/pykakasi/kakasi.py
+++ b/src/calibre/ebooks/unihandecode/pykakasi/kakasi.py
@ -0,0 +1,101 @@
+# -*- coding: utf-8 -*-
+#  kakasi.py
+#
+# Copyright 2011 Hiroshi Miura <miurahr@linux.com>
+#
+#  Original Copyright:
+# * KAKASI (Kanji Kana Simple inversion program)
+# * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
+# * Copyright (C) 1992
+# * Hironobu Takahashi (takahasi@tiny.or.jp)
+# *
+# * This program is free software; you can redistribute it and/or modify
+# * it under the terms of the GNU General Public License as published by
+# * the Free Software Foundation; either versions 2, or (at your option)
+# * any later version.
+# *
+# * This program is distributed in the hope that it will be useful
+# * but WITHOUT ANY WARRANTY; without even the implied warranty of
+# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# * GNU General Public License for more details.
+# *
+# * You should have received a copy of the GNU General Public License
+# * along with KAKASI, see the file COPYING.  If not, write to the Free
+# * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
+# * 02111-1307, USA.
+# */
+
+from calibre.ebooks.unihandecode.pykakasi.j2h import J2H
+from calibre.ebooks.unihandecode.pykakasi.h2a import H2a
+from calibre.ebooks.unihandecode.pykakasi.k2a import K2a
+
+class kakasi(object):
+
+    j2h = None
+    h2a = None
+    k2a = None
+
+    def __init__(self):
+        self.j2h = J2H()
+        self.h2a = H2a()
+        self.k2a = K2a()
+
+
+    def do(self, text):
+        otext =  ''
+        i = 0
+        while True:
+            if i >= len(text):
+                break
+
+            if self.j2h.isKanji(text[i]):
+                (t, l) = self.j2h.convert(text[i:])
+                if l <= 0:
+                    otext  = otext + text[i]
+                    i = i + 1
+                    continue
+                i = i + l
+                m = 0
+                tmptext = ""
+                while True:
+                    if m >= len(t):
+                        break
+                    (s, n) = self.h2a.convert(t[m:])
+                    if n <= 0:
+                        break
+                    m = m + n
+                    tmptext = tmptext+s
+                if i >= len(text):
+                    otext = otext + tmptext.capitalize()
+                else:
+                    otext = otext + tmptext.capitalize() +' '
+            elif self.h2a.isHiragana(text[i]):
+                tmptext = ''
+                while True:
+                    (t, l) = self.h2a.convert(text[i:])
+                    tmptext = tmptext+t
+                    i = i + l
+                    if i >= len(text):
+                        otext = otext + tmptext
+                        break
+                    elif not self.h2a.isHiragana(text[i]):
+                        otext = otext + tmptext + ' '
+                        break
+            elif self.k2a.isKatakana(text[i]):
+                tmptext = ''
+                while True:
+                    (t, l) = self.k2a.convert(text[i:])
+                    tmptext = tmptext+t
+                    i = i + l
+                    if i >= len(text):
+                        otext = otext + tmptext
+                        break
+                    elif not self.k2a.isKatakana(text[i]):
+                        otext = otext + tmptext + ' '
+                        break
+            else:
+                otext  = otext + text[i]
+                i += 1
+
+        return otext
+
--- a/src/calibre/ebooks/unihandecode/pykakasi/kakasidict.utf8
+++ b/src/calibre/ebooks/unihandecode/pykakasi/kakasidict.utf8
--- a/src/calibre/ebooks/unihandecode/pykakasi/kanadict.utf8
+++ b/src/calibre/ebooks/unihandecode/pykakasi/kanadict.utf8
@ -0,0 +1,317 @@
+;; Kana-Alphabet mapping dictionary
+;;
+;;  To use this mapping table, 
+;;    you should unicode normalize NKFC form.
+;;
+;; basic mapping
+;;
+a ァ
+a ア
+ba バ
+bba ッバ
+bbe ッベ
+bbi ッビ
+bbo ッボ
+bbu ッブ
+bbya ッビャ
+bbyo ッビョ
+bbyu ッビュ
+be ベ
+bi ビ
+bo ボ
+bu ブ
+bya ビャ
+byo ビョ
+byu ビュ
+cha チャ
+che チェ
+chi チ
+cho チョ
+chu チュ
+da ダ
+dda ッダ
+dde ッデ
+ddo ッド
+de デ
+di ディ
+do ド
+e ェ
+e エ
+e ヱ
+fa ファ
+fe フェ
+ffa ッファ
+ffe ッフェ
+ffi ッフィ
+ffo ッフォ
+ffu ッフ
+fi フィ
+fo フォ
+fu フ
+ga ガ
+ge ゲ
+gga ッガ
+gge ッゲ
+ggi ッギ
+ggo ッゴ
+ggu ッグ
+ggya ッギャ
+ggyo ッギョ
+ggyu ッギュ
+gi ギ
+go ゴ
+gu グ
+gya グャ
+gyo ギョ
+gyu ギゥ
+ha ハ
+he ヘ
+hha ッハ
+hhe ッヘ
+hhi ッヒ
+hho ッホ
+hhya ッヒャ
+hhyo ッヒョ
+hhyu ッヒュ
+hi ヒ
+ho ホ
+hya ヒャ
+hyo ヒョ
+hyu ヒュ
+i ィ
+i イ
+i ヰ
+ja ジャ
+ja ヂャ
+ji ジ
+ji ヂ
+jja ッジャ
+jji ッジ
+jji ッヂ
+jjo ッジョ
+jju ッジュ
+jjya ッヂャ
+jjyo ッヂョ
+jjyu ッヂュ
+jo ジョ
+jo ヂョ
+ju ジュ
+ju ヂュ
+ka カ
+ka ヵ
+ke ケ
+ke ヶ
+ki キ
+kka ッカ
+kke ッケ
+kki ッキ
+kko ッコ
+kku ック
+kkya ッキャ
+kkyo ッキョ
+kkyu ッキュ
+ko コ
+ku ク
+kya キァ
+kyo キォ
+kyu キゥ
+ma マ
+me メ
+mi ミ
+mo モ
+mu ム
+mya ミャ
+myo ミョ
+myu ミュ
+n ン
+n'a ンア
+n'e ンエ
+n'i ンイ
+n'o ンオ
+n'u ンウ
+na ナ
+ne ネ
+ni ニ
+no ノ
+nu ヌ
+nya ニャ
+nyo ニョ
+nyu ニュ
+o ォ
+o オ
+pa パ
+pe ペ
+pi ピ
+po ポ
+ppa ッパ
+ppe ッペ
+ppi ッピ
+ppo ッポ
+ppu ップ
+ppya ッピャ
+ppyo ッピョ
+ppyu ッピュ
+pu プ
+pya ピャ
+pyo ピョ
+pyu ピュ
+ra ラ
+re レ
+ri リ
+ro ロ
+rra ッラ
+rre ッレ
+rri ッリ
+rro ッロ
+rru ッル
+rrya ッリャ
+rryo ッリョ
+rryu ッリュ
+ru ル
+rya リャ
+ryo リョ
+ryu リュ
+sa サ
+se セ
+sha シャ
+shi シ
+sho ショ
+shu シュ
+so ソ
+ssa ッサ
+sse ッセ
+ssha ッシャ
+sshi ッシ
+ssho ッショ
+sshu ッシュ
+sso ッソ
+ssu ッス
+su ス
+ta タ
+tcha ッチャ
+tchi ッチ
+tcho ッチョ
+tchu ッチュ
+te テ
+to ト
+tsu ッ
+tsu ツ
+tta ッタ
+tte ッテ
+tto ット
+ttsu ッツ
+u ゥ
+u ウ
+va ヴァ
+ve ヴェ
+vi ヴィ
+vo ヴォ
+vu ヴ
+vva ッヴァ
+vve ッヴェ
+vvi ッヴィ
+vvo ッヴォ
+vvu ッヴ
+wa ヮ
+wa ワ
+wo ヲ
+ya ャ
+ya ヤ
+yo ョ
+yo ヨ
+yu ュ
+yu ユ
+yya ッヤ
+yyo ッヨ
+yyu ッユ
+za ザ
+ze ゼ
+zo ゾ
+zu ズ
+zu ヅ
+zza ッザ
+zzo ッゾ
+zzu ッズ
+zzu ッヅ
+;;
+;; extended characters
+;;
+;;
+;; gairai terms
+;;
+all オール
+algrism アルゴリズム
+answer アンサー
+base ベース
+begineer ビギナー
+connection コネクション
+contents コンテンツ
+creator クリエーター
+comic コミック
+comics コミックス
+culture カルチャー
+debug デバッグ
+debugging デバッギング
+design デザイン
+digital デジタル
+dillenma ジレンマ
+directory ディレクトリ
+disk ディスク
+document ドキュメント
+download ダウンロード
+electric エレクトリック
+facebook フェイスブック
+firefox ファイアーフォックス
+folder フォルダ
+format フォーマット
+forum フォーラム
+fox フォックス
+free フリー
+gnome ノーム
+gnu グヌー
+gozilla ゴジラ
+guide ガイド
+harvard ハーバード
+help ヘルプ
+highlight ハイライト
+japan ジャパン
+journal ジャーナル
+library ライブラリ
+line ライン
+love ラヴ
+love ラブ
+mail メール
+main メイン
+mystery ミステリ
+mozilla モジラ
+network ネットワーク
+next ネクスト
+new ニュー
+news ニュース
+native ネイティブ
+online オンライン
+open オープン
+professional プロフェッショナル
+profile プロファイル
+programmer プログラマ
+sample サンプル
+series シリーズ
+share シェア
+social ソーシャル
+society ソサエティ
+software ソフトウエア
+source ソース
+street ストリート
+system システム
+tag タグ
+text テキスト
+thunderbird サンダーバード
+training トレーニング
+twitter ツイッター
+unicode ユニコード
+wall ウオール
+wall ウォール
+welcome ウェルカム
+welcome ウエルカム
+wikinomics ウィキノミクス
+york ヨーク
--- a/src/calibre/ebooks/unihandecode/unicodepoints.py
+++ b/src/calibre/ebooks/unihandecode/unicodepoints.py
--- a/src/calibre/ebooks/unihandecode/unidecoder.py
+++ b/src/calibre/ebooks/unihandecode/unidecoder.py
@ -1,12 +1,17 @@
 # -*- coding: utf-8 -*-

 __license__ = 'GPL 3'
-__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 __docformat__ = 'restructuredtext en'

 '''
-Decode unicode text to an ASCII representation of the text. Transliterate
-unicode characters to ASCII.
+Decode unicode text to an ASCII representation of the text in Chinese.
+Transliterate unicode characters to ASCII based on chinese pronounce.
+
+Derived from John Schember's unidecode library. Which was created
+as part of calibre.
+
+Copyright(c) 2009, John Schember <john@nachtimwald.com>

 Based on the ruby unidecode gem (http://rubyforge.org/projects/unidecode/) which
 is based on the perl module Text::Unidecode
@ -55,29 +60,20 @@ it under the same terms as Perl itself.
 '''

 import re
-
-from calibre.ebooks.unidecode.unicodepoints import CODEPOINTS
-from calibre.constants import preferred_encoding
+from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
+from calibre.ebooks.unihandecode.zhcodepoints import CODEPOINTS as HANCODES

 class Unidecoder(object):

+    codepoints = {}
+
+    def __init__(self):
+        self.codepoints = CODEPOINTS
+        self.codepoints.update(HANCODES)
+
    def decode(self, text):
-        '''
-        Tranliterate the string from unicode characters to ASCII.
-        '''
-        # The keys for CODEPOINTS is unicode characters, we want to be sure the
-        # input text is unicode.
-        if not isinstance(text, unicode):
-            try:
-                text = unicode(text)
-            except:
-                try:
-                    text = text.decode(preferred_encoding)
-                except:
-                    text = text.decode('utf-8', 'replace')
        # Replace characters larger than 127 with their ASCII equivelent.
-        return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),
-            text)
+        return re.sub('[^\x00-\x7f]',lambda x: self.replace_point(x.group()), text)

    def replace_point(self, codepoint):
        '''
@ -87,7 +83,7 @@ class Unidecoder(object):
            # Split the unicode character xABCD into parts 0xAB and 0xCD.
            # 0xAB represents the group within CODEPOINTS to query and 0xCD
            # represents the position in the list of characters for the group.
-            return CODEPOINTS[self.code_group(codepoint)][self.grouped_point(
+            return self.codepoints[self.code_group(codepoint)][self.grouped_point(
                codepoint)]
        except:
            return '?'
@ -97,12 +93,18 @@ class Unidecoder(object):
        Find what group character is a part of.
        '''
        # Code groups withing CODEPOINTS take the form 'xAB'
-        return u'x%02x' % (ord(unicode(character)) >> 8)
+        try:#python2
+            return 'x%02x' % (ord(unicode(character)) >> 8)
+        except:
+            return 'x%02x' % (ord(character) >> 8)

    def grouped_point(self, character):
        '''
        Return the location the replacement character is in the list for a
        the group character is a part of.
        '''
-        return ord(unicode(character)) & 255
+        try:#python2
+            return ord(unicode(character)) & 255
+        except:
+            return ord(character) & 255

--- a/src/calibre/ebooks/unihandecode/vncodepoints.py
+++ b/src/calibre/ebooks/unihandecode/vncodepoints.py
--- a/src/calibre/ebooks/unihandecode/vndecoder.py
+++ b/src/calibre/ebooks/unihandecode/vndecoder.py
@ -0,0 +1,23 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Decode unicode text to an ASCII representation of the text in Vietnamese.
+
+'''
+
+from calibre.ebooks.unihandecode.unidecoder import Unidecoder
+from calibre.ebooks.unihandecode.vncodepoints import CODEPOINTS as HANCODES
+from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
+
+class Vndecoder(Unidecoder):
+
+    codepoints = {}
+
+    def __init__(self):
+        self.codepoints = CODEPOINTS
+        self.codepoints.update(HANCODES)
+
--- a/src/calibre/ebooks/unihandecode/zhcodepoints.py
+++ b/src/calibre/ebooks/unihandecode/zhcodepoints.py
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -137,14 +137,18 @@ def _config():
            help=_('Automatically download the cover, if available'))
    c.add_opt('enforce_cpu_limit', default=True,
            help=_('Limit max simultaneous jobs to number of CPUs'))
-    c.add_opt('tag_browser_hidden_categories', default=set(),
-            help=_('tag browser categories not to display'))
    c.add_opt('gui_layout', choices=['wide', 'narrow'],
            help=_('The layout of the user interface'), default='wide')
    c.add_opt('show_avg_rating', default=True,
            help=_('Show the average rating per item indication in the tag browser'))
    c.add_opt('disable_animations', default=False,
            help=_('Disable UI animations'))
+
+    # This option is no longer used. It remains for compatibility with upgrades
+    # so the value can be migrated
+    c.add_opt('tag_browser_hidden_categories', default=set(),
+            help=_('tag browser categories not to display'))
+
    c.add_opt
    return ConfigProxy(c)

--- a/src/calibre/gui2/actions/add.py
+++ b/src/calibre/gui2/actions/add.py
@ -204,7 +204,8 @@ class AddAction(InterfaceAction):
                        ]
        to_device = self.gui.stack.currentIndex() != 0
        if to_device:
-            filters = [(_('Supported books'), self.gui.device_manager.device.FORMATS)]
+            fmts = self.gui.device_manager.device.settings().format_map
+            filters = [(_('Supported books'), fmts)]

        books = choose_files(self.gui, 'add books dialog dir', 'Select books',
                             filters=filters)
--- a/src/calibre/gui2/actions/delete.py
+++ b/src/calibre/gui2/actions/delete.py
@ -271,11 +271,6 @@ class DeleteAction(InterfaceAction):
                        partial(self.library_ids_deleted, current_row=row))
        # Device view is visible.
        else:
-            if not confirm('<p>'+_('The selected books will be '
-                                   '<b>permanently deleted</b> '
-                                   'from your device. Are you sure?')
-                                +'</p>', 'device_delete_books', self.gui):
-                return
            if self.gui.stack.currentIndex() == 1:
                view = self.gui.memory_view
            elif self.gui.stack.currentIndex() == 2:
@ -283,8 +278,14 @@ class DeleteAction(InterfaceAction):
            else:
                view = self.gui.card_b_view
            paths = view.model().paths(rows)
+            ids = view.model().indices(rows)
+            if not confirm('<p>'+_('The selected books will be '
+                                   '<b>permanently deleted</b> '
+                                   'from your device. Are you sure?')
+                                +'</p>', 'device_delete_books', self.gui):
+                return
            job = self.gui.remove_paths(paths)
            self.delete_memory[job] = (paths, view.model())
-            view.model().mark_for_deletion(job, rows)
+            view.model().mark_for_deletion(job, ids, rows_are_ids=True)
            self.gui.status_bar.show_message(_('Deleting books from device.'), 1000)

--- a/src/calibre/gui2/complete.py
+++ b/src/calibre/gui2/complete.py
@ -158,6 +158,8 @@ class MultiCompleteComboBox(EnComboBox):
        # item that matches case insensitively
        c = self.lineEdit().completer()
        c.setCaseSensitivity(Qt.CaseSensitive)
+        self.dummy_model = CompleteModel(self)
+        c.setModel(self.dummy_model)

    def update_items_cache(self, complete_items):
        self.lineEdit().update_items_cache(complete_items)
--- a/src/calibre/gui2/custom_column_widgets.py
+++ b/src/calibre/gui2/custom_column_widgets.py
@ -551,7 +551,11 @@ class BulkBool(BulkBase, Bool):

    def setup_ui(self, parent):
        self.make_widgets(parent, QComboBox)
-        items = [_('Yes'), _('No'), _('Undefined')]
+        items = [_('Yes'), _('No')]
+        if tweaks['bool_custom_columns_are_tristate'] == 'no':
+            items.append('')
+        else:
+            items.append(_('Undefined'))
        icons = [I('ok.png'), I('list_remove.png'), I('blank.png')]
        self.main_widget.blockSignals(True)
        for icon, text in zip(icons, items):
@ -560,7 +564,10 @@ class BulkBool(BulkBase, Bool):

    def getter(self):
        val = self.main_widget.currentIndex()
-        return {2: None, 1: False, 0: True}[val]
+        if tweaks['bool_custom_columns_are_tristate'] == 'no':
+            return {2: False, 1: False, 0: True}[val]
+        else:
+            return {2: None, 1: False, 0: True}[val]

    def setter(self, val):
        val = {None: 2, False: 1, True: 0}[val]
@ -576,6 +583,14 @@ class BulkBool(BulkBase, Bool):
            val = False
        self.db.set_custom_bulk(book_ids, val, num=self.col_id, notify=notify)

+    def a_c_checkbox_changed(self):
+        if not self.ignore_change_signals:
+            if tweaks['bool_custom_columns_are_tristate'] == 'no' and \
+                                    self.main_widget.currentIndex() == 2:
+                self.a_c_checkbox.setChecked(False)
+            else:
+                self.a_c_checkbox.setChecked(True)
+
 class BulkInt(BulkBase):

    def setup_ui(self, parent):
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -1292,6 +1292,16 @@ class DeviceMixin(object): # {{{
        to both speed up matching and to count matches.
        '''

+        if not self.device_manager.is_device_connected:
+            return False
+
+        # It might be possible to get here without having initialized the
+        # library view. In this case, simply give up
+        try:
+            db = self.library_view.model().db
+        except:
+            return False
+
        string_pat = re.compile('(?u)\W|[_]')
        def clean_string(x):
            x = x.lower() if x else ''
@ -1299,26 +1309,19 @@ class DeviceMixin(object): # {{{

        update_metadata = prefs['manage_device_metadata'] == 'on_connect'

+        get_covers = False
+        if update_metadata and self.device_manager.is_device_connected:
+            if self.device_manager.device.WANTS_UPDATED_THUMBNAILS:
+                get_covers = True
+
        # Force a reset if the caches are not initialized
        if reset or not hasattr(self, 'db_book_title_cache'):
            # Build a cache (map) of the library, so the search isn't On**2
            db_book_title_cache = {}
            db_book_uuid_cache = {}
-            # It might be possible to get here without having initialized the
-            # library view. In this case, simply give up
-            try:
-                db = self.library_view.model().db
-            except:
-                return False

-            get_covers = False
-            if update_metadata and self.device_manager.is_device_connected:
-                if self.device_manager.device.WANTS_UPDATED_THUMBNAILS:
-                    get_covers = True
-
-            for id in db.data.iterallids():
-                mi = db.get_metadata(id, index_is_id=True, get_cover=get_covers)
-                title = clean_string(mi.title)
+            for id_ in db.data.iterallids():
+                title = clean_string(db.title(id_, index_is_id=True))
                if title not in db_book_title_cache:
                    db_book_title_cache[title] = \
                                {'authors':{}, 'author_sort':{}, 'db_ids':{}}
@ -1326,14 +1329,14 @@ class DeviceMixin(object): # {{{
                # and author, then remember the last one. That is OK, because as
                # we can't tell the difference between the books, one is as good
                # as another.
-                if mi.authors:
-                    authors = clean_string(authors_to_string(mi.authors))
-                    db_book_title_cache[title]['authors'][authors] = mi
-                if mi.author_sort:
-                    aus = clean_string(mi.author_sort)
-                    db_book_title_cache[title]['author_sort'][aus] = mi
-                db_book_title_cache[title]['db_ids'][mi.application_id] = mi
-                db_book_uuid_cache[mi.uuid] = mi
+                authors = clean_string(db.authors(id_, index_is_id=True))
+                if authors:
+                    db_book_title_cache[title]['authors'][authors] = id_
+                if db.author_sort(id_, index_is_id=True):
+                    aus = clean_string(db.author_sort(id_, index_is_id=True))
+                    db_book_title_cache[title]['author_sort'][aus] = id_
+                db_book_title_cache[title]['db_ids'][id_] = id_
+                db_book_uuid_cache[db.uuid(id_, index_is_id=True)] = id_
            self.db_book_title_cache = db_book_title_cache
            self.db_book_uuid_cache = db_book_uuid_cache

@ -1341,19 +1344,22 @@ class DeviceMixin(object): # {{{
        # in_library field. If the UUID matches a book in the library, then
        # do not consider that book for other matching. In all cases set
        # the application_id to the db_id of the matching book. This value
-        # will be used by books_on_device to indicate matches.
+        # will be used by books_on_device to indicate matches. While we are
+        # going by, update the metadata for a book if automatic management is on

        for booklist in booklists:
            for book in booklist:
                book.in_library = None
                if getattr(book, 'uuid', None) in self.db_book_uuid_cache:
+                    id_ = db_book_uuid_cache[book.uuid]
                    if update_metadata:
-                        book.smart_update(self.db_book_uuid_cache[book.uuid],
+                        book.smart_update(db.get_metadata(id_,
+                                                          index_is_id=True,
+                                                          get_cover=get_covers),
                                          replace_metadata=True)
                    book.in_library = 'UUID'
                    # ensure that the correct application_id is set
-                    book.application_id = \
-                        self.db_book_uuid_cache[book.uuid].application_id
+                    book.application_id = id_
                    continue
                # No UUID exact match. Try metadata matching.
                book_title = clean_string(book.title)
@ -1363,21 +1369,25 @@ class DeviceMixin(object): # {{{
                    # will match if any of the db_id, author, or author_sort
                    # also match.
                    if getattr(book, 'application_id', None) in d['db_ids']:
-                        # app_id already matches a db_id. No need to set it.
                        if update_metadata:
-                            book.smart_update(d['db_ids'][book.application_id],
+                            id_ = getattr(book, 'application_id', None)
+                            book.smart_update(db.get_metadata(id_,
+                                                              index_is_id=True,
+                                                              get_cover=get_covers),
                                              replace_metadata=True)
                        book.in_library = 'APP_ID'
+                        # app_id already matches a db_id. No need to set it.
                        continue
                    # Sonys know their db_id independent of the application_id
                    # in the metadata cache. Check that as well.
                    if getattr(book, 'db_id', None) in d['db_ids']:
                        if update_metadata:
-                            book.smart_update(d['db_ids'][book.db_id],
+                            book.smart_update(db.get_metadata(book.db_id,
+                                                              index_is_id=True,
+                                                              get_cover=get_covers),
                                              replace_metadata=True)
                        book.in_library = 'DB_ID'
-                        book.application_id = \
-                                    d['db_ids'][book.db_id].application_id
+                        book.application_id = book.db_id
                        continue
                    # We now know that the application_id is not right. Set it
                    # to None to prevent book_on_device from accidentally
@ -1389,19 +1399,23 @@ class DeviceMixin(object): # {{{
                        # either can appear as the author
                        book_authors = clean_string(authors_to_string(book.authors))
                        if book_authors in d['authors']:
+                            id_ = d['authors'][book_authors]
                            if update_metadata:
-                                book.smart_update(d['authors'][book_authors],
-                                                  replace_metadata=True)
+                                book.smart_update(db.get_metadata(id_,
+                                                              index_is_id=True,
+                                                              get_cover=get_covers),
+                                              replace_metadata=True)
                            book.in_library = 'AUTHOR'
-                            book.application_id = \
-                                    d['authors'][book_authors].application_id
+                            book.application_id = id_
                        elif book_authors in d['author_sort']:
+                            id_ = d['author_sort'][book_authors]
                            if update_metadata:
-                                book.smart_update(d['author_sort'][book_authors],
+                                book.smart_update(db.get_metadata(id_,
+                                                              index_is_id=True,
+                                                              get_cover=get_covers),
                                                  replace_metadata=True)
                            book.in_library = 'AUTH_SORT'
-                            book.application_id = \
-                                d['author_sort'][book_authors].application_id
+                            book.application_id = id_
                else:
                    # Book definitely not matched. Clear its application ID
                    book.application_id = None
--- a/src/calibre/gui2/device_drivers/configwidget.py
+++ b/src/calibre/gui2/device_drivers/configwidget.py
@ -9,15 +9,16 @@ import textwrap
 from PyQt4.Qt import QWidget, QListWidgetItem, Qt, QVariant, SIGNAL, \
                     QLabel, QLineEdit, QCheckBox

-from calibre.gui2 import error_dialog
+from calibre.gui2 import error_dialog, question_dialog
 from calibre.gui2.device_drivers.configwidget_ui import Ui_ConfigWidget
 from calibre.utils.formatter import validation_formatter
+from calibre.ebooks import BOOK_EXTENSIONS

 class ConfigWidget(QWidget, Ui_ConfigWidget):

    def __init__(self, settings, all_formats, supports_subdirs,
        must_read_metadata, supports_use_author_sort,
-        extra_customization_message):
+        extra_customization_message, device):

        QWidget.__init__(self)
        Ui_ConfigWidget.__init__(self)
@ -25,9 +26,15 @@ class ConfigWidget(QWidget, Ui_ConfigWidget):

        self.settings = settings

+        all_formats = set(all_formats)
+        self.calibre_known_formats = device.FORMATS
+        self.device_name = device.get_gui_name()
+        if device.USER_CAN_ADD_NEW_FORMATS:
+            all_formats = set(all_formats) | set(BOOK_EXTENSIONS)
+
        format_map = settings.format_map
        disabled_formats = list(set(all_formats).difference(format_map))
-        for format in format_map + disabled_formats:
+        for format in format_map + list(sorted(disabled_formats)):
            item = QListWidgetItem(format, self.columns)
            item.setData(Qt.UserRole, QVariant(format))
            item.setFlags(Qt.ItemIsEnabled|Qt.ItemIsUserCheckable|Qt.ItemIsSelectable)
@ -110,6 +117,18 @@ class ConfigWidget(QWidget, Ui_ConfigWidget):
        return self.opt_use_author_sort.isChecked()

    def validate(self):
+        formats = set(self.format_map())
+        extra = formats - set(self.calibre_known_formats)
+        if extra:
+            fmts = sorted([x.upper() for x in extra])
+            if not question_dialog(self, _('Unknown formats'),
+                    _('You have enabled the <b>{0}</b> formats for'
+                        ' your {1}. The {1} may not support them.'
+                        ' If you send these formats to your {1} they '
+                        'may not work. Are you sure?').format(
+                            (', '.join(fmts)), self.device_name)):
+                return False
+
        tmpl = unicode(self.opt_save_template.text())
        try:
            validation_formatter.validate(tmpl)
--- a/src/calibre/gui2/dialogs/check_library.py
+++ b/src/calibre/gui2/dialogs/check_library.py
@ -213,6 +213,8 @@ class CheckLibraryDialog(QDialog):

        self.log = QTreeWidget(self)
        self.log.itemChanged.connect(self.item_changed)
+        self.log.itemExpanded.connect(self.item_expanded_or_collapsed)
+        self.log.itemCollapsed.connect(self.item_expanded_or_collapsed)
        self._layout.addWidget(self.log)

        self.check_button = QPushButton(_('&Run the check again'))
@ -333,11 +335,15 @@ class CheckLibraryDialog(QDialog):
        for check in CHECKS:
            builder(t, checker, check)

-        t.setColumnWidth(0, 200)
-        t.setColumnWidth(1, 400)
+        t.resizeColumnToContents(0)
+        t.resizeColumnToContents(1)
        self.delete_button.setEnabled(False)
        self.text_results = '\n'.join(plaintext)

+    def item_expanded_or_collapsed(self, item):
+        self.log.resizeColumnToContents(0)
+        self.log.resizeColumnToContents(1)
+
    def item_changed(self, item, column):
        self.fix_button.setEnabled(False)
        for it in self.top_level_items.values():
--- a/src/calibre/gui2/dialogs/metadata_bulk.py
+++ b/src/calibre/gui2/dialogs/metadata_bulk.py
@ -912,6 +912,7 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):

    def series_changed(self, *args):
        self.write_series = True
+        self.autonumber_series.setEnabled(True)

    def s_r_remove_query(self, *args):
        if self.query_field.currentIndex() == 0:
--- a/src/calibre/gui2/dialogs/metadata_bulk.ui
+++ b/src/calibre/gui2/dialogs/metadata_bulk.ui
@ -303,6 +303,9 @@
            <layout class="QHBoxLayout" name="HLayout_3">
             <item>
              <widget class="QCheckBox" name="autonumber_series">
+               <property name="enabled">
+                <bool>false</bool>
+               </property>
               <property name="toolTip">
                <string>If not checked, the series number for the books will be set to 1.
 If checked, selected books will be automatically numbered, in the order
@ -1006,8 +1009,8 @@ not multiple and the destination field is multiple</string>
               <rect>
                <x>0</x>
                <y>0</y>
-                <width>938</width>
-                <height>268</height>
+                <width>197</width>
+                <height>60</height>
               </rect>
              </property>
              <layout class="QGridLayout" name="testgrid">
--- a/src/calibre/gui2/dialogs/tag_list_editor.py
+++ b/src/calibre/gui2/dialogs/tag_list_editor.py
@ -99,8 +99,8 @@ class TagListEditor(QDialog, Ui_TagListEditor):
            return
        self.available_tags.editItem(item)

-    def delete_tags(self, item=None):
-        deletes = self.available_tags.selectedItems() if item is None else [item]
+    def delete_tags(self):
+        deletes = self.available_tags.selectedItems()
        if not deletes:
            error_dialog(self, _('No items selected'),
                         _('You must select at least one items from the list.')).exec_()
--- a/src/calibre/gui2/library/models.py
+++ b/src/calibre/gui2/library/models.py
@ -120,11 +120,10 @@ class BooksModel(QAbstractTableModel): # {{{

    def set_device_connected(self, is_connected):
        self.device_connected = is_connected
-        self.refresh_ondevice()

    def refresh_ondevice(self):
        self.db.refresh_ondevice()
-        self.refresh() # does a resort()
+        self.resort()
        self.research()

    def set_book_on_device_func(self, func):
@ -685,7 +684,7 @@ class BooksModel(QAbstractTableModel): # {{{
                self.dc[col] = functools.partial(bool_type, idx=idx)
                self.dc_decorator[col] = functools.partial(
                            bool_type_decorator, idx=idx,
-                            bool_cols_are_tristate=tweaks['bool_custom_columns_are_tristate'] == 'yes')
+                            bool_cols_are_tristate=tweaks['bool_custom_columns_are_tristate'] != 'no')
            elif datatype == 'rating':
                self.dc[col] = functools.partial(rating_type, idx=idx)
            elif datatype == 'series':
@ -826,7 +825,7 @@ class BooksModel(QAbstractTableModel): # {{{
                    return False
                val = int(value.toInt()[0]) if column == 'rating' else \
                      value.toDate() if column in ('timestamp', 'pubdate') else \
-                      unicode(value.toString())
+                      unicode(value.toString()).strip()
                id = self.db.id(row)
                books_to_refresh = set([id])
                if column == 'rating':
--- a/src/calibre/gui2/preferences/search.py
+++ b/src/calibre/gui2/preferences/search.py
@ -26,12 +26,19 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        r('limit_search_columns_to', prefs, setting=CommaSeparatedList)
        fl = gui.library_view.model().db.field_metadata.get_search_terms()
        self.opt_limit_search_columns_to.update_items_cache(fl)
+        self.clear_history_button.clicked.connect(self.clear_histories)

    def refresh_gui(self, gui):
        gui.search.search_as_you_type(config['search_as_you_type'])
        gui.library_view.model().set_highlight_only(config['highlight_search_matches'])
        gui.search.do_search()

+    def clear_histories(self, *args):
+        for key, val in config.defaults.iteritems():
+            if key.endswith('_search_history') and isinstance(val, list):
+                config[key] = []
+        self.gui.search.clear_history()
+
 if __name__ == '__main__':
    app = QApplication([])
    test_widget('Interface', 'Search')
--- a/src/calibre/gui2/preferences/search.ui
+++ b/src/calibre/gui2/preferences/search.ui
@ -77,7 +77,7 @@
     </layout>
    </widget>
   </item>
-   <item row="3" column="0">
+   <item row="4" column="0">
    <spacer name="verticalSpacer">
     <property name="orientation">
      <enum>Qt::Vertical</enum>
@ -90,13 +90,23 @@
     </property>
    </spacer>
   </item>
+   <item row="3" column="0">
+    <widget class="QPushButton" name="clear_history_button">
+     <property name="toolTip">
+      <string>Clear search histories from all over calibre. Including the book list, e-book viewer, fetch news dialog, etc.</string>
+     </property>
+     <property name="text">
+      <string>Clear search &amp;histories</string>
+     </property>
+    </widget>
+   </item>
  </layout>
 </widget>
 <customwidgets>
  <customwidget>
   <class>MultiCompleteLineEdit</class>
   <extends>QLineEdit</extends>
-   <header>calibre/gui2.complete.h</header>
+   <header>calibre/gui2/complete.h</header>
  </customwidget>
 </customwidgets>
 <resources/>
--- a/src/calibre/gui2/search_box.py
+++ b/src/calibre/gui2/search_box.py
@ -114,6 +114,9 @@ class SearchBox2(QComboBox): # {{{
    def text(self):
        return self.currentText()

+    def clear_history(self, *args):
+        QComboBox.clear(self)
+
    def clear(self, emit_search=True):
        self.normalize_state()
        self.setEditText('')
--- a/src/calibre/gui2/tag_view.py
+++ b/src/calibre/gui2/tag_view.py
@ -116,7 +116,14 @@ class TagsView(QTreeView): # {{{
        self.set_new_model(self._model.get_filter_categories_by())

    def set_database(self, db, tag_match, sort_by):
-        self.hidden_categories = config['tag_browser_hidden_categories']
+        self.hidden_categories = db.prefs.get('tag_browser_hidden_categories', None)
+        # migrate from config to db prefs
+        if self.hidden_categories is None:
+            self.hidden_categories = config['tag_browser_hidden_categories']
+            db.prefs.set('tag_browser_hidden_categories', list(self.hidden_categories))
+        else:
+            self.hidden_categories = set(self.hidden_categories)
+
        old = getattr(self, '_model', None)
        if old is not None:
            old.break_cycles()
@ -234,7 +241,7 @@ class TagsView(QTreeView): # {{{
                    gprefs['tags_browser_partition_method'] = category
            elif action == 'defaults':
                self.hidden_categories.clear()
-            config.set('tag_browser_hidden_categories', self.hidden_categories)
+            self.db.prefs.set('tag_browser_hidden_categories', list(self.hidden_categories))
            self.set_new_model()
        except:
            return
--- a/src/calibre/gui2/viewer/main.py
+++ b/src/calibre/gui2/viewer/main.py
@ -17,16 +17,16 @@ from calibre.gui2.viewer.bookmarkmanager import BookmarkManager
 from calibre.gui2.widgets import ProgressIndicator
 from calibre.gui2.main_window import MainWindow
 from calibre.gui2 import Application, ORG_NAME, APP_UID, choose_files, \
-                         info_dialog, error_dialog, open_url, available_height
+    info_dialog, error_dialog, open_url, available_height, gprefs
 from calibre.ebooks.oeb.iterator import EbookIterator
 from calibre.ebooks import DRMError
-from calibre.constants import islinux, isfreebsd, isosx
+from calibre.constants import islinux, isfreebsd, isosx, filesystem_encoding
 from calibre.utils.config import Config, StringConfig, dynamic
 from calibre.gui2.search_box import SearchBox2
 from calibre.ebooks.metadata import MetaInformation
 from calibre.customize.ui import available_input_formats
 from calibre.gui2.viewer.dictionary import Lookup
-from calibre import as_unicode
+from calibre import as_unicode, force_unicode, isbytestring

 class TOCItem(QStandardItem):

@ -160,6 +160,12 @@ class HelpfulLineEdit(QLineEdit):
        self.setPalette(self.gray)
        self.setText(self.HELP_TEXT)

+class RecentAction(QAction):
+
+    def __init__(self, path, parent):
+        self.path = path
+        QAction.__init__(self, os.path.basename(path), parent)
+
 class EbookViewer(MainWindow, Ui_EbookViewer):

    STATE_VERSION = 1
@ -284,8 +290,26 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
        ca = self.view.copy_action
        ca.setShortcut(QKeySequence.Copy)
        self.addAction(ca)
+        self.open_history_menu = QMenu()
+        self.build_recent_menu()
+        self.action_open_ebook.setMenu(self.open_history_menu)
+        self.open_history_menu.triggered[QAction].connect(self.open_recent)
+        w = self.tool_bar.widgetForAction(self.action_open_ebook)
+        w.setPopupMode(QToolButton.MenuButtonPopup)
+
        self.restore_state()

+    def build_recent_menu(self):
+        m = self.open_history_menu
+        m.clear()
+        count = 0
+        for path in gprefs.get('viewer_open_history', []):
+            if count > 9:
+                break
+            if os.path.exists(path):
+                m.addAction(RecentAction(path, m))
+                count += 1
+
    def closeEvent(self, e):
        self.save_state()
        return MainWindow.closeEvent(self, e)
@ -425,6 +449,9 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
        if files:
            self.load_ebook(files[0])

+    def open_recent(self, action):
+        self.load_ebook(action.path)
+
    def font_size_larger(self, checked):
        frac = self.view.magnify_fonts()
        self.action_font_size_larger.setEnabled(self.view.multiplier() < 3)
@ -647,6 +674,17 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
                    self.action_table_of_contents.setChecked(True)
            else:
                self.action_table_of_contents.setChecked(False)
+            if isbytestring(pathtoebook):
+                pathtoebook = force_unicode(pathtoebook, filesystem_encoding)
+            vh = gprefs.get('viewer_open_history', [])
+            try:
+                vh.remove(pathtoebook)
+            except:
+                pass
+            vh.insert(0, pathtoebook)
+            gprefs.set('viewer_open_history', vh[:50])
+            self.build_recent_menu()
+
            self.action_table_of_contents.setDisabled(not self.iterator.toc)
            self.current_book_has_toc = bool(self.iterator.toc)
            self.current_title = title
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@ -528,7 +528,7 @@ class ResultCache(SearchQueryParser): # {{{
                location[i] = db_col[loc]

            # get the tweak here so that the string lookup and compare aren't in the loop
-            bools_are_tristate = tweaks['bool_custom_columns_are_tristate'] == 'yes'
+            bools_are_tristate = tweaks['bool_custom_columns_are_tristate'] != 'no'

            for loc in location: # location is now an array of field indices
                if loc == db_col['authors']:
@ -812,7 +812,10 @@ class SortKeyGenerator(object):
                val = self.string_sort_key(val)

            elif dt == 'bool':
-                val = {True: 1, False: 2, None: 3}.get(val, 3)
+                if tweaks['bool_custom_columns_are_tristate'] == 'no':
+                    val = {True: 1, False: 2, None: 2}.get(val, 2)
+                else:
+                    val = {True: 1, False: 2, None: 3}.get(val, 3)

            yield val

--- a/src/calibre/utils/cleantext.py
+++ b/src/calibre/utils/cleantext.py
@ -12,6 +12,8 @@ def clean_ascii_chars(txt, charlist=None):
    Remove ASCII control chars: 0 to 8 and 11, 12, 14-31 by default
    This is all control chars except \\t,\\n and \\r
    '''
+    if not txt:
+        return ''
    global _ascii_pat
    if _ascii_pat is None:
        chars = list(range(8)) + [0x0B, 0x0C] + list(range(0x0E, 0x1F))
--- a/src/calibre/utils/filenames.py
+++ b/src/calibre/utils/filenames.py
@ -6,12 +6,12 @@ meaning as possible.
 import os
 from math import ceil

-from calibre.ebooks.unidecode.unidecoder import Unidecoder
 from calibre import sanitize_file_name
 from calibre.constants import preferred_encoding, iswindows
-udc = Unidecoder()
+from calibre.utils.localization import get_udc

 def ascii_text(orig):
+    udc = get_udc()
    try:
        ascii = udc.decode(orig)
    except:
--- a/src/calibre/utils/localization.py
+++ b/src/calibre/utils/localization.py
@ -169,3 +169,13 @@ def set_qt_translator(translator):
                return translator.load(p)
    return False

+_udc = None
+
+def get_udc():
+    global _udc
+    if _udc is None:
+        from calibre.ebooks.unihandecode import Unihandecoder
+        _udc = Unihandecoder(lang=get_lang())
+    return _udc
+
+