merge from trunk

2025-07-09 03:04:10 -04:00 · 2011-02-16 09:37:24 +08:00 · 2011-02-16 09:37:24 +08:00 · 3d20aadbd3
commit 3d20aadbd3
parent 6cd2a567f0 d8ee793cd4
75 changed files with 148407 additions and 4075 deletions
--- a/27
+++ b/27
@ -193,6 +193,33 @@ License: GPL-3
 The full text of the GPL is distributed as in
 /usr/share/common-licenses/GPL-3 on Debian systems.
 Files: src/calibre/ebooks/unihandecode/pykakasi/*
 Copyright: 2011, Hiroshi Miura <miurahr@linux.com>
 Copyright: 1992, Hironobu Takahashi
 License: GPL-2+
 The full text of the GPL is distributed as in
 /usr/share/common-licenses/GPL on Debian systems.
 Files: resources/kanwadict2.db
 Files: resources/itaijidict2.pickle
 Copyright: 2011, Hiroshi Miura <miurahr@linux.com>
 Copyright: 1992 1993 1994, Hironobu Takahashi (takahasi@tiny.or.jp),
 Copyright: 1992 1993 1994, Masahiko Sato (masahiko@sato.riec.tohoku.ac.jp),
 Copyright: 1992 1993 1994, Yukiyoshi Kameyama, Miki Inooka, Akihiko Sasaki, Dai Ando, Junichi Okukawa,
 Copyright: 1992 1993 1994, Katsushi Sato and Nobuhiro Yamagishi
 License: GPL-2+
 The full text of the GPL is distributed as in
 /usr/share/common-licenses/GPL on Debian systems.
 Files: src/calibre/ebooks/unihandecode/*
 Copyright: 2010-2011, Hiroshi Miura <miurahr@linux.com>
 Copyright: 2009, John Schember
 Copyright: 2007, Russell Norris
 Copyright: 2001, Sean M. Burke
 License: GPL-3, Perl
 The full text of the GPL is distributed as in
 /usr/share/common-licenses/GPL-3 on Debian systems.
 Files: src/encutils/__init__.py
 Copyright: 2005-2008: Christof Hoeke
 License: LGPL-3+, CC-BY-3.0
--- a/imgsrc/news.svg
+++ b/imgsrc/news.svg
@ -13,12 +13,12 @@
   id="Layer_1"
   x="0px"
   y="0px"
-   width="134.77701"
+   width="200"
-   height="199.99901"
+   height="200"
-   viewBox="0 0 134.777 199.999"
+   viewBox="0 0 199.99999 199.99999"
   enable-background="new 0 0 595.28 841.89"
   xml:space="preserve"
-   inkscape:version="0.47 r22583"
+   inkscape:version="0.48.0 r9654"
   sodipodi:docname="news.svg"><metadata
   id="metadata26"><rdf:RDF><cc:Work
       rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
@ -38,22 +38,22 @@
   guidetolerance="10"
   inkscape:pageopacity="0"
   inkscape:pageshadow="2"
-   inkscape:window-width="640"
+   inkscape:window-width="1680"
-   inkscape:window-height="489"
+   inkscape:window-height="997"
   id="namedview22"
   showgrid="false"
   inkscape:zoom="0.28032165"
   inkscape:cx="67.389001"
   inkscape:cy="99.722002"
-   inkscape:window-x="0"
+   inkscape:window-x="-4"
-   inkscape:window-y="41"
+   inkscape:window-y="30"
-   inkscape:window-maximized="0"
+   inkscape:window-maximized="1"
   inkscape:current-layer="Layer_1" />
 <g
   id="g3"
-   transform="translate(-230.25101,-320.668)">
+   transform="translate(-194.57771,-320.66701)">
 	<polygon
-   points="360.241,366.109 345.29,359.678 345.29,343.405 329.945,343.405 324.265,329.15 309.147,335.175 297.64,323.667 286.79,334.517 272.693,328.454 266.263,343.405 249.988,343.405 249.988,358.749 235.734,364.429 241.759,379.548 230.251,391.056 241.101,401.906 235.039,416.002 249.988,422.432 249.988,438.706 265.333,438.706 271.013,452.961 277.817,450.25 277.817,475.111 252.085,475.111 297.64,520.667 343.193,475.111 317.463,475.111 317.463,451.453 322.585,453.656 329.016,438.706 345.29,438.706 345.29,423.362 359.546,417.682 353.521,402.563 365.028,391.056 354.178,380.205 "
+   points="286.79,334.517 272.693,328.454 266.263,343.405 249.988,343.405 249.988,358.749 235.734,364.429 241.759,379.548 230.251,391.056 241.101,401.906 235.039,416.002 249.988,422.432 249.988,438.706 265.333,438.706 271.013,452.961 277.817,450.25 277.817,475.111 252.085,475.111 297.64,520.667 343.193,475.111 317.463,475.111 317.463,451.453 322.585,453.656 329.016,438.706 345.29,438.706 345.29,423.362 359.546,417.682 353.521,402.563 365.028,391.056 354.178,380.205 360.241,366.109 345.29,359.678 345.29,343.405 329.945,343.405 324.265,329.15 309.147,335.175 297.64,323.667 "
   id="polygon5"
   style="fill:#ffffff" />
 	<linearGradient
@ -73,7 +73,7 @@
   id="stop10" />
 	</linearGradient>
 	<polygon
-   points="360.241,363.11 345.29,356.679 345.29,340.406 329.945,340.406 324.265,326.151 309.147,332.176 297.64,320.668 286.79,331.518 272.693,325.455 266.263,340.406 249.988,340.406 249.988,355.75 235.734,361.43 241.759,376.549 230.251,388.057 241.101,398.907 235.039,413.003 249.988,419.433 249.988,435.707 265.333,435.707 271.013,449.962 277.817,447.251 277.817,472.112 252.085,472.112 297.64,517.668 343.193,472.112 317.463,472.112 317.463,448.454 322.585,450.657 329.016,435.707 345.29,435.707 345.29,420.363 359.546,414.683 353.521,399.564 365.028,388.057 354.178,377.206 "
+   points="286.79,331.518 272.693,325.455 266.263,340.406 249.988,340.406 249.988,355.75 235.734,361.43 241.759,376.549 230.251,388.057 241.101,398.907 235.039,413.003 249.988,419.433 249.988,435.707 265.333,435.707 271.013,449.962 277.817,447.251 277.817,472.112 252.085,472.112 297.64,517.668 343.193,472.112 317.463,472.112 317.463,448.454 322.585,450.657 329.016,435.707 345.29,435.707 345.29,420.363 359.546,414.683 353.521,399.564 365.028,388.057 354.178,377.206 360.241,363.11 345.29,356.679 345.29,340.406 329.945,340.406 324.265,326.151 309.147,332.176 297.64,320.668 "
   id="polygon12"
   style="fill:url(#SVGID_1_)" />
 	<g
@ -81,14 +81,16 @@
 		<path
   d="m 273.311,419.168 v -56.752 h 17.935 l 9.01,17.43 c 3.115,5.641 6.399,13.22 8.926,19.873 h 0.252 c -0.842,-7.494 -1.178,-15.41 -1.178,-23.83 v -13.472 h 13.893 v 56.752 H 306.15 l -9.684,-18.861 c -3.116,-5.978 -6.82,-13.641 -9.515,-20.461 h -0.336 c 0.42,7.663 0.589,16.167 0.589,25.345 v 13.978 h -13.893 z"
   id="path16"
-   style="fill:#993720" />
+   style="fill:#993720"
   inkscape:connector-curvature="0" />
 	</g>
 	<g
   id="g18">
 		<path
   d="m 273.311,416.873 v -56.752 h 17.935 l 9.01,17.43 c 3.115,5.641 6.399,13.22 8.926,19.873 h 0.252 c -0.842,-7.494 -1.178,-15.41 -1.178,-23.83 v -13.472 h 13.893 v 56.752 H 306.15 l -9.684,-18.861 c -3.116,-5.978 -6.82,-13.641 -9.515,-20.461 h -0.336 c 0.42,7.663 0.589,16.167 0.589,25.345 v 13.978 h -13.893 z"
   id="path20"
-   style="fill:#f0efef" />
+   style="fill:#f0efef"
   inkscape:connector-curvature="0" />
 	</g>
 </g>
 </svg>
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -105,7 +105,7 @@ bool_custom_columns_are_tristate = 'yes'
 # title within authors.
 sort_columns_at_startup = None
-#; Control how dates are displayed
+#: Control how dates are displayed
 # Format to be used for publication date and the timestamp (date).
 #  A string controlling how the publication date is displayed in the GUI
 #  d    the day as number without a leading zero (1 to 31)
@ -245,19 +245,6 @@ sony_collection_name_template='{value}{category:| (|)}'
 sony_collection_sorting_rules = []
 #: Create search terms to apply a query across several built-in search terms.
 # Syntax: {'new term':['existing term 1', 'term 2', ...], 'new':['old'...] ...}
 # Example: create the term 'myseries' that when used as myseries:foo would
 # search all of the search categories 'series', '#myseries', and '#myseries2':
 # grouped_search_terms={'myseries':['series','#myseries', '#myseries2']}
 # Example: two search terms 'a' and 'b' both that search 'tags' and '#mytags':
 # grouped_search_terms={'a':['tags','#mytags'], 'b':['tags','#mytags']}
 # Note: You cannot create a search term that is a duplicate of an existing term.
 # Such duplicates will be silently ignored. Also note that search terms ignore
 # case. 'MySearch' and 'mysearch' are the same term.
 grouped_search_terms = {}
 #: Control how tags are applied when copying books to another library
 # Set this to True to ensure that tags in 'Tags to add when adding
 # a book' are added when copying books to another library
--- a/resources/images/news.png
+++ b/resources/images/news.png
--- a/resources/images/news/de_standaard.png
+++ b/resources/images/news/de_standaard.png
--- a/resources/recipes/adevarul.recipe
+++ b/resources/recipes/adevarul.recipe
@ -0,0 +1,50 @@
 # -*- coding: utf-8 -*-
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
 adevarul.ro
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Adevarul(BasicNewsRecipe):
    title                 = u'Adev\u0103rul'
    language              = 'ro'
    __author__            = u'Silviu Cotoar\u0103'
    description           = u'\u0218tiri din Rom\u00e2nia'
    publisher             = 'Adevarul'
    category              = 'Ziare,Stiri,Romania'
    oldest_article        = 5
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
    remove_javascript     = True
    cover_url         = 'http://upload.wikimedia.org/wikipedia/en/d/d6/Logo_noul_adevarul.png'
    conversion_options = {
                             'comments'   : description
                            ,'tags'       : category
                            ,'language'   : language
                ,'publisher'  : publisher
                         }
    keep_only_tags = [  dict(name='div', attrs={'class':'article_header'})
                       ,dict(name='div', attrs={'class':'bd'})
                     ]
    remove_tags = [  dict(name='div', attrs={'class':'bb-wg-article_related_attachements'})
                    ,dict(name='div', attrs={'class':'bb-md bb-md-article_comments'})
                ,dict(name='form', attrs={'id':'bb-comment-create-form'})
              ]
    remove_tags_after = [ dict(name='form', attrs={'id':'bb-comment-create-form'}) ]
    feeds = [ (u'\u0218tiri', u'http://www.adevarul.ro/rss/latest') ]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/resources/recipes/aprospect.recipe
+++ b/resources/recipes/aprospect.recipe
--- a/resources/recipes/capital.recipe
+++ b/resources/recipes/capital.recipe
@ -0,0 +1,44 @@
 # -*- coding: utf-8 -*-
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
 capital.ro
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Capital(BasicNewsRecipe):
    title                 = 'Capital'
    __author__            = u'Silviu Cotoar\u0103'
    description           = u'\u0218tiri din Rom\u00e2nia'
    oldest_article        = 5
    language              = 'ro'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    category              = 'Ziare,Stiri,Romania'
    encoding              = 'utf-8'
    remove_javascript     = True
    publisher             = 'Capital'
    cover_url         = 'http://www.mediapress.ro/imagini/sigla-capital-s16.gif'
    conversion_options = {
                             'comments'   : description
                            ,'tags'       : category
                ,'language'   : language
                ,'publisher'  : publisher
                         }
    keep_only_tags = [  dict(name='div', attrs={'class':'single one_article'})
                     ]
    remove_tags = [  dict(name='div', attrs={'class':'single_details'})
                   , dict(name='div', attrs={'class':'tx-addoceansbanners-pi1'})
          ]
    feeds = [(u'\u0218tiri', u'http://www.capital.ro/rss.html') ]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/resources/recipes/catavencu.recipe
+++ b/resources/recipes/catavencu.recipe
@ -0,0 +1,53 @@
 # -*- coding: utf-8 -*-
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
 catavencu.ro
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Catavencu(BasicNewsRecipe):
    title                 = u'Academia Ca\u0163avencu'
    __author__            = u'Silviu Cotoar\u0103'
    description           = 'Tagma cum laude'
    publisher             = 'Catavencu'
    oldest_article        = 5
    language              = 'ro'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    category              = 'Ziare'
    encoding              = 'utf-8'
    cover_url         = 'http://upload.wikimedia.org/wikipedia/en/1/1e/Academia_Catavencu.jpg'
    conversion_options = {
                             'comments'   : description
                            ,'tags'       : category
                            ,'language'   : language
                ,'publisher'  : publisher
                         }
    keep_only_tags = [
            dict(name='ul', attrs={'class':'articles'})
                     ]
    remove_tags = [
             dict(name='div', attrs={'class':['tools']})
           , dict(name='div', attrs={'class':['share']})
           , dict(name='div', attrs={'class':['category']})
           , dict(name='div', attrs={'id':['comments']})
                  ]
    remove_tags_after = [
              dict(name='div', attrs={'id':'comments'})
            ]
    feeds          = [
            (u'Feeds', u'http://catavencu.ro/feed/rss')
                 ]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/resources/recipes/de_standaard.recipe
+++ b/resources/recipes/de_standaard.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 standaard.be
 '''
@ -9,14 +9,15 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class DeStandaard(BasicNewsRecipe):
    title                 = u'De Standaard'
    __author__            = u'Darko Miletic'
-    language = 'nl_BE'
+    language              = 'nl_BE'
    description           = u'News from Belgium in Dutch'
    oldest_article        = 7
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
    masthead_url          = 'http://www.standaard.be/extra/css/images/masthead/logo_340x45.png'
    publication_type      = 'newspaper'
    keep_only_tags    = [dict(name='div' , attrs={'id':['intro','continued']})]
@ -27,4 +28,4 @@ class DeStandaard(BasicNewsRecipe):
        return article.get('guid',  None)
    def print_version(self, url):
-        return url.replace('/Detail.aspx?','/PrintArtikel.aspx?')
+        return url.replace('/artikel/detail.aspx?','/Artikel/PrintArtikel.aspx?')
--- a/resources/recipes/gandul.recipe
+++ b/resources/recipes/gandul.recipe
@ -0,0 +1,47 @@
 # -*- coding: utf-8 -*-
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
 gandul.info
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Gandul(BasicNewsRecipe):
    title                 = u'G\u00E2ndul'
    __author__            = u'Silviu Cotoar\u0103'
    publisher             = 'Gandul'
    description           = 'Cotidian Online'
    oldest_article        = 5
    language              = 'ro'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    category              = 'Ziare,Stiri,Romania'
    encoding              = 'utf-8'
    cover_url         = 'http://storage0.dms.mpinteractiv.ro/media/1/1/1706/1064063/1/logo.jpg?width=400'
    conversion_options = {
                'comments'    : description
                            ,'tags'       : category
                            ,'language'   : language
                ,'publisher'  : publisher
                         }
    keep_only_tags = [
                    dict(name='div', attrs={'class':'article'})
             ]
    remove_tags = [
             dict(name='a', attrs={'class':'photo'})
           , dict(name='div', attrs={'class':'ad'})
                  ]
    feeds  = [
        (u'\u0218tiri', u'http://www.gandul.info/rss-stiri-prima-pagina.xml')
         ]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/resources/recipes/hotnews.recipe
+++ b/resources/recipes/hotnews.recipe
@ -0,0 +1,46 @@
 # -*- coding: utf-8 -*-
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
 hotnews.ro
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Hotnews(BasicNewsRecipe):
    title                 = 'Hotnews'
    __author__            = u'Silviu Cotoar\u0103'
    description           = u'\u0218tiri din Rom\u00e2nia'
    publisher             = 'Hotnews'
    oldest_article        = 5
    language              = 'ro'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    category              = 'Ziare,Stiri,Romania'
    encoding              = 'utf-8'
    cover_url         = 'http://www.hotnews.ro/images/new/logo.gif'
    conversion_options = {
                             'comments'   : description
                            ,'tags'       : category
                            ,'language'   : language
                ,'publisher'  : publisher
                         }
    keep_only_tags = [
            dict(name='h1', attrs={'class':'title'})
                   ,dict(name='div', attrs={'id':'articleContent'})
                     ]
    feeds          = [   (u'\u0218tiri', u'http://www.hotnews.ro/rss/actualitate')
            ,(u'English', u'http://www.hotnews.ro/rss/english')
                 ]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/resources/recipes/jurnalulnational.recipe
+++ b/resources/recipes/jurnalulnational.recipe
@ -0,0 +1,54 @@
 # -*- coding: utf-8 -*-
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
 jurnalul.ro
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class JurnalulNational(BasicNewsRecipe):
    title                 = u'Jurnalul Na\u0163ional'
    __author__            = u'Silviu Cotoar\u0103'
    description           = u'\u0218tiri din Rom\u00e2nia'
    publisher             = 'Jurnalul National'
    oldest_article        = 5
    language              = 'ro'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    category              = 'Ziare,Stiri,Romania'
    encoding              = 'utf-8'
    cover_url         = 'http://www.jurnalul.ro/images/sigla.png'
    conversion_options = {
                             'comments'   : description
                            ,'tags'       : category
                            ,'language'   : language
                ,'publisher'  : publisher
                         }
    keep_only_tags = [
            dict(name='h1', attrs={'class':'h3 art_title'})
                   ,dict(name='div', attrs={'class':'only_text'})
                     ]
    feeds          = [
             (u'\u0218tiri', u'http://www.jurnalul.ro/rss/stiri-3028.html')
                ,(u'Special', u'http://www.jurnalul.ro/rss/special-3001.html')
                    ,(u'Sport', u'http://www.jurnalul.ro/rss/sport-3035.html')
            ,(u'Bani Afaceri', u'http://www.jurnalul.ro/rss/bani-afaceri-3006.html')
            ,(u'Viata Sanatoasa', u'http://www.jurnalul.ro/rss/viata-sanatoasa-3010.html')
            ,(u'Stiinta Tehnica', u'http://www.jurnalul.ro/rss/stiinta-tehnica-3019.html')
            ,(u'Timp Liber', u'http://www.jurnalul.ro/rss/timp-liber-3022.html')
            ,(u'Fun', u'http://www.jurnalul.ro/rss/fun-3038.html')
            ,(u'Acum 20 de ani', u'http://www.jurnalul.ro/rss/acum-20-de-ani-3073.html')
                 ]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/resources/recipes/lifehacker.recipe
+++ b/resources/recipes/lifehacker.recipe
@ -8,7 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class Lifehacker(BasicNewsRecipe):
    title                 = 'Lifehacker'
-    __author__            = 'NA'
+    __author__            = 'Kovid Goyal'
    description           = "Computers make us more productive. Yeah, right. Lifehacker recommends the software downloads and web sites that actually save time. Don't live to geek; geek to live."
    publisher             = 'lifehacker.com'
    category              = 'news, IT, Internet, gadgets, tips and tricks, howto, diy'
@ -32,14 +32,20 @@ class Lifehacker(BasicNewsRecipe):
                        , 'language'  : language
                        }
-    remove_attributes  = ['width','height']
+    remove_attributes  = ['width', 'height', 'style']
    keep_only_tags     = [dict(attrs={'class':'content permalink'})]
    remove_tags_before = dict(name='h1')
-    remove_tags        = [dict(attrs={'class':'contactinfo'})]
+    keep_only_tags = [dict(id='container')]
-    remove_tags_after  = dict(attrs={'class':'contactinfo'})
+    remove_tags_after  = dict(attrs={'class':'post-body'})
    remove_tags = [
            dict(id="sharemenu"),
            {'class': 'related'},
    ]
    feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/full')]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
    def print_version(self, url):
        return url.replace('#!', '?_escaped_fragment_=')
--- a/resources/recipes/mediafax.recipe
+++ b/resources/recipes/mediafax.recipe
@ -0,0 +1,52 @@
 # -*- coding: utf-8 -*-
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
 mediafax.ro
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Mediafax(BasicNewsRecipe):
    title                 = 'Mediafax'
    __author__            = u'Silviu Cotoar\u0103'
    description           = u'\u0218tiri din Rom\u00e2nia'
    publisher             = 'Mediafax'
    oldest_article        = 5
    language              = 'ro'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    category              = 'Ziare,Stiri,Romania'
    encoding              = 'utf-8'
    cover_url         = 'http://storage0.dms.mpinteractiv.ro/media/1/1/1706/4134575/2/logo-mediafax-mass-media-news.jpg?width=400'
    conversion_options = {
                             'comments'   : description
                            ,'tags'       : category
                            ,'language'   : language
                ,'publisher'  : publisher
                         }
    keep_only_tags = [
            dict(name='div', attrs={'class':'news tabs-container'})
                     ]
    remove_tags = [
            dict(name='ul', attrs={'class':['CategoryNews']})
           ,dict(name='div', attrs={'class':['read']})
                  ]
    remove_tags_after = [ dict(name='div', attrs={'class':'cmsItemViews'}) ]
    feeds       = [
            (u'Actualitate', u'http://www.mediafax.ro/rss/')
              ]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/resources/recipes/moneyro.recipe
+++ b/resources/recipes/moneyro.recipe
@ -0,0 +1,54 @@
 # -*- coding: utf-8 -*-
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
 money.ro
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class MoneyRo(BasicNewsRecipe):
    title                 = 'Money Ro'
    __author__            = u'Silviu Cotoar\u0103'
    description           = u'\u0218tiri din Rom\u00e2nia'
    publisher             = 'MoneyRo'
    oldest_article        = 5
    language              = 'ro'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    category              = 'Ziare,Stiri,Romania'
    encoding              = 'utf-8'
    remove_javascript     = True
    cover_url         = 'http://assets.moneyweb.ro/images/logo_money.jpg'
    conversion_options = {
                             'comments'   : description
                            ,'tags'       : category
                            ,'language'   : language
                ,'publisher'  : publisher
                         }
    keep_only_tags = [   dict(name='div', attrs={'id':'titluArticol'})
               , dict(name='img', attrs={'id':'objImage'})
               , dict(name='div', attrs={'class':'leftColumnArticle'})
                     ]
    remove_tags_after = [ dict(name='div', attrs={'id':'articleTags'}) ]
    remove_tags = [  dict(name='div', attrs={'id':'ads'})
           , dict(name='div', attrs={'id':'aus'})
           , dict(name='div', attrs={'id':'bb-comment-create-form'})
           , dict(name='div', attrs={'id':'articleTags'})
           , dict(name='div', attrs={'class':'breadcrumb'})
          ]
    feeds = [(u'\u0218tiri', u'http://moneyro.feedsportal.com/c/32533/fe.ed/rss.money.ro/stiri.xml') ]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/resources/recipes/nrc.nl.recipe
+++ b/resources/recipes/nrc.nl.recipe
@ -38,7 +38,7 @@ class Pagina12(BasicNewsRecipe):
    keep_only_tags = [dict(attrs={'class':'uitstekendekeus'})]
    remove_tags    = [
                        dict(name=['meta','base','link','object','embed'])
-                       ,dict(attrs={'class':['reclamespace','tags-and-sharing']})
+                       ,dict(attrs={'class':['reclamespace','tags-and-sharing','sharing-is-caring']})
                     ]
    remove_attributes=['lang']
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@ -668,7 +668,7 @@ class NYTimes(BasicNewsRecipe):
            try:
                #remove "Related content" bar
-                runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline','articleInline runaroundLeft  ']})
+                runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline','articleInline runaroundLeft  ','articleInline runaroundLeft  lastArticleInline']})
                if runAroundsFound:
                    for runAround in runAroundsFound:
                        #find all section headers
--- a/resources/recipes/prosport.recipe
+++ b/resources/recipes/prosport.recipe
@ -0,0 +1,49 @@
 # -*- coding: utf-8 -*-
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
 prosport.ro
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Prosport(BasicNewsRecipe):
    title                 = 'Prosport'
    __author__            = u'Silviu Cotoar\u0103'
    publisher             = 'Prosport'
    description           = u'\u0218tiri Sportive din Rom\u00e2nia'
    oldest_article        = 5
    language              = 'ro'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    category              = 'Ziare,Stiri,Romania,Sport'
    encoding              = 'utf-8'
    cover_url         = 'http://storage0.dms.mpinteractiv.ro/media/401/581/7946/3688311/1/logo-pro.jpg?width=610'
    conversion_options = {
                             'comments'   : description
                            ,'tags'       : category
                            ,'language'   : language
                ,'publisher'  : publisher
                         }
    keep_only_tags = [
            dict(name='h1', attrs={'class':'a-title'})
                   ,dict(name='div', attrs={'class':'a-entry'})
                     ]
    remove_tags = [  dict(name='div', attrs={'class':'utils'})
            ,dict(name='div', attrs={'class':'g-slide'})
              ]
    feeds          = [ (u'\u0218tiri', u'http://www.prosport.ro/rss.xml')]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/resources/recipes/realitatea.recipe
+++ b/resources/recipes/realitatea.recipe
@ -0,0 +1,45 @@
 # -*- coding: utf-8 -*-
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
 realitatea.net
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Realitatea(BasicNewsRecipe):
    title                 = 'Realitatea'
    __author__            = u'Silviu Cotoar\u0103'
    publisher             = 'Realitatea'
    description           = u'\u0218tiri din Rom\u00e2nia'
    oldest_article        = 5
    language              = 'ro'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    category              = 'Ziare,Stiri,Romania'
    encoding              = 'utf-8'
    cover_url         = 'http://assets.realitatea.ro/images/logo.jpg'
    conversion_options = {
                             'comments'   : description
                            ,'tags'       : category
                            ,'language'   : language
                ,'publisher'  : publisher
                         }
    keep_only_tags = [
            dict(name='div', attrs={'class':'articleTitle '})
                   ,dict(name='div', attrs={'class':'articleBody'})
                     ]
    remove_tags = [  dict(name='div', attrs={'id':'aus'}) ]
    feeds          = [ (u'\u0218tiri', u'http://realitatea.feedsportal.com/c/32533/fe.ed/rss.realitatea.net/stiri.xml') ]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/resources/recipes/smh.recipe
+++ b/resources/recipes/smh.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 smh.com.au
 '''
@ -22,7 +22,11 @@ class Smh_au(BasicNewsRecipe):
    remove_empty_feeds    = True
    masthead_url          = 'http://images.smh.com.au/2010/02/02/1087188/smh-620.jpg'
    publication_type      = 'newspaper'
-    extra_css             = ' h1{font-family: Georgia,"Times New Roman",Times,serif } body{font-family: Arial,Helvetica,sans-serif} .cT-imageLandscape{font-size: x-small} '
+    extra_css             = """ 
                                h1{font-family: Georgia,"Times New Roman",Times,serif } 
                                body{font-family: Arial,Helvetica,sans-serif} 
                                .cT-imageLandscape,.cT-imagePortrait{font-size: x-small} 
                            """
    conversion_options = {
                          'comment'   : description
@ -38,7 +42,11 @@ class Smh_au(BasicNewsRecipe):
                  ]
    remove_tags_after = [dict(name='div',attrs={'class':'articleBody'})]
    keep_only_tags    = [dict(name='div',attrs={'id':'content'})]
-    remove_attributes = ['width','height']
+    remove_tags       = [ 
                          dict(attrs={'class':'hidden'}), 
                          dict(name=['link','meta','base','embed','object','iframe'])
                        ]
    remove_attributes = ['width','height','lang']
    def parse_index(self):
        articles = []
@ -66,3 +74,14 @@ class Smh_au(BasicNewsRecipe):
                                 ,'description':description
                                })
        return [(self.tag_to_string(soup.find('title')), articles)]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        for item in soup.findAll('bod'):
            item.name = 'div'
        for item in soup.findAll('img'):
            if not item.has_key('alt'):
               item['alt'] = 'image'
        return soup
--- a/resources/recipes/standardmoney.recipe
+++ b/resources/recipes/standardmoney.recipe
@ -0,0 +1,46 @@
 # -*- coding: utf-8 -*-
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
 standard.money.ro
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class StandardMoneyRo(BasicNewsRecipe):
    title                 = 'Standard Money Ro'
    __author__            = u'Silviu Cotoar\u0103'
    publisher             = 'Standard Money'
    description           = 'Portal de Business'
    oldest_article        = 5
    language              = 'ro'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    category              = 'Ziare,Stiri,Romania'
    encoding              = 'utf-8'
    cover_url         = 'http://assets.standard.ro/wp-content/themes/standard/images/standard-logo.gif'
    conversion_options = {
                             'comments'   : description
                            ,'tags'       : category
                            ,'language'   : language
                ,'publisher'  : publisher
                         }
    keep_only_tags = [
             dict(name='h1', attrs={'class':'post-title'})
                   , dict(name='div', attrs={'class':'content_post'})
                     ]
    feeds          = [
            (u'Actualitate', u'http://standard.money.ro/feed')
                 ]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/resources/recipes/ziarulfinanciar.recipe
+++ b/resources/recipes/ziarulfinanciar.recipe
@ -0,0 +1,45 @@
 # -*- coding: utf-8 -*-
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
 zf.ro
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class ZiarulFinanciar(BasicNewsRecipe):
    title                 = 'Ziarul Financiar'
    __author__            = u'Silviu Cotoar\u0103'
    description           = u'\u0218tiri din Business'
    publisher             = 'Ziarul Financiar'
    oldest_article        = 5
    language              = 'ro'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    category              = 'Ziare,Stiri,Romania'
    encoding              = 'utf-8'
    cover_url         = 'http://storage0.dms.mpinteractiv.ro/media/1/1/1706/7462721/1/ziarul-financiar-big.jpg?width=400'
    conversion_options = {
                             'comments'   : description
                            ,'tags'       : category
                            ,'language'   : language
                ,'publisher'  : publisher
                         }
    keep_only_tags = [
            dict(name='div', attrs={'class':'article'})
                     ]
    feeds          = [
            (u'\u0218tiri', u'http://www.zf.ro/rss/zf-24/')
                 ]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/setup/resources.py
+++ b/setup/resources.py
@ -6,9 +6,10 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-import os, cPickle
+import os, cPickle, re, anydbm, shutil
 from zlib import compress
-from setup import Command, basenames
+from setup import Command, basenames, __appname__
 def get_opts_from_parser(parser):
    def do_opt(opt):
@ -26,6 +27,9 @@ class Resources(Command):
    description = 'Compile various needed calibre resources'
    KAKASI_PATH = os.path.join(Command.SRC,  __appname__,
            'ebooks', 'unihandecode', 'pykakasi')
    def run(self, opts):
        scripts = {}
        for x in ('console', 'gui'):
@ -101,11 +105,107 @@ class Resources(Command):
        import json
        json.dump(function_dict, open(dest, 'wb'), indent=4)
        self.run_kakasi(opts)
    def run_kakasi(self, opts):
        self.records = {}
        src = self.j(self.KAKASI_PATH, 'kakasidict.utf8')
        dest = self.j(self.RESOURCES, 'localization',
                'pykakasi','kanwadict2.db')
        base = os.path.dirname(dest)
        if not os.path.exists(base):
            os.makedirs(base)
        if self.newer(dest, src):
            self.info('\tGenerating Kanwadict')
            for line in open(src, "r"):
                self.parsekdict(line)
            self.kanwaout(dest)
        src = self.j(self.KAKASI_PATH, 'itaijidict.utf8')
        dest = self.j(self.RESOURCES, 'localization',
                'pykakasi','itaijidict2.pickle')
        if self.newer(dest, src):
            self.info('\tGenerating Itaijidict')
            self.mkitaiji(src, dest)
        src = self.j(self.KAKASI_PATH, 'kanadict.utf8')
        dest = self.j(self.RESOURCES, 'localization',
                'pykakasi','kanadict2.pickle')
        if self.newer(dest, src):
            self.info('\tGenerating kanadict')
            self.mkkanadict(src, dest)
        return
    def mkitaiji(self, src, dst):
        dic = {}
        for line in open(src, "r"):
            line = line.decode("utf-8").strip()
            if line.startswith(';;'): # skip comment
                continue
            if re.match(r"^$",line):
                continue
            pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line)
            dic[pair[0]] = pair[1]
        cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
    def mkkanadict(self, src, dst):
        dic = {}
        for line in open(src, "r"):
            line = line.decode("utf-8").strip()
            if line.startswith(';;'): # skip comment
                continue
            if re.match(r"^$",line):
                continue
            (alpha, kana) = line.split(' ')
            dic[kana] = alpha
        cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
    def parsekdict(self, line):
        line = line.decode("utf-8").strip()
        if line.startswith(';;'): # skip comment
            return
        (yomi, kanji) = line.split(' ')
        if ord(yomi[-1:]) <= ord('z'):
            tail = yomi[-1:]
            yomi = yomi[:-1]
        else:
            tail = ''
        self.updaterec(kanji, yomi, tail)
    def updaterec(self, kanji, yomi, tail):
            key = "%04x"%ord(kanji[0])
            if key in self.records:
                if kanji in self.records[key]:
                    rec = self.records[key][kanji]
                    rec.append((yomi,tail))
                    self.records[key].update( {kanji: rec} )
                else:
                    self.records[key][kanji]=[(yomi, tail)]
            else:
                self.records[key] = {}
                self.records[key][kanji]=[(yomi, tail)]
    def kanwaout(self, out):
        dic = anydbm.open(out, 'c')
        for (k, v) in self.records.iteritems():
            dic[k] = compress(cPickle.dumps(v, -1))
        dic.close()
    def clean(self):
        for x in ('scripts', 'recipes', 'ebook-convert-complete'):
            x = self.j(self.RESOURCES, x+'.pickle')
            if os.path.exists(x):
                os.remove(x)
        kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi')
        if os.path.exists(kakasi):
            shutil.rmtree(kakasi)
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -2,11 +2,14 @@ import os.path
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-import textwrap, os, glob, functools
+import textwrap, os, glob, functools, re
 from calibre import guess_type
 from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
    MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase
 from calibre.constants import numeric_version
 from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
 from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre.ebooks.oeb.base import OEB_IMAGES
 # To archive plugins {{{
 class HTML2ZIP(FileTypePlugin):
@ -82,6 +85,77 @@ class PML2PMLZ(FileTypePlugin):
        return of.name
 class TXT2TXTZ(FileTypePlugin):
    name = 'TXT to TXTZ'
    author = 'John Schember'
    description = _('Create a TXTZ archive when a TXT file is imported '
        'containing Markdown or Textile references to images. The referenced '
        'images as well as the TXT file are added to the archive.')
    version = numeric_version
    file_types = set(['txt'])
    supported_platforms = ['windows', 'osx', 'linux']
    on_import = True
    def _get_image_references(self, txt, base_dir):
        images = []
        # Textile
        for m in re.finditer(ur'(?mu)(?:[\[{])?\!(?:\. )?(?P<path>[^\s(!]+)\s?(?:\(([^\)]+)\))?\!(?::(\S+))?(?:[\]}]|(?=\s|$))', txt):
            path = m.group('path')
            if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
                images.append(path)
        # Markdown inline        
        for m in re.finditer(ur'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\((?P<path>[^\)]*)\)', txt):
            path = m.group('path')
            if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
                images.append(path)
        # Markdown reference
        refs = {}
        for m in re.finditer(ur'(?mu)^(\ ?\ ?\ ?)\[(?P<id>[^\]]*)\]:\s*(?P<path>[^\s]*)$', txt):
            if m.group('id') and m.group('path'):
                refs[m.group('id')] = m.group('path')
        for m in re.finditer(ur'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\[(?P<id>[^\]]*)\]', txt):
            path = refs.get(m.group('id'), None)
            if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
                images.append(path)
        # Remove duplicates
        return list(set(images))
    def run(self, path_to_ebook):
        with open(path_to_ebook, 'rb') as ebf:
            txt = ebf.read()
        base_dir = os.path.dirname(path_to_ebook)
        images = self._get_image_references(txt, base_dir)
        if images:
            # Create TXTZ and put file plus images inside of it.
            import zipfile
            of = self.temporary_file('_plugin_txt2txtz.txtz')
            txtz = zipfile.ZipFile(of.name, 'w')
            # Add selected TXT file to archive.
            txtz.write(path_to_ebook, os.path.basename(path_to_ebook), zipfile.ZIP_DEFLATED)
            # metadata.opf
            if os.path.exists(os.path.join(base_dir, 'metadata.opf')):
                txtz.write(os.path.join(base_dir, 'metadata.opf'), 'metadata.opf', zipfile.ZIP_DEFLATED)
            else:
                from calibre.ebooks.metadata.txt import get_metadata
                with open(path_to_ebook, 'rb') as ebf:
                    mi = get_metadata(ebf)
                opf = metadata_to_opf(mi)
                txtz.writestr('metadata.opf', opf, zipfile.ZIP_DEFLATED)
            # images
            for image in images:
                txtz.write(os.path.join(base_dir, image), image)
            txtz.close()
            return of.name
        else:
            # No images so just import the TXT file.
            return path_to_ebook
 # }}}
 # Metadata reader plugins {{{
@ -511,14 +585,14 @@ from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
 from calibre.ebooks.metadata.douban import DoubanBooks
 from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
 from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
-        LibraryThingCovers, DoubanCovers
+        AmazonCovers, DoubanCovers
 from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
 from calibre.ebooks.epub.fix.unmanifested import Unmanifested
 from calibre.ebooks.epub.fix.epubcheck import Epubcheck
-plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
+plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
        KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
-        Epubcheck, OpenLibraryCovers, LibraryThingCovers, DoubanCovers,
+        Epubcheck, OpenLibraryCovers, AmazonCovers, DoubanCovers,
        NiceBooksCovers]
 plugins += [
    ComicInput,
--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
@ -24,6 +24,7 @@ from calibre.utils.logging import Log
 from calibre.utils.zipfile import ZipFile
 from PIL import Image as PILImage
 from lxml import etree
 if isosx:
    try:
@ -2515,23 +2516,23 @@ class ITUNES(DriverBase):
            fnames = zf_opf.namelist()
            opf = [x for x in fnames if '.opf' in x][0]
            if opf:
-                opf_raw = cStringIO.StringIO(zf_opf.read(opf))
+                opf_tree = etree.fromstring(zf_opf.read(opf))
-                soup = BeautifulSoup(opf_raw.getvalue())
+                ns_map = opf_tree.nsmap.keys()
-                opf_raw.close()
+                for item in ns_map:
-
+                    ns = opf_tree.nsmap[item]
-                # Touch existing calibre timestamp
+                    md_el = opf_tree.find(".//{%s}metadata" % ns)
-                md = soup.find('metadata')
+                    if md_el is not None:
-                if md:
+                        ts = md_el.find('.//{%s}meta[@name="calibre:timestamp"]')
-                    ts = md.find('meta',attrs={'name':'calibre:timestamp'})
+                        if ts:
-                    if ts:
+                            timestamp = ts.get('content')
-                        timestamp = ts['content']
+                            old_ts = parse_date(timestamp)
-                        old_ts = parse_date(timestamp)
+                            metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
-                        metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
+                                                       old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
-                                                   old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
+                        else:
-                    else:
+                            metadata.timestamp = now()
-                        metadata.timestamp = now()
+                            if DEBUG:
-                        if DEBUG:
+                                self.log.info("   add timestamp: %s" % metadata.timestamp)
-                            self.log.info("   add timestamp: %s" % metadata.timestamp)
+                        break
                else:
                    metadata.timestamp = now()
                    if DEBUG:
@ -2839,7 +2840,7 @@ class ITUNES(DriverBase):
    def _xform_metadata_via_plugboard(self, book, format):
        ''' Transform book metadata from plugboard templates '''
        if DEBUG:
-            self.log.info("  ITUNES._update_metadata_from_plugboard()")
+            self.log.info("  ITUNES._xform_metadata_via_plugboard()")
        if self.plugboard_func:
            pb = self.plugboard_func(self.DEVICE_PLUGBOARD_NAME, format, self.plugboards)
--- a/src/calibre/devices/kindle/apnx.py
+++ b/src/calibre/devices/kindle/apnx.py
@ -11,44 +11,42 @@ Generates and writes an APNX page mapping file.
 import struct
 import uuid
 from calibre.ebooks.mobi.reader import MobiReader
 from calibre.ebooks.pdb.header import PdbHeaderReader
 from calibre.utils.logging import default_log
 class APNXBuilder(object):
    '''
-    2300 characters of uncompressed text per page. This is
+    Create an APNX file using a pseudo page mapping.
    not meant to map 1 to 1 to a print book but to be a
    close enough measure.
    A test book was chosen and the characters were counted
    on one page. This number was round to 2240 then 60
    characters of markup were added to the total giving
    2300.
    Uncompressed text length is used because it's easily
    accessible in MOBI files (part of the header). Also,
    It's faster to work off of the length then to
    decompress and parse the actual text.
    A better but much more resource intensive and slower
    method to calculate the page length would be to parse
    the uncompressed text. For each paragraph we would
    want to find how many lines it would occupy in a paper
    back book. 70 characters per line and 32 lines per page.
    So divide the number of characters (minus markup) in
    each paragraph by 70. If there are less than 70
    characters in the paragraph then it is 1 line. Then,
    count every 32 lines and mark that location as a page.
    '''
-    def write_apnx(self, mobi_file_path, apnx_path):
+    def write_apnx(self, mobi_file_path, apnx_path, accurate=True):
        # Check that this is really a MOBI file.
        with open(mobi_file_path, 'rb') as mf:
-            phead = PdbHeaderReader(mf)
+            ident = PdbHeaderReader(mf).identity()
-            r0 = phead.section_data(0)
+        if ident != 'BOOKMOBI':
-            text_length = struct.unpack('>I', r0[4:8])[0]
+            raise Exception(_('Not a valid MOBI file. Reports identity of %s') % ident)
-        pages = self.get_pages(text_length)
+        # Get the pages depending on the chosen parser
        pages = []
        if accurate:
            try:
                pages = self.get_pages_accurate(mobi_file_path)
            except:
                # Fall back to the fast parser if we can't
                # use the accurate one. Typically this is
                # due to the file having DRM.
                pages = self.get_pages_fast(mobi_file_path)
        else:
            pages = self.get_pages_fast(mobi_file_path)
        if not pages:
            raise Exception(_('Could not generate page mapping.'))
        # Generate the APNX file from the page mapping.
        apnx = self.generate_apnx(pages)
        # Write the APNX.
        with open(apnx_path, 'wb') as apnxf:
            apnxf.write(apnx)
@ -73,18 +71,126 @@ class APNXBuilder(object):
        apnx += struct.pack('>H', 32)
        apnx += page_header
-        # write page values to apnx
+        # Write page values to APNX.
        for page in pages:
-            apnx += struct.pack('>L', page)
+            apnx += struct.pack('>I', page)
        return apnx
-    def get_pages(self, text_length):
+    def get_pages_fast(self, mobi_file_path):
        '''
        2300 characters of uncompressed text per page. This is
        not meant to map 1 to 1 to a print book but to be a
        close enough measure.
        A test book was chosen and the characters were counted
        on one page. This number was round to 2240 then 60
        characters of markup were added to the total giving
        2300.
        Uncompressed text length is used because it's easily
        accessible in MOBI files (part of the header). Also,
        It's faster to work off of the length then to
        decompress and parse the actual text.
        '''
        text_length = 0
        pages = []
        count = 0
        with open(mobi_file_path, 'rb') as mf:
            phead = PdbHeaderReader(mf)
            r0 = phead.section_data(0)
            text_length = struct.unpack('>I', r0[4:8])[0]
        while count < text_length:
            pages.append(count)
            count += 2300
        return pages
    def get_pages_accurate(self, mobi_file_path):
        '''
        A more accurate but much more resource intensive and slower
        method to calculate the page length.
        Parses the uncompressed text. In an average paper back book
        There are 32 lines per page and a maximum of 70 characters
        per line.
        Each paragraph starts a new line and every 70 characters
        (minus markup) in a paragraph starts a new line. The
        position after every 30 lines will be marked as a new
        page.
        This can be make more accurate by accounting for
        <div class="mbp_pagebreak" /> as a new page marker.
        And <br> elements as an empty line.
        '''
        pages = []
        # Get the MOBI html.
        mr = MobiReader(mobi_file_path, default_log)
        if mr.book_header.encryption_type != 0:
            # DRMed book
            return self.get_pages_fast(mobi_file_path)
        mr.extract_text()
        # States
        in_tag = False
        in_p = False
        check_p = False
        closing = False
        p_char_count = 0
        # Get positions of every line
        # A line is either a paragraph starting
        # or every 70 characters in a paragraph.
        lines = []
        pos = -1
        # We want this to be as fast as possible so we
        # are going to do one pass across the text. re
        # and string functions will parse the text each
        # time they are called.
        #
        # We can can use .lower() here because we are
        # not modifying the text. In this case the case
        # doesn't matter just the absolute character and
        # the position within the stream.
        for c in mr.mobi_html.lower():
            pos += 1
            # Check if we are starting or stopping a p tag.
            if check_p:
                if c == '/':
                    closing = True
                    continue
                elif c == 'p':
                    if closing:
                        in_p = False
                    else:
                        in_p = True
                        lines.append(pos - 2)
                check_p = False
                closing = False
                continue
            if c == '<':
                in_tag = True
                check_p = True
                continue
            elif c == '>':
                in_tag = False
                check_p = False
                continue
            if in_p and not in_tag:
                p_char_count += 1
                if p_char_count == 70:
                    lines.append(pos)
                    p_char_count = 0
        # Every 30 lines is a new page
        for i in xrange(0, len(lines), 32):
            pages.append(lines[i])
        return pages
--- a/src/calibre/devices/kindle/driver.py
+++ b/src/calibre/devices/kindle/driver.py
@ -176,6 +176,28 @@ class KINDLE2(KINDLE):
    PRODUCT_ID = [0x0002, 0x0004]
    BCD        = [0x0100]
    EXTRA_CUSTOMIZATION_MESSAGE = [
        _('Send page number information when sending books') +
            ':::' +
            _('The Kindle 3 and newer versions can use page number information '
              'in MOBI files. With this option, calibre will calculate and send'
              ' this information to the Kindle when uploading MOBI files by'
              ' USB. Note that the page numbers do not correspond to any paper'
              ' book.'),
        _('Use slower but more accurate page number generation') +
            ':::' +
            _('There are two ways to generate the page number information. Using the more accurate '
              'generator will produce pages that correspond better to a printed book. '
              'However, this method is slower and will slow down sending files '
              'to the Kindle.'),
    ]
    EXTRA_CUSTOMIZATION_DEFAULT = [
        True,
        False,
    ]
    OPT_APNX           = 0
    OPT_APNX_ACCURATE  = 1
    def books(self, oncard=None, end_session=True):
        bl = USBMS.books(self, oncard=oncard, end_session=end_session)
        # Read collections information
@ -212,13 +234,17 @@ class KINDLE2(KINDLE):
        '''
        Hijacking this function to write the apnx file.
        '''
-        if not filepath.lower().endswith('.mobi'):
+        opts = self.settings()
        if not opts.extra_customization[self.OPT_APNX]:
            return
        if os.path.splitext(filepath.lower())[1] not in ('.azw', '.mobi', '.prc'):
            return
        apnx_path = '%s.apnx' % os.path.join(path, filename)
        apnx_builder = APNXBuilder()
        try:
-            apnx_builder.write_apnx(filepath, apnx_path)
+            apnx_builder.write_apnx(filepath, apnx_path, accurate=opts.extra_customization[self.OPT_APNX_ACCURATE])
        except:
            print 'Failed to generate APNX'
            import traceback
--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -152,8 +152,17 @@ def check_ebook_format(stream, current_guess):
        stream.seek(0)
    return ans
 def normalize(x):
    if isinstance(x, unicode):
        import unicodedata
        x = unicodedata.normalize('NFKC', x)
    return x
 def calibre_cover(title, author_string, series_string=None,
        output_format='jpg', title_size=46, author_size=36):
    title = normalize(title)
    author_string = normalize(author_string)
    series_string = normalize(series_string)
    from calibre.utils.magick.draw import create_cover_page, TextLine
    lines = [TextLine(title, title_size), TextLine(author_string, author_size)]
    if series_string:
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -402,8 +402,8 @@ OptionRecommendation(name='asciiize',
            'with "Mikhail Gorbachiov". Also, note that in '
            'cases where there are multiple representations of a character '
            '(characters shared by Chinese and Japanese for instance) the '
-            'representation used by the largest number of people will be '
+            'representation based on the current calibre interface language will be '
-            'used (Chinese in the previous example).')%\
+            'used.')%\
            u'\u041c\u0438\u0445\u0430\u0438\u043b '
            u'\u0413\u043e\u0440\u0431\u0430\u0447\u0451\u0432'
 )
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -543,9 +543,9 @@ class HTMLPreProcessor(object):
        html = XMLDECL_RE.sub('', html)
        if getattr(self.extra_opts, 'asciiize', False):
-            from calibre.ebooks.unidecode.unidecoder import Unidecoder
+            from calibre.utils.localization import get_udc
-            unidecoder = Unidecoder()
+            unihandecoder = get_udc()
-            html = unidecoder.decode(html)
+            html = unihandecoder.decode(html)
        if getattr(self.extra_opts, 'enable_heuristics', False):
            from calibre.ebooks.conversion.utils import HeuristicProcessor
@ -557,10 +557,10 @@ class HTMLPreProcessor(object):
        unsupported_unicode_chars = self.extra_opts.output_profile.unsupported_unicode_chars
        if unsupported_unicode_chars:
-            from calibre.ebooks.unidecode.unidecoder import Unidecoder
+            from calibre.utils.localization import get_udc
-            unidecoder = Unidecoder()
+            unihandecoder = get_udc()
            for char in unsupported_unicode_chars:
-                asciichar = unidecoder.decode(char)
+                asciichar = unihandecoder.decode(char)
                html = html.replace(char, asciichar)
        return html
--- a/src/calibre/ebooks/metadata/init.py
+++ b/src/calibre/ebooks/metadata/init.py
@ -271,6 +271,8 @@ def check_isbn13(isbn):
    return None
 def check_isbn(isbn):
    if not isbn:
        return None
    isbn = re.sub(r'[^0-9X]', '', isbn.upper())
    if len(isbn) == 10:
        return check_isbn10(isbn)
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en'
 Fetch metadata using Amazon AWS
 '''
 import sys, re
 from threading import RLock
 from lxml import html
 from lxml.html import soupparser
@ -17,6 +18,10 @@ from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.library.comments import sanitize_comments_html
 asin_cache = {}
 cover_url_cache = {}
 cache_lock = RLock()
 def find_asin(br, isbn):
    q = 'http://www.amazon.com/s?field-keywords='+isbn
    raw = br.open_novisit(q).read()
@ -29,6 +34,12 @@ def find_asin(br, isbn):
        return revs[0]
 def to_asin(br, isbn):
    with cache_lock:
        ans = asin_cache.get(isbn, None)
    if ans:
        return ans
    if ans is False:
        return None
    if len(isbn) == 13:
        try:
            asin = find_asin(br, isbn)
@ -38,8 +49,11 @@ def to_asin(br, isbn):
            asin = None
    else:
        asin = isbn
    with cache_lock:
        asin_cache[isbn] = ans if ans else False
    return asin
 def get_social_metadata(title, authors, publisher, isbn):
    mi = Metadata(title, authors)
    if not isbn:
@ -58,6 +72,68 @@ def get_social_metadata(title, authors, publisher, isbn):
            return mi
    return mi
 def get_cover_url(isbn, br):
    isbn = check_isbn(isbn)
    if not isbn:
        return None
    with cache_lock:
        ans = cover_url_cache.get(isbn, None)
    if ans:
        return ans
    if ans is False:
        return None
    asin = to_asin(br, isbn)
    if asin:
        ans = _get_cover_url(br, asin)
        if ans:
            with cache_lock:
                cover_url_cache[isbn] = ans
            return ans
    from calibre.ebooks.metadata.xisbn import xisbn
    for i in xisbn.get_associated_isbns(isbn):
        asin = to_asin(br, i)
        if asin:
            ans = _get_cover_url(br, asin)
            if ans:
                with cache_lock:
                    cover_url_cache[isbn] = ans
                    cover_url_cache[i] = ans
                return ans
    with cache_lock:
        cover_url_cache[isbn] = False
    return None
 def _get_cover_url(br, asin):
    q = 'http://amzn.com/'+asin
    try:
        raw = br.open_novisit(q).read()
    except Exception, e:
        if callable(getattr(e, 'getcode', None)) and \
                e.getcode() == 404:
            return None
        raise
    if '<title>404 - ' in raw:
        return None
    raw = xml_to_unicode(raw, strip_encoding_pats=True,
            resolve_entities=True)[0]
    try:
        root = soupparser.fromstring(raw)
    except:
        return False
    imgs = root.xpath('//img[@id="prodImage" and @src]')
    if imgs:
        src = imgs[0].get('src')
        parts = src.split('/')
        if len(parts) > 3:
            bn = parts[-1]
            sparts = bn.split('_')
            if len(sparts) > 2:
                bn = sparts[0] + sparts[-1]
                return ('/'.join(parts[:-1]))+'/'+bn
    return None
 def get_metadata(br, asin, mi):
    q = 'http://amzn.com/'+asin
    try:
@ -111,18 +187,25 @@ def get_metadata(br, asin, mi):
 def main(args=sys.argv):
-    # Test xisbn
+    import tempfile, os
-    print get_social_metadata('Learning Python', None, None, '8324616489')
+    tdir = tempfile.gettempdir()
-    print
+    br = browser()
    for title, isbn in [
            ('Learning Python', '8324616489'), # Test xisbn
            ('Angels & Demons', '9781416580829'), # Test sophisticated comment formatting
            # Random tests
            ('Star Trek: Destiny: Mere Mortals', '9781416551720'),
            ('The Great Gatsby', '0743273567'),
            ]:
        cpath = os.path.join(tdir, title+'.jpg')
        curl = get_cover_url(isbn, br)
        if curl is None:
            print 'No cover found for', title
        else:
            open(cpath, 'wb').write(br.open_novisit(curl).read())
            print 'Cover for', title, 'saved to', cpath
-    # Test sophisticated comment formatting
+        print get_social_metadata(title, None, None, isbn)
    print get_social_metadata('Angels & Demons', None, None, '9781416580829')
    print
    # Random tests
    print get_social_metadata('Star Trek: Destiny: Mere Mortals', None, None, '9781416551720')
    print
    print get_social_metadata('The Great Gatsby', None, None, '0743273567')
    return 0
--- a/src/calibre/ebooks/metadata/covers.py
+++ b/src/calibre/ebooks/metadata/covers.py
@ -5,7 +5,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-import traceback, socket, re, sys
+import traceback, socket, sys
 from functools import partial
 from threading import Thread, Event
 from Queue import Queue, Empty
@ -15,7 +15,6 @@ import mechanize
 from calibre.customize import Plugin
 from calibre import browser, prints
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.constants import preferred_encoding, DEBUG
 class CoverDownload(Plugin):
@ -112,73 +111,38 @@ class OpenLibraryCovers(CoverDownload): # {{{
 # }}}
-class LibraryThingCovers(CoverDownload): # {{{
+class AmazonCovers(CoverDownload): # {{{
-    name = 'librarything.com covers'
+    name = 'amazon.com covers'
-    description = _('Download covers from librarything.com')
+    description = _('Download covers from amazon.com')
    author = 'Kovid Goyal'
    LIBRARYTHING = 'http://www.librarything.com/isbn/'
    def get_cover_url(self, isbn, br, timeout=5.):
        try:
            src = br.open_novisit('http://www.librarything.com/isbn/'+isbn,
                    timeout=timeout).read().decode('utf-8', 'replace')
        except Exception, err:
            if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
                err = Exception(_('LibraryThing.com timed out. Try again later.'))
            raise err
        else:
            if '/wiki/index.php/HelpThing:Verify' in src:
                raise Exception('LibraryThing is blocking calibre.')
            s = BeautifulSoup(src)
            url = s.find('td', attrs={'class':'left'})
            if url is None:
                if s.find('div', attrs={'class':'highloadwarning'}) is not None:
                    raise Exception(_('Could not fetch cover as server is experiencing high load. Please try again later.'))
                raise Exception(_('ISBN: %s not found')%isbn)
            url = url.find('img')
            if url is None:
                raise Exception(_('LibraryThing.com server error. Try again later.'))
            url = re.sub(r'_S[XY]\d+', '', url['src'])
            return url
    def has_cover(self, mi, ans, timeout=5.):
-        return False
+        if not mi.isbn:
        if not mi.isbn or not self.site_customization:
            return False
-        from calibre.ebooks.metadata.library_thing import get_browser, login
+        from calibre.ebooks.metadata.amazon import get_cover_url
-        br = get_browser()
+        br = browser()
        un, _, pw = self.site_customization.partition(':')
        login(br, un, pw)
        try:
-            self.get_cover_url(mi.isbn, br, timeout=timeout)
+            get_cover_url(mi.isbn, br)
            self.debug('cover for', mi.isbn, 'found')
            ans.set()
        except Exception, e:
            self.debug(e)
    def get_covers(self, mi, result_queue, abort, timeout=5.):
-        if not mi.isbn or not self.site_customization:
+        if not mi.isbn:
            return
-        from calibre.ebooks.metadata.library_thing import get_browser, login
+        from calibre.ebooks.metadata.amazon import get_cover_url
-        br = get_browser()
+        br = browser()
        un, _, pw = self.site_customization.partition(':')
        login(br, un, pw)
        try:
-            url = self.get_cover_url(mi.isbn, br, timeout=timeout)
+            url = get_cover_url(mi.isbn, br)
            cover_data = br.open_novisit(url).read()
            result_queue.put((True, cover_data, 'jpg', self.name))
        except Exception, e:
            result_queue.put((False, self.exception_to_string(e),
                traceback.format_exc(), self.name))
    def customization_help(self, gui=False):
        ans = _('To use librarything.com you must sign up for a %sfree account%s '
                'and enter your username and password separated by a : below.')
        return '<p>'+ans%('<a href="http://www.librarything.com">', '</a>')
 # }}}
 def check_for_cover(mi, timeout=5.): # {{{
--- a/src/calibre/ebooks/metadata/mobi.py
+++ b/src/calibre/ebooks/metadata/mobi.py
@ -12,6 +12,7 @@ __docformat__ = 'restructuredtext en'
 from struct import pack, unpack
 from cStringIO import StringIO
 from calibre.ebooks import normalize
 from calibre.ebooks.mobi import MobiError
 from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN
 from calibre.ebooks.mobi.langcodes import iana2mobi
@ -311,6 +312,7 @@ class MetadataUpdater(object):
        return StreamSlicer(self.stream, start, stop)
    def update(self, mi):
        mi.title = normalize(mi.title)
        def update_exth_record(rec):
            recs.append(rec)
            if rec[0] in self.original_exth_records:
@ -331,12 +333,12 @@ class MetadataUpdater(object):
            kindle_pdoc = None
        if mi.author_sort and pas:
            authors = mi.author_sort
-            update_exth_record((100, authors.encode(self.codec, 'replace')))
+            update_exth_record((100, normalize(authors).encode(self.codec, 'replace')))
        elif mi.authors:
            authors = ';'.join(mi.authors)
-            update_exth_record((100, authors.encode(self.codec, 'replace')))
+            update_exth_record((100, normalize(authors).encode(self.codec, 'replace')))
        if mi.publisher:
-            update_exth_record((101, mi.publisher.encode(self.codec, 'replace')))
+            update_exth_record((101, normalize(mi.publisher).encode(self.codec, 'replace')))
        if mi.comments:
            # Strip user annotations
            a_offset = mi.comments.find('<div class="user_annotations">')
@ -345,12 +347,12 @@ class MetadataUpdater(object):
                mi.comments = mi.comments[:a_offset]
            if ad_offset >= 0:
                mi.comments = mi.comments[:ad_offset]
-            update_exth_record((103, mi.comments.encode(self.codec, 'replace')))
+            update_exth_record((103, normalize(mi.comments).encode(self.codec, 'replace')))
        if mi.isbn:
            update_exth_record((104, mi.isbn.encode(self.codec, 'replace')))
        if mi.tags:
            subjects = '; '.join(mi.tags)
-            update_exth_record((105, subjects.encode(self.codec, 'replace')))
+            update_exth_record((105, normalize(subjects).encode(self.codec, 'replace')))
            if kindle_pdoc and kindle_pdoc in mi.tags:
                update_exth_record((501, str('PDOC')))
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@ -367,6 +367,9 @@ class MobiMLizer(object):
            istate.attrib['src'] = elem.attrib['src']
            istate.attrib['align'] = 'baseline'
            cssdict = style.cssdict()
            valign = cssdict.get('vertical-align', None)
            if valign in ('top', 'bottom', 'middle'):
                istate.attrib['align'] = valign
            for prop in ('width', 'height'):
                if cssdict[prop] != 'auto':
                    value = style[prop]
@ -451,8 +454,11 @@ class MobiMLizer(object):
                text = COLLAPSE.sub(' ', elem.text)
        valign = style['vertical-align']
        not_baseline = valign in ('super', 'sub', 'text-top',
-                'text-bottom')
+                'text-bottom') or (
-        vtag = 'sup' if valign in ('super', 'text-top') else 'sub'
+                isinstance(valign, (float, int)) and abs(valign) != 0)
        issup = valign in ('super', 'text-top') or (
            isinstance(valign, (float, int)) and valign > 0)
        vtag = 'sup' if issup else 'sub'
        if not_baseline and not ignore_valign and tag not in NOT_VTAGS and not isblock:
            nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
            vbstate = BlockState(etree.SubElement(nroot, XHTML('body')))
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -14,8 +14,9 @@ import re
 from struct import pack
 import time
 from urlparse import urldefrag
 from cStringIO import StringIO
 from calibre.ebooks import normalize
 from calibre.ebooks.mobi.langcodes import iana2mobi
 from calibre.ebooks.mobi.mobiml import MBP_NS
 from calibre.ebooks.oeb.base import OEB_DOCS
@ -1365,7 +1366,7 @@ class MobiWriter(object):
            self._text_length,
            self._text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf)
        uid = random.randint(0, 0xffffffff)
-        title = unicode(metadata.title[0]).encode('utf-8')
+        title = normalize(unicode(metadata.title[0])).encode('utf-8')
        # The MOBI Header
        # 0x0 - 0x3
@ -1523,12 +1524,12 @@ class MobiWriter(object):
            items = oeb.metadata[term]
            if term == 'creator':
                if self._prefer_author_sort:
-                    creators = [unicode(c.file_as or c) for c in items]
+                    creators = [normalize(unicode(c.file_as or c)) for c in items]
                else:
-                    creators = [unicode(c) for c in items]
+                    creators = [normalize(unicode(c)) for c in items]
                items = ['; '.join(creators)]
            for item in items:
-                data = self.COLLAPSE_RE.sub(' ', unicode(item))
+                data = self.COLLAPSE_RE.sub(' ', normalize(unicode(item)))
                if term == 'identifier':
                    if data.lower().startswith('urn:isbn:'):
                        data = data[9:]
@ -1542,7 +1543,7 @@ class MobiWriter(object):
                nrecs += 1
            if term == 'rights' :
                try:
-                    rights = unicode(oeb.metadata.rights[0]).encode('utf-8')
+                    rights = normalize(unicode(oeb.metadata.rights[0])).encode('utf-8')
                except:
                    rights = 'Unknown'
                exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8))
--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@ -207,7 +207,14 @@ class CSSFlattener(object):
            font_size = self.sbase if self.sbase is not None else \
                self.context.source.fbase
        if 'align' in node.attrib:
-            cssdict['text-align'] = node.attrib['align']
+            if tag != 'img':
                cssdict['text-align'] = node.attrib['align']
            else:
                val = node.attrib['align']
                if val in ('middle', 'bottom', 'top'):
                    cssdict['vertical-align'] = val
                elif val in ('left', 'right'):
                    cssdict['text-align'] = val
            del node.attrib['align']
        if node.tag == XHTML('font'):
            node.tag = XHTML('span')
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@ -4,10 +4,9 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import glob
 import os
-from calibre import _ent_pat, xml_entity_to_unicode
+from calibre import _ent_pat, walk, xml_entity_to_unicode
 from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
 from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
 from calibre.ebooks.chardet import detect
@ -16,7 +15,6 @@ from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
    preserve_spaces, detect_paragraph_type, detect_formatting_type, \
    normalize_line_endings, convert_textile, remove_indents, block_to_single_line, \
    separate_hard_scene_breaks
 from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.zipfile import ZipFile
 class TXTInput(InputFormatPlugin):
@ -28,20 +26,23 @@ class TXTInput(InputFormatPlugin):
    options = set([
        OptionRecommendation(name='paragraph_type', recommended_value='auto',
-            choices=['auto', 'block', 'single', 'print', 'unformatted'],
+            choices=['auto', 'block', 'single', 'print', 'unformatted', 'off'],
            help=_('Paragraph structure.\n'
-                   'choices are [\'auto\', \'block\', \'single\', \'print\', \'unformatted\']\n'
+                   'choices are [\'auto\', \'block\', \'single\', \'print\', \'unformatted\', \'off\']\n'
                   '* auto: Try to auto detect paragraph type.\n'
                   '* block: Treat a blank line as a paragraph break.\n'
                   '* single: Assume every line is a paragraph.\n'
                   '* print:  Assume every line starting with 2+ spaces or a tab '
-                   'starts a paragraph.'
+                   'starts a paragraph.\n'
-                   '* unformatted: Most lines have hard line breaks, few/no blank lines or indents.')),
+                   '* unformatted: Most lines have hard line breaks, few/no blank lines or indents. '
                   'Tries to determine structure and reformat the differentiate elements.\n'
                   '* off: Don\'t modify the paragraph structure. This is useful when combined with '
                   'Markdown or Textile formatting to ensure no formatting is lost.')),
        OptionRecommendation(name='formatting_type', recommended_value='auto',
-            choices=['auto', 'none', 'heuristic', 'textile', 'markdown'],
+            choices=['auto', 'plain', 'heuristic', 'textile', 'markdown'],
            help=_('Formatting used within the document.'
                   '* auto: Automatically decide which formatting processor to use.\n'
-                   '* none: Do not process the document formatting. Everything is a '
+                   '* plain: Do not process the document formatting. Everything is a '
                   'paragraph and no styling is applied.\n'
                   '* heuristic: Process using heuristics to determine formatting such '
                   'as chapter headings and italic text.\n'
@ -64,18 +65,17 @@ class TXTInput(InputFormatPlugin):
        txt = ''
        log.debug('Reading text from file...')
        length = 0
        # [(u'path', mime),]
        # Extract content from zip archive.
        if file_ext == 'txtz':
-            log.debug('De-compressing content to temporary directory...')
+            zf = ZipFile(stream)
-            with TemporaryDirectory('_untxtz') as tdir:
+            zf.extractall('.')
                zf = ZipFile(stream)
                zf.extractall(tdir)
-                txts = glob.glob(os.path.join(tdir, '*.txt'))
+            for x in walk('.'):
-                for t in txts:
+                if os.path.splitext(x)[1].lower() == '.txt':
-                    with open(t, 'rb') as tf:
+                    with open(x, 'rb') as tf:
-                        txt += tf.read()
+                        txt += tf.read() + '\n\n'
        else:
            txt = stream.read()
@ -134,7 +134,7 @@ class TXTInput(InputFormatPlugin):
            preprocessor = HeuristicProcessor(options, log=getattr(self, 'log', None))
            txt = preprocessor.punctuation_unwrap(length, txt, 'txt')
            txt = separate_paragraphs_single_line(txt)
-        else:
+        elif options.paragraph_type == 'block':
            txt = separate_hard_scene_breaks(txt)
            txt = block_to_single_line(txt)
@ -178,7 +178,7 @@ class TXTInput(InputFormatPlugin):
            setattr(options, opt.option.name, opt.recommended_value)
        options.input_encoding = 'utf-8'
        base = os.getcwdu()
-        if hasattr(stream, 'name'):
+        if file_ext != 'txtz' and hasattr(stream, 'name'):
            base = os.path.dirname(stream.name)
        fname = os.path.join(base, 'index.html')
        c = 0
@ -190,7 +190,7 @@ class TXTInput(InputFormatPlugin):
            htmlfile.write(html.encode('utf-8'))
        odi = options.debug_pipeline
        options.debug_pipeline = None
-        # Generate oeb from htl conversion.
+        # Generate oeb from html conversion.
        oeb = html_input.convert(open(htmlfile.name, 'rb'), options, 'html', log,
                {})
        options.debug_pipeline = odi
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@ -126,7 +126,7 @@ def separate_hard_scene_breaks(txt):
            return '\n%s\n' % line
        else:
            return line
-    txt = re.sub(u'(?miu)^[ \t-=~\/]+$', lambda mo: sep_break(mo.group()), txt)
+    txt = re.sub(u'(?miu)^[ \t-=~\/_]+$', lambda mo: sep_break(mo.group()), txt)
    return txt
 def block_to_single_line(txt):
--- a/src/calibre/ebooks/unidecode/init.py
+++ b/src/calibre/ebooks/unidecode/init.py
--- a/src/calibre/ebooks/unidecode/unicodepoints.py
+++ b/src/calibre/ebooks/unidecode/unicodepoints.py
--- a/src/calibre/ebooks/unihandecode/init.py
+++ b/src/calibre/ebooks/unihandecode/init.py
@ -0,0 +1,57 @@
 # -*- coding: utf-8 -*-
 __license__ = 'GPL 3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 __docformat__ = 'restructuredtext en'
 __all__ = ["Unihandecoder"]
 '''
 Decode unicode text to an ASCII representation of the text.
 Translate unicode characters to ASCII.
 Inspired from John Schember's unidecode library which was created as part
 of calibre.
 Copyright(c) 2009, John Schember
 Tranliterate the string from unicode characters to ASCII in Chinese and others.
 '''
 import unicodedata
 class Unihandecoder(object):
    preferred_encoding = None
    decoder = None
    def __init__(self, lang="zh", encoding='utf-8'):
        self.preferred_encoding = encoding
        lang = lang.lower()
        if lang[:2] == u'ja':
            from calibre.ebooks.unihandecode.jadecoder import Jadecoder
            self.decoder = Jadecoder()
        elif lang[:2] == u'kr' or lang == u'korean':
            from calibre.ebooks.unihandecode.krdecoder import Krdecoder
            self.decoder = Krdecoder()
        elif lang[:2] == u'vn' or lang == u'vietnum':
            from calibre.ebooks.unihandecode.vndecoder import Vndecoder
            self.decoder = Vndecoder()
        else: #zh and others
            from calibre.ebooks.unihandecode.unidecoder import Unidecoder
            self.decoder = Unidecoder()
    def decode(self, text):
        try:
            unicode # python2
            if not isinstance(text, unicode):
                try:
                    text = unicode(text)
                except:
                    try:
                        text = text.decode(self.preferred_encoding)
                    except:
                        text = text.decode('utf-8', 'replace')
        except: # python3, str is unicode
            pass
        #at first unicode normalize it. (see Unicode standards)
        ntext = unicodedata.normalize('NFKC', text)
        return self.decoder.decode(ntext)
--- a/src/calibre/ebooks/unihandecode/jacodepoints.py
+++ b/src/calibre/ebooks/unihandecode/jacodepoints.py
--- a/src/calibre/ebooks/unihandecode/jadecoder.py
+++ b/src/calibre/ebooks/unihandecode/jadecoder.py
@ -0,0 +1,41 @@
 # coding:utf8
 __license__ = 'GPL 3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 __docformat__ = 'restructuredtext en'
 '''
 Decode unicode text to an ASCII representation of the text for Japanese.
 Translate unicode string to ASCII roman string.
 API is based on the python unidecode,
 which is based on Ruby gem (http://rubyforge.org/projects/unidecode/)
 and  perl module Text::Unidecode
 (http://search.cpan.org/~sburke/Text-Unidecode-0.04/).
 This functionality is owned by Kakasi Japanese processing engine.
 Copyright (c) 2010 Hiroshi Miura
 '''
 import re
 from calibre.ebooks.unihandecode.unidecoder import Unidecoder
 from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
 from calibre.ebooks.unihandecode.jacodepoints import CODEPOINTS as JACODES
 from calibre.ebooks.unihandecode.pykakasi.kakasi import kakasi
 class Jadecoder(Unidecoder):
    kakasi = None
    codepoints = {}
    def __init__(self):
        self.codepoints = CODEPOINTS
        self.codepoints.update(JACODES)
        self.kakasi = kakasi()
    def decode(self, text):
        try:
            result=self.kakasi.do(text)
            return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),result)
        except:
            return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),text)
--- a/src/calibre/ebooks/unihandecode/krcodepoints.py
+++ b/src/calibre/ebooks/unihandecode/krcodepoints.py
--- a/src/calibre/ebooks/unihandecode/krdecoder.py
+++ b/src/calibre/ebooks/unihandecode/krdecoder.py
@ -0,0 +1,24 @@
 # -*- coding: utf-8 -*-
 __license__ = 'GPL 3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 __docformat__ = 'restructuredtext en'
 '''
 Decode unicode text to an ASCII representation of the text in Korean.
 Based on unidecoder.
 '''
 from calibre.ebooks.unihandecode.unidecoder import Unidecoder
 from calibre.ebooks.unihandecode.krcodepoints import CODEPOINTS as HANCODES
 from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
 class Krdecoder(Unidecoder):
    codepoints = {}
    def __init__(self):
        self.codepoints = CODEPOINTS
        self.codepoints.update(HANCODES)
--- a/src/calibre/ebooks/unihandecode/pykakasi/init.py
+++ b/src/calibre/ebooks/unihandecode/pykakasi/init.py
@ -0,0 +1,5 @@
 from calibre.ebooks.unihandecode.pykakasi.kakasi import kakasi
 kakasi
 __all__ = ["pykakasi"]
--- a/src/calibre/ebooks/unihandecode/pykakasi/h2a.py
+++ b/src/calibre/ebooks/unihandecode/pykakasi/h2a.py
@ -0,0 +1,185 @@
 # -*- coding: utf-8 -*-
 #  h2a.py
 #
 # Copyright 2011 Hiroshi Miura <miurahr@linux.com>
 #
 # Original copyright:
 # * KAKASI (Kanji Kana Simple inversion program)
 # * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
 # * Copyright (C) 1992
 # * Hironobu Takahashi (takahasi@tiny.or.jp)
 # *
 # * This program is free software; you can redistribute it and/or modify
 # * it under the terms of the GNU General Public License as published by
 # * the Free Software Foundation; either versions 2, or (at your option)
 # * any later version.
 # *
 # * This program is distributed in the hope that it will be useful
 # * but WITHOUT ANY WARRANTY; without even the implied warranty of
 # * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # * GNU General Public License for more details.
 # *
 # * You should have received a copy of the GNU General Public License
 # * along with KAKASI, see the file COPYING.  If not, write to the Free
 # * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
 # * 02111-1307, USA.
 # */
 class H2a (object):
    H2a_table = {
        u"\u3041":"a", u"\u3042":"a",
        u"\u3043":"i", u"\u3044":"i",
        u"\u3045":"u", u"\u3046":"u",
        u"\u3046\u309b":"vu", u"\u3046\u309b\u3041":"va",
        u"\u3046\u309b\u3043":"vi", u"\u3046\u309b\u3047":"ve",
        u"\u3046\u309b\u3049":"vo",
        u"\u3047":"e", u"\u3048":"e",
        u"\u3049":"o", u"\u304a":"o",
        u"\u304b":"ka", u"\u304c":"ga",
        u"\u304d":"ki", u"\u304d\u3041":"kya",
        u"\u304d\u3045":"kyu", u"\u304d\u3049":"kyo",
        u"\u304e":"gi", u"\u3050\u3083":"gya",
        u"\u304e\u3045":"gyu", u"\u304e\u3087":"gyo",
        u"\u304f":"ku", u"\u3050":"gu",
        u"\u3051":"ke", u"\u3052":"ge",
        u"\u3053":"ko", u"\u3054":"go",
        u"\u3055":"sa", u"\u3056":"za",
        u"\u3057":"shi", u"\u3057\u3083":"sha",
        u"\u3057\u3085":"shu", u"\u3057\u3087":"sho",
        u"\u3058":"ji", u"\u3058\u3083":"ja",
        u"\u3058\u3085":"ju", u"\u3058\u3087":"jo",
        u"\u3059":"su", u"\u305a":"zu",
        u"\u305b":"se", u"\u305c":"ze",
        u"\u305d":"so", u"\u305e":"zo",
        u"\u305f":"ta", u"\u3060":"da",
        u"\u3061":"chi", u"\u3061\u3047":"che", u"\u3061\u3083":"cha",
        u"\u3061\u3085":"chu", u"\u3061\u3087":"cho",
        u"\u3062":"ji", u"\u3062\u3083":"ja",
        u"\u3062\u3085":"ju", u"\u3062\u3087":"jo",
        u"\u3063":"tsu",
        u"\u3063\u3046\u309b":"vvu",
        u"\u3063\u3046\u309b\u3041":"vva",
        u"\u3063\u3046\u309b\u3043":"vvi",
        u"\u3063\u3046\u309b\u3047":"vve",
        u"\u3063\u3046\u309b\u3049":"vvo",
        u"\u3063\u304b":"kka", u"\u3063\u304c":"gga",
        u"\u3063\u304d":"kki", u"\u3063\u304d\u3083":"kkya",
        u"\u3063\u304d\u3085":"kkyu", u"\u3063\u304d\u3087":"kkyo",
        u"\u3063\u304e":"ggi", u"\u3063\u304e\u3083":"ggya",
        u"\u3063\u304e\u3085":"ggyu", u"\u3063\u304e\u3087":"ggyo",
        u"\u3063\u304f":"kku", u"\u3063\u3050":"ggu",
        u"\u3063\u3051":"kke", u"\u3063\u3052":"gge",
        u"\u3063\u3053":"kko", u"\u3063\u3054":"ggo",
        u"\u3063\u3055":"ssa", u"\u3063\u3056":"zza",
        u"\u3063\u3057":"sshi", u"\u3063\u3057\u3083":"ssha",
        u"\u3063\u3057\u3085":"sshu", u"\u3063\u3057\u3087":"ssho",
        u"\u3063\u3058":"jji", u"\u3063\u3058\u3083":"jja",
        u"\u3063\u3058\u3085":"jju", u"\u3063\u3058\u3087":"jjo",
        u"\u3063\u3059":"ssu", u"\u3063\u305a":"zzu",
        u"\u3063\u305b":"sse", u"\u3063\u305e":"zze",
        u"\u3063\u305d":"sso", u"\u3063\u305e":"zzo",
        u"\u3063\u305f":"tta", u"\u3063\u3060":"dda",
        u"\u3063\u3061":"tchi", u"\u3063\u3061\u3083":"tcha",
        u"\u3063\u3061\u3085":"tchu", u"\u3063\u3061\u3087":"tcho",
        u"\u3063\u3062":"jji", u"\u3063\u3062\u3083":"jjya",
        u"\u3063\u3062\u3085":"jjyu", u"\u3063\u3062\u3087":"jjyo",
        u"\u3063\u3064":"ttsu", u"\u3063\u3065":"zzu",
        u"\u3063\u3066":"tte", u"\u3063\u3067":"dde",
        u"\u3063\u3068":"tto", u"\u3063\u3069":"ddo",
        u"\u3063\u306f":"hha", u"\u3063\u3070":"bba",
        u"\u3063\u3071":"ppa",
        u"\u3063\u3072":"hhi", u"\u3063\u3072\u3083":"hhya",
        u"\u3063\u3072\u3085":"hhyu", u"\u3063\u3072\u3087":"hhyo",
        u"\u3063\u3073":"bbi", u"\u3063\u3073\u3083":"bbya",
        u"\u3063\u3073\u3085":"bbyu", u"\u3063\u3073\u3087":"bbyo",
        u"\u3063\u3074":"ppi", u"\u3063\u3074\u3083":"ppya",
        u"\u3063\u3074\u3085":"ppyu", u"\u3063\u3074\u3087":"ppyo",
        u"\u3063\u3075":"ffu", u"\u3063\u3075\u3041":"ffa",
        u"\u3063\u3075\u3043":"ffi", u"\u3063\u3075\u3047":"ffe",
        u"\u3063\u3075\u3049":"ffo",
        u"\u3063\u3076":"bbu", u"\u3063\u3077":"ppu",
        u"\u3063\u3078":"hhe", u"\u3063\u3079":"bbe",
        u"\u3063\u307a":"ppe",
        u"\u3063\u307b":"hho", u"\u3063\u307c":"bbo",
        u"\u3063\u307d":"ppo",
        u"\u3063\u3084":"yya", u"\u3063\u3086":"yyu",
        u"\u3063\u3088":"yyo",
        u"\u3063\u3089":"rra", u"\u3063\u308a":"rri",
        u"\u3063\u308a\u3083":"rrya", u"\u3063\u308a\u3085":"rryu",
        u"\u3063\u308a\u3087":"rryo",
        u"\u3063\u308b":"rru", u"\u3063\u308c":"rre",
        u"\u3063\u308d":"rro",
        u"\u3064":"tsu", u"\u3065":"zu",
        u"\u3066":"te", u"\u3067":"de", u"\u3067\u3043":"di",
        u"\u3068":"to", u"\u3069":"do",
        u"\u306a":"na",
        u"\u306b":"ni", u"\u306b\u3083":"nya",
        u"\u306b\u3085":"nyu", u"\u306b\u3087":"nyo",
        u"\u306c":"nu", u"\u306d":"ne", u"\u306e":"no",
        u"\u306f":"ha", u"\u3070":"ba", u"\u3071":"pa",
        u"\u3072":"hi", u"\u3072\u3083":"hya",
        u"\u3072\u3085":"hyu", u"\u3072\u3087":"hyo",
        u"\u3073":"bi", u"\u3073\u3083":"bya",
        u"\u3073\u3085":"byu", u"\u3073\u3087":"byo",
        u"\u3074":"pi", u"\u3074\u3083":"pya",
        u"\u3074\u3085":"pyu", u"\u3074\u3087":"pyo",
        u"\u3075":"fu", u"\u3075\u3041":"fa",
        u"\u3075\u3043":"fi", u"\u3075\u3047":"fe",
        u"\u3075\u3049":"fo",
        u"\u3076":"bu", u"\u3077":"pu",
        u"\u3078":"he", u"\u3079":"be", u"\u307a":"pe",
        u"\u307b":"ho", u"\u307c":"bo", u"\u307d":"po",
        u"\u307e":"ma",
        u"\u307f":"mi", u"\u307f\u3083":"mya",
        u"\u307f\u3085":"myu", u"\u307f\u3087":"myo",
        u"\u3080":"mu", u"\u3081":"me", u"\u3082":"mo",
        u"\u3083":"ya", u"\u3084":"ya",
        u"\u3085":"yu", u"\u3086":"yu",
        u"\u3087":"yo", u"\u3088":"yo",
        u"\u3089":"ra",
        u"\u308a":"ri", u"\u308a\u3083":"rya",
        u"\u308a\u3085":"ryu", u"\u308a\u3087":"ryo",
        u"\u308b":"ru", u"\u308c":"re", u"\u308d":"ro",
        u"\u308e":"wa", u"\u308f":"wa",
        u"\u3090":"i", u"\u3091":"e",
        u"\u3092":"wo", u"\u3093":"n",
        u"\u3093\u3042":"n'a", u"\u3093\u3044":"n'i",
        u"\u3093\u3046":"n'u", u"\u3093\u3048":"n'e",
        u"\u3093\u304a":"n'o",
    }
 # this class is Borg
    _shared_state = {}
    def __new__(cls, *p, **k):
        self = object.__new__(cls, *p, **k)
        self.__dict__ = cls._shared_state
        return self
    def isHiragana(self, char):
        return ( 0x3040 < ord(char) and ord(char) < 0x3094)
    def convert(self, text):
        Hstr = ""
        max_len = -1
        r = min(4, len(text)+1)
        for x in xrange(r):
            if text[:x] in self.H2a_table:
                if max_len < x:
                    max_len = x
                    Hstr = self.H2a_table[text[:x]]
        return (Hstr, max_len)
--- a/src/calibre/ebooks/unihandecode/pykakasi/itaijidict.utf8
+++ b/src/calibre/ebooks/unihandecode/pykakasi/itaijidict.utf8
@ -0,0 +1,564 @@
 芦蘆
 壱一
 苅刈
 舘館
 曽曾
 菟兎
 島嶋
 盃杯
 冨富
 峯峰
 亘亙
 弌一
 乘乗
 亂乱
 豫予
 亊事
 弍二
 亞亜
 亰京
 从従
 仭仞
 佛仏
 來来
 儘侭
 伜倅
 假仮
 會会
 做作
 傳伝
 僞偽
 價価
 儉倹
 兒児
 兔兎
 竸競
 兩両
 囘回
 册冊
 冢塚
 冩写
 决決
 冱冴
 冰氷
 况況
 凉涼
 處処
 凾函
 刄刃
 刔抉
 刧劫
 剩剰
 劍剣
 劔剣
 劒剣
 剱剣
 劑剤
 辨弁
 勞労
 勳勲
 勵励
 勸勧
 區区
 卆卒
 丗世
 凖準
 夘卯
 卻却
 卷巻
 厠廁
 厦廈
 厮廝
 厰廠
 參参
 雙双
 咒呪
 單単
 噐器
 營営
 嚏嚔
 嚴厳
 囑嘱
 囓齧
 圀国
 圈圏
 國国
 圍囲
 圓円
 團団
 圖図
 埀垂
 埓埒
 塲場
 壞壊
 墮堕
 壓圧
 壘塁
 壥廛
 壤壌
 壯壮
 壺壷
 壹一
 壻婿
 壽寿
 夂夊
 夛多
 梦夢
 竒奇
 奧奥
 奬奨
 侫佞
 姙妊
 嫻嫺
 孃嬢
 學学
 斈学
 寃冤
 寇冦
 寢寝
 寫写
 寶宝
 寳宝
 尅剋
 將将
 專専
 對対
 尓爾
 尢尤
 屆届
 屬属
 峽峡
 嶌嶋
 嵜崎
 崙崘
 嵳嵯
 嶽岳
 巛川
 巵卮
 帋紙
 帶帯
 幤幣
 廐厩
 廏厩
 廣広
 廚厨
 廢廃
 廳庁
 廰庁
 廸迪
 弃棄
 弉奘
 彜彝
 彈弾
 彌弥
 弯彎
 徃往
 徑径
 從従
 徠来
 悳徳
 恠怪
 恆恒
 悧俐
 惡悪
 惠恵
 忰悴
 惱悩
 愼慎
 愽博
 慘惨
 慚慙
 憇憩
 應応
 懷懐
 懴懺
 戀恋
 戞戛
 戰戦
 戲戯
 拔抜
 拏拿
 擔担
 拜拝
 拂払
 挾挟
 搜捜
 插挿
 搖揺
 攝摂
 攪撹
 據拠
 擇択
 擧拳
 舉拳
 抬擡
 擴拡
 攜携
 攵攴
 攷考
 收収
 效効
 敕勅
 敍叙
 敘叙
 數数
 變変
 斷断
 旙旛
 昜陽
 晄晃
 晉晋
 晝昼
 晰晢
 暎映
 曉暁
 暸瞭
 昿曠
 曵曳
 朖朗
 朞期
 霸覇
 杤栃
 杰傑
 枩松
 檜桧
 條条
 檮梼
 梹檳
 棊棋
 棧桟
 棕椶
 楙茂
 榮栄
 槨椁
 樂楽
 權権
 樞枢
 樣様
 樓楼
 橢楕
 檢検
 櫻桜
 鬱欝
 盜盗
 飮飲
 歐嘔
 歡歓
 歸帰
 殘残
 殱殲
 殼殻
 毆殴
 毓育
 氣気
 沒没
 泪涙
 濤涛
 渕淵
 渊淵
 淨浄
 淺浅
 滿満
 溂剌
 溪渓
 灌潅
 滯滞
 澁渋
 澀渋
 潛潜
 濳潜
 澂澄
 澑溜
 澤沢
 濟済
 濕湿
 濱浜
 濾滬
 灣湾
 烱炯
 烟煙
 熈煕
 熏燻
 燒焼
 爐炉
 爭争
 爲為
 爼俎
 犁犂
 犹猶
 犲豺
 狹狭
 獎奨
 默黙
 獨独
 獸獣
 獵猟
 獻献
 珎珍
 璢瑠
 瑯琅
 珱瓔
 瓣弁
 甞嘗
 甼町
 畄留
 畍界
 畊耕
 畆畝
 畧略
 畫画
 當当
 畴疇
 疊畳
 疉畳
 疂畳
 癡痴
 發発
 皃猊
 皈帰
 皹皸
 盖蓋
 盡尽
 蘯盪
 眞真
 眦眥
 礦鉱
 礪砺
 碎砕
 碯瑙
 祕秘
 祿禄
 齋斎
 禪禅
 禮礼
 禀稟
 稱称
 稻稲
 稾稿
 穗穂
 穩穏
 龝穐
 穰穣
 窗窓
 竈竃
 窰窯
 竊窃
 竝並
 筺筐
 笋筍
 箟箘
 筝箏
 簔蓑
 籠篭
 籘籐
 籖籤
 粹粋
 糺糾
 絲糸
 經経
 總総
 緜綿
 縣県
 縱縦
 繪絵
 繩縄
 繼継
 緕纃
 續続
 纖繊
 纎繊
 纜繿
 缺欠
 罐缶
 罸罰
 羃冪
 羣群
 羮羹
 譱善
 翆翠
 翦剪
 耻恥
 聟婿
 聨聯
 聲声
 聰聡
 聽聴
 肅粛
 冐冒
 脉脈
 腦脳
 腟膣
 膓腸
 膸髄
 膽胆
 臈臘
 臟臓
 臺台
 與与
 舊旧
 舍舎
 舖舗
 舩船
 艢檣
 舮艫
 艷艶
 莖茎
 莊荘
 莵兎
 菷帚
 萠萌
 蕚萼
 蒂蔕
 萬万
 葢蓋
 蘂蕊
 蕋蕊
 藪薮
 藏蔵
 藝芸
 藥薬
 蘓蘇
 乕虎
 號号
 蠣蛎
 蝨虱
 蠅蝿
 螢蛍
 蟆蟇
 蟲虫
 蠏蟹
 蟷螳
 蟒蠎
 蠶蚕
 蠧蠹
 蠻蛮
 衂衄
 衞衛
 袵衽
 裝装
 襃褒
 褝襌
 覩睹
 覺覚
 覽覧
 觀観
 觧解
 觸触
 誡戒
 謌歌
 諡謚
 謠謡
 證証
 譛譖
 譯訳
 譽誉
 讀読
 讓譲
 讚賛
 豐豊
 貉狢
 貍狸
 貎猊
 豼貔
 貘獏
 戝財
 貭質
 貳弐
 貮弐
 賤賎
 賣売
 贊賛
 賍贓
 赱走
 踈疎
 踴踊
 躰体
 軆体
 軈軅
 軣轟
 輕軽
 輙輒
 輌輛
 轉転
 辭辞
 辯弁
 迯逃
 逹達
 逎遒
 遞逓
 遲遅
 邊辺
 邉辺
 邨村
 鄰隣
 醉酔
 醫医
 釀醸
 釋釈
 釡釜
 釼剣
 銕鉄
 錢銭
 鎭鎮
 鐵鉄
 鐡鉄
 鑒鑑
 鑄鋳
 鑛鉱
 鈩鑪
 鑚鑽
 閇閉
 濶闊
 關関
 阯址
 陷陥
 險険
 隱隠
 隸隷
 襍雑
 雜雑
 靈霊
 靜静
 靱靭
 韭韮
 韲齏
 韵韻
 顏顔
 顯顕
 飃飄
 餘余
 餝飾
 餠餅
 騷騒
 驅駆
 驛駅
 驗験
 髓髄
 體体
 髮髪
 鬪闘
 鰺鯵
 鰛鰮
 鳬鳧
 鳫鴈
 鵄鴟
 鵞鵝
 鷄鶏
 鷏鷆
 鹽塩
 麥麦
 麸麩
 麪麺
 點点
 黨党
 皷鼓
 鼡鼠
 齊斉
 齒歯
 齡齢
 龜亀
 槇槙
 遙遥
 瑤瑶
 凜凛
 熙煕
--- a/src/calibre/ebooks/unihandecode/pykakasi/j2h.py
+++ b/src/calibre/ebooks/unihandecode/pykakasi/j2h.py
@ -0,0 +1,83 @@
 # -*- coding: utf-8 -*-
 #  j2h.py
 #
 # Copyright 2011 Hiroshi Miura <miurahr@linux.com>
 #
 #  Original Copyright:
 # * KAKASI (Kanji Kana Simple inversion program)
 # * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
 # * Copyright (C) 1992
 # * Hironobu Takahashi (takahasi@tiny.or.jp)
 # *
 # * This program is free software; you can redistribute it and/or modify
 # * it under the terms of the GNU General Public License as published by
 # * the Free Software Foundation; either versions 2, or (at your option)
 # * any later version.
 # *
 # * This program is distributed in the hope that it will be useful
 # * but WITHOUT ANY WARRANTY; without even the implied warranty of
 # * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # * GNU General Public License for more details.
 # *
 # * You should have received a copy of the GNU General Public License
 # * along with KAKASI, see the file COPYING.  If not, write to the Free
 # * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
 # * 02111-1307, USA.
 # */
 from calibre.ebooks.unihandecode.pykakasi.jisyo import jisyo
 import re
 class J2H (object):
    kanwa = None
    cl_table = [
 	"","aiueow", "aiueow", "aiueow", "aiueow", "aiueow", "aiueow", "aiueow",
 	"aiueow", "aiueow", "aiueow", "k", "g", "k", "g", "k", "g", "k", "g", "k",
 	"g", "s", "zj", "s", "zj", "s", "zj", "s", "zj", "s", "zj", "t", "d", "tc",
 	"d", "aiueokstchgzjfdbpw", "t", "d", "t", "d", "t", "d", "n", "n", "n", "n",
 	"n", "h", "b", "p", "h", "b", "p", "hf", "b", "p", "h", "b", "p", "h", "b",
 	"p", "m", "m", "m", "m", "m", "y", "y", "y", "y", "y", "y", "rl", "rl",
 	"rl", "rl", "rl", "wiueo", "wiueo", "wiueo", "wiueo", "w", "n", "v", "k",
 	"k", "", "", "", "", "", "", "", "", ""]
    def __init__(self):
        self.kanwa = jisyo()
    def isKanji(self, c):
        return ( 0x3400 <= ord(c) and ord(c) < 0xfa2e)
    def isCletter(self, l, c):
        if (ord(u"ぁ") <= ord(c) and  ord(c) <= 0x309f) and (  l in self.cl_table[ord(c) - ord(u"ぁ")-1]):
            return True
        return False
    def itaiji_conv(self, text):
        r = []
        for c in text:
            if c in self.kanwa.itaijidict:
                r.append(c)
        for c in r:
            text = re.sub(c, self.kanwa.itaijidict[c], text)
        return text
    def convert(self, text):
        max_len = 0
        Hstr = ""
        table = self.kanwa.load_jisyo(text[0])
        if table is None:
            return ("", 0)
        for (k,v) in table.iteritems():
            length = len(k)
            if len(text) >= length:
                if text.startswith(k):
                    for  (yomi, tail) in v:
                        if tail is '':
                            if max_len < length:
                                Hstr = yomi
                                max_len = length
                        elif max_len < length+1 and len(text) > length and self.isCletter(tail, text[length]):
                            Hstr=''.join([yomi,text[length]])
                            max_len = length+1
        return (Hstr, max_len)
--- a/src/calibre/ebooks/unihandecode/pykakasi/jisyo.py
+++ b/src/calibre/ebooks/unihandecode/pykakasi/jisyo.py
@ -0,0 +1,53 @@
 # -*- coding: utf-8 -*-
 #  jisyo.py
 #
 # Copyright 2011 Hiroshi Miura <miurahr@linux.com>
 from cPickle import load
 import anydbm,marshal
 from zlib import decompress
 import os
 import calibre.utils.resources as resources
 class jisyo (object):
    kanwadict = None
    itaijidict = None
    kanadict = None
    jisyo_table = {}
 # this class is Borg
    _shared_state = {}
    def __new__(cls, *p, **k):
        self = object.__new__(cls, *p, **k)
        self.__dict__ = cls._shared_state
        return self
    def __init__(self):
        if self.kanwadict is None:
            dictpath = resources.get_path(os.path.join('localization','pykakasi','kanwadict2.db'))
            self.kanwadict = anydbm.open(dictpath,'r')
        if self.itaijidict is  None:
            itaijipath = resources.get_path(os.path.join('localization','pykakasi','itaijidict2.pickle'))
            itaiji_pkl = open(itaijipath, 'rb')
            self.itaijidict = load(itaiji_pkl)
        if self.kanadict is None:
            kanadictpath = resources.get_path(os.path.join('localization','pykakasi','kanadict2.pickle'))
            kanadict_pkl = open(kanadictpath, 'rb')
            self.kanadict = load(kanadict_pkl)
    def load_jisyo(self, char):
        try:#python2
            key = "%04x"%ord(unicode(char))
        except:#python3
            key = "%04x"%ord(char)
        try: #already exist?
            table = self.jisyo_table[key]
        except:
            try:
                table = self.jisyo_table[key]  = marshal.loads(decompress(self.kanwadict[key]))
            except:
                return None
        return table
--- a/src/calibre/ebooks/unihandecode/pykakasi/k2a.py
+++ b/src/calibre/ebooks/unihandecode/pykakasi/k2a.py
@ -0,0 +1,50 @@
 # -*- coding: utf-8 -*-
 #  k2a.py
 #
 # Copyright 2011 Hiroshi Miura <miurahr@linux.com>
 #
 # Original copyright:
 # * KAKASI (Kanji Kana Simple inversion program)
 # * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
 # * Copyright (C) 1992
 # * Hironobu Takahashi (takahasi@tiny.or.jp)
 # *
 # * This program is free software; you can redistribute it and/or modify
 # * it under the terms of the GNU General Public License as published by
 # * the Free Software Foundation; either versions 2, or (at your option)
 # * any later version.
 # *
 # * This program is distributed in the hope that it will be useful
 # * but WITHOUT ANY WARRANTY; without even the implied warranty of
 # * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # * GNU General Public License for more details.
 # *
 # * You should have received a copy of the GNU General Public License
 # * along with KAKASI, see the file COPYING.  If not, write to the Free
 # * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
 # * 02111-1307, USA.
 # */
 from calibre.ebooks.unihandecode.pykakasi.jisyo import jisyo
 class K2a (object):
    kanwa = None
    def __init__(self):
        self.kanwa = jisyo()
    def isKatakana(self, char):
        return ( 0x30a0 < ord(char) and ord(char) < 0x30f7)
    def convert(self, text):
        Hstr = ""
        max_len = -1
        r = min(10, len(text)+1)
        for x in xrange(r):
            if text[:x] in self.kanwa.kanadict:
                if max_len < x:
                    max_len = x
                    Hstr = self.kanwa.kanadict[text[:x]]
        return (Hstr, max_len) 
--- a/src/calibre/ebooks/unihandecode/pykakasi/kakasi.py
+++ b/src/calibre/ebooks/unihandecode/pykakasi/kakasi.py
@ -0,0 +1,101 @@
 # -*- coding: utf-8 -*-
 #  kakasi.py
 #
 # Copyright 2011 Hiroshi Miura <miurahr@linux.com>
 #
 #  Original Copyright:
 # * KAKASI (Kanji Kana Simple inversion program)
 # * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
 # * Copyright (C) 1992
 # * Hironobu Takahashi (takahasi@tiny.or.jp)
 # *
 # * This program is free software; you can redistribute it and/or modify
 # * it under the terms of the GNU General Public License as published by
 # * the Free Software Foundation; either versions 2, or (at your option)
 # * any later version.
 # *
 # * This program is distributed in the hope that it will be useful
 # * but WITHOUT ANY WARRANTY; without even the implied warranty of
 # * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # * GNU General Public License for more details.
 # *
 # * You should have received a copy of the GNU General Public License
 # * along with KAKASI, see the file COPYING.  If not, write to the Free
 # * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
 # * 02111-1307, USA.
 # */
 from calibre.ebooks.unihandecode.pykakasi.j2h import J2H
 from calibre.ebooks.unihandecode.pykakasi.h2a import H2a
 from calibre.ebooks.unihandecode.pykakasi.k2a import K2a
 class kakasi(object):
    j2h = None
    h2a = None
    k2a = None
    def __init__(self):
        self.j2h = J2H()
        self.h2a = H2a()
        self.k2a = K2a()
    def do(self, text):
        otext =  ''
        i = 0
        while True:
            if i >= len(text):
                break
            if self.j2h.isKanji(text[i]):
                (t, l) = self.j2h.convert(text[i:])
                if l <= 0:
                    otext  = otext + text[i]
                    i = i + 1
                    continue
                i = i + l
                m = 0
                tmptext = ""
                while True:
                    if m >= len(t):
                        break
                    (s, n) = self.h2a.convert(t[m:])
                    if n <= 0:
                        break
                    m = m + n
                    tmptext = tmptext+s
                if i >= len(text):
                    otext = otext + tmptext.capitalize()
                else:
                    otext = otext + tmptext.capitalize() +' '
            elif self.h2a.isHiragana(text[i]):
                tmptext = ''
                while True:
                    (t, l) = self.h2a.convert(text[i:])
                    tmptext = tmptext+t
                    i = i + l
                    if i >= len(text):
                        otext = otext + tmptext
                        break
                    elif not self.h2a.isHiragana(text[i]):
                        otext = otext + tmptext + ' '
                        break
            elif self.k2a.isKatakana(text[i]):
                tmptext = ''
                while True:
                    (t, l) = self.k2a.convert(text[i:])
                    tmptext = tmptext+t
                    i = i + l
                    if i >= len(text):
                        otext = otext + tmptext
                        break
                    elif not self.k2a.isKatakana(text[i]):
                        otext = otext + tmptext + ' '
                        break
            else:
                otext  = otext + text[i]
                i += 1
        return otext
--- a/src/calibre/ebooks/unihandecode/pykakasi/kakasidict.utf8
+++ b/src/calibre/ebooks/unihandecode/pykakasi/kakasidict.utf8
--- a/src/calibre/ebooks/unihandecode/pykakasi/kanadict.utf8
+++ b/src/calibre/ebooks/unihandecode/pykakasi/kanadict.utf8
@ -0,0 +1,317 @@
 ;; Kana-Alphabet mapping dictionary
 ;;
 ;;  To use this mapping table, 
 ;;    you should unicode normalize NKFC form.
 ;;
 ;; basic mapping
 ;;
 a ァ
 a ア
 ba バ
 bba ッバ
 bbe ッベ
 bbi ッビ
 bbo ッボ
 bbu ッブ
 bbya ッビャ
 bbyo ッビョ
 bbyu ッビュ
 be ベ
 bi ビ
 bo ボ
 bu ブ
 bya ビャ
 byo ビョ
 byu ビュ
 cha チャ
 che チェ
 chi チ
 cho チョ
 chu チュ
 da ダ
 dda ッダ
 dde ッデ
 ddo ッド
 de デ
 di ディ
 do ド
 e ェ
 e エ
 e ヱ
 fa ファ
 fe フェ
 ffa ッファ
 ffe ッフェ
 ffi ッフィ
 ffo ッフォ
 ffu ッフ
 fi フィ
 fo フォ
 fu フ
 ga ガ
 ge ゲ
 gga ッガ
 gge ッゲ
 ggi ッギ
 ggo ッゴ
 ggu ッグ
 ggya ッギャ
 ggyo ッギョ
 ggyu ッギュ
 gi ギ
 go ゴ
 gu グ
 gya グャ
 gyo ギョ
 gyu ギゥ
 ha ハ
 he ヘ
 hha ッハ
 hhe ッヘ
 hhi ッヒ
 hho ッホ
 hhya ッヒャ
 hhyo ッヒョ
 hhyu ッヒュ
 hi ヒ
 ho ホ
 hya ヒャ
 hyo ヒョ
 hyu ヒュ
 i ィ
 i イ
 i ヰ
 ja ジャ
 ja ヂャ
 ji ジ
 ji ヂ
 jja ッジャ
 jji ッジ
 jji ッヂ
 jjo ッジョ
 jju ッジュ
 jjya ッヂャ
 jjyo ッヂョ
 jjyu ッヂュ
 jo ジョ
 jo ヂョ
 ju ジュ
 ju ヂュ
 ka カ
 ka ヵ
 ke ケ
 ke ヶ
 ki キ
 kka ッカ
 kke ッケ
 kki ッキ
 kko ッコ
 kku ック
 kkya ッキャ
 kkyo ッキョ
 kkyu ッキュ
 ko コ
 ku ク
 kya キァ
 kyo キォ
 kyu キゥ
 ma マ
 me メ
 mi ミ
 mo モ
 mu ム
 mya ミャ
 myo ミョ
 myu ミュ
 n ン
 n'a ンア
 n'e ンエ
 n'i ンイ
 n'o ンオ
 n'u ンウ
 na ナ
 ne ネ
 ni ニ
 no ノ
 nu ヌ
 nya ニャ
 nyo ニョ
 nyu ニュ
 o ォ
 o オ
 pa パ
 pe ペ
 pi ピ
 po ポ
 ppa ッパ
 ppe ッペ
 ppi ッピ
 ppo ッポ
 ppu ップ
 ppya ッピャ
 ppyo ッピョ
 ppyu ッピュ
 pu プ
 pya ピャ
 pyo ピョ
 pyu ピュ
 ra ラ
 re レ
 ri リ
 ro ロ
 rra ッラ
 rre ッレ
 rri ッリ
 rro ッロ
 rru ッル
 rrya ッリャ
 rryo ッリョ
 rryu ッリュ
 ru ル
 rya リャ
 ryo リョ
 ryu リュ
 sa サ
 se セ
 sha シャ
 shi シ
 sho ショ
 shu シュ
 so ソ
 ssa ッサ
 sse ッセ
 ssha ッシャ
 sshi ッシ
 ssho ッショ
 sshu ッシュ
 sso ッソ
 ssu ッス
 su ス
 ta タ
 tcha ッチャ
 tchi ッチ
 tcho ッチョ
 tchu ッチュ
 te テ
 to ト
 tsu ッ
 tsu ツ
 tta ッタ
 tte ッテ
 tto ット
 ttsu ッツ
 u ゥ
 u ウ
 va ヴァ
 ve ヴェ
 vi ヴィ
 vo ヴォ
 vu ヴ
 vva ッヴァ
 vve ッヴェ
 vvi ッヴィ
 vvo ッヴォ
 vvu ッヴ
 wa ヮ
 wa ワ
 wo ヲ
 ya ャ
 ya ヤ
 yo ョ
 yo ヨ
 yu ュ
 yu ユ
 yya ッヤ
 yyo ッヨ
 yyu ッユ
 za ザ
 ze ゼ
 zo ゾ
 zu ズ
 zu ヅ
 zza ッザ
 zzo ッゾ
 zzu ッズ
 zzu ッヅ
 ;;
 ;; extended characters
 ;;
 ;;
 ;; gairai terms
 ;;
 all オール
 algrism アルゴリズム
 answer アンサー
 base ベース
 begineer ビギナー
 connection コネクション
 contents コンテンツ
 creator クリエーター
 comic コミック
 comics コミックス
 culture カルチャー
 debug デバッグ
 debugging デバッギング
 design デザイン
 digital デジタル
 dillenma ジレンマ
 directory ディレクトリ
 disk ディスク
 document ドキュメント
 download ダウンロード
 electric エレクトリック
 facebook フェイスブック
 firefox ファイアーフォックス
 folder フォルダ
 format フォーマット
 forum フォーラム
 fox フォックス
 free フリー
 gnome ノーム
 gnu グヌー
 gozilla ゴジラ
 guide ガイド
 harvard ハーバード
 help ヘルプ
 highlight ハイライト
 japan ジャパン
 journal ジャーナル
 library ライブラリ
 line ライン
 love ラヴ
 love ラブ
 mail メール
 main メイン
 mystery ミステリ
 mozilla モジラ
 network ネットワーク
 next ネクスト
 new ニュー
 news ニュース
 native ネイティブ
 online オンライン
 open オープン
 professional プロフェッショナル
 profile プロファイル
 programmer プログラマ
 sample サンプル
 series シリーズ
 share シェア
 social ソーシャル
 society ソサエティ
 software ソフトウエア
 source ソース
 street ストリート
 system システム
 tag タグ
 text テキスト
 thunderbird サンダーバード
 training トレーニング
 twitter ツイッター
 unicode ユニコード
 wall ウオール
 wall ウォール
 welcome ウェルカム
 welcome ウエルカム
 wikinomics ウィキノミクス
 york ヨーク
--- a/src/calibre/ebooks/unihandecode/unicodepoints.py
+++ b/src/calibre/ebooks/unihandecode/unicodepoints.py
--- a/src/calibre/ebooks/unihandecode/unidecoder.py
+++ b/src/calibre/ebooks/unihandecode/unidecoder.py
@ -1,12 +1,17 @@
 # -*- coding: utf-8 -*-
 __license__ = 'GPL 3'
-__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 __docformat__ = 'restructuredtext en'
 '''
-Decode unicode text to an ASCII representation of the text. Transliterate
+Decode unicode text to an ASCII representation of the text in Chinese.
-unicode characters to ASCII.
+Transliterate unicode characters to ASCII based on chinese pronounce.
 Derived from John Schember's unidecode library. Which was created
 as part of calibre.
 Copyright(c) 2009, John Schember <john@nachtimwald.com>
 Based on the ruby unidecode gem (http://rubyforge.org/projects/unidecode/) which
 is based on the perl module Text::Unidecode
@ -55,29 +60,20 @@ it under the same terms as Perl itself.
 '''
 import re
-
+from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
-from calibre.ebooks.unidecode.unicodepoints import CODEPOINTS
+from calibre.ebooks.unihandecode.zhcodepoints import CODEPOINTS as HANCODES
 from calibre.constants import preferred_encoding
 class Unidecoder(object):
    codepoints = {}
    def __init__(self):
        self.codepoints = CODEPOINTS
        self.codepoints.update(HANCODES)
    def decode(self, text):
        '''
        Tranliterate the string from unicode characters to ASCII.
        '''
        # The keys for CODEPOINTS is unicode characters, we want to be sure the
        # input text is unicode.
        if not isinstance(text, unicode):
            try:
                text = unicode(text)
            except:
                try:
                    text = text.decode(preferred_encoding)
                except:
                    text = text.decode('utf-8', 'replace')
        # Replace characters larger than 127 with their ASCII equivelent.
-        return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),
+        return re.sub('[^\x00-\x7f]',lambda x: self.replace_point(x.group()), text)
            text)
    def replace_point(self, codepoint):
        '''
@ -87,7 +83,7 @@ class Unidecoder(object):
            # Split the unicode character xABCD into parts 0xAB and 0xCD.
            # 0xAB represents the group within CODEPOINTS to query and 0xCD
            # represents the position in the list of characters for the group.
-            return CODEPOINTS[self.code_group(codepoint)][self.grouped_point(
+            return self.codepoints[self.code_group(codepoint)][self.grouped_point(
                codepoint)]
        except:
            return '?'
@ -97,12 +93,18 @@ class Unidecoder(object):
        Find what group character is a part of.
        '''
        # Code groups withing CODEPOINTS take the form 'xAB'
-        return u'x%02x' % (ord(unicode(character)) >> 8)
+        try:#python2
            return 'x%02x' % (ord(unicode(character)) >> 8)
        except:
            return 'x%02x' % (ord(character) >> 8)
    def grouped_point(self, character):
        '''
        Return the location the replacement character is in the list for a
        the group character is a part of.
        '''
-        return ord(unicode(character)) & 255
+        try:#python2
            return ord(unicode(character)) & 255
        except:
            return ord(character) & 255
--- a/src/calibre/ebooks/unihandecode/vncodepoints.py
+++ b/src/calibre/ebooks/unihandecode/vncodepoints.py
--- a/src/calibre/ebooks/unihandecode/vndecoder.py
+++ b/src/calibre/ebooks/unihandecode/vndecoder.py
@ -0,0 +1,23 @@
 # -*- coding: utf-8 -*-
 __license__ = 'GPL 3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 __docformat__ = 'restructuredtext en'
 '''
 Decode unicode text to an ASCII representation of the text in Vietnamese.
 '''
 from calibre.ebooks.unihandecode.unidecoder import Unidecoder
 from calibre.ebooks.unihandecode.vncodepoints import CODEPOINTS as HANCODES
 from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
 class Vndecoder(Unidecoder):
    codepoints = {}
    def __init__(self):
        self.codepoints = CODEPOINTS
        self.codepoints.update(HANCODES)
--- a/src/calibre/ebooks/unihandecode/zhcodepoints.py
+++ b/src/calibre/ebooks/unihandecode/zhcodepoints.py
--- a/src/calibre/gui2/actions/delete.py
+++ b/src/calibre/gui2/actions/delete.py
@ -271,11 +271,6 @@ class DeleteAction(InterfaceAction):
                        partial(self.library_ids_deleted, current_row=row))
        # Device view is visible.
        else:
            if not confirm('<p>'+_('The selected books will be '
                                   '<b>permanently deleted</b> '
                                   'from your device. Are you sure?')
                                +'</p>', 'device_delete_books', self.gui):
                return
            if self.gui.stack.currentIndex() == 1:
                view = self.gui.memory_view
            elif self.gui.stack.currentIndex() == 2:
@ -283,8 +278,14 @@ class DeleteAction(InterfaceAction):
            else:
                view = self.gui.card_b_view
            paths = view.model().paths(rows)
            ids = view.model().indices(rows)
            if not confirm('<p>'+_('The selected books will be '
                                   '<b>permanently deleted</b> '
                                   'from your device. Are you sure?')
                                +'</p>', 'device_delete_books', self.gui):
                return
            job = self.gui.remove_paths(paths)
            self.delete_memory[job] = (paths, view.model())
-            view.model().mark_for_deletion(job, rows)
+            view.model().mark_for_deletion(job, ids, rows_are_ids=True)
            self.gui.status_bar.show_message(_('Deleting books from device.'), 1000)
--- a/src/calibre/gui2/dialogs/check_library.py
+++ b/src/calibre/gui2/dialogs/check_library.py
@ -213,6 +213,8 @@ class CheckLibraryDialog(QDialog):
        self.log = QTreeWidget(self)
        self.log.itemChanged.connect(self.item_changed)
        self.log.itemExpanded.connect(self.item_expanded_or_collapsed)
        self.log.itemCollapsed.connect(self.item_expanded_or_collapsed)
        self._layout.addWidget(self.log)
        self.check_button = QPushButton(_('&Run the check again'))
@ -333,11 +335,15 @@ class CheckLibraryDialog(QDialog):
        for check in CHECKS:
            builder(t, checker, check)
-        t.setColumnWidth(0, 200)
+        t.resizeColumnToContents(0)
-        t.setColumnWidth(1, 400)
+        t.resizeColumnToContents(1)
        self.delete_button.setEnabled(False)
        self.text_results = '\n'.join(plaintext)
    def item_expanded_or_collapsed(self, item):
        self.log.resizeColumnToContents(0)
        self.log.resizeColumnToContents(1)
    def item_changed(self, item, column):
        self.fix_button.setEnabled(False)
        for it in self.top_level_items.values():
--- a/src/calibre/gui2/library/models.py
+++ b/src/calibre/gui2/library/models.py
@ -120,11 +120,10 @@ class BooksModel(QAbstractTableModel): # {{{
    def set_device_connected(self, is_connected):
        self.device_connected = is_connected
        self.refresh_ondevice()
    def refresh_ondevice(self):
        self.db.refresh_ondevice()
-        self.refresh() # does a resort()
+        self.resort()
        self.research()
    def set_book_on_device_func(self, func):
@ -826,7 +825,7 @@ class BooksModel(QAbstractTableModel): # {{{
                    return False
                val = int(value.toInt()[0]) if column == 'rating' else \
                      value.toDate() if column in ('timestamp', 'pubdate') else \
-                      unicode(value.toString())
+                      unicode(value.toString()).strip()
                id = self.db.id(row)
                books_to_refresh = set([id])
                if column == 'rating':
--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@ -551,8 +551,10 @@ class BooksView(QTableView): # {{{
        return mods & Qt.ControlModifier or mods & Qt.ShiftModifier
    def mousePressEvent(self, event):
-        if event.button() == Qt.LeftButton and not self.event_has_mods():
+        ep = event.pos()
-            self.drag_start_pos = event.pos()
+        if self.indexAt(ep) in self.selectionModel().selectedIndexes() and \
                event.button() == Qt.LeftButton and not self.event_has_mods():
            self.drag_start_pos = ep
        return QTableView.mousePressEvent(self, event)
    def mouseMoveEvent(self, event):
--- a/src/calibre/gui2/preferences/search.py
+++ b/src/calibre/gui2/preferences/search.py
@ -10,13 +10,15 @@ from PyQt4.Qt import QApplication
 from calibre.gui2.preferences import ConfigWidgetBase, test_widget, \
        CommaSeparatedList
 from calibre.gui2.preferences.search_ui import Ui_Form
-from calibre.gui2 import config
+from calibre.gui2 import config, error_dialog
 from calibre.utils.config import prefs
 class ConfigWidget(ConfigWidgetBase, Ui_Form):
    def genesis(self, gui):
        self.gui = gui
        db = gui.library_view.model().db
        self.db = db
        r = self.register
@ -24,11 +26,153 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        r('highlight_search_matches', config)
        r('limit_search_columns', prefs)
        r('limit_search_columns_to', prefs, setting=CommaSeparatedList)
-        fl = gui.library_view.model().db.field_metadata.get_search_terms()
+        fl = db.field_metadata.get_search_terms()
        self.opt_limit_search_columns_to.update_items_cache(fl)
        self.clear_history_button.clicked.connect(self.clear_histories)
        self.gst_explanation.setText('<p>' + _(
    "<b>Grouped search terms</b> are search names that permit a query to automatically "
    "search across more than one column. For example, if you create a grouped "
    "search term <code>allseries</code> with the value "
    "<code>series, #myseries, #myseries2</code>, then "
    "the query <code>allseries:adhoc</code> will find 'adhoc' in any of the "
    "columns <code>series</code>, <code>#myseries</code>, and "
    "<code>#myseries2</code>.<p> Enter the name of the "
    "grouped search term in the drop-down box, enter the list of columns "
    "to search in the value box, then push the Save button. "
    "<p>Note: Search terms are forced to lower case; <code>MySearch</code> "
    "and <code>mysearch</code> are the same term."
    "<p>You can have your grouped search term show up as user categories in "
    " the Tag Browser. Just add the grouped search term names to the Make user "
    "categories from box. You can add multiple terms separated by commas. "
    "The new user category will be automatically "
    "populated with all the items in the categories included in the grouped "
    "search term. <p>Automatic user categories permit you to see easily "
    "all the category items that "
    "are in the columns contained in the grouped search term. Using the above "
    "<code>allseries</code> example, the automatically-generated user category "
    "will contain all the series mentioned in <code>series</code>, "
    "<code>#myseries</code>, and <code>#myseries2</code>. This "
    "can be useful to check for duplicates, to find which column contains "
    "a particular item, or to have hierarchical categories (categories "
    "that contain categories)."))
        self.gst = db.prefs.get('grouped_search_terms', {})
        self.orig_gst_keys = self.gst.keys()
        fl = []
        for f in db.all_field_keys():
            fm = db.metadata_for_field(f)
            if not fm['search_terms']:
                continue
            if not fm['is_category']:
                continue
            fl.append(f)
        self.gst_value.update_items_cache(fl)
        self.fill_gst_box(select=None)
        self.gst_delete_button.setEnabled(False)
        self.gst_save_button.setEnabled(False)
        self.gst_names.currentIndexChanged[int].connect(self.gst_index_changed)
        self.gst_names.editTextChanged.connect(self.gst_text_changed)
        self.gst_value.textChanged.connect(self.gst_text_changed)
        self.gst_save_button.clicked.connect(self.gst_save_clicked)
        self.gst_delete_button.clicked.connect(self.gst_delete_clicked)
        self.gst_changed = False
        if db.prefs.get('grouped_search_make_user_categories', None) is None:
            db.prefs.set('grouped_search_make_user_categories', [])
        r('grouped_search_make_user_categories', db.prefs, setting=CommaSeparatedList)
        self.muc_changed = False
        self.opt_grouped_search_make_user_categories.editingFinished.connect(
                                                        self.muc_box_changed)
    def muc_box_changed(self):
        self.muc_changed = True
    def gst_save_clicked(self):
        idx = self.gst_names.currentIndex()
        name = icu_lower(unicode(self.gst_names.currentText()))
        if not name:
            return error_dialog(self.gui, _('Grouped Search Terms'),
                                _('The search term cannot be blank'),
                                show=True)
        if idx != 0:
            orig_name = unicode(self.gst_names.itemData(idx).toString())
        else:
            orig_name = ''
        if name != orig_name:
            if name in self.db.field_metadata.get_search_terms() and \
                    name not in self.orig_gst_keys:
                return error_dialog(self.gui, _('Grouped Search Terms'),
                    _('That name is already used for a column or grouped search term'),
                    show=True)
            if name in [icu_lower(p) for p in self.db.prefs.get('user_categories', {})]:
                return error_dialog(self.gui, _('Grouped Search Terms'),
                    _('That name is already used for user category'),
                    show=True)
        val = [v.strip() for v in unicode(self.gst_value.text()).split(',') if v.strip()]
        if not val:
            return error_dialog(self.gui, _('Grouped Search Terms'),
                _('The value box cannot be empty'), show=True)
        if orig_name and name != orig_name:
            del self.gst[orig_name]
        self.gst_changed = True
        self.gst[name] = val
        self.fill_gst_box(select=name)
        self.changed_signal.emit()
    def gst_delete_clicked(self):
        if self.gst_names.currentIndex() == 0:
            return error_dialog(self.gui, _('Grouped Search Terms'),
                _('The empty grouped search term cannot be deleted'), show=True)
        name = unicode(self.gst_names.currentText())
        if name in self.gst:
            del self.gst[name]
            self.fill_gst_box(select='')
            self.changed_signal.emit()
            self.gst_changed = True
    def fill_gst_box(self, select=None):
        terms = sorted(self.gst.keys())
        self.opt_grouped_search_make_user_categories.update_items_cache(terms)
        self.gst_names.blockSignals(True)
        self.gst_names.clear()
        self.gst_names.addItem('', '')
        for t in terms:
            self.gst_names.addItem(t, t)
        self.gst_names.blockSignals(False)
        if select is not None:
            if select == '':
                self.gst_index_changed(0)
            elif select in terms:
                self.gst_names.setCurrentIndex(self.gst_names.findText(select))
    def gst_text_changed(self):
        self.gst_delete_button.setEnabled(False)
        self.gst_save_button.setEnabled(True)
    def gst_index_changed(self, idx):
        self.gst_delete_button.setEnabled(idx != 0)
        self.gst_save_button.setEnabled(False)
        self.gst_value.blockSignals(True)
        if idx == 0:
            self.gst_value.setText('')
        else:
            name = unicode(self.gst_names.itemData(idx).toString())
            self.gst_value.setText(','.join(self.gst[name]))
        self.gst_value.blockSignals(False)
    def commit(self):
        if self.gst_changed:
            self.db.prefs.set('grouped_search_terms', self.gst)
            self.db.field_metadata.add_grouped_search_terms(self.gst)
        return ConfigWidgetBase.commit(self)
    def refresh_gui(self, gui):
        if self.muc_changed:
            gui.tags_view.set_new_model()
        gui.search.search_as_you_type(config['search_as_you_type'])
        gui.library_view.model().set_highlight_only(config['highlight_search_matches'])
        gui.search.do_search()
--- a/src/calibre/gui2/preferences/search.ui
+++ b/src/calibre/gui2/preferences/search.ui
@ -7,7 +7,7 @@
    <x>0</x>
    <y>0</y>
    <width>670</width>
-    <height>392</height>
+    <height>556</height>
   </rect>
  </property>
  <property name="windowTitle">
@ -77,19 +77,6 @@
     </layout>
    </widget>
   </item>
   <item row="4" column="0">
    <spacer name="verticalSpacer">
     <property name="orientation">
      <enum>Qt::Vertical</enum>
     </property>
     <property name="sizeHint" stdset="0">
      <size>
       <width>0</width>
       <height>0</height>
      </size>
     </property>
    </spacer>
   </item>
   <item row="3" column="0">
    <widget class="QPushButton" name="clear_history_button">
     <property name="toolTip">
@ -100,6 +87,120 @@
     </property>
    </widget>
   </item>
   <item row="4" column="0">
    <widget class="QGroupBox" name="groupBox_2">
     <property name="title">
      <string>Grouped Search Terms</string>
     </property>
     <layout class="QGridLayout" name="gridLayout_3">
      <item row="0" column="0">
       <layout class="QHBoxLayout" name="l12">
        <item>
         <widget class="QLabel" name="la10">
          <property name="text">
           <string>&amp;Names:</string>
          </property>
          <property name="buddy">
           <cstring>gst_names</cstring>
          </property>
         </widget>
        </item>
        <item>
         <widget class="QComboBox" name="gst_names">
          <property name="editable">
           <bool>true</bool>
          </property>
          <property name="minimumContentsLength">
           <number>10</number>
          </property>
          <property name="toolTip">
           <string>Contains the names of the currently-defined group search terms.
 Create a new name by entering it into the empty box, then
 pressing Save. Rename a search term by selecting it then
 changing the name and pressing Save. Change the value of
 a search term by changing the value box then pressing Save.</string>
          </property>
         </widget>
        </item>
        <item>
         <widget class="QToolButton" name="gst_delete_button">
          <property name="toolTip">
           <string>Delete the current search term</string>
          </property>
          <property name="text">
           <string>...</string>
          </property>
          <property name="icon">
           <iconset resource="../../../../resources/images.qrc">
            <normaloff>:/images/trash.png</normaloff>:/images/trash.png</iconset>
          </property>
         </widget>
        </item>
        <item>
         <widget class="MultiCompleteLineEdit" name="gst_value"/>
        </item>
        <item>
         <widget class="QToolButton" name="gst_save_button">
          <property name="toolTip">
           <string>Save the current search term. You can rename a search term by
 changing the name then pressing Save. You can change the value
 of a search term by changing the value box then pressing Save.</string>
          </property>
          <property name="text">
           <string>&amp;Save</string>
          </property>
         </widget>
        </item>
       </layout>
      </item>
      <item row="0" column="1" rowspan="3">
       <widget class="QTextBrowser" name="gst_explanation">
        <property name="sizePolicy">
         <sizepolicy hsizetype="Expanding" vsizetype="Expanding">
          <horstretch>0</horstretch>
          <verstretch>100</verstretch>
         </sizepolicy>
        </property>
       </widget>
      </item>
      <item row="1" column="0">
       <layout class="QHBoxLayout">
        <item>
         <widget class="QLabel" name="l11">
          <property name="text">
           <string>Make &amp;user categories from:</string>
          </property>
          <property name="buddy">
           <cstring>opt_grouped_search_make_user_categories</cstring>
          </property>
         </widget>
        </item>
        <item>
         <widget class="MultiCompleteLineEdit" name="opt_grouped_search_make_user_categories">
          <property name="toolTip">
           <string>Enter the names of any grouped search terms you wish
 to be shown as user categories</string>
          </property>
         </widget>
        </item>
       </layout>
      </item>
      <item row="2" column="0">
       <spacer name="verticalSpacer">
        <property name="orientation">
         <enum>Qt::Vertical</enum>
        </property>
        <property name="sizeHint" stdset="0">
         <size>
          <width>20</width>
          <height>40</height>
         </size>
        </property>
       </spacer>
      </item>
     </layout>
    </widget>
   </item>
  </layout>
 </widget>
 <customwidgets>
@ -109,6 +210,8 @@
   <header>calibre/gui2/complete.h</header>
  </customwidget>
 </customwidgets>
- <resources/>
+ <resources>
  <include location="../../../../resources/images.qrc"/>
 </resources>
 <connections/>
 </ui>
--- a/src/calibre/gui2/tag_view.py
+++ b/src/calibre/gui2/tag_view.py
@ -466,10 +466,7 @@ class TagTreeItem(object): # {{{
            icon_map[0] = data.icon
            self.tag, self.icon_state_map = data, list(map(QVariant, icon_map))
        if tooltip:
-            if tooltip.endswith(':'):
+            self.tooltip = tooltip + ' '
                self.tooltip = tooltip + ' '
            else:
                self.tooltip = tooltip + ': '
        else:
            self.tooltip = ''
@ -589,11 +586,17 @@ class TagsModel(QAbstractItemModel): # {{{
        # get_node_tree cannot return None here, because row_map is empty
        data = self.get_node_tree(config['sort_tags_by'])
        gst = db.prefs.get('grouped_search_terms', {})
        self.root_item = TagTreeItem()
        for i, r in enumerate(self.row_map):
            if self.hidden_categories and self.categories[i] in self.hidden_categories:
                continue
-            tt = _(u'The lookup/search name is "{0}"').format(r)
+            if r.startswith('@') and r[1:] in gst:
                tt = _(u'The grouped search term name is "{0}"').format(r[1:])
            elif r == 'news':
                tt = ''
            else:
                tt = _(u'The lookup/search name is "{0}"').format(r)
            TagTreeItem(parent=self.root_item,
                    data=self.categories[i],
                    category_icon=self.category_icon_map[r],
@ -735,6 +738,14 @@ class TagsModel(QAbstractItemModel): # {{{
        self.row_map = []
        self.categories = []
        # Get the categories
        if self.search_restriction:
            data = self.db.get_categories(sort=sort,
                        icon_map=self.category_icon_map,
                        ids=self.db.search('', return_matches=True))
        else:
            data = self.db.get_categories(sort=sort, icon_map=self.category_icon_map)
        # Reconstruct the user categories, putting them into metadata
        self.db.field_metadata.remove_dynamic_categories()
        tb_cats = self.db.field_metadata
@ -746,17 +757,16 @@ class TagsModel(QAbstractItemModel): # {{{
            except ValueError:
                import traceback
                traceback.print_exc()
        for cat in sorted(self.db.prefs.get('grouped_search_terms', {}),
                          key=sort_key):
            if (u'@' + cat) in data:
                tb_cats.add_user_category(label=u'@' + cat, name=cat)
        self.db.data.change_search_locations(self.db.field_metadata.get_search_terms())
        if len(saved_searches().names()):
            tb_cats.add_search_category(label='search', name=_('Searches'))
        # Now get the categories
        if self.search_restriction:
            data = self.db.get_categories(sort=sort,
                        icon_map=self.category_icon_map,
                        ids=self.db.search('', return_matches=True))
        else:
            data = self.db.get_categories(sort=sort, icon_map=self.category_icon_map)
        if self.filter_categories_by:
            for category in data.keys():
                data[category] = [t for t in data[category]
@ -767,6 +777,7 @@ class TagsModel(QAbstractItemModel): # {{{
            if category in data: # The search category can come and go
                self.row_map.append(category)
                self.categories.append(tb_categories[category]['name'])
        if len(old_row_map) != 0 and len(old_row_map) != len(self.row_map):
            # A category has been added or removed. We must force a rebuild of
            # the model
@ -822,6 +833,7 @@ class TagsModel(QAbstractItemModel): # {{{
                                not self.db.field_metadata[r]['is_custom'] and \
                                not self.db.field_metadata[r]['kind'] == 'user' \
                            else False
            tt = r if self.db.field_metadata[r]['kind'] == 'user' else None
            for idx,tag in enumerate(data[r]):
                if clear_rating:
                    tag.avg_rating = None
@ -861,10 +873,10 @@ class TagsModel(QAbstractItemModel): # {{{
                                     category_icon = category_node.icon,
                                     tooltip = None,
                                     category_key=category_node.category_key)
-                    t = TagTreeItem(parent=sub_cat, data=tag, tooltip=r,
+                    t = TagTreeItem(parent=sub_cat, data=tag, tooltip=tt,
                                        icon_map=self.icon_state_map)
                else:
-                    t = TagTreeItem(parent=category, data=tag, tooltip=r,
+                    t = TagTreeItem(parent=category, data=tag, tooltip=tt,
                                    icon_map=self.icon_state_map)
            self.endInsertRows()
        return True
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@ -433,6 +433,10 @@ class ResultCache(SearchQueryParser): # {{{
        if len(candidates) == 0:
            return matches
        if len(location) > 2 and location.startswith('@') and \
                    location[1:] in self.db_prefs['grouped_search_terms']:
            location = location[1:]
        if query and query.strip():
            # get metadata key associated with the search term. Eliminates
            # dealing with plurals and other aliases
@ -440,9 +444,16 @@ class ResultCache(SearchQueryParser): # {{{
            # grouped search terms
            if isinstance(location, list):
                if allow_recursion:
                    if query.lower() == 'false':
                        invert = True
                        query = 'true'
                    else:
                        invert = False
                    for loc in location:
                        matches |= self.get_matches(loc, query,
                                candidates=candidates, allow_recursion=False)
                    if invert:
                        matches = self.universal_set() - matches
                    return matches
                raise ParseException(query, len(query), 'Recursive query group detected', self)
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -1481,23 +1481,36 @@ class EPUB_MOBI(CatalogPlugin):
            current_author = authors[0]
            for (i,author) in enumerate(authors):
                if author != current_author and i:
                    # Exit if author matches previous, but author_sort doesn't match
                    if author[0] == current_author[0]:
-                        error_msg = _('''
+                        if self.opts.fmt == 'mobi':
-Inconsistent Author Sort values for Author '{0}':
+                            # Exit if building MOBI
-'{1}' <> '{2}',
+                            error_msg = _(
-unable to build catalog.\n
+'''Inconsistent Author Sort values for
-Select all books by '{0}', apply correct Author Sort value in Edit Metadata dialog,
+Author '{0}':
-then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
+'{1}' <> '{2}'
-                        self.opts.log.warn('\n*** Metadata error ***')
+Unable to build MOBI catalog.\n
-                        self.opts.log.warn(error_msg)
+Select all books by '{0}', apply correct Author Sort value in Edit Metadata dialog, then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
                            self.opts.log.warn('\n*** Metadata error ***')
                            self.opts.log.warn(error_msg)
                            self.error.append('Author Sort mismatch')
                            self.error.append(error_msg)
                            return False
                        else:
                            # Warning if building non-MOBI
                            if not self.error:
                                self.error.append('Author Sort mismatch')
                            error_msg = _(
 '''Warning: inconsistent Author Sort values for
 Author '{0}':
 '{1}' <> '{2}'\n''').format(author[0],author[1],current_author[1])
                            self.opts.log.warn('\n*** Metadata warning ***')
                            self.opts.log.warn(error_msg)
                            self.error.append(error_msg)
                        self.error.append('Metadata error')
                        self.error.append(error_msg)
                        return False
                    current_author = author
            self.booksByAuthor = sorted(self.booksByAuthor, key=self.booksByAuthorSorter_author_sort)
            # Build the unique_authors set from existing data
@ -2135,7 +2148,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
                if author_count == 1:
                    divOpeningTag.insert(dotc, pBookTag)
                    dotc += 1
-                else:
+                elif divRunningTag:
                    divRunningTag.insert(drtc,pBookTag)
                    drtc += 1
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -188,6 +188,17 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        migrate_preference('saved_searches', {})
        set_saved_searches(self, 'saved_searches')
        # migrate grouped_search_terms
        if self.prefs.get('grouped_search_terms', None) is None:
            try:
                ogst = tweaks.get('grouped_search_terms', {})
                ngst = {}
                for t in ogst:
                    ngst[icu_lower(t)] = ogst[t]
                self.prefs.set('grouped_search_terms', ngst)
            except:
                pass
        # Rename any user categories with names that differ only in case
        user_cats = self.prefs.get('user_categories', [])
        catmap = {}
@ -349,12 +360,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        if len(saved_searches().names()):
            tb_cats.add_search_category(label='search', name=_('Searches'))
-        gst = tweaks['grouped_search_terms']
+        self.field_metadata.add_grouped_search_terms(
-        for t in gst:
+                                    self.prefs.get('grouped_search_terms', {}))
            try:
                self.field_metadata._add_search_terms_to_map(gst[t], [t])
            except ValueError:
                traceback.print_exc()
        self.book_on_device_func = None
        self.data    = ResultCache(self.FIELD_MAP, self.field_metadata, db_prefs=self.prefs)
@ -1293,7 +1300,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
            # icon_map is not None if get_categories is to store an icon and
            # possibly a tooltip in the tag structure.
            icon = None
-            tooltip = ''
+            tooltip = '(' + category + ')'
            label = tb_cats.key_to_label(category)
            if icon_map:
                if not tb_cats.is_custom_field(category):
@ -1379,7 +1386,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
            categories['formats'].sort(key = lambda x:x.name)
        #### Now do the user-defined categories. ####
-        user_categories = self.prefs['user_categories']
+        user_categories = dict.copy(self.prefs['user_categories'])
        # We want to use same node in the user category as in the source
        # category. To do that, we need to find the original Tag node. There is
@ -1390,6 +1397,17 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        for c in categories.keys():
            taglist[c] = dict(map(lambda t:(t.name, t), categories[c]))
        muc = self.prefs.get('grouped_search_make_user_categories', [])
        gst = self.prefs.get('grouped_search_terms', {})
        for c in gst:
            if c not in muc:
                continue
            user_categories[c] = []
            for sc in gst[c]:
                if sc in categories.keys():
                    for t in categories[sc]:
                        user_categories[c].append([t.name, sc, 0])
        for user_cat in sorted(user_categories.keys(), key=sort_key):
            items = []
            for (name,label,ign) in user_categories[user_cat]:
--- a/src/calibre/library/field_metadata.py
+++ b/src/calibre/library/field_metadata.py
@ -3,7 +3,7 @@ Created on 25 May 2010
@author: charles
 '''
-import copy
+import copy, traceback
 from calibre.utils.ordered_dict import OrderedDict
 from calibre.utils.config import tweaks
@ -488,6 +488,20 @@ class FieldMetadata(dict):
                        del self._search_term_map[k]
                del self._tb_cats[key]
    def _remove_grouped_search_terms(self):
        to_remove = [v for v in self._search_term_map
                        if isinstance(self._search_term_map[v], list)]
        for v in to_remove:
            del self._search_term_map[v]
    def add_grouped_search_terms(self, gst):
        self._remove_grouped_search_terms()
        for t in gst:
            try:
                self._add_search_terms_to_map(gst[t], [t])
            except ValueError:
                traceback.print_exc()
    def cc_series_index_column_for(self, key):
        return self._tb_cats[key]['rec_index'] + 1
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/utils/filenames.py
+++ b/src/calibre/utils/filenames.py
@ -6,12 +6,12 @@ meaning as possible.
 import os
 from math import ceil
 from calibre.ebooks.unidecode.unidecoder import Unidecoder
 from calibre import sanitize_file_name
 from calibre.constants import preferred_encoding, iswindows
-udc = Unidecoder()
+from calibre.utils.localization import get_udc
 def ascii_text(orig):
    udc = get_udc()
    try:
        ascii = udc.decode(orig)
    except:
--- a/src/calibre/utils/localization.py
+++ b/src/calibre/utils/localization.py
@ -169,3 +169,13 @@ def set_qt_translator(translator):
                return translator.load(p)
    return False
 _udc = None
 def get_udc():
    global _udc
    if _udc is None:
        from calibre.ebooks.unihandecode import Unihandecoder
        _udc = Unihandecoder(lang=get_lang())
    return _udc