Merge

2025-08-11 09:13:57 -04:00 · 2011-05-14 09:48:59 +02:00 · 2011-05-14 09:48:59 +02:00 · 918a5ee79d
commit 918a5ee79d
parent 50df54efa6 3aeca93ff3
129 changed files with 87393 additions and 88488 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -19,6 +19,83 @@
 #  new recipes:
 #    - title: 
 - version: 0.8.1
  date: 2011-05-13
  new features:
    - title: "Add Amazon DE, Beam EBooks, Beam DE, Weightless Books, Wizards Tower Books to the list of ebook stores searched by Get Books"
    - title: "TXT output: All new Textile output with much greater preservation of formatting from the input document"
    - title: "Migrate metadata plugin for Douban Books to the 0.8 API"
    - title: "Driver for Dell Streak on windows"
    - title: "Add menu items to Get Books action to search by title and author of current book"
    - title: "Add title_sort as available field to CSV/XML catalogs"
    - title: "Add a context menu to the manage authors dialog"
    - title: "Add a button to paste isbn into the identifiers field in the edit metadata dialog automatically"
  bug fixes:
    - title: "Amazon metadata download plugin: Fix links being stripped from comments. Also fix ratings/isbn not being parsed from kindle edition pages."
      tickets: [782012]
    - title: "Fix one source of segfaults on shutdown in the linux binary builds."
    - title: "Allow the use of condensed/expanded fonts as interface fonts"
    - title: "EPUB Input: Ignore missing cover file when converting, instead of erroring out."
      tickets: [781848]
    - title: "Fix custom identifier being erased by metadata download"
      tickets: [781759]
    - title: "Fix regression that broke various things when using Japanese language calibre on windows"
      tickets: [780804]
    - title: "RTF Input: Handle null color codes correctly"
      tickets: [780728]
    - title: "ODT Input: Handle inline special styles defined on <text:span> tags."
      tickets: [780250]
    - title: "Fix error when pressing next previous button with an empty search in the Plugins preferences"
      tickets: [781135]
    - title: "Ignore 'Unknown' author when downloading metadata."
      tickets: [779348]
    - title: "Fix timezone bug when setting dates in the edit metadata dialog"
      tickets: [779497]
    - title: "Fix ebook-convert not recognizing output paths starting with .."
      tickets: [779322]
  improved recipes:
    - "Strategy+Business"
    - Readers Digest
    - Ming Pao
    - Telepolis
    - Fronda
    - Rzeczpospolita
  new recipes:
    - title: "Various Taiwanese news sources"
      author: Eddie Lau
    - title: Replica Vedetelor, Ziua Veche 
      author: Silviu Cotoara
    - title: Welt der Physik
      author: schuster
    - title: Korea Herald
      author: Seongkyoun Yoo
 - version: 0.8.0
  date: 2010-05-06
--- a/recipes/arcamax.recipe
+++ b/recipes/arcamax.recipe
@ -93,7 +93,7 @@ class Arcamax(BasicNewsRecipe):
        for page in pages:
            page_soup = self.index_to_soup(url)
            if page_soup:
-                title = page_soup.find(name='div', attrs={'class':'comics-header'}).h1.contents[0]
+                title = self.tag_to_string(page_soup.find(name='div', attrs={'class':'comics-header'}).h1.contents[0])
                page_url = url
                # orig prev_page_url = 'http://www.arcamax.com' + page_soup.find('a', attrs={'class':'prev'}, text='Previous').parent['href']
                prev_page_url = 'http://www.arcamax.com' + page_soup.find('span', text='Previous').parent.parent['href']
@ -127,4 +127,3 @@ class Arcamax(BasicNewsRecipe):
                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
 		'''
--- a/recipes/china_times.recipe
+++ b/recipes/china_times.recipe
@ -0,0 +1,42 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 # dug from http://www.mobileread.com/forums/showthread.php?p=1012294
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1277443634(BasicNewsRecipe):
    title = u'中時電子報'
    oldest_article = 1
    max_articles_per_feed = 100
    feeds = [(u'焦點', u'http://rss.chinatimes.com/rss/focus-u.rss'),
             (u'政治', u'http://rss.chinatimes.com/rss/Politic-u.rss'),
             (u'社會', u'http://rss.chinatimes.com/rss/social-u.rss'),
             (u'國際', u'http://rss.chinatimes.com/rss/international-u.rss'),
             (u'兩岸', u'http://rss.chinatimes.com/rss/mainland-u.rss'),
             (u'地方', u'http://rss.chinatimes.com/rss/local-u.rss'),
             (u'言論', u'http://rss.chinatimes.com/rss/comment-u.rss'),
             (u'科技', u'http://rss.chinatimes.com/rss/technology-u.rss'),
             (u'運動', u'http://rss.chinatimes.com/rss/sport-u.rss'),
             (u'藝文', u'http://rss.chinatimes.com/rss/philology-u.rss'),
             #(u'旺報', u'http://rss.chinatimes.com/rss/want-u.rss'),
             #(u'財經', u'http://rss.chinatimes.com/rss/finance-u.rss'),  # broken links
             #(u'股市', u'http://rss.chinatimes.com/rss/stock-u.rss')  # broken links
             ]
    __author__ = 'einstuerzende, updated by Eddie Lau'
    __version__ = '1.0'
    language = 'zh'
    publisher = 'China Times Group'
    description = 'China Times (Taiwan)'
    category = 'News, Chinese, Taiwan'
    remove_javascript = True
    use_embedded_content = False
    no_stylesheets = True
    encoding = 'big5'
    conversion_options = {'linearize_tables':True}
    masthead_url = 'http://www.fcuaa.org/gif/chinatimeslogo.gif'
    cover_url = 'http://www.fcuaa.org/gif/chinatimeslogo.gif'
    keep_only_tags = [dict(name='div', attrs={'class':['articlebox','articlebox clearfix']})]
    remove_tags = [dict(name='div', attrs={'class':['focus-news']})]
--- a/recipes/divahair.recipe
+++ b/recipes/divahair.recipe
@ -0,0 +1,53 @@
 # -*- coding: utf-8 -*-
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
 divahair.ro
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class DivaHair(BasicNewsRecipe):
    title                 = u'Diva Hair'
    language              = 'ro'
    __author__            = u'Silviu Cotoar\u0103'
    description           = u'Coafuri, frizuri, tunsori ..'
    publisher             = u'Diva Hair'
    category              = u'Ziare,Stiri,Coafuri,Femei'
    oldest_article        = 5
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
    remove_javascript     = True
    cover_url             = 'http://www.divahair.ro/imgs/logo.jpg'
    conversion_options = {
                             'comments'   : description
                            ,'tags'       : category
                            ,'language'   : language
                            ,'publisher'  : publisher
                         }
    keep_only_tags = [
                         dict(name='td', attrs={'class':'spatiuart'})
                        , dict(name='div', attrs={'class':'spatiuart'})
                     ]
    remove_tags = [
                     dict(name='div', attrs={'class':'categorie'})
                     , dict(name='div', attrs={'class':'gri gri2 detaliiart'})
                     , dict(name='div', attrs={'class':'articol_box_bottom'})
                  ]
    remove_tags_after = [
                             dict(name='div', attrs={'class':'articol_box_bottom'})
                        ]
    feeds = [ (u'\u0218tiri', u'http://www.divahair.ro/feed') ]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/financialsense.recipe
+++ b/recipes/financialsense.recipe
@ -0,0 +1,64 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.financialsense.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class FinancialSense(BasicNewsRecipe):
    title                 = 'Financial Sense'
    __author__            = 'Darko Miletic'
    description           = 'Uncommon News & Views for the Wise Investor'
    publisher             = 'Financial Sense'
    category              = 'news, finances, politics, USA'
    oldest_article        = 2
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
    language              = 'en'
    remove_empty_feeds    = True
    publication_type      = 'newsportal'
    masthead_url          = 'http://www.financialsense.com/sites/default/files/logo.jpg'
    extra_css             = """
                               body{font-family: Arial,"Helvetica Neue",Helvetica,sans-serif }
                               img{margin-bottom: 0.4em; display:block}
                               h2{color: gray}
                               .name{margin-right: 5em}
                            """
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    remove_tags      =[dict(name=['meta','link','base','object','embed','iframe'])]
    remove_tags_after=dict(attrs={'class':'vcard'})
    keep_only_tags   =[dict(attrs={'class':['title','post-meta','content','item-title','vcard']})]
    remove_attributes=['lang','type']
    feeds = [(u'Articles', u'http://feeds.feedburner.com/fso')]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        for item in soup.findAll('a'):
            limg = item.find('img')
            if item.string is not None:
               str = item.string
               item.replaceWith(str)
            else:
               if limg:
                  item.name = 'div'
                  item.attrs = []
               else:
                   str = self.tag_to_string(item)
                   item.replaceWith(str)
        for item in soup.findAll('img'):
            if not item.has_key('alt'):
               item['alt'] = 'image'
        return soup
--- a/recipes/icons/divahair.png
+++ b/recipes/icons/divahair.png
--- a/recipes/icons/financialsense.png
+++ b/recipes/icons/financialsense.png
--- a/recipes/icons/iprofesional.png
+++ b/recipes/icons/iprofesional.png
--- a/recipes/icons/mayra.png
+++ b/recipes/icons/mayra.png
--- a/recipes/icons/moldovaazi.png
+++ b/recipes/icons/moldovaazi.png
--- a/recipes/icons/newsmoldova.png
+++ b/recipes/icons/newsmoldova.png
--- a/recipes/iprofesional.recipe
+++ b/recipes/iprofesional.recipe
@ -0,0 +1,79 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.iprofesional.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class iProfesional(BasicNewsRecipe):
    title                 = 'iProfesional.com'
    __author__            = 'Darko Miletic'
    description           = 'Las ultimas noticias sobre profesionales'
    publisher             = 'Emprendimientos Corporativos S.A.'
    category              = 'news, IT, impuestos, negocios, politics, Argentina'
    oldest_article        = 2
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
    language              = 'es_AR'
    remove_empty_feeds    = True
    publication_type      = 'nesportal'
    masthead_url          = 'http://www.iprofesional.com/img/logo-iprofesional.png'
    extra_css             = """
                               body{font-family: Arial,Helvetica,sans-serif }
                               img{margin-bottom: 0.4em; display:block}
                               .titulo-interior{font-family: Georgia,"Times New Roman",Times,serif}
                               .autor-nota{font-size: small; font-weight: bold; font-style: italic; color: gray}
                            """
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    keep_only_tags = [dict(attrs={'class':['fecha','interior-nota']})]
    remove_tags = [
                     dict(name=['meta','link','base','embed','object','iframe'])
                    ,dict(attrs={'class':['menu-imprimir','guardarNota','IN-widget','fin','permalink']})
                  ]
    remove_attributes=['lang','xmlns:og','xmlns:fb']
    feeds = [
              (u'Ultimas noticias'  , u'http://feeds.feedburner.com/iprofesional-principales-noticias')
             ,(u'Finanzas'          , u'http://feeds.feedburner.com/iprofesional-finanzas'            )
             ,(u'Impuestos'         , u'http://feeds.feedburner.com/iprofesional-impuestos'           )
             ,(u'Negocios'          , u'http://feeds.feedburner.com/iprofesional-economia'            )
             ,(u'Comercio Exterior' , u'http://feeds.feedburner.com/iprofesional-comercio-exterior'   )
             ,(u'Tecnologia'        , u'http://feeds.feedburner.com/iprofesional-tecnologia'          )
             ,(u'Management'        , u'http://feeds.feedburner.com/iprofesional-managment'           )
             ,(u'Marketing'         , u'http://feeds.feedburner.com/iprofesional-marketing'           )
             ,(u'Legales'           , u'http://feeds.feedburner.com/iprofesional-legales'             )
             ,(u'Autos'             , u'http://feeds.feedburner.com/iprofesional-autos'               )
             ,(u'Vinos'             , u'http://feeds.feedburner.com/iprofesional-vinos-bodegas'       )
            ]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        for item in soup.findAll('a'):
            limg = item.find('img')
            if item.string is not None:
               str = item.string
               item.replaceWith(str)
            else:
               if limg:
                  item.name = 'div'
                  item.attrs = []
               else:
                   str = self.tag_to_string(item)
                   item.replaceWith(str)
        for item in soup.findAll('img'):
            if not item.has_key('alt'):
               item['alt'] = 'image'
        return soup
--- a/recipes/liberty_times.recipe
+++ b/recipes/liberty_times.recipe
@ -0,0 +1,44 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 # dug from http://www.mobileread.com/forums/showthread.php?p=1012294
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1277443634(BasicNewsRecipe):
 	title = u'自由電子報'
 	oldest_article = 1
 	max_articles_per_feed = 100
 	feeds = [(u'焦點新聞', u'http://www.libertytimes.com.tw/rss/fo.xml'),
                  (u'政治新聞', u'http://www.libertytimes.com.tw/rss/p.xml'),
                  (u'生活新聞', u'http://www.libertytimes.com.tw/rss/life.xml'),
                  (u'國際新聞', u'http://www.libertytimes.com.tw/rss/int.xml'),
                  (u'自由廣場', u'http://www.libertytimes.com.tw/rss/o.xml'),
                  (u'社會新聞', u'http://www.libertytimes.com.tw/rss/so.xml'),
                  (u'體育新聞', u'http://www.libertytimes.com.tw/rss/sp.xml'),
                  (u'財經焦點', u'http://www.libertytimes.com.tw/rss/e.xml'),
                  (u'證券理財', u'http://www.libertytimes.com.tw/rss/stock.xml'),
                  (u'影視焦點', u'http://www.libertytimes.com.tw/rss/show.xml'),
                  (u'北部新聞', u'http://www.libertytimes.com.tw/rss/north.xml'),
                  (u'中部新聞', u'http://www.libertytimes.com.tw/rss/center.xml'),
                  (u'南部新聞', u'http://www.libertytimes.com.tw/rss/south.xml'),
                  (u'大台北新聞', u'http://www.libertytimes.com.tw/rss/taipei.xml'),
                  (u'藝術文化', u'http://www.libertytimes.com.tw/rss/art.xml'),
                 ]
 	extra_css = '''span[class='insubject1'][id='newtitle'] {font-size:200%; font-weight:bold;}'''
 	__author__ = 'einstuerzende, updated by Eddie Lau'
 	__version__ = '1.1'
 	language = 'zh'
 	publisher = 'Liberty Times Group'
 	description = 'Liberty Times (Taiwan)'
 	category = 'News, Chinese, Taiwan'
 	remove_javascript = True
 	use_embedded_content = False
 	no_stylesheets = True
 	encoding = 'big5'
 	conversion_options = {'linearize_tables':True}
 	masthead_url = 'http://www.libertytimes.com.tw/2008/images/img_auto/005/logo_new.gif'
 	cover_url = 'http://www.libertytimes.com.tw/2008/images/img_auto/005/logo_new.gif'
 	keep_only_tags = [dict(name='td', attrs={'id':['newsContent']})]
--- a/recipes/mayra.recipe
+++ b/recipes/mayra.recipe
@ -0,0 +1,51 @@
 # -*- coding: utf-8 -*-
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
 mayra.ro
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Mayra(BasicNewsRecipe):
    title                 = u'Mayra'
    language              = 'ro'
    __author__            = u'Silviu Cotoar\u0103'
    description           = u'Traieste urban, cool, sexy'
    publisher             = 'Mayra'
    category              = 'Ziare,Stiri,Reviste'
    oldest_article        = 5
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
    remove_javascript     = True
    cover_url             = 'http://img.konkurs.ro/img/concursuri-cu-premii/147/14672_front.jpg'
    conversion_options = {
                             'comments'   : description
                            ,'tags'       : category
                            ,'language'   : language
                            ,'publisher'  : publisher
                         }
    keep_only_tags = [
                        dict(name='div', attrs={'id':'article_details'})
                     ]
    remove_tags = [
                     dict(name='div', attrs={'id':'LikePluginPagelet'})
                     , dict(name='p', attrs={'id':'tags'})
                    , dict(name='span', attrs={'id':'tweet-button'})
                  ]
    remove_tags_after = [
                             dict(name='div', attrs={'id':'LikePluginPagelet'})
                        ]
    feeds = [ (u'\u0218tiri', u'http://www.mayra.ro/rss') ]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/ming_pao.recipe
+++ b/recipes/ming_pao.recipe
@ -1,15 +1,18 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2010-2011, Eddie Lau'
-# Users of Kindle 3 (with limited system-level CJK support)
+# Users of Kindle 3 with limited system-level CJK support
 # please replace the following "True" with "False".
 __MakePeriodical__ = True
-# Turn it to True if your device supports display of CJK titles
+# Turn below to true if your device supports display of CJK titles
 __UseChineseTitle__ = False
-
+# Trun below to true if you wish to use life.mingpao.com as the main article source
 __UseLife__ = True
 '''
 Change Log:
 2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
 2011/03/06: add new articles for finance section, also a new section "Columns"
 2011/02/28: rearrange the sections
            [Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles
@ -32,41 +35,43 @@ import os, datetime, re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from contextlib import nested
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata import MetaInformation
 class MPHKRecipe(BasicNewsRecipe):
-	title          = 'Ming Pao - Hong Kong'
+    title          = 'Ming Pao - Hong Kong'
-	oldest_article = 1
+    oldest_article = 1
-	max_articles_per_feed = 100
+    max_articles_per_feed = 100
-	__author__            = 'Eddie Lau'
+    __author__            = 'Eddie Lau'
-	description           = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
+    description           = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
-	publisher             = 'MingPao'
+    publisher             = 'MingPao'
-	category              = 'Chinese, News, Hong Kong'
+    category              = 'Chinese, News, Hong Kong'
-	remove_javascript = True
+    remove_javascript = True
-	use_embedded_content   = False
+    use_embedded_content   = False
-	no_stylesheets = True
+    no_stylesheets = True
-	language = 'zh'
+    language = 'zh'
-	encoding = 'Big5-HKSCS'
+    encoding = 'Big5-HKSCS'
-	recursions = 0
+    recursions = 0
-	conversion_options = {'linearize_tables':True}
+    conversion_options = {'linearize_tables':True}
-	timefmt = ''
+    timefmt = ''
-	extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
+    extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
-	masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
+    masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
-	keep_only_tags = [dict(name='h1'),
+    keep_only_tags = [dict(name='h1'),
                      dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
                      dict(name='font', attrs={'color':['AA0000']}), # for column articles title
                      dict(attrs={'id':['newscontent']}), # entertainment and column page content
                      dict(attrs={'id':['newscontent01','newscontent02']}),
-                      dict(attrs={'class':['photo']})
+                      dict(attrs={'class':['photo']}),
                      dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
                      ]
-	remove_tags = [dict(name='style'),
+    remove_tags = [dict(name='style'),
-    			   dict(attrs={'id':['newscontent135']}),  # for the finance page
+                   dict(attrs={'id':['newscontent135']}),  # for the finance page from mpfinance.com
-    			   dict(name='table')]  # for content fetched from life.mingpao.com
+                   dict(name='table')]  # for content fetched from life.mingpao.com
-	remove_attributes = ['width']
+    remove_attributes = ['width']
-	preprocess_regexps = [
+    preprocess_regexps = [
                          (re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
                          lambda match: '<h1>'),
                          (re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
@ -80,10 +85,10 @@ class MPHKRecipe(BasicNewsRecipe):
                          lambda match: "</b>")
                         ]
-	def image_url_processor(cls, baseurl, url):
+    def image_url_processor(cls, baseurl, url):
-		# trick: break the url at the first occurance of digit, add an additional
+        # trick: break the url at the first occurance of digit, add an additional
-		# '_' at the front
+        # '_' at the front
-		# not working, may need to move this to preprocess_html() method
+        # not working, may need to move this to preprocess_html() method
 #        minIdx = 10000
 #        i0 = url.find('0')
 #        if i0 >= 0 and i0 < minIdx:
@ -115,314 +120,357 @@ class MPHKRecipe(BasicNewsRecipe):
 #        i9 = url.find('9')
 #        if i9 >= 0 and i9 < minIdx:
 #           minIdx = i9
-		return url
+        return url
-	def get_dtlocal(self):
+    def get_dtlocal(self):
-		dt_utc = datetime.datetime.utcnow()
+        dt_utc = datetime.datetime.utcnow()
-		# convert UTC to local hk time - at around HKT 6.00am, all news are available
+        # convert UTC to local hk time - at around HKT 6.00am, all news are available
-		dt_local = dt_utc - datetime.timedelta(-2.0/24)
+        dt_local = dt_utc - datetime.timedelta(-2.0/24)
-		return dt_local
+        return dt_local
-	def get_fetchdate(self):
+    def get_fetchdate(self):
-		return self.get_dtlocal().strftime("%Y%m%d")
+        return self.get_dtlocal().strftime("%Y%m%d")
-	def get_fetchformatteddate(self):
+    def get_fetchformatteddate(self):
-		return self.get_dtlocal().strftime("%Y-%m-%d")
+        return self.get_dtlocal().strftime("%Y-%m-%d")
-	def get_fetchday(self):
+    def get_fetchday(self):
-		# convert UTC to local hk time - at around HKT 6.00am, all news are available
+        # dt_utc = datetime.datetime.utcnow()
-		return self.get_dtlocal().strftime("%d")
+        # convert UTC to local hk time - at around HKT 6.00am, all news are available
        # dt_local = dt_utc - datetime.timedelta(-2.0/24)
        return self.get_dtlocal().strftime("%d")
-	def get_cover_url(self):
+    def get_cover_url(self):
-		cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
+        cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
-		br = BasicNewsRecipe.get_browser()
+        br = BasicNewsRecipe.get_browser()
-		try:
+        try:
-			br.open(cover)
+            br.open(cover)
-		except:
+        except:
-			cover = None
+            cover = None
-		return cover
+        return cover
-	def parse_index(self):
+    def parse_index(self):
-		feeds = []
+        feeds = []
-		dateStr = self.get_fetchdate()
+        dateStr = self.get_fetchdate()
-		for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
+        if __UseLife__:
-		                   (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
+            for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
-		                   (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
+                                       (u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
-			articles = self.parse_section(url)
+                                       (u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'),
-			if articles:
+                                       (u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'),
-				feeds.append((title, articles))
+                                       (u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'),
                                       (u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'),
                                       (u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
                                       (u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
                                       (u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
                                       (u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
                                       (u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
                articles = self.parse_section2(url, keystr)
                if articles:
                    feeds.append((title, articles))
-		# special- editorial
+            for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
-		ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
+                               (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
-		if ed_articles:
+                articles = self.parse_section(url)
-			feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
+                if articles:
                    feeds.append((title, articles))
        else:
            for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
                               (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
                               (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
                articles = self.parse_section(url)
                if articles:
                    feeds.append((title, articles))
-		for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
+            # special- editorial
-                           (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
+            ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
-                           (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
+            if ed_articles:
-			articles = self.parse_section(url)
+                feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
 			if articles:
 				feeds.append((title, articles))
-		# special - finance
+            for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
-		#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
+                               (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
-		fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
+                               (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
-		if fin_articles:
+                articles = self.parse_section(url)
-			feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
+                if articles:
                    feeds.append((title, articles))
-		for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
+            # special - finance
-                           (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
+            #fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
-			articles = self.parse_section(url)
+            fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
-			if articles:
+            if fin_articles:
-				feeds.append((title, articles))
+                feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
-		# special - entertainment
+            for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
-		ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
+                               (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
-		if ent_articles:
+                articles = self.parse_section(url)
-			feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
+                if articles:
                    feeds.append((title, articles))
-		for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
+            # special - entertainment
-                           (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
+            ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
-			articles = self.parse_section(url)
+            if ent_articles:
-			if articles:
+                feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
-				feeds.append((title, articles))
+
            for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
                               (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
                articles = self.parse_section(url)
                if articles:
                    feeds.append((title, articles))
-		# special- columns
+            # special- columns
-		col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
+            col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
-		if col_articles:
+            if col_articles:
-			feeds.append((u'\u5c08\u6b04 Columns', col_articles))
+                feeds.append((u'\u5c08\u6b04 Columns', col_articles))
-		return feeds
+        return feeds
-	def parse_section(self, url):
+    # parse from news.mingpao.com
-		dateStr = self.get_fetchdate()
+    def parse_section(self, url):
-		soup = self.index_to_soup(url)
+        dateStr = self.get_fetchdate()
-		divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
+        soup = self.index_to_soup(url)
-		current_articles = []
+        divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
-		included_urls = []
+        current_articles = []
-		divs.reverse()
+        included_urls = []
-		for i in divs:
+        divs.reverse()
-			a = i.find('a', href = True)
+        for i in divs:
-			title = self.tag_to_string(a)
+            a = i.find('a', href = True)
-			url = a.get('href', False)
+            title = self.tag_to_string(a)
-			url = 'http://news.mingpao.com/' + dateStr + '/' +url
+            url = a.get('href', False)
-			if url not in included_urls and url.rfind('Redirect') == -1:
+            url = 'http://news.mingpao.com/' + dateStr + '/' +url
-				current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
+            if url not in included_urls and url.rfind('Redirect') == -1:
-				included_urls.append(url)
+                current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
-		current_articles.reverse()
+                included_urls.append(url)
-		return current_articles
+        current_articles.reverse()
        return current_articles
-	def parse_ed_section(self, url):
+    # parse from life.mingpao.com
-		self.get_fetchdate()
+    def parse_section2(self, url, keystr):
-		soup = self.index_to_soup(url)
+        self.get_fetchdate()
-		a = soup.findAll('a', href=True)
+        soup = self.index_to_soup(url)
-		a.reverse()
+        a = soup.findAll('a', href=True)
-		current_articles = []
+        a.reverse()
-		included_urls = []
+        current_articles = []
-		for i in a:
+        included_urls = []
-			title = self.tag_to_string(i)
+        for i in a:
-			url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
+            title = self.tag_to_string(i)
-			if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
+            url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
-				current_articles.append({'title': title, 'url': url, 'description': ''})
+            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
-				included_urls.append(url)
+                current_articles.append({'title': title, 'url': url, 'description': ''})
-		current_articles.reverse()
+                included_urls.append(url)
-		return current_articles
+        current_articles.reverse()
        return current_articles
-	def parse_fin_section(self, url):
+    def parse_ed_section(self, url):
-		self.get_fetchdate()
+        self.get_fetchdate()
-		soup = self.index_to_soup(url)
+        soup = self.index_to_soup(url)
-		a = soup.findAll('a', href= True)
+        a = soup.findAll('a', href=True)
-		current_articles = []
+        a.reverse()
-		included_urls = []
+        current_articles = []
-		for i in a:
+        included_urls = []
-			#url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
+        for i in a:
-			url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
+            title = self.tag_to_string(i)
-			#if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
+            url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
-			if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
+            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
-				title = self.tag_to_string(i)
+                current_articles.append({'title': title, 'url': url, 'description': ''})
-				current_articles.append({'title': title, 'url': url, 'description':''})
+                included_urls.append(url)
-				included_urls.append(url)
+        current_articles.reverse()
-		return current_articles
+        return current_articles
-	def parse_ent_section(self, url):
+    def parse_fin_section(self, url):
-		self.get_fetchdate()
+        self.get_fetchdate()
-		soup = self.index_to_soup(url)
+        soup = self.index_to_soup(url)
-		a = soup.findAll('a', href=True)
+        a = soup.findAll('a', href= True)
-		a.reverse()
+        current_articles = []
-		current_articles = []
+        included_urls = []
-		included_urls = []
+        for i in a:
-		for i in a:
+            #url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
-			title = self.tag_to_string(i)
+            url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
-			url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
+            #if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
-			if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
+            if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
-				current_articles.append({'title': title, 'url': url, 'description': ''})
+                title = self.tag_to_string(i)
-				included_urls.append(url)
+                current_articles.append({'title': title, 'url': url, 'description':''})
-		current_articles.reverse()
+                included_urls.append(url)
-		return current_articles
+        return current_articles
-	def parse_col_section(self, url):
+    def parse_ent_section(self, url):
-		self.get_fetchdate()
+        self.get_fetchdate()
-		soup = self.index_to_soup(url)
+        soup = self.index_to_soup(url)
-		a = soup.findAll('a', href=True)
+        a = soup.findAll('a', href=True)
-		a.reverse()
+        a.reverse()
-		current_articles = []
+        current_articles = []
-		included_urls = []
+        included_urls = []
-		for i in a:
+        for i in a:
-			title = self.tag_to_string(i)
+            title = self.tag_to_string(i)
-			url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
+            url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
-			if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
+            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
-				current_articles.append({'title': title, 'url': url, 'description': ''})
+                current_articles.append({'title': title, 'url': url, 'description': ''})
-				included_urls.append(url)
+                included_urls.append(url)
-		current_articles.reverse()
+        current_articles.reverse()
-		return current_articles
+        return current_articles
-	def preprocess_html(self, soup):
+    def parse_col_section(self, url):
-		for item in soup.findAll(style=True):
+        self.get_fetchdate()
-			del item['style']
+        soup = self.index_to_soup(url)
-		for item in soup.findAll(style=True):
+        a = soup.findAll('a', href=True)
-			del item['width']
+        a.reverse()
-		for item in soup.findAll(stype=True):
+        current_articles = []
-			del item['absmiddle']
+        included_urls = []
-		return soup
+        for i in a:
            title = self.tag_to_string(i)
            url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
                current_articles.append({'title': title, 'url': url, 'description': ''})
                included_urls.append(url)
        current_articles.reverse()
        return current_articles
-	def create_opf(self, feeds, dir=None):
+    def preprocess_html(self, soup):
-		if dir is None:
+        for item in soup.findAll(style=True):
-			dir = self.output_dir
+            del item['style']
-		if __UseChineseTitle__ == True:
+        for item in soup.findAll(style=True):
-			title = u'\u660e\u5831 (\u9999\u6e2f)'
+            del item['width']
-		else:
+        for item in soup.findAll(stype=True):
-			title = self.short_title()
+            del item['absmiddle']
-		# if not generating a periodical, force date to apply in title
+        return soup
 		if __MakePeriodical__ == False:
 			title = title + ' ' + self.get_fetchformatteddate()
 		if True:
 			mi = MetaInformation(title, [self.publisher])
 			mi.publisher = self.publisher
 			mi.author_sort = self.publisher
 			if __MakePeriodical__ == True:
 				mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
 			else:
 				mi.publication_type = self.publication_type+':'+self.short_title()
 			#mi.timestamp = nowf()
 			mi.timestamp = self.get_dtlocal()
 			mi.comments = self.description
 			if not isinstance(mi.comments, unicode):
 				mi.comments = mi.comments.decode('utf-8', 'replace')
 			#mi.pubdate = nowf()
 			mi.pubdate = self.get_dtlocal()
 			opf_path = os.path.join(dir, 'index.opf')
 			ncx_path = os.path.join(dir, 'index.ncx')
 			opf = OPFCreator(dir, mi)
 			# Add mastheadImage entry to <guide> section
 			mp = getattr(self, 'masthead_path', None)
 			if mp is not None and os.access(mp, os.R_OK):
 				from calibre.ebooks.metadata.opf2 import Guide
 				ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
 				ref.type = 'masthead'
 				ref.title = 'Masthead Image'
 				opf.guide.append(ref)
-			manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
+    def create_opf(self, feeds, dir=None):
-			manifest.append(os.path.join(dir, 'index.html'))
+        if dir is None:
-			manifest.append(os.path.join(dir, 'index.ncx'))
+            dir = self.output_dir
        if __UseChineseTitle__ == True:
            title = u'\u660e\u5831 (\u9999\u6e2f)'
        else:
            title = self.short_title()
        # if not generating a periodical, force date to apply in title
        if __MakePeriodical__ == False:
            title = title + ' ' + self.get_fetchformatteddate()
        if True:
            mi = MetaInformation(title, [self.publisher])
            mi.publisher = self.publisher
            mi.author_sort = self.publisher
            if __MakePeriodical__ == True:
                mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
            else:
                mi.publication_type = self.publication_type+':'+self.short_title()
            #mi.timestamp = nowf()
            mi.timestamp = self.get_dtlocal()
            mi.comments = self.description
            if not isinstance(mi.comments, unicode):
                mi.comments = mi.comments.decode('utf-8', 'replace')
            #mi.pubdate = nowf()
            mi.pubdate = self.get_dtlocal()
            opf_path = os.path.join(dir, 'index.opf')
            ncx_path = os.path.join(dir, 'index.ncx')
            opf = OPFCreator(dir, mi)
            # Add mastheadImage entry to <guide> section
            mp = getattr(self, 'masthead_path', None)
            if mp is not None and os.access(mp, os.R_OK):
                from calibre.ebooks.metadata.opf2 import Guide
                ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
                ref.type = 'masthead'
                ref.title = 'Masthead Image'
                opf.guide.append(ref)
-			# Get cover
+            manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
-			cpath = getattr(self, 'cover_path', None)
+            manifest.append(os.path.join(dir, 'index.html'))
-			if cpath is None:
+            manifest.append(os.path.join(dir, 'index.ncx'))
 				pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
 				if self.default_cover(pf):
 					cpath =  pf.name
 			if cpath is not None and os.access(cpath, os.R_OK):
 				opf.cover = cpath
 				manifest.append(cpath)
-			# Get masthead
+            # Get cover
-			mpath = getattr(self, 'masthead_path', None)
+            cpath = getattr(self, 'cover_path', None)
-			if mpath is not None and os.access(mpath, os.R_OK):
+            if cpath is None:
-				manifest.append(mpath)
+                pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
                if self.default_cover(pf):
                    cpath =  pf.name
            if cpath is not None and os.access(cpath, os.R_OK):
                opf.cover = cpath
                manifest.append(cpath)
-			opf.create_manifest_from_files_in(manifest)
+            # Get masthead
-			for mani in opf.manifest:
+            mpath = getattr(self, 'masthead_path', None)
-				if mani.path.endswith('.ncx'):
+            if mpath is not None and os.access(mpath, os.R_OK):
-					mani.id = 'ncx'
+                manifest.append(mpath)
 				if mani.path.endswith('mastheadImage.jpg'):
 					mani.id = 'masthead-image'
 			entries = ['index.html']
 			toc = TOC(base_path=dir)
 			self.play_order_counter = 0
 			self.play_order_map = {}
-		def feed_index(num, parent):
+            opf.create_manifest_from_files_in(manifest)
-			f = feeds[num]
+            for mani in opf.manifest:
-			for j, a in enumerate(f):
+                if mani.path.endswith('.ncx'):
-				if getattr(a, 'downloaded', False):
+                    mani.id = 'ncx'
-					adir = 'feed_%d/article_%d/'%(num, j)
+                if mani.path.endswith('mastheadImage.jpg'):
-					auth = a.author
+                    mani.id = 'masthead-image'
-					if not auth:
+            entries = ['index.html']
-						auth = None
+            toc = TOC(base_path=dir)
-					desc = a.text_summary
+            self.play_order_counter = 0
-					if not desc:
+            self.play_order_map = {}
-						desc = None
+
-					else:
+        def feed_index(num, parent):
-						desc = self.description_limiter(desc)
+            f = feeds[num]
-					entries.append('%sindex.html'%adir)
+            for j, a in enumerate(f):
-					po = self.play_order_map.get(entries[-1], None)
+                if getattr(a, 'downloaded', False):
-					if po is None:
+                    adir = 'feed_%d/article_%d/'%(num, j)
-						self.play_order_counter += 1
+                    auth = a.author
-						po = self.play_order_counter
+                    if not auth:
-					parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
+                        auth = None
                    desc = a.text_summary
                    if not desc:
                        desc = None
                    else:
                        desc = self.description_limiter(desc)
                    entries.append('%sindex.html'%adir)
                    po = self.play_order_map.get(entries[-1], None)
                    if po is None:
                        self.play_order_counter += 1
                        po = self.play_order_counter
                    parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
                                    play_order=po, author=auth, description=desc)
-					last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
+                    last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
-					for sp in a.sub_pages:
+                    for sp in a.sub_pages:
-						prefix = os.path.commonprefix([opf_path, sp])
+                        prefix = os.path.commonprefix([opf_path, sp])
-						relp = sp[len(prefix):]
+                        relp = sp[len(prefix):]
-						entries.append(relp.replace(os.sep, '/'))
+                        entries.append(relp.replace(os.sep, '/'))
-						last = sp
+                        last = sp
-					if os.path.exists(last):
+                    if os.path.exists(last):
-						with open(last, 'rb') as fi:
+                        with open(last, 'rb') as fi:
-							src = fi.read().decode('utf-8')
+                            src = fi.read().decode('utf-8')
-						soup = BeautifulSoup(src)
+                        soup = BeautifulSoup(src)
-						body = soup.find('body')
+                        body = soup.find('body')
-						if body is not None:
+                        if body is not None:
-							prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
+                            prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
-							templ = self.navbar.generate(True, num, j, len(f),
+                            templ = self.navbar.generate(True, num, j, len(f),
                                            not self.has_single_feed,
                                            a.orig_url, self.publisher, prefix=prefix,
                                            center=self.center_navbar)
-							elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
+                            elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
-							body.insert(len(body.contents), elem)
+                            body.insert(len(body.contents), elem)
-							with open(last, 'wb') as fi:
+                            with open(last, 'wb') as fi:
-								fi.write(unicode(soup).encode('utf-8'))
+                                fi.write(unicode(soup).encode('utf-8'))
-		if len(feeds) == 0:
+        if len(feeds) == 0:
-			raise Exception('All feeds are empty, aborting.')
+            raise Exception('All feeds are empty, aborting.')
-		if len(feeds) > 1:
+        if len(feeds) > 1:
-			for i, f in enumerate(feeds):
+            for i, f in enumerate(feeds):
-				entries.append('feed_%d/index.html'%i)
+                entries.append('feed_%d/index.html'%i)
-				po = self.play_order_map.get(entries[-1], None)
+                po = self.play_order_map.get(entries[-1], None)
-				if po is None:
+                if po is None:
-					self.play_order_counter += 1
+                    self.play_order_counter += 1
-					po = self.play_order_counter
+                    po = self.play_order_counter
-				auth = getattr(f, 'author', None)
+                auth = getattr(f, 'author', None)
-				if not auth:
+                if not auth:
-					auth = None
+                    auth = None
-				desc = getattr(f, 'description', None)
+                desc = getattr(f, 'description', None)
-				if not desc:
+                if not desc:
-					desc = None
+                    desc = None
-				feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
+                feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
                           f.title, play_order=po, description=desc, author=auth))
-		else:
+        else:
-			entries.append('feed_%d/index.html'%0)
+            entries.append('feed_%d/index.html'%0)
-			feed_index(0, toc)
+            feed_index(0, toc)
-		for i, p in enumerate(entries):
+        for i, p in enumerate(entries):
-			entries[i] = os.path.join(dir, p.replace('/', os.sep))
+            entries[i] = os.path.join(dir, p.replace('/', os.sep))
-		opf.create_spine(entries)
+        opf.create_spine(entries)
-		opf.set_toc(toc)
+        opf.set_toc(toc)
        with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
            opf.render(opf_file, ncx_file)
 		with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
 			opf.render(opf_file, ncx_file)
--- a/recipes/moldovaazi.recipe
+++ b/recipes/moldovaazi.recipe
@ -0,0 +1,50 @@
 # -*- coding: utf-8 -*-
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
 azi.md
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class MoldovaAzi(BasicNewsRecipe):
    title                 = u'Moldova Azi'
    language              = 'ro'
    __author__            = u'Silviu Cotoar\u0103'
    description           = u'Moldova pe internet'
    publisher             = 'Moldova Azi'
    category              = 'Ziare,Stiri,Moldova'
    oldest_article        = 5
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
    remove_javascript     = True
    cover_url             = 'http://www.azi.md/images/logo.gif'
    conversion_options = {
                             'comments'   : description
                            ,'tags'       : category
                            ,'language'   : language
                            ,'publisher'  : publisher
                         }
    keep_only_tags = [  dict(name='div', attrs={'id':'in'})
                     ]
    remove_tags = [
                     dict(name='div', attrs={'class':'in-more-stories'})
                  ]
    remove_tags_after = [
                              dict(name='div', attrs={'id':'comment_wrapper'})
                            , dict(name='div', attrs={'class':'box-title4'})
                        ]
    feeds = [ (u'\u0218tiri', u'http://www.azi.md/ro/feeds/0/rss201') ]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/newsmoldova.recipe
+++ b/recipes/newsmoldova.recipe
@ -0,0 +1,50 @@
 # -*- coding: utf-8 -*-
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
 newsmoldova.md
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class NewsMoldova(BasicNewsRecipe):
    title                 = u'Agen\u0163ia de \u015ftiri Moldova'
    language              = 'ro'
    __author__            = u'Silviu Cotoar\u0103'
    description           = u'Agen\u0163ia de \u015ftiri Moldova'
    publisher             = 'Moldova'
    category              = 'Ziare,Stiri,Moldova'
    oldest_article        = 5
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
    remove_javascript     = True
    cover_url             = 'http://www.newsmoldova.md/i/logo_top_md.gif'
    conversion_options = {
                             'comments'   : description
                            ,'tags'       : category
                            ,'language'   : language
                            ,'publisher'  : publisher
                         }
    keep_only_tags = [  dict(name='div', attrs={'class':'main-article-index article'})
                     ]
    remove_tags = [
                      dict(name='div', attrs={'id':'actions'})
                     , dict(name='li', attrs={'class':'invisible'})
                  ]
    remove_tags_after = [
                             dict(name='div', attrs={'id':'actions'})
                        ]
    feeds = [ (u'\u0218tiri', u'http://newsmoldova.md/export/rss2/archive/index.xml') ]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/readers_digest.recipe
+++ b/recipes/readers_digest.recipe
@ -3,7 +3,6 @@ __license__   = 'GPL v3'
 '''
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.web.feeds import Feed
 class ReadersDigest(BasicNewsRecipe):
@ -38,151 +37,20 @@ class ReadersDigest(BasicNewsRecipe):
                        '''
    remove_tags = [
        dict(name='h4', attrs={'class':'close'}),
        dict(name='div', attrs={'class':'fromLine'}),
        dict(name='img', attrs={'class':'colorTag'}),
        dict(name='div', attrs={'id':'sponsorArticleHeader'}),
        dict(name='div', attrs={'class':'horizontalAd'}),
        dict(name='div', attrs={'id':'imageCounterLeft'}),
        dict(name='div', attrs={'id':'commentsPrint'})
        ]
    feeds = [
-            ('New in RD', 'http://feeds.rd.com/ReadersDigest'),
+            ('Food', 'http://www.rd.com/food/feed'),
-            ('Jokes', 'http://feeds.rd.com/ReadersDigestJokes'),
+            ('Health', 'http://www.rd.com/health/feed'),
-            ('Cartoons', 'http://feeds.rd.com/ReadersDigestCartoons'),
+            ('Home', 'http://www.rd.com/home/feed'),
-            ('Blogs','http://feeds.rd.com/ReadersDigestBlogs')
+            ('Family', 'http://www.rd.com/family/feed'),
            ('Money', 'http://www.rd.com/money/feed'),
            ('Travel', 'http://www.rd.com/travel/feed'),
        ]
    cover_url = 'http://www.rd.com/images/logo-main-rd.gif'
-
+    keep_only_tags = dict(id='main-content')
-
+    remove_tags = [
-#-------------------------------------------------------------------------------------------------
+            {'class':['post-categories']},
    def print_version(self, url):
        # Get the identity number of the current article and append it to the root print URL
        if url.find('/article') > 0:
            ident = url[url.find('/article')+8:url.find('.html?')-4]
            url = 'http://www.rd.com/content/printContent.do?contentId=' + ident
        elif url.find('/post') > 0:
            # in this case, have to get the page itself to derive the Print page.
            soup = self.index_to_soup(url)
            newsoup = soup.find('ul',attrs={'class':'printBlock'})
            url = 'http://www.rd.com' + newsoup('a')[0]['href']
            url = url[0:url.find('&Keep')]
        return url
 #-------------------------------------------------------------------------------------------------
    def parse_index(self):
        pages = [
                ('Your America','http://www.rd.com/your-america-inspiring-people-and-stories', 'channelLeftContainer',{'class':'moreLeft'}),
                # useless recipes ('Living Healthy','http://www.rd.com/living-healthy', 'channelLeftContainer',{'class':'moreLeft'}),
                ('Advice and Know-How','http://www.rd.com/advice-and-know-how', 'channelLeftContainer',{'class':'moreLeft'})
            ]
        feeds = []
        for page in pages:
            section, url, divider, attrList = page
            newArticles = self.page_parse(url, divider, attrList)
            feeds.append((section,newArticles))
        # after the pages of the site have been processed, parse several RSS feeds for additional sections
        newfeeds = Feed()
        newfeeds = self.parse_rss()
        # The utility code in parse_rss returns a Feed object.  Convert each feed/article combination into a form suitable
        # for this module (parse_index).
        for feed in newfeeds:
            newArticles = []
            for article in feed.articles:
                newArt = {
                            'title' : article.title,
                            'url'   : article.url,
                            'date'  : article.date,
                            'description' : article.text_summary
                        }
                newArticles.append(newArt)
            # New and Blogs should be the first two feeds.
            if feed.title == 'New in RD':
                feeds.insert(0,(feed.title,newArticles))
            elif feed.title == 'Blogs':
                feeds.insert(1,(feed.title,newArticles))
            else:
                feeds.append((feed.title,newArticles))
        return feeds
 #-------------------------------------------------------------------------------------------------
    def page_parse(self, mainurl, divider, attrList):
        articles = []
        mainsoup = self.index_to_soup(mainurl)
        for item in mainsoup.findAll(attrs=attrList):
            newArticle = {
                        'title' : item('img')[0]['alt'],
                        'url'   : 'http://www.rd.com'+item('a')[0]['href'],
                        'date'  : '',
                        'description' : ''
                    }
            articles.append(newArticle)
        return articles
 #-------------------------------------------------------------------------------------------------
    def parse_rss (self):
        # Do the "official" parse_feeds first
        feeds = BasicNewsRecipe.parse_feeds(self)
        # Loop thru the articles in all feeds to find articles with "recipe" in it
        recipeArticles = []
        for curfeed in feeds:
            delList = []
            for a,curarticle in enumerate(curfeed.articles):
                if curarticle.title.upper().find('RECIPE') >= 0:
                    recipeArticles.append(curarticle)
                    delList.append(curarticle)
            if len(delList)>0:
                for d in delList:
                    index = curfeed.articles.index(d)
                    curfeed.articles[index:index+1] = []
        # If there are any recipes found, create a new Feed object and append.
        if len(recipeArticles) > 0:
            pfeed = Feed()
            pfeed.title = 'Recipes'
            pfeed.descrition = 'Recipe Feed (Virtual)'
            pfeed.image_url  = None
            pfeed.oldest_article = 30
            pfeed.id_counter = len(recipeArticles)
            # Create a new Feed, add the recipe articles, and then append
            # to "official" list of feeds
            pfeed.articles = recipeArticles[:]
            feeds.append(pfeed)
        return feeds
--- a/recipes/strategy-business.recipe
+++ b/recipes/strategy-business.recipe
@ -33,7 +33,7 @@ class StrategyBusinessRecipe(BasicNewsRecipe):
                    elif c.name.endswith('_password'):
                        br[c.name] = self.password
                raw = br.submit().read()
-                if '>Logout' not in raw:
+                if 'You have been logged in' not in raw:
                    raise ValueError('Failed to login, check your username and password')
        return br
--- a/recipes/united_daily.recipe
+++ b/recipes/united_daily.recipe
@ -0,0 +1,67 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 from calibre.web.feeds.news import BasicNewsRecipe
 class UnitedDaily(BasicNewsRecipe):
    title = u'聯合新聞網'
    oldest_article = 1
    max_articles_per_feed = 100
    feeds = [(u'焦點', u'http://udn.com/udnrss/focus.xml'),
             (u'政治', u'http://udn.com/udnrss/politics.xml'),
             (u'社會', u'http://udn.com/udnrss/social.xml'),
             (u'生活', u'http://udn.com/udnrss/life.xml'),
             (u'綜合', u'http://udn.com/udnrss/education.xml'),
             (u'意見評論', u'http://udn.com/udnrss/opinion.xml'),
             (u'大台北', u'http://udn.com/udnrss/local_taipei.xml'),
             (u'桃竹苗', u'http://udn.com/udnrss/local_tyhcml.xml'),
             (u'中彰投', u'http://udn.com/udnrss/local_tcchnt.xml'),
             (u'雲嘉南', u'http://udn.com/udnrss/local_ylcytn.xml'),
             (u'高屏離島', u'http://udn.com/udnrss/local_ksptisland.xml'),
             (u'基宜花東', u'http://udn.com/udnrss/local_klilhltt.xml'),
             (u'台灣百寶鄉', u'http://udn.com/udnrss/local_oddlyenough.xml'),
             (u'兩岸要聞', u'http://udn.com/udnrss/mainland.xml'),
             (u'國際焦點', u'http://udn.com/udnrss/international.xml'),
             (u'台商經貿', u'http://udn.com/udnrss/financechina.xml'),
             (u'國際財經', u'http://udn.com/udnrss/financeworld.xml'),
             (u'財經焦點', u'http://udn.com/udnrss/financesfocus.xml'),
             (u'股市要聞', u'http://udn.com/udnrss/stock.xml'),
             (u'股市快訊', u'http://udn.com/udnrss/stklatest.xml'),
             (u'稅務法務', u'http://udn.com/udnrss/tax.xml'),
             (u'房市情報', u'http://udn.com/udnrss/houses.xml'),
             (u'棒球', u'http://udn.com/udnrss/baseball.xml'),
             (u'籃球', u'http://udn.com/udnrss/basketball.xml'),
             (u'體壇動態', u'http://udn.com/udnrss/sportsfocus.xml'),
             (u'熱門星聞', u'http://udn.com/udnrss/starsfocus.xml'),
             (u'廣電港陸', u'http://udn.com/udnrss/tv.xml'),
             (u'海外星球', u'http://udn.com/udnrss/starswestern.xml'),
             (u'日韓星情', u'http://udn.com/udnrss/starsjk.xml'),
             (u'電影世界', u'http://udn.com/udnrss/movie.xml'),
             (u'流行音樂', u'http://udn.com/udnrss/music.xml'),
             (u'觀點專題', u'http://udn.com/udnrss/starssubject.xml'),
             (u'食樂指南', u'http://udn.com/udnrss/food.xml'),
             (u'折扣好康', u'http://udn.com/udnrss/shopping.xml'),
             (u'醫藥新聞', u'http://udn.com/udnrss/health.xml'),
             (u'家婦繽紛', u'http://udn.com/udnrss/benfen.xml'),
             (u'談星論命', u'http://udn.com/udnrss/astrology.xml'),
             (u'文化副刊', u'http://udn.com/udnrss/reading.xml'),
             ]
    extra_css = '''div[id='story_title'] {font-size:200%; font-weight:bold;}'''
    __author__ = 'Eddie Lau'
    __version__ = '1.0'
    language = 'zh'
    publisher = 'United Daily News Group'
    description = 'United Daily (Taiwan)'
    category = 'News, Chinese, Taiwan'
    remove_javascript = True
    use_embedded_content = False
    no_stylesheets = True
    encoding = 'big5'
    conversion_options = {'linearize_tables':True}
    masthead_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
    cover_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
    keep_only_tags = [dict(name='div', attrs={'id':['story_title','story_author', 'story']})]
    remove_tags = [dict(name='div', attrs={'id':['mvouter']})]
--- a/setup/commands.py
+++ b/setup/commands.py
@ -11,7 +11,7 @@ __all__ = [
        'build', 'build_pdf2xml', 'server',
        'gui',
        'develop', 'install',
-        'resources',
+        'kakasi', 'resources',
        'check',
        'sdist',
        'manual', 'tag_release',
@ -49,8 +49,9 @@ gui = GUI()
 from setup.check import Check
 check = Check()
-from setup.resources import Resources
+from setup.resources import Resources, Kakasi
 resources = Resources()
 kakasi = Kakasi()
 from setup.publish import Manual, TagRelease, Stage1, Stage2, \
        Stage3, Stage4, Publish
--- a/setup/installer/linux/util.c
+++ b/setup/installer/linux/util.c
@ -30,11 +30,12 @@ int report_libc_error(const char *msg) {
 }
 int pyobject_to_int(PyObject *res) {
-    int ret; PyObject *tmp;
+    int ret = 0; PyObject *tmp;
-    tmp = PyNumber_Int(res);
+    if (res != NULL) {
-    if (tmp == NULL) ret = (PyObject_IsTrue(res)) ? 1 : 0;
+        tmp = PyNumber_Int(res);
-    else ret = (int)PyInt_AS_LONG(tmp);
+        if (tmp == NULL) ret = (PyObject_IsTrue(res)) ? 1 : 0;
-
+        else ret = (int)PyInt_AS_LONG(tmp);
    }
    return ret;
 }
--- a/setup/installer/windows/init.py
+++ b/setup/installer/windows/init.py
@ -32,6 +32,7 @@ class Win32(VMInstaller):
    FREEZE_TEMPLATE = 'python -OO setup.py {freeze_command} --no-ice'
    INSTALLER_EXT = 'msi'
    SHUTDOWN_CMD = ['shutdown.exe', '-s', '-f', '-t', '0']
    BUILD_BUILD  =  ['python setup.py kakasi',] + VMInstaller.BUILD_BUILD
    def download_installer(self):
        installer = self.installer()
--- a/setup/installer/windows/freeze.py
+++ b/setup/installer/windows/freeze.py
@ -14,7 +14,7 @@ from setup.build_environment import msvc, MT, RC
 from setup.installer.windows.wix import WixMixIn
 OPENSSL_DIR = r'Q:\openssl'
-QT_DIR = 'Q:\\Qt\\4.7.2'
+QT_DIR = 'Q:\\Qt\\4.7.3'
 QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
 LIBUSB_DIR       = 'C:\\libusb'
 LIBUNRAR         = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
--- a/setup/installer/windows/wix-template.xml
+++ b/setup/installer/windows/wix-template.xml
@ -11,9 +11,6 @@
 						SummaryCodepage='1252' />
                <Media Id="1" Cabinet="{app}.cab" CompressionLevel="{compression}" EmbedCab="yes" />
                <!-- The following line is needed because of the patch to QtCore4.dll. You can remove this line
                     after you update Qt beyond 4.7.2. 'emus' means re-install even if version is the same not just if it is older. -->
                <Property Id='REINSTALLMODE' Value='emus'/>
                <Upgrade Id="{upgrade_code}">
                    <UpgradeVersion Maximum="{version}"
--- a/setup/resources.py
+++ b/setup/resources.py
@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
 import os, cPickle, re, anydbm, shutil, marshal, zipfile, glob
 from zlib import compress
-from setup import Command, basenames, __appname__
+from setup import Command, basenames, __appname__, iswindows
 def get_opts_from_parser(parser):
    def do_opt(opt):
@ -23,13 +23,119 @@ def get_opts_from_parser(parser):
        for o in g.option_list:
            for x in do_opt(o): yield x
-class Resources(Command):
+class Kakasi(Command):
-    description = 'Compile various needed calibre resources'
+    description = 'Compile resources for unihandecode'
    KAKASI_PATH = os.path.join(Command.SRC,  __appname__,
            'ebooks', 'unihandecode', 'pykakasi')
    def run(self, opts):
        self.records = {}
        src = self.j(self.KAKASI_PATH, 'kakasidict.utf8')
        dest = self.j(self.RESOURCES, 'localization',
                'pykakasi','kanwadict2.db')
        base = os.path.dirname(dest)
        if not os.path.exists(base):
            os.makedirs(base)
        if self.newer(dest, src) or iswindows:
            self.info('\tGenerating Kanwadict')
            for line in open(src, "r"):
                self.parsekdict(line)
            self.kanwaout(dest)
        src = self.j(self.KAKASI_PATH, 'itaijidict.utf8')
        dest = self.j(self.RESOURCES, 'localization',
                'pykakasi','itaijidict2.pickle')
        if self.newer(dest, src) or iswindows:
            self.info('\tGenerating Itaijidict')
            self.mkitaiji(src, dest)
        src = self.j(self.KAKASI_PATH, 'kanadict.utf8')
        dest = self.j(self.RESOURCES, 'localization',
                'pykakasi','kanadict2.pickle')
        if self.newer(dest, src) or iswindows:
            self.info('\tGenerating kanadict')
            self.mkkanadict(src, dest)
        return
    def mkitaiji(self, src, dst):
        dic = {}
        for line in open(src, "r"):
            line = line.decode("utf-8").strip()
            if line.startswith(';;'): # skip comment
                continue
            if re.match(r"^$",line):
                continue
            pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line)
            dic[pair[0]] = pair[1]
        cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
    def mkkanadict(self, src, dst):
        dic = {}
        for line in open(src, "r"):
            line = line.decode("utf-8").strip()
            if line.startswith(';;'): # skip comment
                continue
            if re.match(r"^$",line):
                continue
            (alpha, kana) = line.split(' ')
            dic[kana] = alpha
        cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
    def parsekdict(self, line):
        line = line.decode("utf-8").strip()
        if line.startswith(';;'): # skip comment
            return
        (yomi, kanji) = line.split(' ')
        if ord(yomi[-1:]) <= ord('z'):
            tail = yomi[-1:]
            yomi = yomi[:-1]
        else:
            tail = ''
        self.updaterec(kanji, yomi, tail)
    def updaterec(self, kanji, yomi, tail):
        key = "%04x"%ord(kanji[0])
        if key in self.records:
            if kanji in self.records[key]:
                rec = self.records[key][kanji]
                rec.append((yomi,tail))
                self.records[key].update( {kanji: rec} )
            else:
                self.records[key][kanji]=[(yomi, tail)]
        else:
            self.records[key] = {}
            self.records[key][kanji]=[(yomi, tail)]
    def kanwaout(self, out):
        try:
            # Needed as otherwise anydbm tries to create a gdbm db when the db
            # created on Unix is found
            os.remove(out)
        except:
            pass
        dic = anydbm.open(out, 'n')
        for (k, v) in self.records.iteritems():
            dic[k] = compress(marshal.dumps(v))
        dic.close()
    def clean(self):
        kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi')
        if os.path.exists(kakasi):
            shutil.rmtree(kakasi)
 class Resources(Command):
    description = 'Compile various needed calibre resources'
    sub_commands = ['kakasi']
    def run(self, opts):
        scripts = {}
        for x in ('console', 'gui'):
@ -117,108 +223,13 @@ class Resources(Command):
        import json
        json.dump(function_dict, open(dest, 'wb'), indent=4)
        self.run_kakasi(opts)
    def run_kakasi(self, opts):
        self.records = {}
        src = self.j(self.KAKASI_PATH, 'kakasidict.utf8')
        dest = self.j(self.RESOURCES, 'localization',
                'pykakasi','kanwadict2.db')
        base = os.path.dirname(dest)
        if not os.path.exists(base):
            os.makedirs(base)
        if self.newer(dest, src):
            self.info('\tGenerating Kanwadict')
            for line in open(src, "r"):
                self.parsekdict(line)
            self.kanwaout(dest)
        src = self.j(self.KAKASI_PATH, 'itaijidict.utf8')
        dest = self.j(self.RESOURCES, 'localization',
                'pykakasi','itaijidict2.pickle')
        if self.newer(dest, src):
            self.info('\tGenerating Itaijidict')
            self.mkitaiji(src, dest)
        src = self.j(self.KAKASI_PATH, 'kanadict.utf8')
        dest = self.j(self.RESOURCES, 'localization',
                'pykakasi','kanadict2.pickle')
        if self.newer(dest, src):
            self.info('\tGenerating kanadict')
            self.mkkanadict(src, dest)
        return
    def mkitaiji(self, src, dst):
        dic = {}
        for line in open(src, "r"):
            line = line.decode("utf-8").strip()
            if line.startswith(';;'): # skip comment
                continue
            if re.match(r"^$",line):
                continue
            pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line)
            dic[pair[0]] = pair[1]
        cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
    def mkkanadict(self, src, dst):
        dic = {}
        for line in open(src, "r"):
            line = line.decode("utf-8").strip()
            if line.startswith(';;'): # skip comment
                continue
            if re.match(r"^$",line):
                continue
            (alpha, kana) = line.split(' ')
            dic[kana] = alpha
        cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
    def parsekdict(self, line):
        line = line.decode("utf-8").strip()
        if line.startswith(';;'): # skip comment
            return
        (yomi, kanji) = line.split(' ')
        if ord(yomi[-1:]) <= ord('z'):
            tail = yomi[-1:]
            yomi = yomi[:-1]
        else:
            tail = ''
        self.updaterec(kanji, yomi, tail)
    def updaterec(self, kanji, yomi, tail):
            key = "%04x"%ord(kanji[0])
            if key in self.records:
                if kanji in self.records[key]:
                    rec = self.records[key][kanji]
                    rec.append((yomi,tail))
                    self.records[key].update( {kanji: rec} )
                else:
                    self.records[key][kanji]=[(yomi, tail)]
            else:
                self.records[key] = {}
                self.records[key][kanji]=[(yomi, tail)]
    def kanwaout(self, out):
        dic = anydbm.open(out, 'c')
        for (k, v) in self.records.iteritems():
            dic[k] = compress(marshal.dumps(v))
        dic.close()
    def clean(self):
        for x in ('scripts', 'recipes', 'ebook-convert-complete'):
            x = self.j(self.RESOURCES, x+'.pickle')
            if os.path.exists(x):
                os.remove(x)
-        kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi')
+        from setup.commands import kakasi
-        if os.path.exists(kakasi):
+        kakasi.clean()
            shutil.rmtree(kakasi)
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = u'calibre'
-numeric_version = (0, 8, 0)
+numeric_version = (0, 8, 1)
 __version__   = u'.'.join(map(unicode, numeric_version))
 __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -628,8 +628,9 @@ from calibre.ebooks.metadata.sources.amazon import Amazon
 from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
 from calibre.ebooks.metadata.sources.isbndb import ISBNDB
 from calibre.ebooks.metadata.sources.overdrive import OverDrive
 from calibre.ebooks.metadata.sources.douban import Douban
-plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive]
+plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban]
 # }}}
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@ -253,7 +253,7 @@ class OutputProfile(Plugin):
    periodical_date_in_title = True
    #: Characters used in jackets and catalogs
-	missing_char = u'x'
+    missing_char = u'x'
    ratings_char = u'*'
    empty_ratings_char = u' '
    read_char = u'+'
@ -293,38 +293,38 @@ class iPadOutput(OutputProfile):
        }
    ]
-	missing_char = u'\u2715\u200a'		# stylized 'x' plus hair space
+    missing_char = u'\u2715\u200a'      # stylized 'x' plus hair space
-    ratings_char = u'\u2605'			# filled star
+    ratings_char = u'\u2605'            # filled star
-	empty_ratings_char = u'\u2606'		# hollow star
+    empty_ratings_char = u'\u2606'      # hollow star
-    read_char = u'\u2713'				# check mark
+    read_char = u'\u2713'               # check mark
    touchscreen = True
    # touchscreen_news_css {{{
    touchscreen_news_css = u'''
-			/* hr used in articles */
+            /* hr used in articles */
-			.article_articles_list {
+            .article_articles_list {
                width:18%;
-				}
+                }
            .article_link {
-            	color: #593f29;
+                color: #593f29;
                font-style: italic;
                }
            .article_next {
-				-webkit-border-top-right-radius:4px;
+                -webkit-border-top-right-radius:4px;
-				-webkit-border-bottom-right-radius:4px;
+                -webkit-border-bottom-right-radius:4px;
                font-style: italic;
                width:32%;
                }
            .article_prev {
-				-webkit-border-top-left-radius:4px;
+                -webkit-border-top-left-radius:4px;
-				-webkit-border-bottom-left-radius:4px;
+                -webkit-border-bottom-left-radius:4px;
                font-style: italic;
                width:32%;
                }
-			.article_sections_list {
+            .article_sections_list {
                width:18%;
-				}
+                }
            .articles_link {
                font-weight: bold;
                }
@ -334,8 +334,8 @@ class iPadOutput(OutputProfile):
            .caption_divider {
-            	border:#ccc 1px solid;
+                border:#ccc 1px solid;
-				}
+                }
            .touchscreen_navbar {
                background:#c3bab2;
@ -357,50 +357,50 @@ class iPadOutput(OutputProfile):
                text-align:center;
                }
-			.touchscreen_navbar td a:link {
+            .touchscreen_navbar td a:link {
-				color: #593f29;
+                color: #593f29;
-				text-decoration: none;
+                text-decoration: none;
-				}
+                }
-			/* Index formatting */
+            /* Index formatting */
-			.publish_date {
+            .publish_date {
-				text-align:center;
+                text-align:center;
-				}
+                }
-			.divider {
+            .divider {
-				border-bottom:1em solid white;
+                border-bottom:1em solid white;
-				border-top:1px solid gray;
+                border-top:1px solid gray;
-				}
+                }
-			hr.caption_divider {
+            hr.caption_divider {
-				border-color:black;
+                border-color:black;
-				border-style:solid;
+                border-style:solid;
-				border-width:1px;
+                border-width:1px;
-				}
+                }
            /* Feed summary formatting */
            .article_summary {
-            	display:inline-block;
+                display:inline-block;
-            	}
+                }
            .feed {
                font-family:sans-serif;
                font-weight:bold;
                font-size:larger;
-				}
+                }
            .feed_link {
                font-style: italic;
                }
            .feed_next {
-				-webkit-border-top-right-radius:4px;
+                -webkit-border-top-right-radius:4px;
-				-webkit-border-bottom-right-radius:4px;
+                -webkit-border-bottom-right-radius:4px;
                font-style: italic;
                width:40%;
                }
            .feed_prev {
-				-webkit-border-top-left-radius:4px;
+                -webkit-border-top-left-radius:4px;
-				-webkit-border-bottom-left-radius:4px;
+                -webkit-border-bottom-left-radius:4px;
                font-style: italic;
                width:40%;
                }
@ -410,24 +410,24 @@ class iPadOutput(OutputProfile):
                font-size: 160%;
                }
-			.feed_up {
+            .feed_up {
                font-weight: bold;
                width:20%;
-				}
+                }
            .summary_headline {
                font-weight:bold;
                text-align:left;
-				}
+                }
            .summary_byline {
                text-align:left;
                font-family:monospace;
-				}
+                }
            .summary_text {
                text-align:left;
-				}
+                }
        '''
        # }}}
@ -617,8 +617,8 @@ class KindleOutput(OutputProfile):
    supports_mobi_indexing = True
    periodical_date_in_title = False
-	missing_char = u'x\u2009'
+    missing_char = u'x\u2009'
-	empty_ratings_char = u'\u2606'
+    empty_ratings_char = u'\u2606'
    ratings_char = u'\u2605'
    read_char = u'\u2713'
@ -642,8 +642,8 @@ class KindleDXOutput(OutputProfile):
    #comic_screen_size         = (741, 1022)
    supports_mobi_indexing = True
    periodical_date_in_title = False
-	missing_char = u'x\u2009'
+    missing_char = u'x\u2009'
-	empty_ratings_char = u'\u2606'
+    empty_ratings_char = u'\u2606'
    ratings_char = u'\u2605'
    read_char = u'\u2713'
    mobi_ems_per_blockquote = 2.0
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -92,7 +92,7 @@ def restore_plugin_state_to_default(plugin_or_name):
    config['enabled_plugins'] = ep
 default_disabled_plugins = set([
-    'Overdrive',
+    'Overdrive', 'Douban Books',
 ])
 def is_disabled(plugin):
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -109,7 +109,7 @@ class ANDROID(USBMS):
            'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
            'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
            '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
-            'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB']
+            'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD']
--- a/src/calibre/ebooks/epub/input.py
+++ b/src/calibre/ebooks/epub/input.py
@ -103,10 +103,11 @@ class EPUBInput(InputFormatPlugin):
        t.set('href', guide_cover)
        t.set('title', 'Title Page')
        from calibre.ebooks import render_html_svg_workaround
-        renderer = render_html_svg_workaround(guide_cover, log)
+        if os.path.exists(guide_cover):
-        if renderer is not None:
+            renderer = render_html_svg_workaround(guide_cover, log)
-            open('calibre_raster_cover.jpg', 'wb').write(
+            if renderer is not None:
-                renderer)
+                open('calibre_raster_cover.jpg', 'wb').write(
                    renderer)
    def find_opf(self):
        def attr(n, attr):
--- a/src/calibre/ebooks/metadata/archive.py
+++ b/src/calibre/ebooks/metadata/archive.py
@ -83,6 +83,7 @@ class ArchiveExtract(FileTypePlugin):
        return of.name
 def get_comic_book_info(d, mi):
    # See http://code.google.com/p/comicbookinfo/wiki/Example
    series = d.get('series', '')
    if series.strip():
        mi.series = series
@ -111,6 +112,7 @@ def get_comic_book_info(d, mi):
 def get_cbz_metadata(stream):
    # See http://code.google.com/p/comicbookinfo/wiki/Example
    from calibre.utils.zipfile import ZipFile
    from calibre.ebooks.metadata import MetaInformation
    import json
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@ -112,10 +112,15 @@ class Metadata(object):
        Be careful with numeric fields since this will return True for zero as
        well as None.
        Also returns True if the field does not exist.
        '''
-        null_val = NULL_VALUES.get(field, None)
+        try:
-        val = getattr(self, field, None)
+            null_val = NULL_VALUES.get(field, None)
-        return not val or val == null_val
+            val = getattr(self, field, None)
            return not val or val == null_val
        except:
            return True
    def __getattribute__(self, field):
        _data = object.__getattribute__(self, '_data')
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@ -16,7 +16,7 @@ from lxml.html import soupparser, tostring
 from calibre import as_unicode
 from calibre.ebooks.metadata import check_isbn
-from calibre.ebooks.metadata.sources.base import Source
+from calibre.ebooks.metadata.sources.base import Source, Option
 from calibre.utils.cleantext import clean_ascii_chars
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.metadata.book.base import Metadata
@ -37,6 +37,92 @@ class Worker(Thread): # Get details {{{
        self.relevance, self.plugin = relevance, plugin
        self.browser = browser.clone_browser()
        self.cover_url = self.amazon_id = self.isbn = None
        self.domain = self.plugin.domain
        months = {
                'de': {
            1 : ['jän'],
            3 : ['märz'],
            5 : ['mai'],
            6 : ['juni'],
            7 : ['juli'],
            10: ['okt'],
            12: ['dez']
            },
                'it': {
            1: ['enn'],
            2: ['febbr'],
            5: ['magg'],
            6: ['giugno'],
            7: ['luglio'],
            8: ['ag'],
            9: ['sett'],
            10: ['ott'],
            12: ['dic'],
            },
                'fr': {
            1: ['janv'],
            2: ['févr'],
            3: ['mars'],
            4: ['avril'],
            5: ['mai'],
            6: ['juin'],
            7: ['juil'],
            8: ['août'],
            9: ['sept'],
            12: ['déc'],
            },
        }
        self.english_months = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
                'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
        self.months = months.get(self.domain, {})
        self.pd_xpath = '''
            //h2[text()="Product Details" or \
                 text()="Produktinformation" or \
                 text()="Dettagli prodotto" or \
                 text()="Product details" or \
                 text()="Détails sur le produit"]/../div[@class="content"]
            '''
        self.publisher_xpath = '''
            descendant::*[starts-with(text(), "Publisher:") or \
                    starts-with(text(), "Verlag:") or \
                    starts-with(text(), "Editore:") or \
                    starts-with(text(), "Editeur")]
            '''
        self.language_xpath =    '''
            descendant::*[
                starts-with(text(), "Language:") \
                or text() = "Language" \
                or text() = "Sprache:" \
                or text() = "Lingua:" \
                or starts-with(text(), "Langue") \
                ]
            '''
        self.ratings_pat = re.compile(
            r'([0-9.]+) (out of|von|su|étoiles sur) (\d+)( (stars|Sternen|stelle)){0,1}')
        lm = {
                'en': ('English', 'Englisch'),
                'fr': ('French', 'Français'),
                'it': ('Italian', 'Italiano'),
                'de': ('German', 'Deutsch'),
                }
        self.lang_map = {}
        for code, names in lm.iteritems():
            for name in names:
                self.lang_map[name] = code
    def delocalize_datestr(self, raw):
        if not self.months:
            return raw
        ans = raw.lower()
        for i, vals in self.months.iteritems():
            for x in vals:
                ans = ans.replace(x, self.english_months[i])
        return ans
    def run(self):
        try:
@ -132,7 +218,7 @@ class Worker(Thread): # Get details {{{
            self.log.exception('Error parsing cover for url: %r'%self.url)
        mi.has_cover = bool(self.cover_url)
-        pd = root.xpath('//h2[text()="Product Details"]/../div[@class="content"]')
+        pd = root.xpath(self.pd_xpath)
        if pd:
            pd = pd[0]
@ -194,30 +280,42 @@ class Worker(Thread): # Get details {{{
    def parse_authors(self, root):
        x = '//h1[@class="parseasinTitle"]/following-sibling::span/*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]'
        aname = root.xpath(x)
        if not aname:
            aname = root.xpath('''
            //h1[@class="parseasinTitle"]/following-sibling::*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]
                    ''')
        for x in aname:
            x.tail = ''
        authors = [tostring(x, encoding=unicode, method='text').strip() for x
                in aname]
        authors = [a for a in authors if a]
        return authors
    def parse_rating(self, root):
        ratings = root.xpath('//div[@class="jumpBar"]/descendant::span[@class="asinReviewsSummary"]')
-        pat = re.compile(r'([0-9.]+) out of (\d+) stars')
+        if not ratings:
            ratings = root.xpath('//div[@class="buying"]/descendant::span[@class="asinReviewsSummary"]')
        if not ratings:
            ratings = root.xpath('//span[@class="crAvgStars"]/descendant::span[@class="asinReviewsSummary"]')
        if ratings:
            for elem in ratings[0].xpath('descendant::*[@title]'):
                t = elem.get('title').strip()
-                m = pat.match(t)
+                m = self.ratings_pat.match(t)
                if m is not None:
-                    return float(m.group(1))/float(m.group(2)) * 5
+                    return float(m.group(1))/float(m.group(3)) * 5
    def parse_comments(self, root):
        desc = root.xpath('//div[@id="productDescription"]/*[@class="content"]')
        if desc:
            desc = desc[0]
            for c in desc.xpath('descendant::*[@class="seeAll" or'
-                    ' @class="emptyClear" or @href]'):
+                    ' @class="emptyClear"]'):
                c.getparent().remove(c)
            for a in desc.xpath('descendant::a[@href]'):
                del a.attrib['href']
                a.tag = 'span'
            desc = tostring(desc, method='html', encoding=unicode).strip()
            # Encoding bug in Amazon data U+fffd (replacement char)
            # in some examples it is present in place of '
            desc = desc.replace('\ufffd', "'")
@ -246,41 +344,44 @@ class Worker(Thread): # Get details {{{
                        return ('/'.join(parts[:-1]))+'/'+bn
    def parse_isbn(self, pd):
-        for x in reversed(pd.xpath(
+        items = pd.xpath(
-            'descendant::*[starts-with(text(), "ISBN")]')):
+            'descendant::*[starts-with(text(), "ISBN")]')
        if not items:
            items = pd.xpath(
                'descendant::b[contains(text(), "ISBN:")]')
        for x in reversed(items):
            if x.tail:
                ans = check_isbn(x.tail.strip())
                if ans:
                    return ans
    def parse_publisher(self, pd):
-        for x in reversed(pd.xpath(
+        for x in reversed(pd.xpath(self.publisher_xpath)):
            'descendant::*[starts-with(text(), "Publisher:")]')):
            if x.tail:
                ans = x.tail.partition(';')[0]
                return ans.partition('(')[0].strip()
    def parse_pubdate(self, pd):
-        for x in reversed(pd.xpath(
+        for x in reversed(pd.xpath(self.publisher_xpath)):
            'descendant::*[starts-with(text(), "Publisher:")]')):
            if x.tail:
                ans = x.tail
                date = ans.partition('(')[-1].replace(')', '').strip()
                date = self.delocalize_datestr(date)
                return parse_date(date, assume_utc=True)
    def parse_language(self, pd):
-        for x in reversed(pd.xpath(
+        for x in reversed(pd.xpath(self.language_xpath)):
            'descendant::*[starts-with(text(), "Language:")]')):
            if x.tail:
                ans = x.tail.strip()
-                if ans == 'English':
+                ans = self.lang_map.get(ans, None)
-                    return 'en'
+                if ans:
                    return ans
 # }}}
 class Amazon(Source):
    name = 'Amazon.com'
-    description = _('Downloads metadata from Amazon')
+    description = _('Downloads metadata and covers from Amazon')
    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'identifier:amazon',
@ -294,8 +395,15 @@ class Amazon(Source):
            'fr' : _('France'),
            'de' : _('Germany'),
            'uk' : _('UK'),
            'it' : _('Italy'),
    }
    options = (
            Option('domain', 'choices', 'com', _('Amazon website to use:'),
                _('Metadata from Amazon will be fetched using this '
                    'country\'s Amazon website.'), choices=AMAZON_DOMAINS),
            )
    def get_book_url(self, identifiers): # {{{
        asin = identifiers.get('amazon', None)
        if asin is None:
@ -304,8 +412,16 @@ class Amazon(Source):
            return ('amazon', asin, 'http://amzn.com/%s'%asin)
    # }}}
    @property
    def domain(self):
        domain = self.prefs['domain']
        if domain not in self.AMAZON_DOMAINS:
            domain = 'com'
        return domain
    def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
-        domain = self.prefs.get('domain', 'com')
+        domain = self.domain
        # See the amazon detailed search page to get all options
        q = {   'search-alias' : 'aps',
@ -345,6 +461,8 @@ class Amazon(Source):
        latin1q = dict([(x.encode('latin1', 'ignore'), y.encode('latin1',
            'ignore')) for x, y in
            q.iteritems()])
        if domain == 'uk':
            domain = 'co.uk'
        url = 'http://www.amazon.%s/s/?'%domain + urlencode(latin1q)
        return url
@ -516,11 +634,19 @@ if __name__ == '__main__': # tests {{{
    # src/calibre/ebooks/metadata/sources/amazon.py
    from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
            title_test, authors_test)
-    test_identify_plugin(Amazon.name,
+    com_tests = [ # {{{
        [
-            ( # An e-book ISBN not on Amazon, one of the authors is
+            (  # Description has links
-              # unknown to Amazon, so no popup wrapper
+                {'identifiers':{'isbn': '9780671578275'}},
                [title_test('A Civil Campaign: A Comedy of Biology and Manners',
                    exact=True), authors_test(['Lois McMaster Bujold'])
                 ]
            ),
            ( # An e-book ISBN not on Amazon, the title/author search matches
              # the Kindle edition, which has different markup for ratings and
              # isbn
                {'identifiers':{'isbn': '9780307459671'},
                    'title':'Invisible Gorilla', 'authors':['Christopher Chabris']},
                [title_test('The Invisible Gorilla: And Other Ways Our Intuitions Deceive Us',
@ -556,6 +682,38 @@ if __name__ == '__main__': # tests {{{
            ),
-        ])
+    ] # }}}
    de_tests = [ # {{{
            (
                {'identifiers':{'isbn': '3548283519'}},
                [title_test('Wer Wind sät',
                    exact=True), authors_test(['Nele Neuhaus'])
                 ]
            ),
    ] # }}}
    it_tests = [ # {{{
            (
                {'identifiers':{'isbn': '8838922195'}},
                [title_test('La briscola in cinque',
                    exact=True), authors_test(['Marco Malvaldi'])
                 ]
            ),
    ] # }}}
    fr_tests = [ # {{{
            (
                {'identifiers':{'isbn': '2221116798'}},
                [title_test('L\'étrange voyage de Monsieur Daldry',
                    exact=True), authors_test(['Marc Levy'])
                 ]
            ),
    ] # }}}
    test_identify_plugin(Amazon.name, com_tests)
 # }}}
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -145,10 +145,13 @@ class Option(object):
        :param default: The default value for this option
        :param label: A short (few words) description of this option
        :param desc: A longer description of this option
-        :param choices: A list of possible values, used only if type='choices'
+        :param choices: A dict of possible values, used only if type='choices'.
        dict is of the form {key:human readable label, ...}
        '''
        self.name, self.type, self.default, self.label, self.desc = (name,
                type_, default, label, desc)
        if choices and not isinstance(choices, dict):
            choices = dict([(x, x) for x in choices])
        self.choices = choices
 class Source(Plugin):
--- a/src/calibre/ebooks/metadata/sources/douban.py
+++ b/src/calibre/ebooks/metadata/sources/douban.py
@ -0,0 +1,347 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>; 2011, Li Fanxi <lifanxi@freemindworld.com>'
 __docformat__ = 'restructuredtext en'
 import time
 from urllib import urlencode
 from functools import partial
 from Queue import Queue, Empty
 from lxml import etree
 from calibre.ebooks.metadata import check_isbn
 from calibre.ebooks.metadata.sources.base import Source
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.utils.date import parse_date, utcnow
 from calibre.utils.cleantext import clean_ascii_chars
 from calibre import as_unicode
 NAMESPACES = {
              'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
              'atom' : 'http://www.w3.org/2005/Atom',
              'db': 'http://www.douban.com/xmlns/',
              'gd': 'http://schemas.google.com/g/2005'
            }
 XPath = partial(etree.XPath, namespaces=NAMESPACES)
 total_results  = XPath('//openSearch:totalResults')
 start_index    = XPath('//openSearch:startIndex')
 items_per_page = XPath('//openSearch:itemsPerPage')
 entry          = XPath('//atom:entry')
 entry_id       = XPath('descendant::atom:id')
 title          = XPath('descendant::atom:title')
 description    = XPath('descendant::atom:summary')
 publisher      = XPath("descendant::db:attribute[@name='publisher']")
 isbn           = XPath("descendant::db:attribute[@name='isbn13']")
 date           = XPath("descendant::db:attribute[@name='pubdate']")
 creator        = XPath("descendant::db:attribute[@name='author']")
 booktag        = XPath("descendant::db:tag/attribute::name")
 rating         = XPath("descendant::gd:rating/attribute::average")
 cover_url      = XPath("descendant::atom:link[@rel='image']/attribute::href")
 def get_details(browser, url, timeout): # {{{
    try:
        raw = browser.open_novisit(url, timeout=timeout).read()
    except Exception as e:
        gc = getattr(e, 'getcode', lambda : -1)
        if gc() != 403:
            raise
        # Douban is throttling us, wait a little
        time.sleep(2)
        raw = browser.open_novisit(url, timeout=timeout).read()
    return raw
 # }}}
 def to_metadata(browser, log, entry_, timeout): # {{{
    def get_text(extra, x):
        try:
            ans = x(extra)
            if ans:
                ans = ans[0].text
                if ans and ans.strip():
                    return ans.strip()
        except:
            log.exception('Programming error:')
        return None
    id_url = entry_id(entry_)[0].text
    douban_id = id_url.split('/')[-1]
    title_ = ': '.join([x.text for x in title(entry_)]).strip()
    authors = [x.text.strip() for x in creator(entry_) if x.text]
    if not authors:
        authors = [_('Unknown')]
    if not id_url or not title:
        # Silently discard this entry
        return None
    mi = Metadata(title_, authors)
    mi.identifiers = {'douban':douban_id}
    try:
        raw = get_details(browser, id_url, timeout)
        feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
            strip_encoding_pats=True)[0])
        extra = entry(feed)[0]
    except:
        log.exception('Failed to get additional details for', mi.title)
        return mi
    mi.comments = get_text(extra, description)
    mi.publisher = get_text(extra, publisher)
    # ISBN
    isbns = []
    for x in [t.text for t in isbn(extra)]:
        if check_isbn(x):
            isbns.append(x)
    if isbns:
        mi.isbn = sorted(isbns, key=len)[-1]
    mi.all_isbns = isbns
    # Tags
    try:
        btags = [x for x in booktag(extra) if x]
        tags = []
        for t in btags:
            atags = [y.strip() for y in t.split('/')]
            for tag in atags:
                if tag not in tags:
                    tags.append(tag)
    except:
        log.exception('Failed to parse tags:')
        tags = []
    if tags:
        mi.tags = [x.replace(',', ';') for x in tags]
    # pubdate
    pubdate = get_text(extra, date)
    if pubdate:
        try:
            default = utcnow().replace(day=15)
            mi.pubdate = parse_date(pubdate, assume_utc=True, default=default)
        except:
            log.error('Failed to parse pubdate %r'%pubdate)
    # Ratings
    if rating(extra):
        try:
            mi.rating = float(rating(extra)[0]) / 2.0
        except:
            log.exception('Failed to parse rating')
            mi.rating = 0
    # Cover
    mi.has_douban_cover = None
    u = cover_url(extra)
    if u:
        u = u[0].replace('/spic/', '/lpic/');
        # If URL contains "book-default", the book doesn't have a cover
        if u.find('book-default') == -1:
            mi.has_douban_cover = u
    return mi
 # }}}
 class Douban(Source):
    name = 'Douban Books'
    author = 'Li Fanxi'
    version = (2, 0, 0)
    description = _('Downloads metadata and covers from Douban.com')
    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'tags',
        'pubdate', 'comments', 'publisher', 'identifier:isbn', 'rating',
        'identifier:douban']) # language currently disabled
    supports_gzip_transfer_encoding = True
    cached_cover_url_is_reliable = True
    DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d'
    DOUBAN_BOOK_URL = 'http://book.douban.com/subject/%s/'
    def get_book_url(self, identifiers): # {{{
        db = identifiers.get('douban', None)
        if db is not None:
            return ('douban', db, self.DOUBAN_BOOK_URL%db)
    # }}}
    def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
        SEARCH_URL = 'http://api.douban.com/book/subjects?'
        ISBN_URL = 'http://api.douban.com/book/subject/isbn/'
        SUBJECT_URL = 'http://api.douban.com/book/subject/'
        q = ''
        t = None
        isbn = check_isbn(identifiers.get('isbn', None))
        subject = identifiers.get('douban', None)
        if isbn is not None:
            q = isbn
            t = 'isbn'
        elif subject is not None:
            q = subject
            t = 'subject'
        elif title or authors:
            def build_term(prefix, parts):
                return ' '.join(x for x in parts)
            title_tokens = list(self.get_title_tokens(title))
            if title_tokens:
                q += build_term('title', title_tokens)
            author_tokens = self.get_author_tokens(authors,
                    only_first_author=True)
            if author_tokens:
                q += ((' ' if q != '' else '') +
                    build_term('author', author_tokens))
            t = 'search'
        q = q.strip()
        if isinstance(q, unicode):
            q = q.encode('utf-8')
        if not q:
            return None
        url = None
        if t == "isbn":
            url = ISBN_URL + q
        elif t == 'subject':
            url = SUBJECT_URL + q
        else:
            url = SEARCH_URL + urlencode({
                    'q': q,
                    })
        if self.DOUBAN_API_KEY and self.DOUBAN_API_KEY != '':
            url = url + "?apikey=" + self.DOUBAN_API_KEY
        return url
    # }}}
    def download_cover(self, log, result_queue, abort, # {{{
            title=None, authors=None, identifiers={}, timeout=30):
        cached_url = self.get_cached_cover_url(identifiers)
        if cached_url is None:
            log.info('No cached cover found, running identify')
            rq = Queue()
            self.identify(log, rq, abort, title=title, authors=authors,
                    identifiers=identifiers)
            if abort.is_set():
                return
            results = []
            while True:
                try:
                    results.append(rq.get_nowait())
                except Empty:
                    break
            results.sort(key=self.identify_results_keygen(
                title=title, authors=authors, identifiers=identifiers))
            for mi in results:
                cached_url = self.get_cached_cover_url(mi.identifiers)
                if cached_url is not None:
                    break
        if cached_url is None:
            log.info('No cover found')
            return
        if abort.is_set():
            return
        br = self.browser
        log('Downloading cover from:', cached_url)
        try:
            cdata = br.open_novisit(cached_url, timeout=timeout).read()
            if cdata:
                result_queue.put((self, cdata))
        except:
            log.exception('Failed to download cover from:', cached_url)
    # }}}
    def get_cached_cover_url(self, identifiers): # {{{
        url = None
        db = identifiers.get('douban', None)
        if db is None:
            isbn = identifiers.get('isbn', None)
            if isbn is not None:
                db = self.cached_isbn_to_identifier(isbn)
        if db is not None:
            url = self.cached_identifier_to_cover_url(db)
        return url
    # }}}
    def get_all_details(self, br, log, entries, abort, # {{{
            result_queue, timeout):
        for relevance, i in enumerate(entries):
            try:
                ans = to_metadata(br, log, i, timeout)
                if isinstance(ans, Metadata):
                    ans.source_relevance = relevance
                    db = ans.identifiers['douban']
                    for isbn in getattr(ans, 'all_isbns', []):
                        self.cache_isbn_to_identifier(isbn, db)
                    if ans.has_douban_cover:
                        self.cache_identifier_to_cover_url(db,
                                ans.has_douban_cover)
                    self.clean_downloaded_metadata(ans)
                    result_queue.put(ans)
            except:
                log.exception(
                    'Failed to get metadata for identify entry:',
                    etree.tostring(i))
            if abort.is_set():
                break
    # }}}
    def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
            identifiers={}, timeout=30):
        query = self.create_query(log, title=title, authors=authors,
                identifiers=identifiers)
        if not query:
            log.error('Insufficient metadata to construct query')
            return
        br = self.browser
        try:
            raw = br.open_novisit(query, timeout=timeout).read()
        except Exception as e:
            log.exception('Failed to make identify query: %r'%query)
            return as_unicode(e)
        try:
            parser = etree.XMLParser(recover=True, no_network=True)
            feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
                strip_encoding_pats=True)[0], parser=parser)
            entries = entry(feed)
        except Exception as e:
            log.exception('Failed to parse identify results')
            return as_unicode(e)
        if not entries and identifiers and title and authors and \
                not abort.is_set():
            return self.identify(log, result_queue, abort, title=title,
                    authors=authors, timeout=timeout)
        # There is no point running these queries in threads as douban
        # throttles requests returning 403 Forbidden errors
        self.get_all_details(br, log, entries, abort, result_queue, timeout)
        return None
    # }}}
 if __name__ == '__main__': # tests {{{
    # To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/douban.py
    from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
            title_test, authors_test)
    test_identify_plugin(Douban.name,
        [
            (
                {'identifiers':{'isbn': '9787536692930'}, 'title':'三体',
                    'authors':['刘慈欣']},
                [title_test('三体', exact=True),
                    authors_test(['刘慈欣'])]
            ),
            (
                {'title': 'Linux内核修炼之道', 'authors':['任桥伟']},
                [title_test('Linux内核修炼之道', exact=False)]
            ),
    ])
 # }}}
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@ -157,7 +157,7 @@ def to_metadata(browser, log, entry_, timeout): # {{{
 class GoogleBooks(Source):
    name = 'Google'
-    description = _('Downloads metadata from Google Books')
+    description = _('Downloads metadata and covers from Google Books')
    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@ -372,6 +372,18 @@ def identify(log, abort, # {{{
    longest, lp = -1, ''
    for plugin, presults in results.iteritems():
        presults.sort(key=plugin.identify_results_keygen(**sort_kwargs))
        # Throw away lower priority results from the same source that have exactly the same
        # title and authors as a higher priority result
        filter_results = set()
        filtered_results = []
        for r in presults:
            key = (r.title, tuple(r.authors))
            if key not in filter_results:
                filtered_results.append(r)
                filter_results.add(key)
        results[plugin] = presults = filtered_results
        plog = logs[plugin].getvalue().strip()
        log('\n'+'*'*30, plugin.name, '*'*30)
        log('Request extra headers:', plugin.browser.addheaders)
@ -479,7 +491,7 @@ if __name__ == '__main__': # tests {{{
            (
                {'title':'Magykal Papers',
                    'authors':['Sage']},
-                [title_test('The Magykal Papers', exact=True)],
+                [title_test('Septimus Heap: The Magykal Papers', exact=True)],
            ),
@ -506,12 +518,6 @@ if __name__ == '__main__': # tests {{{
                    exact=True), authors_test(['Dan Brown'])]
            ),
            ( # No ISBN
                {'title':'Justine', 'authors':['Durrel']},
                [title_test('Justine', exact=True),
                    authors_test(['Lawrence Durrel'])]
            ),
            (  # A newer book
                {'identifiers':{'isbn': '9780316044981'}},
                [title_test('The Heroes', exact=True),
--- a/src/calibre/ebooks/metadata/sources/overdrive.py
+++ b/src/calibre/ebooks/metadata/sources/overdrive.py
@ -30,7 +30,7 @@ base_url = 'http://search.overdrive.com/'
 class OverDrive(Source):
    name = 'Overdrive'
-    description = _('Downloads metadata from Overdrive\'s Content Reserve')
+    description = _('Downloads metadata and covers from Overdrive\'s Content Reserve')
    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@ -191,7 +191,11 @@ class OEBReader(object):
                        if not scheme and href not in known:
                            new.add(href)
                elif item.media_type in OEB_STYLES:
-                    for url in cssutils.getUrls(item.data):
+                    try:
                        urls = list(cssutils.getUrls(item.data))
                    except:
                        urls = []
                    for url in urls:
                        href, _ = urldefrag(url)
                        href = item.abshref(urlnormalize(href))
                        scheme = urlparse(href).scheme
--- a/src/calibre/ebooks/textile/functions.py
+++ b/src/calibre/ebooks/textile/functions.py
@ -12,7 +12,7 @@ A Humane Web Text Generator
 #__date__ = '2009/12/04'
 __copyright__ = """
-Copyright (c) 2011, Leigh Parry
+Copyright (c) 2011, Leigh Parry <leighparry@blueyonder.co.uk>
 Copyright (c) 2011, John Schember <john@nachtimwald.com>
 Copyright (c) 2009, Jason Samsa, http://jsamsa.com/
 Copyright (c) 2004, Roberto A. F. De Almeida, http://dealmeida.net/
@ -219,14 +219,13 @@ class Textile(object):
    ]
    glyph_defaults = [
        (re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'),                   r'\1\2&#215;\3'),                       #  dimension sign
-        (re.compile(r'(\d+)\'', re.I),                                 r'\1&#8242;'),                          #  prime
+        (re.compile(r'(\d+)\'(\s)', re.I),                             r'\1&#8242;\2'),                          #  prime
-        (re.compile(r'(\d+)\"', re.I),                                 r'\1&#8243;'),                          #  prime-double
+        (re.compile(r'(\d+)\"(\s)', re.I),                             r'\1&#8243;\2'),                          #  prime-double
        (re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'),      r'<acronym title="\2">\1</acronym>'),   #  3+ uppercase acronym
        (re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'),         r'<span class="caps">\1</span>'),       #  3+ uppercase
        (re.compile(r'\b(\s{0,1})?\.{3}'),                             r'\1&#8230;'),                          #  ellipsis
        (re.compile(r'^[\*_-]{3,}$', re.M),                            r'<hr />'),                             #  <hr> scene-break
-        (re.compile(r'\b--\b'),                                        r'&#8212;'),                            #  em dash
+        (re.compile(r'(^|[^-])--([^-]|$)'),                                r'\1&#8212;\2'),                        #  em dash
        (re.compile(r'(\s)--(\s)'),                                    r'\1&#8212;\2'),                        #  em dash
        (re.compile(r'\s-(?:\s|$)'),                                   r' &#8211; '),                          #  en dash
        (re.compile(r'\b( ?)[([]TM[])]', re.I),                        r'\1&#8482;'),                          #  trademark
        (re.compile(r'\b( ?)[([]R[])]', re.I),                         r'\1&#174;'),                           #  registered
@ -706,6 +705,21 @@ class Textile(object):
            result.append(line)
        return ''.join(result)
    def macros_only(self, text):
        # fix: hackish
        text = re.sub(r'"\Z', '\" ', text)
        result = []
        for line in re.compile(r'(<.*?>)', re.U).split(text):
            if not re.search(r'<.*>', line):
                rules = []
                if re.search(r'{.+?}', line):
                    rules = self.macro_defaults
                for s, r in rules:
                    line = s.sub(r, line)
            result.append(line)
        return ''.join(result)
    def vAlign(self, input):
        d = {'^':'top', '-':'middle', '~':'bottom'}
        return d.get(input, '')
@ -814,6 +828,7 @@ class Textile(object):
        'fooobar ... and hello world ...'
        """
        text = self.macros_only(text)
        punct = '!"#$%&\'*+,-./:;=?@\\^_`|~'
        pattern = r'''
@ -1044,4 +1059,3 @@ def textile_restricted(text, lite=True, noimage=True, html_type='xhtml'):
    return Textile(restricted=True, lite=lite,
                   noimage=noimage).textile(text, rel='nofollow',
                                            html_type=html_type)
--- a/src/calibre/ebooks/txt/output.py
+++ b/src/calibre/ebooks/txt/output.py
@ -66,19 +66,26 @@ class TXTOutput(OutputFormatPlugin):
            help=_('Do not remove image references within the document. This is only ' \
            'useful when paired with a txt-output-formatting option that '
            'is not none because links are always removed with plain text output.')),
        OptionRecommendation(name='keep_color',
            recommended_value=False, level=OptionRecommendation.LOW,
            help=_('Do not remove font color from output. This is only useful when ' \
                   'txt-output-formatting is set to textile. Textile is the only ' \
                   'formatting that supports setting font color. If this option is ' \
                   'not specified font color will not be set and default to the ' \
                   'color displayed by the reader (generally this is black).')),
     ])
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        if opts.txt_output_formatting.lower() == 'markdown':
            from calibre.ebooks.txt.markdownml import MarkdownMLizer
-            writer = MarkdownMLizer(log)
+            self.writer = MarkdownMLizer(log)
        elif opts.txt_output_formatting.lower() == 'textile':
            from calibre.ebooks.txt.textileml import TextileMLizer
-            writer = TextileMLizer(log)
+            self.writer = TextileMLizer(log)
        else:
-            writer = TXTMLizer(log)
+            self.writer = TXTMLizer(log)
-        txt = writer.extract_content(oeb_book, opts)
+        txt = self.writer.extract_content(oeb_book, opts)
        txt = clean_ascii_chars(txt)
        log.debug('\tReplacing newlines with selected type...')
@ -111,17 +118,28 @@ class TXTZOutput(TXTOutput):
        from calibre.ebooks.oeb.base import OEB_IMAGES
        with TemporaryDirectory('_txtz_output') as tdir:
            # TXT
-            with TemporaryFile('index.txt') as tf:
+            txt_name = 'index.txt'
            if opts.txt_output_formatting.lower() == 'textile':
                txt_name = 'index.text'
            with TemporaryFile(txt_name) as tf:
                TXTOutput.convert(self, oeb_book, tf, input_plugin, opts, log)
-                shutil.copy(tf, os.path.join(tdir, 'index.txt'))
+                shutil.copy(tf, os.path.join(tdir, txt_name))
            # Images
            for item in oeb_book.manifest:
                if item.media_type in OEB_IMAGES:
-                    path = os.path.join(tdir, os.path.dirname(item.href))
+                    if hasattr(self.writer, 'images'):
                        path = os.path.join(tdir, 'images')
                        if item.href in self.writer.images:
                            href = self.writer.images[item.href]
                        else:
                            continue
                    else:
                        path = os.path.join(tdir, os.path.dirname(item.href))
                        href = os.path.basename(item.href)
                    if not os.path.exists(path):
                        os.makedirs(path)
-                    with open(os.path.join(tdir, item.href), 'wb') as imgf:
+                    with open(os.path.join(path, href), 'wb') as imgf:
                        imgf.write(item.data)
            # Metadata
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@ -243,6 +243,8 @@ def detect_formatting_type(txt):
    textile_count += len(re.findall(r'(?mu)(?<=\!)\S+(?=\!)', txt))
    # Links
    textile_count += len(re.findall(r'"[^"]*":\S+', txt))
    # paragraph blocks
    textile_count += len(re.findall(r'(?mu)^p(<|<>|=|>)?\. ', txt))
    # Decide if either markdown or textile is used in the text
    # based on the number of unique formatting elements found.
--- a/src/calibre/ebooks/txt/textileml.py
+++ b/src/calibre/ebooks/txt/textileml.py
@ -1,62 +1,489 @@
 # -*- coding: utf-8 -*-
 __license__ = 'GPL 3'
-__copyright__ = '2011, John Schember <john@nachtimwald.com>'
+__copyright__ = '2011, Leigh Parry <leighparry@blueyonder.co.uk>'
 __docformat__ = 'restructuredtext en'
 '''
 Transform OEB content into Textile formatted plain text
 '''
 import re
-from lxml import etree
+from functools import partial
-from calibre.ebooks.oeb.base import XHTML
+from calibre.ebooks.htmlz.oeb2html import OEB2HTML
-from calibre.utils.html2textile import html2textile
+from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace, rewrite_links
 from calibre.ebooks.oeb.stylizer import Stylizer
 from calibre.ebooks import unit_convert
 from calibre.ebooks.txt.unsmarten import unsmarten
-class TextileMLizer(object):
+class TextileMLizer(OEB2HTML):
    def __init__(self, log):
        self.log = log
    def extract_content(self, oeb_book, opts):
        self.log.info('Converting XHTML to Textile formatted TXT...')
        self.oeb_book = oeb_book
        self.opts = opts
        self.in_pre = False
        self.in_table = False
        self.links = {}
        self.list = []
        self.our_links = []
        self.in_a_link = False
        self.our_ids = []
        self.images = {}
        self.id_no_text = u''
        self.style_embed = []
        self.remove_space_after_newline = False
        self.base_hrefs = [item.href for item in oeb_book.spine]
        self.map_resources(oeb_book)
-        return self.mlize_spine()
+        self.style_bold = False
        self.style_italic = False
        self.style_under = False
        self.style_strike = False
        self.style_smallcap = False
-    def mlize_spine(self):
+        txt = self.mlize_spine(oeb_book)
        txt = unsmarten(txt)
        # Do some tidying up
        txt = self.tidy_up(txt)
        return txt
    def mlize_spine(self, oeb_book):
        output = [u'']
-
+        for item in oeb_book.spine:
        for item in self.oeb_book.spine:
            self.log.debug('Converting %s to Textile formatted TXT...' % item.href)
            self.rewrite_ids(item.data, item)
            rewrite_links(item.data, partial(self.rewrite_link, page=item))
            stylizer = Stylizer(item.data, item.href, oeb_book, self.opts, self.opts.output_profile)
            output += self.dump_text(item.data.find(XHTML('body')), stylizer)
            output.append('\n\n')
        return ''.join(output)
-            html = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
+    def tidy_up(self, text):
        # May need tweaking and finetuning
        def check_escaping(text, tests):
            for t in tests:
                # I'm not checking for duplicated spans '%' as any that follow each other were being incorrectly merged
                txt = '%s' % t
                if txt != '%':
                    text = re.sub(r'([^'+t+'|^\n])'+t+'\]\['+t+'([^'+t+'])', r'\1\2', text)
                    text = re.sub(r'([^'+t+'|^\n])'+t+t+'([^'+t+'])', r'\1\2', text)
                text = re.sub(r'(\s|[*_\'"])\[('+t+'[a-zA-Z0-9 \'",.*_]+'+t+')\](\s|[*_\'"?!,.])', r'\1\2\3', text)
            return text
-            if not self.opts.keep_links:
+        # Now tidyup links and ids - remove ones that don't have a correponding opposite
-                html = re.sub(r'<\s*/*\s*a[^>]*>', '', html)
+        if self.opts.keep_links:
-            if not self.opts.keep_image_references:
+            for i in self.our_links:
-                html = re.sub(r'<\s*img[^>]*>', '', html)
+                if i[0] == '#':
                    if i not in self.our_ids:
                        text = re.sub(r'"(.+)":'+i+'(\s)', r'\1\2', text)
            for i in self.our_ids:
                if i not in self.our_links:
                    text = re.sub(r'%?\('+i+'\)\xa0?%?', r'', text)
        # Remove obvious non-needed escaping, add sub/sup-script ones
        text = check_escaping(text, ['\*', '_', '\*'])
        # escape the super/sub-scripts if needed
        text = re.sub(r'(\w)([~^]\w+[~^])', r'\1[\2]', text)
        # escape the super/sub-scripts if needed
        text = re.sub(r'([~^]\w+[~^])(\w)', r'[\1]\2', text)
-            text = html2textile(html)
+        #remove empty spans
        text = re.sub(r'%\xa0+', r'%', text)
        #remove empty spans - MAY MERGE SOME ?
        text = re.sub(r'%%', r'', text)
        #remove spans from tagged output
        text = re.sub(r'%([_+*-]+)%', r'\1', text)
        #remove spaces before a newline
        text = re.sub(r' +\n', r'\n', text)
        #remove newlines at top of file
        text = re.sub(r'^\n+', r'', text)
        #correct blockcode paras
        text = re.sub(r'\npre\.\n?\nbc\.', r'\nbc.', text)
        #correct blockquote paras
        text = re.sub(r'\nbq\.\n?\np.*\. ', r'\nbq. ', text)
-            # Ensure the section ends with at least two new line characters.
+        #reduce blank lines
-            # This is to prevent the last paragraph from a section being
+        text = re.sub(r'\n{3}', r'\n\np. \n\n', text)
-            # combined into the fist paragraph of the next.
+        text = re.sub(u'%\n(p[<>=]{1,2}\.|p\.)', r'%\n\n\1', text)
-            end_chars = text[-4:]
+        #Check span following blank para
-            # Convert all newlines to \n
+        text = re.sub(r'\n+ +%', r' %', text)
-            end_chars = end_chars.replace('\r\n', '\n')
+        text = re.sub(u'p[<>=]{1,2}\.\n\n?', r'', text)
-            end_chars = end_chars.replace('\r', '\n')
+        # blank paragraph
-            end_chars = end_chars[-2:]
+        text = re.sub(r'\n(p.*\.)\n', r'\n\1 \n\n', text)
-            if not end_chars[1] == '\n':
+        # blank paragraph
-                text += '\n\n'
+        text = re.sub(u'\n\xa0', r'\np. ', text)
-            if end_chars[1] == '\n' and not end_chars[0] == '\n':
+        # blank paragraph
-                text += '\n'
+        text = re.sub(u'\np[<>=]{1,2}?\. \xa0', r'\np. ', text)
        text = re.sub(r'(^|\n)(p.*\. ?\n)(p.*\.)', r'\1\3', text)
        text = re.sub(r'\n(p\. \n)(p.*\.|h.*\.)', r'\n\2', text)
        #sort out spaces in tables
        text = re.sub(r' {2,}\|', r' |', text)
-            output += text
+        # Now put back spaces removed earlier as they're needed here
        text = re.sub(r'\np\.\n', r'\np. \n', text)
        #reduce blank lines
        text = re.sub(r' \n\n\n', r' \n\n', text)
-        output = u''.join(output)
+        return text
-        return output
+    def remove_newlines(self, text):
        text = text.replace('\r\n', ' ')
        text = text.replace('\n', ' ')
        text = text.replace('\r', ' ')
        # Condense redundant spaces created by replacing newlines with spaces.
        text = re.sub(r'[ ]{2,}', ' ', text)
        text = re.sub(r'\t+', '', text)
        if self.remove_space_after_newline == True:
            text = re.sub(r'^ +', '', text)
            self.remove_space_after_newline = False
        return text
    def check_styles(self, style):
        txt = '{'
        if self.opts.keep_color:
            if 'color' in style.cssdict() and style['color'] != 'black':
                txt += 'color:'+style['color']+';'
            if 'background' in style.cssdict():
                txt += 'background:'+style['background']+';'
        txt += '}'
        if txt == '{}': txt = ''
        return txt
    def check_halign(self, style):
        tests = {'left':'<','justify':'<>','center':'=','right':'>'}
        for i in tests:
            if style['text-align'] == i:
                return tests[i]
        return ''
    def check_valign(self, style):
        tests = {'top':'^','bottom':'~'} #, 'middle':'-'}
        for i in tests:
            if style['vertical-align'] == i:
                return tests[i]
        return ''
    def check_padding(self, style, stylizer):
        txt = ''
        left_padding_pts = 0
        left_margin_pts = 0
        if 'padding-left' in style.cssdict() and style['padding-left'] != 'auto':
            left_padding_pts = unit_convert(style['padding-left'], style.width, style.fontSize, stylizer.profile.dpi)
        if 'margin-left' in style.cssdict() and style['margin-left'] != 'auto':
            left_margin_pts = unit_convert(style['margin-left'], style.width, style.fontSize, stylizer.profile.dpi)
        left = left_margin_pts + left_padding_pts
        emleft = int(round(left / stylizer.profile.fbase))
        if emleft >= 1:
            txt += '(' * emleft
        right_padding_pts = 0
        right_margin_pts = 0
        if 'padding-right' in style.cssdict() and style['padding-right'] != 'auto':
            right_padding_pts = unit_convert(style['padding-right'], style.width, style.fontSize, stylizer.profile.dpi)
        if 'margin-right' in style.cssdict() and style['margin-right'] != 'auto':
            right_margin_pts = unit_convert(style['margin-right'], style.width, style.fontSize, stylizer.profile.dpi)
        right = right_margin_pts + right_padding_pts
        emright = int(round(right / stylizer.profile.fbase))
        if emright >= 1:
            txt += ')' * emright
        return txt
    def check_id_tag(self, attribs):
        txt = ''
        if attribs.has_key('id'):
            txt = '(#'+attribs['id']+ ')'
            self.our_ids.append('#'+attribs['id'])
            self.id_no_text = u'\xa0'
        return txt
    def build_block(self, tag, style, attribs, stylizer):
        txt = '\n' + tag
        if self.opts.keep_links:
            txt += self.check_id_tag(attribs)
        txt += self.check_padding(style, stylizer)
        txt += self.check_halign(style)
        txt += self.check_styles(style)
        return txt
    def prepare_string_for_textile(self, txt):
        if re.search(r'(\s([*&_+\-~@%|]|\?{2})\S)|(\S([*&_+\-~@%|]|\?{2})\s)', txt):
            return ' ==%s== ' % txt
        return txt
    def dump_text(self, elem, stylizer):
        '''
        @elem: The element in the etree that we are working on.
        @stylizer: The style information attached to the element.
        '''
        # We can only processes tags. If there isn't a tag return any text.
        if not isinstance(elem.tag, basestring) \
           or namespace(elem.tag) != XHTML_NS:
            p = elem.getparent()
            if p is not None and isinstance(p.tag, basestring) and namespace(p.tag) == XHTML_NS \
                    and elem.tail:
                return [elem.tail]
            return ['']
        # Setup our variables.
        text = ['']
        style = stylizer.style(elem)
        tags = []
        tag = barename(elem.tag)
        attribs = elem.attrib
        # Ignore anything that is set to not be displayed.
        if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
           or style['visibility'] == 'hidden':
            return ['']
        # Soft scene breaks.
        if 'margin-top' in style.cssdict() and style['margin-top'] != 'auto':
            ems = int(round(float(style.marginTop) / style.fontSize) - 1)
            if ems >= 1:
                text.append(u'\n\n\xa0' * ems)
        if tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'div'):
            if tag == 'div':
                tag = 'p'
            text.append(self.build_block(tag, style, attribs, stylizer))
            text.append('. ')
            tags.append('\n')
        if style['font-style'] == 'italic' or tag in ('i', 'em'):
            if tag not in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'cite'):
                if self.style_italic == False:
                    if self.in_a_link:
                        text.append('_')
                        tags.append('_')
                    else:
                        text.append('[_')
                        tags.append('_]')
                    self.style_embed.append('_')
                    self.style_italic = True
        if style['font-weight'] in ('bold', 'bolder') or tag in ('b', 'strong'):
            if tag not in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'th'):
                if self.style_bold == False:
                    if self.in_a_link:
                        text.append('*')
                        tags.append('*')
                    else:
                        text.append('[*')
                        tags.append('*]')
                    self.style_embed.append('*')
                    self.style_bold = True
        if style['text-decoration'] == 'underline' or tag in ('u', 'ins'):
            if tag != 'a':
                if self.style_under == False:
                    text.append('[+')
                    tags.append('+]')
                    self.style_embed.append('+')
                    self.style_under = True
        if style['text-decoration'] == 'line-through' or tag in ('strike', 'del', 's'):
            if self.style_strike == False:
                text.append('[-')
                tags.append('-]')
                self.style_embed.append('-')
                self.style_strike = True
        if tag == 'br':
            for i in reversed(self.style_embed):
                text.append(i)
            text.append('\n')
            for i in self.style_embed:
                text.append(i)
            tags.append('')
            self.remove_space_after_newline = True
        if tag == 'blockquote':
            text.append('\nbq. ')
            tags.append('\n')
        elif tag in ('abbr', 'acronym'):
            text.append('')
            txt = attribs['title']
            tags.append('(' + txt + ')')
        elif tag == 'sup':
            text.append('^')
            tags.append('^')
        elif tag == 'sub':
            text.append('~')
            tags.append('~')
        elif tag == 'code':
            if self.in_pre:
                text.append('\nbc. ')
                tags.append('')
            else:
                text.append('@')
                tags.append('@')
        elif tag == 'cite':
            text.append('??')
            tags.append('??')
        elif tag == 'hr':
            text.append('\n***')
            tags.append('\n')
        elif tag == 'pre':
            self.in_pre = True
            text.append('\npre. ')
            tags.append('pre\n')
        elif tag == 'a':
            if self.opts.keep_links:
                if attribs.has_key('href'):
                    text.append('"')
                    tags.append('a')
                    tags.append('":' + attribs['href'])
                    self.our_links.append(attribs['href'])
                    if attribs.has_key('title'):
                        tags.append('(' + attribs['title'] + ')')
                    self.in_a_link = True
                else:
                    text.append('%')
                    tags.append('%')
        elif tag == 'img':
            if self.opts.keep_image_references:
                txt = '!' + self.check_halign(style)
                txt += self.check_valign(style)
                txt += attribs['src']
                text.append(txt)
                if attribs.has_key('alt'):
                    txt = attribs['alt']
                    if txt != '':
                        text.append('(' + txt + ')')
                tags.append('!')
        elif tag in ('ol', 'ul'):
            self.list.append({'name': tag, 'num': 0})
            text.append('')
            tags.append(tag)
        elif tag == 'li':
            if self.list: li = self.list[-1]
            else: li = {'name': 'ul', 'num': 0}
            text.append('\n')
            if li['name'] == 'ul':
                text.append('*' * len(self.list) + ' ')
            elif li['name'] == 'ol':
                text.append('#' * len(self.list) + ' ')
            tags.append('')
        elif tag == 'dl':
            text.append('\n')
            tags.append('')
        elif tag == 'dt':
            text.append('')
            tags.append('\n')
        elif tag == 'dd':
            text.append('    ')
            tags.append('')
        elif tag == 'dd':
            text.append('')
            tags.append('\n')
        elif tag == 'table':
            txt = self.build_block(tag, style, attribs, stylizer)
            txt += '. \n'
            if txt != '\ntable. \n':
                text.append(txt)
            else:
                text.append('\n')
            tags.append('')
        elif tag == 'tr':
            txt = self.build_block('', style, attribs, stylizer)
            txt += '. '
            if txt != '\n. ':
                txt = re.sub ('\n', '', txt)
                text.append(txt)
            tags.append('|\n')
        elif tag == 'td':
            text.append('|')
            txt = ''
            txt += self.check_halign(style)
            txt += self.check_valign(style)
            if attribs.has_key ('colspan'):
                txt += '\\' + attribs['colspan']
            if attribs.has_key ('rowspan'):
                txt += '/' + attribs['rowspan']
            txt += self.check_styles(style)
            if txt != '':
                text.append(txt + '. ')
            tags.append('')
        elif tag == 'th':
            text.append('|_. ')
            tags.append('')
        elif tag == 'span':
            if style['font-variant'] == 'small-caps':
                if self.style_smallcap == False:
                    text.append('&')
                    tags.append('&')
                    self.style_smallcap = True
            else:
                if self.in_a_link == False:
                    txt = '%'
                    if self.opts.keep_links:
                        txt += self.check_id_tag(attribs)
                        txt += self.check_styles(style)
                    if txt != '%':
                        text.append(txt)
                        tags.append('%')
        if self.opts.keep_links and attribs.has_key('id'):
            if tag not in ('body', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'span', 'table'):
                text.append(self.check_id_tag(attribs))
        # Process the styles for any that we want to keep
        if tag not in ('body', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'hr', 'a', 'img', \
                'span', 'table', 'tr', 'td'):
            if not self.in_a_link:
                text.append(self.check_styles(style))
        # Process tags that contain text.
        if hasattr(elem, 'text') and elem.text:
            txt = elem.text
            if not self.in_pre:
                txt = self.prepare_string_for_textile(self.remove_newlines(txt))
            text.append(txt)
            self.id_no_text = u''
        # Recurse down into tags within the tag we are in.
        for item in elem:
            text += self.dump_text(item, stylizer)
        # Close all open tags.
        tags.reverse()
        for t in tags:
            if tag in ('pre', 'ul', 'ol', 'li', 'table'):
                if tag == 'pre':
                    self.in_pre = False
                elif tag in ('ul', 'ol'):
                    if self.list: self.list.pop()
                    if not self.list: text.append('\n')
            else:
                if t == 'a':
                    self.in_a_link = False
                    t = ''
                text.append(self.id_no_text)
                self.id_no_text = u''
                if t in ('*]', '*'):
                    self.style_bold = False
                elif t in ('_]', '_'):
                    self.style_italic = False
                elif t == '+]':
                    self.style_under = False
                elif t == '-]':
                    self.style_strike = False
                elif t == '&':
                    self.style_smallcap = False
                if t in ('*]', '_]', '+]', '-]', '*', '_'):
                    txt = self.style_embed.pop()
                text.append('%s' % t)
        # Soft scene breaks.
        if 'margin-bottom' in style.cssdict() and style['margin-bottom'] != 'auto':
            ems = int(round((float(style.marginBottom) / style.fontSize) - 1))
            if ems >= 1:
                text.append(u'\n\n\xa0' * ems)
        # Add the text that is outside of the tag.
        if hasattr(elem, 'tail') and elem.tail:
            tail = elem.tail
            if not self.in_pre:
                tail = self.prepare_string_for_textile(self.remove_newlines(tail))
            text.append(tail)
        return text
--- a/src/calibre/ebooks/txt/unsmarten.py
+++ b/src/calibre/ebooks/txt/unsmarten.py
@ -0,0 +1,108 @@
 # -*- coding: utf-8 -*-
 """unsmarten : html2textile helper function"""
 __version__ = '0.1'
 __author__ = 'Leigh Parry'
 import re
 def unsmarten(txt):
    txt = re.sub(u'&#8211;|&ndash;|–', r'-', txt) # en-dash
    txt = re.sub(u'&#8212;|&mdash;|—', r'--', txt) # em-dash
    txt = re.sub(u'&#8230;|&hellip;|…', r'...', txt) # ellipsis
    txt = re.sub(u'&#8220;|&#8221;|&#8243;|&ldquo;|&rdquo;|&Prime;|“|”|″', r'"', txt)  # double quote
    txt = re.sub(u'(["\'‘“]|\s)’', r"\1{'/}", txt)  # apostrophe
    txt = re.sub(u'&#8216;|&#8217;|&#8242;|&lsquo;|&rsquo;|&prime;|‘|’|′', r"'", txt)  # single quote
    txt = re.sub(u'&#162;|&cent;|¢',     r'{c\}',  txt)  # cent
    txt = re.sub(u'&#163;|&pound;|£',    r'{L-}',  txt)  # pound
    txt = re.sub(u'&#165;|&yen;|¥',      r'{Y=}',  txt)  # yen
    txt = re.sub(u'&#169;|&copy;|©',     r'{(c)}', txt)  # copyright
    txt = re.sub(u'&#174;|&reg;|®',      r'{(r)}', txt)  # registered
    txt = re.sub(u'&#188;|&frac14;|¼',   r'{1/4}', txt)  # quarter
    txt = re.sub(u'&#189;|&frac12;|½',   r'{1/2}', txt)  # half
    txt = re.sub(u'&#190;|&frac34;|¾',   r'{3/4}', txt)  # three-quarter
    txt = re.sub(u'&#192;|&Agrave;|À',   r'{A`)}', txt)  # A-grave
    txt = re.sub(u'&#193;|&Aacute;|Á',   r"{A'}",  txt)  # A-acute
    txt = re.sub(u'&#194;|&Acirc;|Â',    r'{A^}', txt)  # A-circumflex
    txt = re.sub(u'&#195;|&Atilde;|Ã',   r'{A~}',  txt)  # A-tilde
    txt = re.sub(u'&#196;|&Auml;|Ä',     r'{A"}',  txt)  # A-umlaut
    txt = re.sub(u'&#197;|&Aring;|Å',    r'{Ao}',  txt)  # A-ring
    txt = re.sub(u'&#198;|&AElig;|Æ',    r'{AE}',  txt)  # AE
    txt = re.sub(u'&#199;|&Ccedil;|Ç',   r'{C,}',  txt)  # C-cedilla
    txt = re.sub(u'&#200;|&Egrave;|È',   r'{E`}',  txt)  # E-grave
    txt = re.sub(u'&#201;|&Eacute;|É',   r"{E'}",  txt)  # E-acute
    txt = re.sub(u'&#202;|&Ecirc;|Ê',    r'{E^}', txt)  # E-circumflex
    txt = re.sub(u'&#203;|&Euml;|Ë',     r'{E"}',  txt)  # E-umlaut
    txt = re.sub(u'&#204;|&Igrave;|Ì',   r'{I`}',  txt)  # I-grave
    txt = re.sub(u'&#205;|&Iacute;|Í',   r"{I'}",  txt)  # I-acute
    txt = re.sub(u'&#206;|&Icirc;|Î',    r'{I^}', txt)  # I-circumflex
    txt = re.sub(u'&#207;|&Iuml;|Ï',     r'{I"}',  txt)  # I-umlaut
    txt = re.sub(u'&#208;|&ETH;|Ð',      r'{D-}',  txt)  # ETH
    txt = re.sub(u'&#209;|&Ntilde;|Ñ',   r'{N~}',  txt)  # N-tilde
    txt = re.sub(u'&#210;|&Ograve;|Ò',   r'{O`}',  txt)  # O-grave
    txt = re.sub(u'&#211;|&Oacute;|Ó',   r"{O'}",  txt)  # O-acute
    txt = re.sub(u'&#212;|&Ocirc;|Ô',    r'{O^}', txt)  # O-circumflex
    txt = re.sub(u'&#213;|&Otilde;|Õ',   r'{O~}',  txt)  # O-tilde
    txt = re.sub(u'&#214;|&Ouml;|Ö',     r'{O"}',  txt)  # O-umlaut
    txt = re.sub(u'&#215;|&times;|×',    r'{x}',   txt)  # dimension
    txt = re.sub(u'&#216;|&Oslash;|Ø',   r'{O/}',  txt)  # O-slash
    txt = re.sub(u'&#217;|&Ugrave;|Ù',   r"{U`}",  txt)  # U-grave
    txt = re.sub(u'&#218;|&Uacute;|Ú',   r"{U'}",  txt)  # U-acute
    txt = re.sub(u'&#219;|&Ucirc;|Û',    r'{U^}', txt)  # U-circumflex
    txt = re.sub(u'&#220;|&Uuml;|Ü',     r'{U"}',  txt)  # U-umlaut
    txt = re.sub(u'&#221;|&Yacute;|Ý',   r"{Y'}",  txt)  # Y-grave
    txt = re.sub(u'&#223;|&szlig;|ß',    r'{sz}',  txt)  # sharp-s
    txt = re.sub(u'&#224;|&agrave;|à',   r'{a`}',  txt)  # a-grave
    txt = re.sub(u'&#225;|&aacute;|á',   r"{a'}",  txt)  # a-acute
    txt = re.sub(u'&#226;|&acirc;|â',    r'{a^}', txt)  # a-circumflex
    txt = re.sub(u'&#227;|&atilde;|ã',   r'{a~}',  txt)  # a-tilde
    txt = re.sub(u'&#228;|&auml;|ä',     r'{a"}',  txt)  # a-umlaut
    txt = re.sub(u'&#229;|&aring;|å',    r'{ao}',  txt)  # a-ring
    txt = re.sub(u'&#230;|&aelig;|æ',    r'{ae}',  txt)  # ae
    txt = re.sub(u'&#231;|&ccedil;|ç',   r'{c,}',  txt)  # c-cedilla
    txt = re.sub(u'&#232;|&egrave;|è',   r'{e`}',  txt)  # e-grave
    txt = re.sub(u'&#233;|&eacute;|é',   r"{e'}",  txt)  # e-acute
    txt = re.sub(u'&#234;|&ecirc;|ê',    r'{e^}', txt)  # e-circumflex
    txt = re.sub(u'&#235;|&euml;|ë',     r'{e"}',  txt)  # e-umlaut
    txt = re.sub(u'&#236;|&igrave;|ì',   r'{i`}',  txt)  # i-grave
    txt = re.sub(u'&#237;|&iacute;|í',   r"{i'}",  txt)  # i-acute
    txt = re.sub(u'&#238;|&icirc;|î',    r'{i^}', txt)  # i-circumflex
    txt = re.sub(u'&#239;|&iuml;|ï',     r'{i"}',  txt)  # i-umlaut
    txt = re.sub(u'&#240;|&eth;|ð',      r'{d-}',  txt)  # eth
    txt = re.sub(u'&#241;|&ntilde;|ñ',   r'{n~}',  txt)  # n-tilde
    txt = re.sub(u'&#242;|&ograve;|ò',   r'{o`}',  txt)  # o-grave
    txt = re.sub(u'&#243;|&oacute;|ó',   r"{o'}",  txt)  # o-acute
    txt = re.sub(u'&#244;|&ocirc;|ô',    r'{o^}', txt)  # o-circumflex
    txt = re.sub(u'&#245;|&otilde;|õ',   r'{o~}',  txt)  # o-tilde
    txt = re.sub(u'&#246;|&ouml;|ö',     r'{o"}',  txt)  # o-umlaut
    txt = re.sub(u'&#248;|&oslash;|ø',   r'{o/}',  txt)  # o-stroke
    txt = re.sub(u'&#249;|&ugrave;|ù',   r'{u`}',  txt)  # u-grave
    txt = re.sub(u'&#250;|&uacute;|ú',   r"{u'}",  txt)  # u-acute
    txt = re.sub(u'&#251;|&ucirc;|û',    r'{u^}', txt)  # u-circumflex
    txt = re.sub(u'&#252;|&uuml;|ü',     r'{u"}',  txt)  # u-umlaut
    txt = re.sub(u'&#253;|&yacute;|ý',   r"{y'}",  txt)  # y-acute
    txt = re.sub(u'&#255;|&yuml;|ÿ',     r'{y"}',  txt)  # y-umlaut
    txt = re.sub(u'&#338;|&OElig;|Œ',    r'{OE}',  txt)  # OE
    txt = re.sub(u'&#339;|&oelig;|œ',    r'{oe}',  txt)  # oe
    txt = re.sub(u'&#348;|&Scaron;|Ŝ',   r'{S^}', txt)  # Scaron
    txt = re.sub(u'&#349;|&scaron;|ŝ',   r'{s^}', txt)  # scaron
    txt = re.sub(u'&#8226;|&bull;|•',    r'{*}',   txt)  # bullet
    txt = re.sub(u'&#8355;|₣',           r'{Fr}',  txt)  # Franc
    txt = re.sub(u'&#8356;|₤',           r'{L=}',  txt)  # Lira
    txt = re.sub(u'&#8360;|₨',           r'{Rs}',  txt)  # Rupee
    txt = re.sub(u'&#8364;|&euro;|€',    r'{C=}',  txt)  # euro
    txt = re.sub(u'&#8482;|&trade;|™',   r'{tm}',  txt)  # trademark
    txt = re.sub(u'&#9824;|&spades;|♠',  r'{spade}',   txt)  # spade
    txt = re.sub(u'&#9827;|&clubs;|♣',   r'{club}',    txt)  # club
    txt = re.sub(u'&#9829;|&hearts;|♥',  r'{heart}',   txt)  # heart
    txt = re.sub(u'&#9830;|&diams;|♦',   r'{diamond}', txt)  # diamond
    # Move into main code?
 #    txt = re.sub(u'\xa0',   r'p. ', txt)              # blank paragraph
 #    txt = re.sub(u'\n\n\n\n',   r'\n\np. \n\n', txt)  # blank paragraph
 #    txt = re.sub(u'\n  \n',   r'\n<br />\n', txt)     # blank paragraph - br tag
    return txt
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -620,7 +620,21 @@ class Application(QApplication):
        self.original_font = QFont(QApplication.font())
        fi = gprefs['font']
        if fi is not None:
-            QApplication.setFont(QFont(*fi))
+            font = QFont(*(fi[:4]))
            s = gprefs.get('font_stretch', None)
            if s is not None:
                font.setStretch(s)
            QApplication.setFont(font)
        st = self.style()
        if st is not None:
            st = unicode(st.objectName()).lower()
        if (islinux or isfreebsd) and st in ('windows', 'motif', 'cde'):
            from PyQt4.Qt import QStyleFactory
            styles = set(map(unicode, QStyleFactory.keys()))
            if 'Cleanlooks' in styles:
                self.setStyle('Cleanlooks')
            else:
                self.setStyle('Plastique')
    def _send_file_open_events(self):
        with self._file_open_lock:
--- a/src/calibre/gui2/actions/edit_metadata.py
+++ b/src/calibre/gui2/actions/edit_metadata.py
@ -478,6 +478,10 @@ class EditMetadataAction(InterfaceAction):
        try:
            set_title = not mi.is_null('title')
            set_authors = not mi.is_null('authors')
            idents = db.get_identifiers(i, index_is_id=True)
            if mi.identifiers:
                idents.update(mi.identifiers)
            mi.identifiers = idents
            db.set_metadata(i, mi, commit=False, set_title=set_title,
                    set_authors=set_authors, notify=False)
            self.applied_ids.append(i)
--- a/src/calibre/gui2/actions/store.py
+++ b/src/calibre/gui2/actions/store.py
@ -10,6 +10,7 @@ from functools import partial
 from PyQt4.Qt import QMenu
 from calibre.gui2 import error_dialog
 from calibre.gui2.actions import InterfaceAction
 from calibre.gui2.dialogs.confirm_delete import confirm
@ -19,24 +20,86 @@ class StoreAction(InterfaceAction):
    action_spec = (_('Get books'), 'store.png', None, None)
    def genesis(self):
-        self.qaction.triggered.connect(self.search)
+        self.qaction.triggered.connect(self.do_search)
        self.store_menu = QMenu()
        self.load_menu()
    def load_menu(self):
        self.store_menu.clear()
-        self.store_menu.addAction(_('Search'), self.search)
+        self.store_menu.addAction(_('Search for ebooks'), self.search)
        self.store_menu.addAction(_('Search for this author'), self.search_author)
        self.store_menu.addAction(_('Search for this title'), self.search_title)
        self.store_menu.addAction(_('Search for this book'), self.search_author_title)
        self.store_menu.addSeparator()
        self.store_list_menu = self.store_menu.addMenu(_('Stores'))
        for n, p in sorted(self.gui.istores.items(), key=lambda x: x[0].lower()):
-            self.store_menu.addAction(n, partial(self.open_store, p))
+            self.store_list_menu.addAction(n, partial(self.open_store, p))
        self.qaction.setMenu(self.store_menu)
-    def search(self):
+    def do_search(self):
        return self.search()
    def search(self, query=''):
        self.show_disclaimer()
        from calibre.gui2.store.search.search import SearchDialog
-        sd = SearchDialog(self.gui.istores, self.gui)
+        sd = SearchDialog(self.gui.istores, self.gui, query)
        sd.exec_()
    def _get_selected_row(self):
        rows = self.gui.current_view().selectionModel().selectedRows()
        if not rows or len(rows) == 0:
            return None
        return rows[0].row()
    def _get_author(self, row):
        author = ''
        if self.gui.current_view() is self.gui.library_view:
            author = self.gui.library_view.model().authors(row)
            if author:
                author = author.replace('|', ' ')
        else:
            mi = self.gui.current_view().model().get_book_display_info(row)
            author = ' & '.join(mi.authors)
        return author
    def search_author(self):
        row = self._get_selected_row()
        if row == None:
            error_dialog(self.gui, _('Cannot search'), _('No book selected'), show=True)
            return
        query = 'author:"%s"' % self._get_author(row)
        self.search(query)
    def _get_title(self, row):
        title = ''
        if self.gui.current_view() is self.gui.library_view:
            title = self.gui.library_view.model().title(row)
        else:
            mi = self.gui.current_view().model().get_book_display_info(row)
            title = mi.title
        return title
    def search_title(self):
        row = self._get_selected_row()
        if row == None:
            error_dialog(self.gui, _('Cannot search'), _('No book selected'), show=True)
            return
        query = 'title:"%s"' % self._get_title(row)
        self.search(query)
    def search_author_title(self):
        row = self._get_selected_row()
        if row == None:
            error_dialog(self.gui, _('Cannot search'), _('No book selected'), show=True)
            return
        query = 'author:"%s" title:"%s"' % (self._get_author(row), self._get_title(row))
        self.search(query)
    def open_store(self, store_plugin):
        self.show_disclaimer()
        store_plugin.open(self.gui)
--- a/src/calibre/gui2/convert/txt_output.py
+++ b/src/calibre/gui2/convert/txt_output.py
@ -19,7 +19,7 @@ class PluginWidget(Widget, Ui_Form):
        Widget.__init__(self, parent,
        ['newline', 'max_line_length', 'force_max_line_length',
        'inline_toc', 'txt_output_formatting', 'keep_links', 'keep_image_references',
-        'txt_output_encoding'])
+        'keep_color', 'txt_output_encoding'])
        self.db, self.book_id = db, book_id
        for x in get_option('newline').option.choices:
            self.opt_newline.addItem(x)
--- a/src/calibre/gui2/convert/txt_output.ui
+++ b/src/calibre/gui2/convert/txt_output.ui
@ -122,6 +122,13 @@
        </property>
       </widget>
      </item>
      <item>
       <widget class="QCheckBox" name="opt_keep_color">
        <property name="text">
         <string>Keep text color, when possible</string>
        </property>
       </widget>
      </item>
     </layout>
    </widget>
   </item>
--- a/src/calibre/gui2/library/models.py
+++ b/src/calibre/gui2/library/models.py
@ -506,6 +506,9 @@ class BooksModel(QAbstractTableModel): # {{{
    def id(self, row):
        return self.db.id(getattr(row, 'row', lambda:row)())
    def authors(self, row_number):
        return self.db.authors(row_number)
    def title(self, row_number):
        return self.db.title(row_number)
--- a/src/calibre/gui2/metadata/config.py
+++ b/src/calibre/gui2/metadata/config.py
@ -10,7 +10,7 @@ __docformat__ = 'restructuredtext en'
 import textwrap
 from PyQt4.Qt import (QWidget, QGridLayout, QGroupBox, QListView, Qt, QSpinBox,
-        QDoubleSpinBox, QCheckBox, QLineEdit, QComboBox, QLabel)
+        QDoubleSpinBox, QCheckBox, QLineEdit, QComboBox, QLabel, QVariant)
 from calibre.gui2.preferences.metadata_sources import FieldsModel as FM
@ -95,9 +95,9 @@ class ConfigWidget(QWidget):
            widget.setChecked(bool(val))
        elif opt.type == 'choices':
            widget = QComboBox(self)
-            for x in opt.choices:
+            for key, label in opt.choices.iteritems():
-                widget.addItem(x)
+                widget.addItem(label, QVariant(key))
-            idx = opt.choices.index(val)
+            idx = widget.findData(QVariant(val))
            widget.setCurrentIndex(idx)
        widget.opt = opt
        widget.setToolTip(textwrap.fill(opt.desc))
@ -124,7 +124,8 @@ class ConfigWidget(QWidget):
            elif isinstance(w, QCheckBox):
                val = w.isChecked()
            elif isinstance(w, QComboBox):
-                val = unicode(w.currentText())
+                idx = w.currentIndex()
                val = unicode(w.itemData(idx).toString())
            self.plugin.prefs[w.opt.name] = val
--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@ -336,7 +336,9 @@ class MetadataSingleDialogBase(ResizableDialog):
        if not mi.is_null('tags'):
            self.tags.current_val = mi.tags
        if not mi.is_null('identifiers'):
-            self.identifiers.current_val = mi.identifiers
+            current = self.identifiers.current_val
            current.update(mi.identifiers)
            self.identifiers.current_val = current
        if not mi.is_null('pubdate'):
            self.pubdate.current_val = mi.pubdate
        if not mi.is_null('series') and mi.series.strip():
--- a/src/calibre/gui2/preferences/look_feel.py
+++ b/src/calibre/gui2/preferences/look_feel.py
@ -161,7 +161,11 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
    def initialize(self):
        ConfigWidgetBase.initialize(self)
-        self.current_font = self.initial_font = gprefs['font']
+        font = gprefs['font']
        if font is not None:
            font = list(font)
            font.append(gprefs.get('font_stretch', QFont.Unstretched))
        self.current_font = self.initial_font = font
        self.update_font_display()
        self.display_model.initialize()
@ -178,7 +182,8 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
    def build_font_obj(self):
        font_info = self.current_font
        if font_info is not None:
-            font = QFont(*font_info)
+            font = QFont(*(font_info[:4]))
            font.setStretch(font_info[4])
        else:
            font = qt_app.original_font
        return font
@ -215,15 +220,18 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        if fd.exec_() == fd.Accepted:
            font = fd.selectedFont()
            fi = QFontInfo(font)
-            self.current_font = (unicode(fi.family()), fi.pointSize(),
+            self.current_font = [unicode(fi.family()), fi.pointSize(),
-                    fi.weight(), fi.italic())
+                    fi.weight(), fi.italic(), font.stretch()]
            self.update_font_display()
            self.changed_signal.emit()
    def commit(self, *args):
        rr = ConfigWidgetBase.commit(self, *args)
        if self.current_font != self.initial_font:
-            gprefs['font'] = self.current_font
+            gprefs['font'] = (self.current_font[:4] if self.current_font else
                    None)
            gprefs['font_stretch'] = (self.current_font[4] if self.current_font
                    is not None else QFont.Unstretched)
            QApplication.setFont(self.font_display.font())
            rr = True
        self.display_model.commit()
--- a/src/calibre/gui2/preferences/metadata_sources.py
+++ b/src/calibre/gui2/preferences/metadata_sources.py
@ -71,9 +71,10 @@ class SourcesModel(QAbstractTableModel): # {{{
                    plugin.is_configured()):
            return QIcon(I('list_remove.png'))
        elif role == Qt.ToolTipRole:
            base = plugin.description + '\n\n'
            if plugin.is_configured():
-                return _('This source is configured and ready to go')
+                return base + _('This source is configured and ready to go')
-            return _('This source needs configuration')
+            return base + _('This source needs configuration')
        return NONE
    def setData(self, index, val, role):
--- a/src/calibre/gui2/store/bn_plugin.py
+++ b/src/calibre/gui2/store/bn_plugin.py
@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
 import random
 import re
-import urllib2
+import urllib
 from contextlib import closing
 from lxml import html
@ -48,7 +48,7 @@ class BNStore(BasicStoreConfig, StorePlugin):
    def search(self, query, max_results=10, timeout=60):
        url = 'http://productsearch.barnesandnoble.com/search/results.aspx?STORE=EBOOK&SZE=%s&WRD=' % max_results
-        url += urllib2.quote(query)
+        url += urllib.quote_plus(query)
        br = browser()
--- a/src/calibre/gui2/store/search/download_thread.py
+++ b/src/calibre/gui2/store/search/download_thread.py
@ -12,6 +12,7 @@ from threading import Thread
 from Queue import Queue
 from calibre import browser
 from calibre.constants import DEBUG
 from calibre.utils.magick.draw import thumbnail
 class GenericDownloadThreadPool(object):
@ -119,7 +120,8 @@ class SearchThread(Thread):
                    self.results.put((res, store_plugin))
                self.tasks.task_done()
            except:
-                traceback.print_exc()
+                if DEBUG:
                    traceback.print_exc()
 class CoverThreadPool(GenericDownloadThreadPool):
@ -157,7 +159,8 @@ class CoverThread(Thread):
                    callback()
                self.tasks.task_done()
            except:
-                continue
+                if DEBUG:
                    traceback.print_exc()
 class DetailsThreadPool(GenericDownloadThreadPool):
@ -191,7 +194,8 @@ class DetailsThread(Thread):
                    callback(result)
                self.tasks.task_done()
            except:
-                continue
+                if DEBUG:
                    traceback.print_exc()
 class CacheUpdateThreadPool(GenericDownloadThreadPool):
@ -221,4 +225,5 @@ class CacheUpdateThread(Thread):
                store_plugin, timeout = self.tasks.get()
                store_plugin.update_cache(timeout=timeout, suppress_progress=True)
            except:
-                traceback.print_exc()
+                if DEBUG:
                    traceback.print_exc()
--- a/src/calibre/gui2/store/search/search.py
+++ b/src/calibre/gui2/store/search/search.py
@ -23,8 +23,8 @@ TIMEOUT = 75 # seconds
 class SearchDialog(QDialog, Ui_Dialog):
-    def __init__(self, istores, *args):
+    def __init__(self, istores, parent=None, query=''):
-        QDialog.__init__(self, *args)
+        QDialog.__init__(self, parent)
        self.setupUi(self)
        self.config = JSONConfig('store/search')
@ -54,6 +54,9 @@ class SearchDialog(QDialog, Ui_Dialog):
            setattr(self, 'store_check_' + x, cbox)
        stores_group_layout.addStretch()
        # Set the search query
        self.search_edit.setText(query)
        # Create and add the progress indicator
        self.pi = ProgressIndicator(self, 24)
        self.top_layout.addWidget(self.pi)
@ -93,7 +96,7 @@ class SearchDialog(QDialog, Ui_Dialog):
        # Store / Formats
        self.results_view.setColumnWidth(4, int(total*.25))
-    def do_search(self, checked=False):
+    def do_search(self):
        # Stop all running threads.
        self.checker.stop()
        self.search_pool.abort()
@ -136,14 +139,17 @@ class SearchDialog(QDialog, Ui_Dialog):
        query = query.replace('>', '')
        query = query.replace('<', '')
        # Remove the prefix.
-        for loc in ( 'all', 'author', 'authors', 'title'):
+        for loc in ('all', 'author', 'authors', 'title'):
-            query = re.sub(r'%s:"?(?P<a>[^\s"]+)"?' % loc, '\g<a>', query)
+            query = re.sub(r'%s:"(?P<a>[^\s"]+)"' % loc, '\g<a>', query)
            query = query.replace('%s:' % loc, '')
        # Remove the prefix and search text.
        for loc in ('cover', 'drm', 'format', 'formats', 'price', 'store'):
            query = re.sub(r'%s:"[^"]"' % loc, '', query)
            query = re.sub(r'%s:[^\s]*' % loc, '', query)
        # Remove logic.
-        query = re.sub(r'(^|\s)(and|not|or)(\s|$)', ' ', query)
+        query = re.sub(r'(^|\s)(and|not|or|a|the|is|of)(\s|$)', ' ', query)
        # Remove "
        query = query.replace('"', '')
        # Remove excess whitespace.
        query = re.sub(r'\s{2,}', ' ', query)
        query = query.strip()
@ -252,4 +258,9 @@ class SearchDialog(QDialog, Ui_Dialog):
        self.search_pool.abort()
        self.cache_pool.abort()
        self.save_state()
    def exec_(self):
        if unicode(self.search_edit.text()).strip():
            self.do_search()
        return QDialog.exec_(self)
--- a/src/calibre/gui2/store/wizards_tower_books_plugin.py
+++ b/src/calibre/gui2/store/wizards_tower_books_plugin.py
@ -29,7 +29,7 @@ class WizardsTowerBooksStore(BasicStoreConfig, StorePlugin):
            detail_item = self.url + detail_item
        if external or self.config.get('open_external', False):
-            open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url)))
+            open_url(QUrl(url_slash_cleaner(detail_item)))
        else:
            d = WebStoreDialog(self.gui, self.url, parent, detail_item)
            d.setWindowTitle(self.name)
@ -38,9 +38,9 @@ class WizardsTowerBooksStore(BasicStoreConfig, StorePlugin):
    def search(self, query, max_results=10, timeout=60):
        url = 'http://www.wizardstowerbooks.com/search.html?for=' + urllib.quote(query)
-        
+
        br = browser()
-        
+
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
@ -60,13 +60,13 @@ class WizardsTowerBooksStore(BasicStoreConfig, StorePlugin):
                price = price.strip()
                if not price:
                    continue
-                
+
                title = ''.join(data.xpath('.//span[@class="prti"]/a/b/text()'))
                author = ''.join(data.xpath('.//p[@class="last"]/text()'))
                a, b, author = author.partition(' by ')
-                
+
                counter -= 1
-                
+
                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
@ -74,15 +74,15 @@ class WizardsTowerBooksStore(BasicStoreConfig, StorePlugin):
                s.price = price.strip()
                s.detail_item = id.strip()
                s.drm = SearchResult.DRM_UNLOCKED
-                
+
                yield s
    def get_details(self, search_result, timeout):
        br = browser()
        with closing(br.open(url_slash_cleaner(self.url + search_result.detail_item), timeout=timeout)) as nf:
            idata = html.fromstring(nf.read())
-        
+
            formats = ', '.join(idata.xpath('//select[@id="N1_"]//option//text()'))
            search_result.formats = formats.upper()
-            
+
        return True
--- a/src/calibre/gui2/tag_view.py
+++ b/src/calibre/gui2/tag_view.py
@ -1263,7 +1263,7 @@ class TagsModel(QAbstractItemModel): # {{{
                                d['last'] = data[key][cat_len-1]
                            name = eval_formatter.safe_format(collapse_template,
                                                              d, 'TAG_VIEW', None)
-                            self.beginInsertRows(category_index, 999999, 1) #len(data[key])-1)
+                            self.beginInsertRows(category_index, 999998, 999999) #len(data[key])-1)
                            sub_cat = TagTreeItem(parent=category, data = name,
                                     tooltip = None, temporary=True,
                                     category_icon = category_node.icon,
@ -1296,7 +1296,7 @@ class TagsModel(QAbstractItemModel): # {{{
                        key in ['authors', 'publisher', 'news', 'formats', 'rating'] or
                        key not in self.db.prefs.get('categories_using_hierarchy', []) or
                        len(components) == 1):
-                    self.beginInsertRows(category_index, 999999, 1)
+                    self.beginInsertRows(category_index, 999998, 999999)
                    n = TagTreeItem(parent=node_parent, data=tag, tooltip=tt,
                                    icon_map=self.icon_state_map)
                    if tag.id_set is not None:
@ -1332,7 +1332,7 @@ class TagsModel(QAbstractItemModel): # {{{
                            t.is_hierarchical = \
                                '5state' if t.category != 'search' else '3state'
                            t.name = comp
-                            self.beginInsertRows(category_index, 999999, 1)
+                            self.beginInsertRows(category_index, 999998, 999999)
                            node_parent = TagTreeItem(parent=node_parent, data=t,
                                            tooltip=tt, icon_map=self.icon_state_map)
                            child_map[(comp,tag.category)] = node_parent
--- a/src/calibre/gui2/wizard/init.py
+++ b/src/calibre/gui2/wizard/init.py
@ -633,8 +633,8 @@ class LibraryPage(QWizardPage, LibraryUI):
        try:
            lang = prefs['language'].lower()[:2]
            metadata_plugins = {
-                    'zh' : ('Douban Books', 'Douban.com covers'),
+                    'zh' : ('Douban Books',),
-                    'fr' : ('Nicebooks', 'Nicebooks covers'),
+                    'fr' : ('Nicebooks',),
            }.get(lang, [])
            from calibre.customize.ui import enable_plugin
            for name in metadata_plugins:
--- a/src/calibre/translations/af.po
+++ b/src/calibre/translations/af.po
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/ast.po
+++ b/src/calibre/translations/ast.po
--- a/src/calibre/translations/bg.po
+++ b/src/calibre/translations/bg.po
--- a/src/calibre/translations/bn.po
+++ b/src/calibre/translations/bn.po
--- a/src/calibre/translations/bs.po
+++ b/src/calibre/translations/bs.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/src/calibre/translations/da.po
+++ b/src/calibre/translations/da.po
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/el.po
+++ b/src/calibre/translations/el.po
--- a/src/calibre/translations/en_AU.po
+++ b/src/calibre/translations/en_AU.po
--- a/src/calibre/translations/en_CA.po
+++ b/src/calibre/translations/en_CA.po
--- a/src/calibre/translations/en_GB.po
+++ b/src/calibre/translations/en_GB.po
--- a/src/calibre/translations/eo.po
+++ b/src/calibre/translations/eo.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/eu.po
+++ b/src/calibre/translations/eu.po
--- a/src/calibre/translations/fa.po
+++ b/src/calibre/translations/fa.po
--- a/src/calibre/translations/fi.po
+++ b/src/calibre/translations/fi.po
--- a/src/calibre/translations/fo.po
+++ b/src/calibre/translations/fo.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/gl.po
+++ b/src/calibre/translations/gl.po
--- a/src/calibre/translations/he.po
+++ b/src/calibre/translations/he.po
--- a/src/calibre/translations/hi.po
+++ b/src/calibre/translations/hi.po
--- a/src/calibre/translations/hr.po
+++ b/src/calibre/translations/hr.po
--- a/src/calibre/translations/hu.po
+++ b/src/calibre/translations/hu.po
--- a/src/calibre/translations/id.po
+++ b/src/calibre/translations/id.po
--- a/src/calibre/translations/it.po
+++ b/src/calibre/translations/it.po
--- a/src/calibre/translations/ja.po
+++ b/src/calibre/translations/ja.po
--- a/src/calibre/translations/ko.po
+++ b/src/calibre/translations/ko.po
--- a/src/calibre/translations/lt.po
+++ b/src/calibre/translations/lt.po
--- a/src/calibre/translations/ltg.po
+++ b/src/calibre/translations/ltg.po
--- a/src/calibre/translations/lv.po
+++ b/src/calibre/translations/lv.po
--- a/src/calibre/translations/ml.po
+++ b/src/calibre/translations/ml.po
--- a/src/calibre/translations/mr.po
+++ b/src/calibre/translations/mr.po
--- a/src/calibre/translations/ms.po
+++ b/src/calibre/translations/ms.po
--- a/src/calibre/translations/nb.po
+++ b/src/calibre/translations/nb.po
--- a/Show More
+++ b/Show More