merge from trunk

2025-07-09 03:04:10 -04:00 · 2011-01-07 20:15:41 +00:00 · 2011-01-07 20:15:41 +00:00 · 6a745b68c1
commit 6a745b68c1
parent b0a3912867 83e116d59b
30 changed files with 587 additions and 272 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -4,6 +4,100 @@
 # for important features/bug fixes.
 # Also, each release can have new and improved recipes.
 - version: 0.7.38
  date: 2011-01-07
  new features:
    - title: "Reduce startup time when using a composite custom column"
    - title: "Template language: Add a list_item function for use with tags like columns. See User Manual for details"
    - title: "TXT Input: Attempt to detect the input encoding when not specified. Auto detect paragraph structure and formatting markup."
    - title: "Search & replace: Add ability to manipulate number and boolean columns."
    - title: "Add type ahead completion to the advanced search dialog."
      tickets: [8035]
    - title: "Double click on plugin in Preferences dialog to customize"
      tickets: [8175]
    - title: "Allow customization of the SONY driver to send thumbnail to the device. Useful with newer SONY readers"
      tickets: [8161]
    - title: "Smarten punctuation: Convert double dashes to em dashes. Preprocessing: Various tweaks"
  bug fixes:
    - title: "Fix regression causing the template formatter to intepret a missing format letter as ERROR instead of 's'."
    - title: "Fix regression that broke conversion of PNG images in PDF files on OS X."
      tickets: [8215]
    - title: "Content server: Fix improper XML escaping of category titles in the OPDS feeds"
      tickets: [8225]
    - title: "When decoding XML if the XML starts with a UTF-8 BOM decode as UTF-8. Fixes parsing of FB2 files with UTF-8 BOMs"
    - title: "E-book viewer: When scrolling to a bookmark and the content is wider than the window, do not scroll in the horizontal direction"
    - title: "E-book viewer: Fix next page skipping the bottom of chapters when the content is wider than the window."
      tickets: [8153]
    - title: " FB2 Output: Insert covers."
      tickets: [8172]
    - title: "Content server: When serving OPDS feeds handle html descriptions that have namespaced attributes."
      tickets: [7938]
    - title: "When downloading metadata from isbndb.com, download a maximum of 30 results rather than 1000"
    - title: "Fix sorting of tags column"
    - title: "Change search/replace to show commas instead of vertical bars as the separator for multiple authors"
    - title: "Template language: Make all column names case insensitive"
    - title: "Fix bug that prevent the Disabled option for Tag Browser partiotining from working in the Preferences dialog"
    - title: "Fix bug when using tags like custom column in the template language"
    - title: "Fix bug where composite custom columns using general_program_mode fields are not evaluated correctly when used in a template."
    - title: "ImageMagick interface: Don't crash when asked to open empty image files"
    - title: "Kobo driver: Add TXT,CBZ,CBR to supported formats list"
      tickets: [8124]
    - title: "Don't uneccessarily scroll the book list horizontally when re-selcting previously selected rows."
  new recipes:
    - title: "New London Day"
      author: "Being"
    - title: "Walla"
      author: "marbs"
    - title: "New Journal of Physics"
      author: "Chema Cortes"
    - title: "The Baltimore Sun"
      author: "Josh Hall"
    - title: "Arabian Business and Sunday Times (UK)"
      author: "Darko Miletic"
    - title: "Deia"
      author: "Gerardo Diez"
    - title: "Smarter Planet"
      author: "Jack Mason"
  improved recipes:
    - The Atlantic
    - Danas
    - Ledevoir
 - version: 0.7.37
  date: 2011-01-02
--- a/resources/recipes/new_london_day.recipe
+++ b/resources/recipes/new_london_day.recipe
@ -0,0 +1,74 @@
 __license__ = 'GPL 3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1294342201(BasicNewsRecipe):
    title          = u'New London Day'
    __author__  = 'Being'
    description = 'State, local and business news from New London, CT'
    language = 'en_GB'
    oldest_article = 1
    max_articles_per_feed = 200
    use_embedded_content    = False
    no_stylesheets        = True
    remove_javascript = True
    remove_tags_before = dict(id='article')
    remove_tags_after  = dict(id='article')
    remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}),
                dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']),
                dict(name=['script', 'noscript', 'style'])]
    remove_tags_after = [    {'class':['photo_article',]} ]
    remove_tags = [{'id':["moduleArticleTools","content-bottom","rail","articleRelates module","toolSet","relatedrailcontent","div-wrapper","beta","atp-comments","footer"]},
                   {'class':["clearfix","relatedTitle","articleRelates module","asset-footer","tools","comments","featurePromo","featurePromo fp-topjobs brownBackground","clearfix fullSpan brownBackground","curvedContent"]},
                   dict(name='font',attrs={'id':["cr-other-headlines"]})]
    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
                    .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
                    .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
                    .story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
 		'''
    feeds = [
            (u'All News', u'http://www.theday.com/section/rss'),
            (u'Breaking News', u'http://www.theday.com/section/rss01'),
            (u'Police and Courts', u'http://www.theday.com/section/rss02'),
            (u'State News', u'http://www.theday.com/section/rss03'),
            (u'Local Business', u'http://www.theday.com/section/rss04'),
            (u'Entertainment', u'http://www.theday.com/section/rss05'),
            (u'Opinion', u'http://www.theday.com/section/rss06'),
            (u'Casinos', u'http://www.theday.com/section/rss12'),
            (u'Defense and Military', u'http://www.theday.com/section/rss14'),
            (u'Ann Baldelli Ruminations', u'http://www.theday.com/section/rss20'),
            (u'Paul Choiniere Ruminations', u'http://www.theday.com/section/rss21'),
            (u'Michael Costanza Omnivore', u'http://www.theday.com/section/rss23'),
            (u'Rebecca Dangelo Reel Life', u'http://www.theday.com/section/rss25'),]
    def print_version(self, url):
        return url.replace('/index.html', '/print.html')
    def get_article_url(self, article):
        return article.get('feedburner_origlink', article.get('guid', article.get('link')))
    def postprocess_html(self, soup, first_fetch):
        for t in soup.findAll(['table', 'tr', 'td']):
            t.name = 'div'
        for tag in soup.findAll('form', dict(attrs={'name':["comments_form"]})):
            tag.extract()
        for tag in soup.findAll('font', dict(attrs={'id':["cr-other-headlines"]})):
            tag.extract()
        return soup
--- a/resources/recipes/njp.recipe
+++ b/resources/recipes/njp.recipe
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 __license__     = 'GPL v3'
-__copyright__   = 'Chema Cortés - 2011-01-05'
+__copyright__   = u'Chema Cort\xe9s - 2011-01-05'
 __version__     = 'v0.01'
 __date__        = '2011-01-05'
 '''
--- a/resources/recipes/walla.recipe
+++ b/resources/recipes/walla.recipe
@ -0,0 +1,44 @@
 # -*- coding: utf-8 -*-
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1283848012(BasicNewsRecipe):
    description   = 'The WallaNews.'
    cover_url      = 'http://ftp5.bizportal.co.il/web/giflib/news/rsPhoto/sz_5/rsz_220_220_logo_walla.gif'
    title          = u'Walla'
    language              = 'he'
    __author__ = 'marbs'
    extra_css='img {max-width:100%;} body{direction: rtl;},title{direction: rtl; } ,article_description{direction: rtl; }, a.article{direction: rtl; } ,calibre_feed_description{direction: rtl; }'
    simultaneous_downloads = 5
 #    remove_javascript     = True
    timefmt        = '[%a, %d %b, %Y]'
    oldest_article = 1
    max_articles_per_feed = 100
 #   remove_attributes = ['width']
    keep_only_tags =dict(name='div', attrs={'class':'wp-0-b w3'})
    remove_tags = [dict(name='div', attrs={'class':'tagsContainer'})]
    max_articles_per_feed = 100
 #    preprocess_regexps = [
 #        (re.compile(r'<p>&nbsp;</p>', re.DOTALL|re.IGNORECASE), lambda match: '')
 #        ]
    feeds          = [(u'חדשות', u'http://rss.walla.co.il/?w=/1/0/1/@rss'),
                           (u'עסקים', u'http://rss.walla.co.il/?w=/2/3/1/@rss'),
                           (u'תרבות', u'http://rss.walla.co.il/?w=/4/249/1/@rss'),
                           (u'בריאות', u'http://rss.walla.co.il/?w=/5/18/1/@rss'),
                           (u'TECH', u'http://rss.walla.co.il/?w=/6/4/1/@rss'),
                           (u'אסטרולוגיה', u'http://rss.walla.co.il/?w=/8/3307/1/@rss'),
                           (u'בעלי חיים', u'http://rss.walla.co.il/?w=/59/5703/1/@rss'),
                           (u'רכב', u'http://rss.walla.co.il/?w=/31/4700/1/@rss'),
                           (u'סלבס', u'http://rss.walla.co.il/?w=/22/3600/1/@rss'),
                           (u'אוכל', u'http://rss.walla.co.il/?w=/9/903/1/@rss'),
                           (u'אופנה', u'http://rss.walla.co.il/?w=/24/2120/1/@rss'),
                           (u'ברנזה', u'http://rss.walla.co.il/?w=/27/3900/1/@rss'),
                           (u'ZONE', u'http://rss.walla.co.il/?w=/18/500/1/@rss'),
                           (u'ספורט', u'http://rss.walla.co.il/?w=/3/7/1/@rss')]
    def print_version(self, url):
        print_url = url + '/@@/item/printer'
        return print_url
--- a/resources/viewer/bookmarks.js
+++ b/resources/viewer/bookmarks.js
@ -41,6 +41,7 @@ function scroll_to_bookmark(bookmark) {
    $.scrollTo($(bm[0]), 1000,
        {
            over:ratio,
            axis: 'y', // Do not scroll in the x direction
            onAfter:function(){window.py_bridge.animated_scroll_done()}
        }
    );
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.7.37'
+__version__   = '0.7.38'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"
 import re
--- a/src/calibre/ebooks/chardet/init.py
+++ b/src/calibre/ebooks/chardet/init.py
@ -18,7 +18,7 @@
 __version__ = "1.0"
-import re
+import re, codecs
 def detect(aBuf):
    import calibre.ebooks.chardet.universaldetector as universaldetector
@ -83,9 +83,11 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
    if not raw:
        return u'', encoding
    if not isinstance(raw, unicode):
-        if raw.startswith('\xff\xfe'):
+        if raw.startswith(codecs.BOM_UTF8):
            raw, encoding = raw.decode('utf-8')[1:], 'utf-8'
        elif raw.startswith(codecs.BOM_UTF16_LE):
            raw, encoding = raw.decode('utf-16-le')[1:], 'utf-16-le'
-        elif raw.startswith('\xfe\xff'):
+        elif raw.startswith(codecs.BOM_UTF16_BE):
            raw, encoding = raw.decode('utf-16-be')[1:], 'utf-16-be'
    if not isinstance(raw, unicode):
        for pat in ENCODING_PATS:
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -51,16 +51,16 @@ def chap_head(match):
    chap = match.group('chap')
    title = match.group('title')
    if not title:
-               return '<h1>'+chap+'</h1><br/>\n'
+        return '<h1>'+chap+'</h1><br/>\n'
    else:
-               return '<h1>'+chap+'</h1>\n<h3>'+title+'</h3>\n'
+        return '<h1>'+chap+'</h1>\n<h3>'+title+'</h3>\n'
 def wrap_lines(match):
    ital = match.group('ital')
    if not ital:
-               return ' '
+        return ' '
    else:
-               return ital+' '
+        return ital+' '
 class DocAnalysis(object):
    '''
@ -191,7 +191,7 @@ class Dehyphenator(object):
        dehyphenated = unicode(firsthalf) + unicode(secondhalf)
        lookupword = self.removesuffixes.sub('', dehyphenated)
        if self.prefixes.match(firsthalf) is None:
-           lookupword = self.removeprefix.sub('', lookupword)
+            lookupword = self.removeprefix.sub('', lookupword)
        #print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated)
        try:
            searchresult = self.html.find(lookupword.lower())
@ -353,7 +353,7 @@ class HTMLPreProcessor(object):
                  (re.compile(r'((?<=</a>)\s*file:////?[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''),
                  # Center separator lines
-                  (re.compile(u'<br>\s*(?P<break>([*#•]+\s*)+)\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group(1) + '</p>'),
+                  (re.compile(u'<br>\s*(?P<break>([*#•✦]+\s*)+)\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group(1) + '</p>'),
                  # Remove page links
                  (re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''),
@ -363,13 +363,11 @@ class HTMLPreProcessor(object):
                  # Remove gray background
                  (re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>'),
-                  # Detect Chapters to match default XPATH in GUI
+                  # Convert line breaks to paragraphs
-                  (re.compile(r'<br>\s*(?P<chap>(<[ibu]>){0,2}\s*.?(Introduction|Chapter|Kapitel|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+\s*){0,3}\s*(</[ibu]>){0,2})\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*<br>)?', re.IGNORECASE), chap_head),
+                  (re.compile(r'<br[^>]*>\s*'), lambda match : '</p>\n<p>'),
-                  # Cover the case where every letter in a chapter title is separated by a space
+                  (re.compile(r'<body[^>]*>\s*'), lambda match : '<body>\n<p>'),
-                  (re.compile(r'<br>\s*(?P<chap>([A-Z]\s+){4,}\s*([\d\w-]+\s*){0,3}\s*)\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*(<br>))?'), chap_head),
+                  (re.compile(r'\s*</body>'), lambda match : '</p>\n</body>'),
                  # Have paragraphs show better
                  (re.compile(r'<br.*?>'), lambda match : '<p>'),
                  # Clean up spaces
                  (re.compile(u'(?<=[\.,;\?!”"\'])[\s^ ]*(?=<)'), lambda match: ' '),
                  # Add space before and after italics
@ -455,9 +453,9 @@ class HTMLPreProcessor(object):
        # delete soft hyphens - moved here so it's executed after header/footer removal
        if is_pdftohtml:
            # unwrap/delete soft hyphens
-            end_rules.append((re.compile(u'[](\s*<p>)+\s*(?=[[a-z\d])'), lambda match: ''))
+            end_rules.append((re.compile(u'[](</p>\s*<p>\s*)+\s*(?=[[a-z\d])'), lambda match: ''))
            # unwrap/delete soft hyphens with formatting
-            end_rules.append((re.compile(u'[]\s*(</(i|u|b)>)+(\s*<p>)+\s*(<(i|u|b)>)+\s*(?=[[a-z\d])'), lambda match: ''))
+            end_rules.append((re.compile(u'[]\s*(</(i|u|b)>)+(</p>\s*<p>\s*)+\s*(<(i|u|b)>)+\s*(?=[[a-z\d])'), lambda match: ''))
        # Make the more aggressive chapter marking regex optional with the preprocess option to
        # reduce false positives and move after header/footer removal
@ -475,7 +473,7 @@ class HTMLPreProcessor(object):
                end_rules.append((re.compile(u'(?<=.{%i}[–—])\s*<p>\s*(?=[[a-z\d])' % length), lambda match: ''))
                end_rules.append(
                    # Un wrap using punctuation
-                    (re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíóńśúâêîôûçąężı,:)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
+                    (re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíóńśúâêîôûçąężıãõñæøþðß,:)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
                )
        for rule in self.PREPROCESS + start_rules:
@ -508,7 +506,15 @@ class HTMLPreProcessor(object):
        if is_pdftohtml and length > -1:
            # Dehyphenate
            dehyphenator = Dehyphenator()
-            html = dehyphenator(html,'pdf', length)
+            html = dehyphenator(html,'html', length)
        if is_pdftohtml:
            from calibre.ebooks.conversion.utils import PreProcessor
            pdf_markup = PreProcessor(self.extra_opts, None)
            totalwords = 0
            totalwords = pdf_markup.get_word_count(html)
            if totalwords > 7000:
                html = pdf_markup.markup_chapters(html, totalwords, True)
        #dump(html, 'post-preprocess')
@ -554,5 +560,9 @@ class HTMLPreProcessor(object):
        html = smartyPants(html)
        html = html.replace(start, '<!--')
        html = html.replace(stop, '-->')
        # convert ellipsis to entities to prevent wrapping
        html = re.sub('(?u)(?<=\w)\s?(\.\s?){2}\.', '&hellip;', html)
        # convert double dashes to em-dash
        html = re.sub('\s--\s', u'\u2014', html)
        return substitute_entites(html)
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@ -6,8 +6,10 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import re
 from math import ceil
 from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
 from calibre.utils.logging import default_log
 from calibre.utils.wordcount import get_wordcount_obj
 class PreProcessor(object):
@ -17,6 +19,9 @@ class PreProcessor(object):
        self.found_indents = 0
        self.extra_opts = extra_opts
    def is_pdftohtml(self, src):
        return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
    def chapter_head(self, match):
        chap = match.group('chap')
        title = match.group('title')
@ -64,7 +69,7 @@ class PreProcessor(object):
        inspect.  Percent is the minimum percent of line endings which should
        be marked up to return true.
        '''
-        htm_end_ere = re.compile('</p>', re.DOTALL)
+        htm_end_ere = re.compile('</(p|div)>', re.DOTALL)
        line_end_ere = re.compile('(\n|\r|\r\n)', re.DOTALL)
        htm_end = htm_end_ere.findall(raw)
        line_end = line_end_ere.findall(raw)
@ -101,36 +106,125 @@ class PreProcessor(object):
                with open(os.path.join(odir, name), 'wb') as f:
                    f.write(raw.encode('utf-8'))
    def get_word_count(self, html):
        word_count_text = re.sub(r'(?s)<head[^>]*>.*?</head>', '', html)
        word_count_text = re.sub(r'<[^>]*>', '', word_count_text)
        wordcount = get_wordcount_obj(word_count_text)
        return wordcount.words
    def markup_chapters(self, html, wordcount, blanks_between_paragraphs):
        # Typical chapters are between 2000 and 7000 words, use the larger number to decide the
        # minimum of chapters to search for
        self.min_chapters = 1
        if wordcount > 7000:
            self.min_chapters = int(ceil(wordcount / 7000.))
        #print "minimum chapters required are: "+str(self.min_chapters)
        heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE)
        self.html_preprocess_sections = len(heading.findall(html))
        self.log("found " + unicode(self.html_preprocess_sections) + " pre-existing headings")
        # Build the Regular Expressions in pieces
        init_lookahead = "(?=<(p|div))"
        chapter_line_open = "<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*"
        title_line_open = "<(?P<outer2>p|div)[^>]*>\s*(<(?P<inner4>font|span|[ibu])[^>]*>)?\s*(<(?P<inner5>font|span|[ibu])[^>]*>)?\s*(<(?P<inner6>font|span|[ibu])[^>]*>)?\s*"
        chapter_header_open = r"(?P<chap>"
        title_header_open = r"(?P<title>"
        chapter_header_close = ")\s*"
        title_header_close = ")"
        chapter_line_close = "(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>"
        title_line_close = "(</(?P=inner6)>)?\s*(</(?P=inner5)>)?\s*(</(?P=inner4)>)?\s*</(?P=outer2)>"
        is_pdftohtml = self.is_pdftohtml(html)
        if is_pdftohtml:
            chapter_line_open = "<(?P<outer>p)[^>]*>(\s*<[ibu][^>]*>)?\s*"
            chapter_line_close = "\s*(</[ibu][^>]*>\s*)?</(?P=outer)>"
            title_line_open = "<(?P<outer2>p)[^>]*>\s*"
            title_line_close = "\s*</(?P=outer2)>"
        if blanks_between_paragraphs:
            blank_lines = "(\s*<p[^>]*>\s*</p>){0,2}\s*"
        else:
            blank_lines = ""
        opt_title_open = "("
        opt_title_close = ")?"
        n_lookahead_open = "\s+(?!"
        n_lookahead_close = ")"
        default_title = r"(<[ibu][^>]*>)?\s{0,3}([\w\'\"-]+\s{0,3}){1,5}?(</[ibu][^>]*>)?(?=<)"
        chapter_types = [
            [r"[^'\"]?(Introduction|Synopsis|Acknowledgements|Chapter|Kapitel|Epilogue|Volume\s|Prologue|Book\s|Part\s|Dedication|Preface)\s*([\d\w-]+\:?\s*){0,4}", True, "Searching for common Chapter Headings"],
            [r"<b[^>]*>\s*(<span[^>]*>)?\s*(?!([*#•]+\s*)+)(\s*(?=[\d.\w#\-*\s]+<)([\d.\w#-*]+\s*){1,5}\s*)(?!\.)(</span>)?\s*</b>", True, "Searching for emphasized lines"], # Emphasized lines
            [r"[^'\"]?(\d+(\.|:)|CHAPTER)\s*([\dA-Z\-\'\"#,]+\s*){0,7}\s*", True, "Searching for numeric chapter headings"],  # Numeric Chapters
            [r"([A-Z]\s+){3,}\s*([\d\w-]+\s*){0,3}\s*", True, "Searching for letter spaced headings"],  # Spaced Lettering
            [r"[^'\"]?(\d+\.?\s+([\d\w-]+\:?\'?-?\s?){0,5})\s*", True, "Searching for numeric chapters with titles"], # Numeric Titles
            [r"[^'\"]?(\d+|CHAPTER)\s*([\dA-Z\-\'\"\?!#,]+\s*){0,7}\s*", True, "Searching for simple numeric chapter headings"],  # Numeric Chapters, no dot or colon
            [r"\s*[^'\"]?([A-Z#]+(\s|-){0,3}){1,5}\s*", False, "Searching for chapters with Uppercase Characters" ] # Uppercase Chapters
            ]
        # Start with most typical chapter headings, get more aggressive until one works
        for [chapter_type, lookahead_ignorecase, log_message] in chapter_types:
            if self.html_preprocess_sections >= self.min_chapters:
                break
            full_chapter_line = chapter_line_open+chapter_header_open+chapter_type+chapter_header_close+chapter_line_close
            n_lookahead = re.sub("(ou|in|cha)", "lookahead_", full_chapter_line)
            self.log("Marked " + unicode(self.html_preprocess_sections) + " headings, " + log_message)
            if lookahead_ignorecase:
                chapter_marker = init_lookahead+full_chapter_line+blank_lines+n_lookahead_open+n_lookahead+n_lookahead_close+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
                chapdetect = re.compile(r'%s' % chapter_marker, re.IGNORECASE)
            else:
                chapter_marker = init_lookahead+full_chapter_line+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close+n_lookahead_open+n_lookahead+n_lookahead_close
                chapdetect = re.compile(r'%s' % chapter_marker, re.UNICODE)
            html = chapdetect.sub(self.chapter_head, html)
        words_per_chptr = wordcount
        if words_per_chptr > 0 and self.html_preprocess_sections > 0:
            words_per_chptr = wordcount / self.html_preprocess_sections
        self.log("Total wordcount is: "+ str(wordcount)+", Average words per section is: "+str(words_per_chptr)+", Marked up "+str(self.html_preprocess_sections)+" chapters")
        return html
    def __call__(self, html):
        self.log("*********  Preprocessing HTML  *********")
        # Count the words in the document to estimate how many chapters to look for and whether
        # other types of processing are attempted
        totalwords = 0
        totalwords = self.get_word_count(html)
        if totalwords < 20:
            self.log("not enough text, not preprocessing")
            return html
        # Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly
-        html = re.sub(r"\s*</p>", "</p>\n", html)
+        html = re.sub(r"\s*</(?P<tag>p|div)>", "</"+"\g<tag>"+">\n", html)
-        html = re.sub(r"\s*<p(?P<style>[^>]*)>\s*", "\n<p"+"\g<style>"+">", html)
+        html = re.sub(r"\s*<(?P<tag>p|div)(?P<style>[^>]*)>\s*", "\n<"+"\g<tag>"+"\g<style>"+">", html)
        ###### Check Markup ######
        #
        # some lit files don't have any <p> tags or equivalent (generally just plain text between
        # <pre> tags), check and  mark up line endings if required before proceeding
        if self.no_markup(html, 0.1):
-             self.log("not enough paragraph markers, adding now")
+            self.log("not enough paragraph markers, adding now")
-             # check if content is in pre tags, use txt processor to mark up if so
+            # check if content is in pre tags, use txt processor to mark up if so
-             pre = re.compile(r'<pre>', re.IGNORECASE)
+            pre = re.compile(r'<pre>', re.IGNORECASE)
-             if len(pre.findall(html)) == 1:
+            if len(pre.findall(html)) == 1:
-                 self.log("Running Text Processing")
+                self.log("Running Text Processing")
-                 from calibre.ebooks.txt.processor import convert_basic, preserve_spaces, \
+                from calibre.ebooks.txt.processor import convert_basic, preserve_spaces, \
-                 separate_paragraphs_single_line
+                separate_paragraphs_single_line
-                 outerhtml = re.compile(r'.*?(?<=<pre>)(?P<text>.*)(?=</pre>).*', re.IGNORECASE|re.DOTALL)
+                outerhtml = re.compile(r'.*?(?<=<pre>)(?P<text>.*)(?=</pre>).*', re.IGNORECASE|re.DOTALL)
-                 html = outerhtml.sub('\g<text>', html)
+                html = outerhtml.sub('\g<text>', html)
-                 html = separate_paragraphs_single_line(html)
+                html = separate_paragraphs_single_line(html)
-                 html = preserve_spaces(html)
+                html = preserve_spaces(html)
-                 html = convert_basic(html, epub_split_size_kb=0)
+                html = convert_basic(html, epub_split_size_kb=0)
-             else:
+            else:
-                 # Add markup naively
+                # Add markup naively
-                 # TODO - find out if there are cases where there are more than one <pre> tag or
+                # TODO - find out if there are cases where there are more than one <pre> tag or
-                 # other types of unmarked html and handle them in some better fashion
+                # other types of unmarked html and handle them in some better fashion
-                 add_markup = re.compile('(?<!>)(\n)')
+                add_markup = re.compile('(?<!>)(\n)')
-                 html = add_markup.sub('</p>\n<p>', html)
+                html = add_markup.sub('</p>\n<p>', html)
        ###### Mark Indents/Cleanup ######
        #
@ -141,12 +235,17 @@ class PreProcessor(object):
            self.log("replaced "+unicode(self.found_indents)+ " nbsp indents with inline styles")
        # remove remaining non-breaking spaces
        html = re.sub(ur'\u00a0', ' ', html)
        # Get rid of various common microsoft specific tags which can cause issues later
        # Get rid of empty <o:p> tags to simplify other processing
        html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html)
        # Delete microsoft 'smart' tags
        html = re.sub('(?i)</?st1:\w+>', '', html)
        # Get rid of empty span, bold, & italics tags
        html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html)
        html = re.sub(r"\s*<[ibu][^>]*>\s*(<[ibu][^>]*>\s*</[ibu]>\s*){0,2}\s*</[ibu]>", " ", html)
        html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
        # ADE doesn't render <br />, change to empty paragraphs
        #html = re.sub('<br[^>]*>', u'<p>\u00a0</p>', html)
        # If more than 40% of the lines are empty paragraphs and the user has enabled remove
        # paragraph spacing then delete blank lines to clean up spacing
@ -164,63 +263,16 @@ class PreProcessor(object):
                self.log("deleting blank lines")
                html = blankreg.sub('', html)
            elif float(len(blanklines)) / float(len(lines)) > 0.40:
-               blanks_between_paragraphs = True
+                blanks_between_paragraphs = True
-               #print "blanks between paragraphs is marked True"
+                #print "blanks between paragraphs is marked True"
            else:
                blanks_between_paragraphs = False
        #self.dump(html, 'before_chapter_markup')
        # detect chapters/sections to match xpath or splitting logic
        #
        # Build the Regular Expressions in pieces
        init_lookahead = "(?=<(p|div))"
        chapter_line_open = "<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*"
        title_line_open = "<(?P<outer2>p|div)[^>]*>\s*(<(?P<inner4>font|span|[ibu])[^>]*>)?\s*(<(?P<inner5>font|span|[ibu])[^>]*>)?\s*(<(?P<inner6>font|span|[ibu])[^>]*>)?\s*"
        chapter_header_open = r"(?P<chap>"
        title_header_open = r"(?P<title>"
        chapter_header_close = ")\s*"
        title_header_close = ")"
        chapter_line_close = "(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>"
        title_line_close = "(</(?P=inner6)>)?\s*(</(?P=inner5)>)?\s*(</(?P=inner4)>)?\s*</(?P=outer2)>"
-        if blanks_between_paragraphs:
+        html = self.markup_chapters(html, totalwords, blanks_between_paragraphs)
            blank_lines = "(\s*<p[^>]*>\s*</p>){0,2}\s*"
        else:
            blank_lines = ""
        opt_title_open = "("
        opt_title_close = ")?"
        n_lookahead_open = "\s+(?!"
        n_lookahead_close = ")"
        default_title = r"\s{0,3}([\w\'\"-]+\s{0,3}){1,5}?(?=<)"
        min_chapters = 10
        heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE)
        self.html_preprocess_sections = len(heading.findall(html))
        self.log("found " + unicode(self.html_preprocess_sections) + " pre-existing headings")
        chapter_types = [
            [r"[^'\"]?(Introduction|Synopsis|Acknowledgements|Chapter|Kapitel|Epilogue|Volume\s|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,4}", True, "Searching for common Chapter Headings"],
            [r"[^'\"]?(\d+\.?|CHAPTER)\s*([\dA-Z\-\'\"\?\.!#,]+\s*){0,7}\s*", True, "Searching for numeric chapter headings"],  # Numeric Chapters
            [r"<b[^>]*>\s*(<span[^>]*>)?\s*(?!([*#•]+\s*)+)(\s*(?=[\w#\-*\s]+<)([\w#-*]+\s*){1,5}\s*)(</span>)?\s*</b>", True, "Searching for emphasized lines"], # Emphasized lines
            [r"[^'\"]?(\d+\.?\s+([\d\w-]+\:?\'?-?\s?){0,5})\s*", True, "Searching for numeric chapters with titles"], # Numeric Titles
            [r"\s*[^'\"]?([A-Z#]+(\s|-){0,3}){1,5}\s*", False, "Searching for chapters with Uppercase Characters" ] # Uppercase Chapters
            ]
        # Start with most typical chapter headings, get more aggressive until one works
        for [chapter_type, lookahead_ignorecase, log_message] in chapter_types:
            if self.html_preprocess_sections >= min_chapters:
                break
            full_chapter_line = chapter_line_open+chapter_header_open+chapter_type+chapter_header_close+chapter_line_close
            n_lookahead = re.sub("(ou|in|cha)", "lookahead_", full_chapter_line)
            self.log("Marked " + unicode(self.html_preprocess_sections) + " headings, " + log_message)
            if lookahead_ignorecase:
                chapter_marker = init_lookahead+full_chapter_line+blank_lines+n_lookahead_open+n_lookahead+n_lookahead_close+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
                chapdetect = re.compile(r'%s' % chapter_marker, re.IGNORECASE)
            else:
                chapter_marker = init_lookahead+full_chapter_line+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close+n_lookahead_open+n_lookahead+n_lookahead_close
                chapdetect = re.compile(r'%s' % chapter_marker, re.UNICODE)
            html = chapdetect.sub(self.chapter_head, html)
        ###### Unwrap lines ######
@ -247,7 +299,7 @@ class PreProcessor(object):
        # Calculate Length
        unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4)
        length = docanalysis.line_length(unwrap_factor)
-        self.log("*** Median line length is " + unicode(length) + ", calculated with " + format + " format ***")
+        self.log("Median line length is " + unicode(length) + ", calculated with " + format + " format")
        # only go through unwrapping code if the histogram shows unwrapping is required or if the user decreased the default unwrap_factor
        if hardbreaks or unwrap_factor < 0.4:
            self.log("Unwrapping required, unwrapping Lines")
@ -260,7 +312,7 @@ class PreProcessor(object):
            self.log("Done dehyphenating")
            # Unwrap lines using punctation and line length
            #unwrap_quotes = re.compile(u"(?<=.{%i}\"')\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*(?=[a-z])" % length, re.UNICODE)
-            unwrap = re.compile(u"(?<=.{%i}([a-zäëïöüàèìòùáćéíóńśúâêîôûçąężı,:)\IA\u00DF]|(?<!\&\w{4});))\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE)
+            unwrap = re.compile(u"(?<=.{%i}([a-zäëïöüàèìòùáćéíóńśúâêîôûçąężıãõñæøþðß,:)\IA\u00DF]|(?<!\&\w{4});))\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE)
            html = unwrap.sub(' ', html)
            #check any remaining hyphens, but only unwrap if there is a match
            dehyphenator = Dehyphenator()
@ -276,7 +328,7 @@ class PreProcessor(object):
        html = re.sub(u'\xad\s*(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*', '', html)
        # If still no sections after unwrapping mark split points on lines with no punctuation
-        if self.html_preprocess_sections < 5:
+        if self.html_preprocess_sections < self.min_chapters:
            self.log("Looking for more split points based on punctuation,"
                    " currently have " + unicode(self.html_preprocess_sections))
            chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(?!([*#•]+\s*)+)(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE)
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@ -173,7 +173,7 @@ class FB2MLizer(object):
            if title_item.spine_position is None and title_item.media_type == 'application/xhtml+xml':
                self.oeb_book.spine.insert(0, title_item, True)
        # Create xhtml page to reference cover image so it can be used.
-        if self.oeb_book.metadata.cover and unicode(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids:
+        if not title_name and self.oeb_book.metadata.cover and unicode(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids:
            id = unicode(self.oeb_book.metadata.cover[0])
            cover_item = self.oeb_book.manifest.ids[id]
            if cover_item.media_type in OEB_RASTER_IMAGES:
--- a/src/calibre/ebooks/fb2/input.py
+++ b/src/calibre/ebooks/fb2/input.py
@ -46,15 +46,19 @@ class FB2Input(InputFormatPlugin):
        log.debug('Parsing XML...')
        raw = stream.read().replace('\0', '')
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
-            assume_utf8=True)[0]
+            assume_utf8=True, resolve_entities=True)[0]
        try:
            doc = etree.fromstring(raw)
        except etree.XMLSyntaxError:
            try:
                doc = etree.fromstring(raw, parser=RECOVER_PARSER)
                if doc is None:
                    raise Exception('parse failed')
            except:
                doc = etree.fromstring(raw.replace('& ', '&amp;'),
                        parser=RECOVER_PARSER)
        if doc is None:
            raise ValueError('The FB2 file is not valid XML')
        stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]')
        css = ''
        for s in stylesheets:
--- a/src/calibre/ebooks/pdb/input.py
+++ b/src/calibre/ebooks/pdb/input.py
@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
 import os
-from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
+from calibre.customize.conversion import InputFormatPlugin
 from calibre.ebooks.pdb.header import PdbHeaderReader
 from calibre.ebooks.pdb import PDBError, IDENTITY_TO_NAME, get_reader
 from calibre.ebooks.conversion.utils import PreProcessor
@ -18,30 +18,6 @@ class PDBInput(InputFormatPlugin):
    description = 'Convert PDB to HTML'
    file_types  = set(['pdb'])
    options = set([
        OptionRecommendation(name='paragraph_type', recommended_value='auto',
            choices=['auto', 'block', 'single', 'print'],
            help=_('Paragraph structure.\n'
                   'choices are [\'auto\', \'block\', \'single\', \'print\', \'markdown\']\n'
                   '* auto: Try to auto detect paragraph type.\n'
                   '* block: Treat a blank line as a paragraph break.\n'
                   '* single: Assume every line is a paragraph.\n'
                   '* print:  Assume every line starting with 2+ spaces or a tab '
                   'starts a paragraph.')),
        OptionRecommendation(name='formatting_type', recommended_value='auto',
            choices=['auto', 'none', 'markdown'],
            help=_('Formatting used within the document.'
                   '* auto: Try to auto detect the document formatting.\n'
                   '* none: Do not modify the paragraph formatting. Everything is a paragraph.\n'
                   '* markdown: Run the input though the markdown pre-processor. '
                   'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
        OptionRecommendation(name='preserve_spaces', recommended_value=False,
            help=_('Normally extra spaces are condensed into a single space. '
                'With this option all spaces will be displayed.')),
        OptionRecommendation(name="markdown_disable_toc", recommended_value=False,
            help=_('Do not insert a Table of Contents into the output text.')),
    ])
    def convert(self, stream, options, file_ext, log,
                accelerators):
        header = PdbHeaderReader(stream)
@ -60,4 +36,4 @@ class PDBInput(InputFormatPlugin):
    def preprocess_html(self, options, html):
        self.options = options
        preprocessor = PreProcessor(self.options, log=getattr(self, 'log', None))
-        return preprocessor(html)
+        return preprocessor(html)
--- a/src/calibre/ebooks/pdb/pdf/reader.py
+++ b/src/calibre/ebooks/pdb/pdf/reader.py
@ -19,9 +19,6 @@ class Reader(FormatReader):
        self.stream = stream
        self.log = log
        self.options = options
        setattr(self.options, 'new_pdf_engine', False)
        setattr(self.options, 'no_images', False)
        setattr(self.options, 'unwrap_factor', 0.45)
    def extract_content(self, output_dir):
        self.log.info('Extracting PDF...')
@ -31,7 +28,12 @@ class Reader(FormatReader):
            for x in xrange(self.header.section_count()):
                pdf.write(self.header.section_data(x))
-            from calibre.customize.ui import plugin_for_input_format
+        from calibre.customize.ui import plugin_for_input_format
-            pdf.seek(0)
+
-            return plugin_for_input_format('pdf').convert(pdf, self.options,
+        pdf_plugin = plugin_for_input_format('pdf')
-                'pdf', self.log, [])
+        for option in pdf_plugin.options:
            if not hasattr(self.options, option.option.name):
                setattr(self.options, option.name, option.recommended_value)
        pdf.seek(0)
        return pdf_plugin.convert(pdf, self.options, 'pdf', self.log, {})
--- a/src/calibre/ebooks/tcr/input.py
+++ b/src/calibre/ebooks/tcr/input.py
@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
 from cStringIO import StringIO
-from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
+from calibre.customize.conversion import InputFormatPlugin
 from calibre.ebooks.compression.tcr import decompress
 class TCRInput(InputFormatPlugin):
@ -16,30 +16,6 @@ class TCRInput(InputFormatPlugin):
    description = 'Convert TCR files to HTML'
    file_types  = set(['tcr'])
    options = set([
        OptionRecommendation(name='paragraph_type', recommended_value='auto',
            choices=['auto', 'block', 'single', 'print'],
            help=_('Paragraph structure.\n'
                   'choices are [\'auto\', \'block\', \'single\', \'print\', \'markdown\']\n'
                   '* auto: Try to auto detect paragraph type.\n'
                   '* block: Treat a blank line as a paragraph break.\n'
                   '* single: Assume every line is a paragraph.\n'
                   '* print:  Assume every line starting with 2+ spaces or a tab '
                   'starts a paragraph.')),
        OptionRecommendation(name='formatting_type', recommended_value='auto',
            choices=['auto', 'none', 'markdown'],
            help=_('Formatting used within the document.'
                   '* auto: Try to auto detect the document formatting.\n'
                   '* none: Do not modify the paragraph formatting. Everything is a paragraph.\n'
                   '* markdown: Run the input though the markdown pre-processor. '
                   'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
        OptionRecommendation(name='preserve_spaces', recommended_value=False,
            help=_('Normally extra spaces are condensed into a single space. '
                'With this option all spaces will be displayed.')),
        OptionRecommendation(name="markdown_disable_toc", recommended_value=False,
            help=_('Do not insert a Table of Contents into the output text.')),
    ])
    def convert(self, stream, options, file_ext, log, accelerators):
        log.info('Decompressing text...')
        raw_txt = decompress(stream)
--- a/src/calibre/gui2/book_details.py
+++ b/src/calibre/gui2/book_details.py
@ -256,8 +256,10 @@ class BookInfo(QWebView):
                    % (left_pane, right_pane)))
    def mouseDoubleClickEvent(self, ev):
-        if self.width() - ev.x() < 25 or \
+        swidth = self.page().mainFrame().scrollBarGeometry(Qt.Vertical).width()
-            self.height() - ev.y() < 25:
+        sheight = self.page().mainFrame().scrollBarGeometry(Qt.Horizontal).height()
        if self.width() - ev.x() < swidth or \
            self.height() - ev.y() < sheight:
            # Filter out double clicks on the scroll bar
            ev.accept()
        else:
--- a/src/calibre/gui2/convert/init.py
+++ b/src/calibre/gui2/convert/init.py
@ -6,7 +6,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-import textwrap
+import textwrap, codecs
 from functools import partial
 from PyQt4.Qt import QWidget, QSpinBox, QDoubleSpinBox, QLineEdit, QTextEdit, \
@ -128,6 +128,7 @@ class Widget(QWidget):
    def get_value(self, g):
        from calibre.gui2.convert.xpath_wizard import XPathEdit
        from calibre.gui2.convert.regex_builder import RegexEdit
        from calibre.gui2.widgets import EncodingComboBox
        ret = self.get_value_handler(g)
        if ret != 'this is a dummy return value, xcswx1avcx4x':
            return ret
@ -139,6 +140,13 @@ class Widget(QWidget):
            if not ans:
                ans = None
            return ans
        elif isinstance(g, EncodingComboBox):
            ans = unicode(g.currentText()).strip()
            try:
                codecs.lookup(ans)
            except:
                ans = ''
            return ans
        elif isinstance(g, QComboBox):
            return unicode(g.currentText())
        elif isinstance(g, QCheckBox):
--- a/src/calibre/gui2/convert/pdb_input.py
+++ b/src/calibre/gui2/convert/pdb_input.py
@ -1,25 +0,0 @@
 # -*- coding: utf-8 -*-
 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 from calibre.gui2.convert.txt_input_ui import Ui_Form
 from calibre.gui2.convert import Widget
 class PluginWidget(Widget, Ui_Form):
    TITLE = _('PDB Input')
    HELP = _('Options specific to')+' PDB '+_('input')
    COMMIT_NAME = 'pdb_input'
    ICON = I('mimetypes/txt.png')
    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
        Widget.__init__(self, parent,
            ['paragraph_type', 'formatting_type', 'markdown_disable_toc', 'preserve_spaces'])
        self.db, self.book_id = db, book_id
        for x in get_option('paragraph_type').option.choices:
            self.opt_paragraph_type.addItem(x)
        for x in get_option('formatting_type').option.choices:
            self.opt_formatting_type.addItem(x)
        self.initialize_options(get_option, get_help, db, book_id)
--- a/src/calibre/gui2/convert/tcr_input.py
+++ b/src/calibre/gui2/convert/tcr_input.py
@ -1,25 +0,0 @@
 # -*- coding: utf-8 -*-
 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 from calibre.gui2.convert.txt_input_ui import Ui_Form
 from calibre.gui2.convert import Widget
 class PluginWidget(Widget, Ui_Form):
    TITLE = _('TCR Input')
    HELP = _('Options specific to')+' TCR '+_('input')
    COMMIT_NAME = 'tcr_input'
    ICON = I('mimetypes/txt.png')
    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
        Widget.__init__(self, parent,
            ['paragraph_type', 'formatting_type', 'markdown_disable_toc', 'preserve_spaces'])
        self.db, self.book_id = db, book_id
        for x in get_option('paragraph_type').option.choices:
            self.opt_paragraph_type.addItem(x)
        for x in get_option('formatting_type').option.choices:
            self.opt_formatting_type.addItem(x)
        self.initialize_options(get_option, get_help, db, book_id)
--- a/src/calibre/gui2/tools.py
+++ b/src/calibre/gui2/tools.py
@ -61,7 +61,8 @@ def convert_single_ebook(parent, db, book_ids, auto_conversion=False, out_format
                    dtitle = unicode(mi.title)
                except:
                    dtitle = repr(mi.title)
-                desc = _('Convert book %d of %d (%s)') % (i + 1, total, dtitle)
+                desc = _('Convert book %(num)d of %(total)d (%(title)s)') % \
                        {'num':i + 1, 'total':total, 'title':dtitle}
                recs = cPickle.loads(d.recommendations)
                if d.opf_file is not None:
--- a/src/calibre/gui2/viewer/documentview.py
+++ b/src/calibre/gui2/viewer/documentview.py
@ -449,7 +449,7 @@ class Document(QWebPage): # {{{
        return self.mainFrame().contentsSize().width() # offsetWidth gives inaccurate results
    def set_bottom_padding(self, amount):
-        s = QSize(-1, -1) if amount == 0 else QSize(self.width,
+        s = QSize(-1, -1) if amount == 0 else QSize(self.viewportSize().width(),
                self.height+amount)
        self.setPreferredContentsSize(s)
@ -820,6 +820,7 @@ class DocumentView(QWebView): # {{{
                        self.flipper.initialize(self.current_page_image())
                    self.manager.next_document()
                return
            #oheight = self.document.height
            lower_limit = opos + delta_y # Max value of top y co-ord after scrolling
            max_y = self.document.height - window_height # The maximum possible top y co-ord
            if max_y < lower_limit:
@ -835,6 +836,7 @@ class DocumentView(QWebView): # {{{
            if epf:
                self.flipper.initialize(self.current_page_image())
            #print 'Document height:', self.document.height
            #print 'Height change:', (self.document.height - oheight)
            max_y = self.document.height - window_height
            lower_limit = min(max_y, lower_limit)
            #print 'Scroll to:', lower_limit
@ -842,6 +844,7 @@ class DocumentView(QWebView): # {{{
                self.document.scroll_to(self.document.xpos, lower_limit)
            actually_scrolled = self.document.ypos - opos
            #print 'After scroll pos:', self.document.ypos
            #print 'Scrolled by:', self.document.ypos - opos
            self.find_next_blank_line(window_height - actually_scrolled)
            #print 'After blank line pos:', self.document.ypos
            if epf:
--- a/src/calibre/library/server/browse.py
+++ b/src/calibre/library/server/browse.py
@ -640,8 +640,8 @@ class BrowseServer(object):
            if fmt:
                href = self.opts.url_prefix + '/get/%s/%s_%d.%s'%(
                        fmt, fname, id_, fmt)
-                rt = xml(_('Read %s in the %s format')%(args['title'],
+                rt = xml(_('Read %(title)s in the %(fmt)s format')% \
-                        fmt.upper()), True)
+                        {'title':args['title'], 'fmt':fmt.upper()}, True)
                args['get_button'] = \
                        '<a href="%s" class="read" title="%s">%s</a>' % \
--- a/src/calibre/library/server/opds.py
+++ b/src/calibre/library/server/opds.py
@ -128,9 +128,9 @@ def CATALOG_ENTRY(item, item_kind, base_href, version, updated,
        count = ''
    if item.category == 'authors' and \
            tweaks['categories_use_field_for_author_name'] == 'author_sort':
-        name = xml(item.sort)
+        name = item.sort
    else:
-        name = xml(item.name)
+        name = item.name
    return E.entry(
            TITLE(name + ('' if not add_kind else ' (%s)'%item_kind)),
            ID(id_),
--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
@ -8,13 +8,13 @@ msgstr ""
 "Project-Id-Version: calibre\n"
 "Report-Msgid-Bugs-To: FULL NAME <EMAIL@ADDRESS>\n"
 "POT-Creation-Date: 2011-01-02 23:55+0000\n"
-"PO-Revision-Date: 2011-01-04 08:51+0000\n"
+"PO-Revision-Date: 2011-01-06 11:10+0000\n"
-"Last-Translator: TomVal <Unknown>\n"
+"Last-Translator: schunka <Unknown>\n"
 "Language-Team: Czech <cs@li.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-01-05 04:43+0000\n"
+"X-Launchpad-Export-Date: 2011-01-07 04:57+0000\n"
 "X-Generator: Launchpad (build Unknown)\n"
 #: /home/kovid/work/calibre/src/calibre/customize/__init__.py:43
@ -799,7 +799,7 @@ msgstr "Spojit se s Sanda Bambook eBook čtečkou"
 #: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:25
 msgid "Li Fanxi"
-msgstr ""
+msgstr "Li Fanxi"
 #: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:41
 msgid "Device IP Address (restart calibre after changing)"
@ -1126,11 +1126,11 @@ msgstr "Komunikovat se zařízením Trekstor"
 #: /home/kovid/work/calibre/src/calibre/devices/misc.py:251
 msgid "Communicate with the EEE Reader"
-msgstr ""
+msgstr "Probíhá spojení se čtečkou EEE Reader."
 #: /home/kovid/work/calibre/src/calibre/devices/misc.py:271
 msgid "Communicate with the Nextbook Reader"
-msgstr ""
+msgstr "Probíhá spojení se čtečkou Nextbook Reader."
 #: /home/kovid/work/calibre/src/calibre/devices/nokia/driver.py:17
 msgid "Communicate with the Nokia 770 internet tablet."
@ -1174,11 +1174,11 @@ msgstr "Spojit se se Sony eBook reader"
 #: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:61
 msgid "All by title"
-msgstr ""
+msgstr "Vše podle názvu"
 #: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:62
 msgid "All by author"
-msgstr ""
+msgstr "Vše podle autora"
 #: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:64
 msgid ""
@ -1226,7 +1226,7 @@ msgstr "Spojit se se Sovos reader."
 #: /home/kovid/work/calibre/src/calibre/devices/teclast/driver.py:78
 msgid "Communicate with the Sunstech EB700 reader."
-msgstr ""
+msgstr "Probíhá spojení se čtečkou Sunstech EB700."
 #: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:258
 msgid "Unable to detect the %s disk drive. Try rebooting."
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
@ -8,13 +8,13 @@ msgstr ""
 "Project-Id-Version: de\n"
 "Report-Msgid-Bugs-To: \n"
 "POT-Creation-Date: 2011-01-02 23:55+0000\n"
-"PO-Revision-Date: 2011-01-01 21:21+0000\n"
+"PO-Revision-Date: 2011-01-07 02:17+0000\n"
-"Last-Translator: Kovid Goyal <Unknown>\n"
+"Last-Translator: heinz beck <Unknown>\n"
 "Language-Team: American English <kde-i18n-doc@lists.kde.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-01-04 13:52+0000\n"
+"X-Launchpad-Export-Date: 2011-01-07 04:58+0000\n"
 "X-Generator: Launchpad (build Unknown)\n"
 "Generated-By: pygettext.py 1.5\n"
@ -943,7 +943,7 @@ msgstr "Kommunikation mit dem PocketBook 301 Reader."
 #: /home/kovid/work/calibre/src/calibre/devices/eb600/driver.py:233
 msgid "Communicate with the PocketBook 602/603/902/903 reader."
-msgstr ""
+msgstr "verbinden mit PocketBook 602/603/902/903"
 #: /home/kovid/work/calibre/src/calibre/devices/eb600/driver.py:252
 msgid "Communicate with the PocketBook 701"
@ -1186,7 +1186,7 @@ msgstr "Kommunikation mit allen Sony eBook Readern."
 #: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:61
 msgid "All by title"
-msgstr ""
+msgstr "nach Titel"
 #: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:62
 msgid "All by author"
--- a/src/calibre/translations/gl.po
+++ b/src/calibre/translations/gl.po
@ -8,13 +8,13 @@ msgstr ""
 "Project-Id-Version: calibre\n"
 "Report-Msgid-Bugs-To: FULL NAME <EMAIL@ADDRESS>\n"
 "POT-Creation-Date: 2011-01-02 23:55+0000\n"
-"PO-Revision-Date: 2011-01-02 13:21+0000\n"
+"PO-Revision-Date: 2011-01-06 14:46+0000\n"
-"Last-Translator: Calidonia Hibernia <Unknown>\n"
+"Last-Translator: Antón Méixome <meixome@gmail.com>\n"
 "Language-Team: dev@gl.openoffice.org\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-01-04 13:52+0000\n"
+"X-Launchpad-Export-Date: 2011-01-07 04:58+0000\n"
 "X-Generator: Launchpad (build Unknown)\n"
 "Language: gl\n"
@ -5749,7 +5749,7 @@ msgstr "Tamaño da mensaxe para a descrición das miniaturas de portada"
 #: /home/kovid/work/calibre/src/calibre/gui2/catalog/catalog_epub_mobi_ui.py:330
 msgid " inch"
-msgstr ""
+msgstr " polgada"
 #: /home/kovid/work/calibre/src/calibre/gui2/catalog/catalog_epub_mobi_ui.py:331
 msgid "&Description note"
@ -10645,15 +10645,15 @@ msgstr "Nunca"
 #: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel.py:60
 msgid "By first letter"
-msgstr ""
+msgstr "Pola primeira letra"
 #: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel.py:60
 msgid "Disabled"
-msgstr ""
+msgstr "Desactivado"
 #: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel.py:61
 msgid "Partitioned"
-msgstr ""
+msgstr "Particionado"
 #: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel_ui.py:140
 msgid "User Interface &layout (needs restart):"
@ -10709,7 +10709,7 @@ msgstr "Buscar mentres se escribe"
 #: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel_ui.py:152
 msgid "Tags browser category partitioning method:"
-msgstr ""
+msgstr "Método de particionado con categorías de etiquetas de navegación:"
 #: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel_ui.py:153
 msgid ""
@ -10719,10 +10719,15 @@ msgid ""
 "have a list of fixed-sized groups. Set to disabled\n"
 "if you never want subcategories"
 msgstr ""
 "Escoller como as subcategorías de etiquetas de navegación se amosan cando\n"
 "hai máis ítems que os do límite. Seleccione por primeira\n"
 "letra para ver unha lista A, B, C. Escolla particionado para\n"
 "ter unha lista de grupos de tamaño fixo. Escolla desactivado\n"
 "se non vai querer nunca subcategorías"
 #: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel_ui.py:158
 msgid "Collapse when more items than:"
-msgstr ""
+msgstr "Colapsar cando os ítems son máis de:"
 #: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel_ui.py:159
 msgid ""
@ -10730,6 +10735,10 @@ msgid ""
 "up into sub-categories. If the partition method is set to disable, this "
 "value is ignored."
 msgstr ""
 "Se unha categoría de etiquetas de navegación ten máis ca este número de "
 "ítems, divídese\n"
 "en subcategorías. Se o método de partición se pon como desactivado, "
 "ignorarase este valor."
 #: /home/kovid/work/calibre/src/calibre/gui2/preferences/look_feel_ui.py:161
 msgid "&Toolbar"
@ -11494,7 +11503,7 @@ msgstr "Mostrar todas as categorías"
 #: /home/kovid/work/calibre/src/calibre/gui2/tag_view.py:300
 msgid "Change sub-categorization scheme"
-msgstr ""
+msgstr "Cambiar o esquema de subcategorización"
 #: /home/kovid/work/calibre/src/calibre/gui2/tag_view.py:625
 msgid ""
--- a/src/calibre/translations/it.po
+++ b/src/calibre/translations/it.po
@ -9,13 +9,13 @@ msgstr ""
 "Project-Id-Version: calibre_calibre-it\n"
 "Report-Msgid-Bugs-To: \n"
 "POT-Creation-Date: 2011-01-02 23:55+0000\n"
-"PO-Revision-Date: 2011-01-02 22:45+0000\n"
+"PO-Revision-Date: 2011-01-06 15:33+0000\n"
-"Last-Translator: Marco Ciampa <ciampix@libero.it>\n"
+"Last-Translator: Francesco Pasa <Unknown>\n"
 "Language-Team: italiano\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-01-04 13:53+0000\n"
+"X-Launchpad-Export-Date: 2011-01-07 04:58+0000\n"
 "X-Generator: Launchpad (build Unknown)\n"
 "X-Poedit-Bookmarks: -1,-1,-1,-1,-1,1105,-1,1312,-1,-1\n"
 "Generated-By: pygettext.py 1.5\n"
@ -5694,7 +5694,7 @@ msgstr ""
 #: /home/kovid/work/calibre/src/calibre/gui2/catalog/catalog_epub_mobi_ui.py:330
 msgid " inch"
-msgstr ""
+msgstr " pollice"
 #: /home/kovid/work/calibre/src/calibre/gui2/catalog/catalog_epub_mobi_ui.py:331
 msgid "&Description note"
--- a/src/calibre/translations/nds.po
+++ b/src/calibre/translations/nds.po
@ -8,13 +8,13 @@ msgstr ""
 "Project-Id-Version: nds\n"
 "Report-Msgid-Bugs-To: \n"
 "POT-Creation-Date: 2011-01-02 23:55+0000\n"
-"PO-Revision-Date: 2010-10-18 00:57+0000\n"
+"PO-Revision-Date: 2011-01-07 02:48+0000\n"
-"Last-Translator: Nils-Christoph Fiedler <ncfiedler@gnome.org>\n"
+"Last-Translator: heinz beck <Unknown>\n"
 "Language-Team: German\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-01-04 13:55+0000\n"
+"X-Launchpad-Export-Date: 2011-01-07 04:59+0000\n"
 "X-Generator: Launchpad (build Unknown)\n"
 "X-Poedit-Country: GERMANY\n"
 "X-Poedit-Language: German\n"
--- a/src/calibre/translations/pt_BR.po
+++ b/src/calibre/translations/pt_BR.po
@ -8,13 +8,13 @@ msgstr ""
 "Project-Id-Version: calibre\n"
 "Report-Msgid-Bugs-To: FULL NAME <EMAIL@ADDRESS>\n"
 "POT-Creation-Date: 2011-01-02 23:55+0000\n"
-"PO-Revision-Date: 2010-12-18 05:47+0000\n"
+"PO-Revision-Date: 2011-01-06 13:01+0000\n"
-"Last-Translator: Kovid Goyal <Unknown>\n"
+"Last-Translator: MoroniGranja <Unknown>\n"
 "Language-Team: American English <kde-i18n-doc@kde.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-01-04 14:00+0000\n"
+"X-Launchpad-Export-Date: 2011-01-07 04:59+0000\n"
 "X-Generator: Launchpad (build Unknown)\n"
 #: /home/kovid/work/calibre/src/calibre/customize/__init__.py:43
@ -172,7 +172,7 @@ msgstr "Leitor de metadados"
 #: /home/kovid/work/calibre/src/calibre/customize/__init__.py:266
 msgid "Metadata writer"
-msgstr ""
+msgstr "Escritor de metadata"
 #: /home/kovid/work/calibre/src/calibre/customize/__init__.py:296
 msgid "Catalog generator"
@ -589,6 +589,8 @@ msgid ""
 "Intended for the Samsung Galaxy and similar tablet devices with a resolution "
 "of 600x1280"
 msgstr ""
 "Planejado para o Samsung Galaxy e tablets similares com uma resolução "
 "de600x1280"
 #: /home/kovid/work/calibre/src/calibre/customize/profiles.py:471
 msgid "This profile is intended for the Kobo Reader."
@ -695,7 +697,7 @@ msgstr "Desabilitar a extensão com nome"
 #: /home/kovid/work/calibre/src/calibre/debug.py:148
 msgid "Debug log"
-msgstr ""
+msgstr "Log de Debug"
 #: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:13
 msgid "Communicate with Android phones."
@ -808,7 +810,7 @@ msgstr "Comunicar com iTunes."
 #: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:24
 msgid "Communicate with the Sanda Bambook eBook reader."
-msgstr ""
+msgstr "Comunicar com o leitor de eBooks Sanda Bambook"
 #: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:25
 msgid "Li Fanxi"
@ -817,17 +819,22 @@ msgstr ""
 #: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:41
 msgid "Device IP Address (restart calibre after changing)"
 msgstr ""
 "Endereço IP do dispositivo (é necessário reiniciar calibre após modificar)"
 #: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:46
 msgid ""
 "Unable to add book to library directly from Bambook. Please save the book to "
 "disk and add the file to library from disk."
 msgstr ""
 "Impossível adicionar livro a biblioteca diretamente do Bambook. Favor salvar "
 "o livro no disco e adicionar o arquivo do disco a biblioteca."
 #: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:66
 msgid ""
 "Unable to connect to Bambook, you need to install Bambook library first."
 msgstr ""
 "Não foi possível conectar ao Bambook, é necessário instalar a biblioteca "
 "Bambook."
 #: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:74
 msgid ""
@ -835,10 +842,13 @@ msgid ""
 "If you are trying to connect via Wi-Fi, please make sure the IP address of "
 "Bambook has been correctly configured."
 msgstr ""
 "Não foi possível conectar ao Bambook. \n"
 "Se você está tentando conectar por Wi-Fi, favor confirmar se o endereço IP "
 "do Bambook foi configurado corretamente."
 #: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:111
 msgid "Bambook"
-msgstr ""
+msgstr "Bambook"
 #: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:217
 #: /home/kovid/work/calibre/src/calibre/devices/bambook/driver.py:233
@ -899,7 +909,7 @@ msgstr "Enviando metadados ao dispositivo..."
 #: /home/kovid/work/calibre/src/calibre/devices/bambook/libbambookcore.py:132
 msgid "Bambook SDK has not been installed."
-msgstr ""
+msgstr "Bambook SDK não foi instalado."
 #: /home/kovid/work/calibre/src/calibre/devices/binatone/driver.py:17
 msgid "Communicate with the Binatone Readme eBook reader."
@ -938,11 +948,11 @@ msgstr "Comunica-se com o leitor PocketBook 301"
 #: /home/kovid/work/calibre/src/calibre/devices/eb600/driver.py:233
 msgid "Communicate with the PocketBook 602/603/902/903 reader."
-msgstr ""
+msgstr "Comunicar-se com o PocketBook 602/603/902/903 reader."
 #: /home/kovid/work/calibre/src/calibre/devices/eb600/driver.py:252
 msgid "Communicate with the PocketBook 701"
-msgstr ""
+msgstr "Comunicar-se com o PocketBook 701"
 #: /home/kovid/work/calibre/src/calibre/devices/edge/driver.py:17
 msgid "Entourage Edge"
@ -1069,6 +1079,8 @@ msgid ""
 "The Kobo supports only one collection currently: the \"Im_Reading\" list.  "
 "Create a tag called \"Im_Reading\" "
 msgstr ""
 "O Kobo aceita apenas uma coleção atualmente: a lista \"Estou_Lendo\". Crie "
 "uma tag chamada \"Estou_Lendo\" "
 #: /home/kovid/work/calibre/src/calibre/devices/kobo/driver.py:446
 #: /home/kovid/work/calibre/src/calibre/gui2/actions/add.py:279
@ -1097,7 +1109,7 @@ msgstr "Comunicar com o Sweex MM300"
 #: /home/kovid/work/calibre/src/calibre/devices/misc.py:79
 msgid "Communicate with the Digma Q600"
-msgstr ""
+msgstr "Comunicar-se com o Digma Q600"
 #: /home/kovid/work/calibre/src/calibre/devices/misc.py:88
 msgid "Communicate with the Kogan"
@ -1110,7 +1122,7 @@ msgstr "Comunicar com o Pandigital Novel"
 #: /home/kovid/work/calibre/src/calibre/devices/misc.py:142
 msgid "Communicate with the VelocityMicro"
-msgstr ""
+msgstr "Comunicar-se com o VelocityMicro"
 #: /home/kovid/work/calibre/src/calibre/devices/misc.py:160
 msgid "Communicate with the GM2000"
@ -1118,23 +1130,23 @@ msgstr "Comunicar com o GM2000"
 #: /home/kovid/work/calibre/src/calibre/devices/misc.py:180
 msgid "Communicate with the Acer Lumiread"
-msgstr ""
+msgstr "Comunicar-se com o Acer Lumiread"
 #: /home/kovid/work/calibre/src/calibre/devices/misc.py:211
 msgid "Communicate with the Aluratek Color"
-msgstr ""
+msgstr "Comunicar-se com o Acer Lumiread"
 #: /home/kovid/work/calibre/src/calibre/devices/misc.py:231
 msgid "Communicate with the Trekstor"
-msgstr ""
+msgstr "Comunicar-se com o Trekstor"
 #: /home/kovid/work/calibre/src/calibre/devices/misc.py:251
 msgid "Communicate with the EEE Reader"
-msgstr ""
+msgstr "Comunicar-se com o EEE Reader"
 #: /home/kovid/work/calibre/src/calibre/devices/misc.py:271
 msgid "Communicate with the Nextbook Reader"
-msgstr ""
+msgstr "Comunicar-se com o Nextbook Reader"
 #: /home/kovid/work/calibre/src/calibre/devices/nokia/driver.py:17
 msgid "Communicate with the Nokia 770 internet tablet."
@ -1142,7 +1154,7 @@ msgstr "Comunica-se com o Nokia 770 Internet Tablet."
 #: /home/kovid/work/calibre/src/calibre/devices/nokia/driver.py:40
 msgid "Communicate with the Nokia 810/900 internet tablet."
-msgstr ""
+msgstr "Comunicar-se com o internet tablet Nokia 810/900."
 #: /home/kovid/work/calibre/src/calibre/devices/nokia/driver.py:74
 msgid "Communicate with the Nokia E52"
@ -1158,11 +1170,11 @@ msgstr "Comunica-se com o leitor Nook."
 #: /home/kovid/work/calibre/src/calibre/devices/nook/driver.py:85
 msgid "Nook Color"
-msgstr ""
+msgstr "Nook Color"
 #: /home/kovid/work/calibre/src/calibre/devices/nook/driver.py:86
 msgid "Communicate with the Nook Color eBook reader."
-msgstr ""
+msgstr "Comunicar-se com o Nook Color."
 #: /home/kovid/work/calibre/src/calibre/devices/nuut2/driver.py:17
 msgid "Communicate with the Nuut2 eBook reader."
@ -1178,11 +1190,11 @@ msgstr "Comunica-se com todos os leitores da Sony."
 #: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:61
 msgid "All by title"
-msgstr ""
+msgstr "Todos por título"
 #: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:62
 msgid "All by author"
-msgstr ""
+msgstr "Todos por autor"
 #: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:64
 msgid ""
@ -1198,6 +1210,9 @@ msgid ""
 "to the list to enable them. The collections will be given the name provided "
 "after the \":\" character."
 msgstr ""
 ". Duas coleções especiais estão disponíveis: %s:%s e %s:%s. Adicione estes "
 "valores à lista para habilita-los. As coleções receberão o nome após os dois "
 "pontos (\":\")."
 #: /home/kovid/work/calibre/src/calibre/devices/prs505/sony_cache.py:190
 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/transforms/structure.py:68
@ -1218,7 +1233,7 @@ msgstr "Comunica-se com o leitor Newsmy."
 #: /home/kovid/work/calibre/src/calibre/devices/teclast/driver.py:47
 msgid "Communicate with the Pico reader."
-msgstr ""
+msgstr "Comunicar-se com o Pico reader."
 #: /home/kovid/work/calibre/src/calibre/devices/teclast/driver.py:57
 msgid "Communicate with the iPapyrus reader."
@ -1230,7 +1245,7 @@ msgstr "Comunicar com o leitor Sovos."
 #: /home/kovid/work/calibre/src/calibre/devices/teclast/driver.py:78
 msgid "Communicate with the Sunstech EB700 reader."
-msgstr ""
+msgstr "Comunicar-se com o Sunstech EB700 reader."
 #: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:258
 msgid "Unable to detect the %s disk drive. Try rebooting."
@ -1254,6 +1269,8 @@ msgid ""
 "Unable to detect the %s disk drive. Either the device has already been "
 "ejected, or your kernel is exporting a deprecated version of SYSFS."
 msgstr ""
 "Não foi possível detectar o disco %s. O dispositivo já foi ejetado, ou o seu "
 "kernel está exportando uma versão deprecada do SYSFS."
 #: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:617
 msgid "Unable to mount main memory (Error code: %d)"
@ -1264,6 +1281,8 @@ msgid ""
 "The main memory of %s is read only. This usually happens because of file "
 "system errors."
 msgstr ""
 "A memória principal de %s é somente leitura. Isto normalmente acontece "
 "devido a erros no sistema de arquivos."
 #: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:816
 #: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:818
@ -1693,6 +1712,9 @@ msgid ""
 "is: %default. Links are only added to the TOC if less than the threshold "
 "number of chapters were detected."
 msgstr ""
 "Número máximo de links para inserir no sumário. Use 0 para desabilitar. O "
 "padrão é: %default. Links serão adicionados ao sumário somente se o número "
 "encontrado for menor que o limite máximo de capítulos."
 #: /home/kovid/work/calibre/src/calibre/ebooks/conversion/plumber.py:256
 msgid ""
@ -2076,7 +2098,7 @@ msgstr "Você deve especificar um arquivo do tipo epub"
 #: /home/kovid/work/calibre/src/calibre/ebooks/epub/fix/unmanifested.py:17
 msgid "Fix unmanifested files"
-msgstr ""
+msgstr "Conserte arquivos sem manifesto."
 #: /home/kovid/work/calibre/src/calibre/ebooks/epub/fix/unmanifested.py:21
 msgid ""
--- a/src/calibre/translations/zh_CN.po
+++ b/src/calibre/translations/zh_CN.po
@ -12904,7 +12904,7 @@ msgstr "其它格式"
 #: /home/kovid/work/calibre/src/calibre/library/server/browse.py:643
 msgid "Read %s in the %s format"
-msgstr "用 %2$s 格式阅读 %1$s"
+msgstr "用 %s 格式阅读 %s"
 #: /home/kovid/work/calibre/src/calibre/library/server/browse.py:648
 msgid "Get"
--- a/src/calibre/utils/wordcount.py
+++ b/src/calibre/utils/wordcount.py
@ -0,0 +1,85 @@
 #!/usr/bin/python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 """
 Get word, character, and Asian character counts
 1. Get a word count as a dictionary:
    wc = get_wordcount(text)
    words = wc['words'] # etc.
 2. Get a word count as an object
    wc = get_wordcount_obj(text)
    words = wc.words # etc.
 properties counted:
    * characters
    * chars_no_spaces
    * asian_chars
    * non_asian_words
    * words
 Sourced from:
 http://ginstrom.com/scribbles/2008/05/17/counting-words-etc-in-an-html-file-with-python/
 http://ginstrom.com/scribbles/2007/10/06/counting-words-characters-and-asian-characters-with-python/
 """
 __version__ = 0.1
 __author__ = "Ryan Ginstrom"
 IDEOGRAPHIC_SPACE = 0x3000
 def is_asian(char):
    """Is the character Asian?"""
    # 0x3000 is ideographic space (i.e. double-byte space)
    # Anything over is an Asian character
    return ord(char) > IDEOGRAPHIC_SPACE
 def filter_jchars(c):
    """Filters Asian characters to spaces"""
    if is_asian(c):
        return ' '
    return c
 def nonj_len(word):
    u"""Returns number of non-Asian words in {word}
    - 日本語AアジアンB -> 2
    - hello -> 1
    @param word: A word, possibly containing Asian characters
    """
    # Here are the steps:
    # 本spam日eggs
    # -> [' ', 's', 'p', 'a', 'm', ' ', 'e', 'g', 'g', 's']
    # -> ' spam eggs'
    # -> ['spam', 'eggs']
    # The length of which is 2!
    chars = [filter_jchars(c) for c in word]
    return len(u''.join(chars).split())
 def get_wordcount(text):
    """Get the word/character count for text
    @param text: The text of the segment
    """
    characters = len(text)
    chars_no_spaces = sum([not x.isspace() for x in text])
    asian_chars =  sum([is_asian(x) for x in text])
    non_asian_words = nonj_len(text)
    words = non_asian_words + asian_chars
    return dict(characters=characters,
                chars_no_spaces=chars_no_spaces,
                asian_chars=asian_chars,
                non_asian_words=non_asian_words,
                words=words)
 def dict2obj(dictionary):
    """Transform a dictionary into an object"""
    class Obj(object):
        def __init__(self, dictionary):
            self.__dict__.update(dictionary)
    return Obj(dictionary)
 def get_wordcount_obj(text):
    """Get the wordcount as an object rather than a dictionary"""
    return dict2obj(get_wordcount(text))