Sync to trunk.

2025-07-09 03:04:10 -04:00 · 2011-04-24 13:17:18 -04:00 · 2011-04-24 13:17:18 -04:00 · 555382a567
commit 555382a567
parent 27b2b3c31d a9e1969712
23 changed files with 996 additions and 143 deletions
--- a/recipes/clarin.recipe
+++ b/recipes/clarin.recipe
@ -1,6 +1,6 @@

 __license__   = 'GPL v3'
-__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 clarin.com
 '''
@ -18,11 +18,18 @@ class Clarin(BasicNewsRecipe):
    use_embedded_content  = False
    no_stylesheets        = True
    encoding              = 'utf8'
+    delay                 = 1
    language              = 'es_AR'
    publication_type      = 'newspaper'
    INDEX                 = 'http://www.clarin.com'
    masthead_url          = 'http://www.clarin.com/static/CLAClarin/images/logo-clarin-print.jpg'
-    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif} h2{font-family: Georgia,serif; font-size: xx-large} .hora{font-weight:bold} .hd p{font-size: small} .nombre-autor{color: #0F325A} '
+    extra_css             = """ 
+                               body{font-family: Arial,Helvetica,sans-serif} 
+                               h2{font-family: Georgia,serif; font-size: xx-large} 
+                               .hora{font-weight:bold} 
+                               .hd p{font-size: small} 
+                               .nombre-autor{color: #0F325A} 
+                            """

    conversion_options = {
                          'comment'  : description
@ -31,7 +38,9 @@ class Clarin(BasicNewsRecipe):
                        , 'language' : language
                        }

-    keep_only_tags = [dict(attrs={'class':['hd','mt']})]
+    keep_only_tags    = [dict(attrs={'class':['hd','mt']})]
+    remove_tags       = [dict(name=['meta','base','link'])]
+    remove_attributes = ['lang','_mce_bogus']

    feeds = [
               (u'Pagina principal', u'http://www.clarin.com/rss/'             )
@ -47,6 +56,10 @@ class Clarin(BasicNewsRecipe):
              ,(u'Ciudades'        , u'http://www.clarin.com/rss/ciudades/'    )
            ]

+    
+    def get_article_url(self, article):
+        return article.get('guid',  None)
+    
    def print_version(self, url):
        return url + '?print=1'

--- a/recipes/financial_times.recipe
+++ b/recipes/financial_times.recipe
@ -53,6 +53,7 @@ class FinancialTimes(BasicNewsRecipe):
    feeds = [
               (u'UK'         , u'http://www.ft.com/rss/home/uk'        )
              ,(u'US'         , u'http://www.ft.com/rss/home/us'        )
+              ,(u'Europe'     , u'http://www.ft.com/rss/home/europe'    )
              ,(u'Asia'       , u'http://www.ft.com/rss/home/asia'      )
              ,(u'Middle East', u'http://www.ft.com/rss/home/middleeast')
            ]
--- a/recipes/staradvertiser.recipe
+++ b/recipes/staradvertiser.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 staradvertiser.com
 '''
@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class Starbulletin(BasicNewsRecipe):
    title                 = 'Honolulu Star Advertiser'
    __author__            = 'Darko Miletic'
-    description           = "Latest national and local Hawaii sports news"
+    description           = 'Latest national and local Hawaii sports news'
    publisher             = 'Honolulu Star-Advertiser'
    category              = 'news, Honolulu, Hawaii'
    oldest_article        = 2
@ -19,7 +19,13 @@ class Starbulletin(BasicNewsRecipe):
    use_embedded_content  = False
    encoding              = 'utf8'
    publication_type      = 'newspaper'
-    extra_css             = ' body{font-family: Verdana,Arial,Helvetica,sans-serif} h1,.brown,.postCredit{color: #663300} .storyDeck{font-size: 1.2em; font-weight: bold} '
+    masthead_url          = 'http://media.staradvertiser.com/designimages/star-advertiser-logo-small.gif'
+    extra_css             = """ 
+                                body{font-family: Verdana,Arial,Helvetica,sans-serif} 
+                                h1,.brown,.postCredit{color: #663300} 
+                                .storyDeck{font-size: 1.2em; font-weight: bold}
+                                img{display: block}
+                            """

    conversion_options = {
                          'comment'          : description
@ -28,14 +34,16 @@ class Starbulletin(BasicNewsRecipe):
                        , 'language'         : language
                        , 'linearize_tables' : True
                        }
-
-    remove_tags_before = dict(attrs={'id':'storyTitle'})
-    remove_tags_after  = dict(name='div',attrs={'class':'storytext'})
+    keep_only_tags = [
+                         dict(attrs={'id':'storyTitle'})
+                        ,dict(attrs={'class':['storyDeck','postCredit']})
+                        ,dict(name='span',attrs={'class':'brown'})
+                        ,dict(name='div',attrs={'class':'storytext'})
+                     ]
    remove_tags = [
-                     dict(name=['object','link','script','span'])
-                    ,dict(attrs={'class':'insideStoryImage'})
+                     dict(name=['object','link','script','span','meta','base','iframe'])
+                    ,dict(attrs={'class':['insideStoryImage','insideStoryAd']})
 					,dict(attrs={'name':'fb_share'})
-					,dict(name='div',attrs={'class':'storytext'})
                  ]

    feeds = [
@ -47,3 +55,24 @@ class Starbulletin(BasicNewsRecipe):
             ,(u'Business'  , u'http://www.staradvertiser.com/business/index.rss'           )
             ,(u'Travel'    , u'http://www.staradvertiser.com/travel/index.rss'             )
            ]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('a'):
+            limg = item.find('img')
+            if item.string is not None:
+               str = item.string
+               item.replaceWith(str)
+            else:
+               if limg:
+                  item.name = 'div'
+                  item.attrs = []
+               else:
+                   str = self.tag_to_string(item)
+                   item.replaceWith(str)
+        for item in soup.findAll('img'):
+            if not item.has_key('alt'):
+               item['alt'] = 'image'
+        return soup
+            
--- a/resources/quick_start.epub
+++ b/resources/quick_start.epub
--- a/src/calibre/ebooks/metadata/pdb.py
+++ b/src/calibre/ebooks/metadata/pdb.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-

 '''
-Read meta information from eReader pdb files.
+Read meta information from pdb files.
 '''

 __license__   = 'GPL v3'
@ -13,10 +13,12 @@ import re
 from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.pdb.header import PdbHeaderReader
 from calibre.ebooks.metadata.ereader import get_metadata as get_eReader
+from calibre.ebooks.metadata.plucker import get_metadata as get_plucker

 MREADER = {
    'PNPdPPrs' : get_eReader,
    'PNRdPPrs' : get_eReader,
+    'DataPlkr' : get_plucker,
 }

 from calibre.ebooks.metadata.ereader import set_metadata as set_eReader
--- a/src/calibre/ebooks/metadata/plucker.py
+++ b/src/calibre/ebooks/metadata/plucker.py
@ -0,0 +1,73 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import (unicode_literals, division, absolute_import, print_function)
+
+'''
+Read meta information from Plucker pdb files.
+'''
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import struct
+from datetime import datetime
+
+from calibre.ebooks.metadata import MetaInformation
+from calibre.ebooks.pdb.header import PdbHeaderReader
+from calibre.ebooks.pdb.plucker.reader import SectionHeader, DATATYPE_METADATA, \
+    MIBNUM_TO_NAME
+
+def get_metadata(stream, extract_cover=True):
+    '''
+    Return metadata as a L{MetaInfo} object
+    '''
+    mi = MetaInformation(_('Unknown'), [_('Unknown')])
+    stream.seek(0)
+
+    pheader = PdbHeaderReader(stream)
+    section_data = None
+    for i in range(1, pheader.num_sections):
+        raw_data = pheader.section_data(i)
+        section_header = SectionHeader(raw_data)
+        if section_header.type == DATATYPE_METADATA:
+            section_data = raw_data[8:]
+            break
+
+    if not section_data:
+        return mi
+
+    default_encoding = 'latin-1'
+    record_count, = struct.unpack('>H', section_data[0:2])
+    adv = 0
+    title = None
+    author = None
+    pubdate = 0
+    for i in xrange(record_count):
+        type, = struct.unpack('>H', section_data[2+adv:4+adv])
+        length, = struct.unpack('>H', section_data[4+adv:6+adv])
+
+        # CharSet
+        if type == 1:
+            val, = struct.unpack('>H', section_data[6+adv:8+adv])
+            default_encoding = MIBNUM_TO_NAME.get(val, 'latin-1')
+        # Author
+        elif type == 4:
+            author = section_data[6+adv+(2*length)]
+        # Title
+        elif type == 5:
+            title = section_data[6+adv+(2*length)]
+        # Publication Date
+        elif type == 6:
+            pubdate, = struct.unpack('>I', section_data[6+adv:6+adv+4])
+
+        adv += 2*length
+
+    if title:
+        mi.title = title.replace('\0', '').decode(default_encoding, 'replace')
+    if author:
+        author = author.replace('\0', '').decode(default_encoding, 'replace')
+        mi.author = author.split(',')
+    mi.pubdate = datetime.fromtimestamp(pubdate)
+
+    return mi
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@ -412,7 +412,7 @@ def identify(log, abort, # {{{

    if msprefs['txt_comments']:
        for r in results:
-            if r.plugin.has_html_comments and r.comments:
+            if r.identify_plugin.has_html_comments and r.comments:
                r.comments = html2text(r.comments)

    max_tags = msprefs['max_tags']
--- a/src/calibre/ebooks/metadata/sources/overdrive.py
+++ b/src/calibre/ebooks/metadata/sources/overdrive.py
@ -206,6 +206,7 @@ class OverDrive(Source):
            xref_q = '+'.join(title_tokens)
        #log.error('Initial query is %s'%initial_q)
        #log.error('Cross reference query is %s'%xref_q)
+
        q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q
        query = '{"szKeyword":"'+initial_q+'"}'

@ -229,34 +230,42 @@ class OverDrive(Source):
                if int(m.group('displayrecords')) >= 1:
                    results = True
                elif int(m.group('totalrecords')) >= 1:
+                    if int(m.group('totalrecords')) >= 100:
+                        if xref_q.find('+') != -1:
+                            xref_tokens = xref_q.split('+')
+                            xref_q = xref_tokens[0]
+                            #log.error('xref_q is '+xref_q)
+                    else:
+                        xref_q = ''
                    xref_q = ''
                    q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q
                elif int(m.group('totalrecords')) == 0:
                    return ''

-        return self.sort_ovrdrv_results(raw, title, title_tokens, author, author_tokens)
+        return self.sort_ovrdrv_results(raw, log, title, title_tokens, author, author_tokens)


-    def sort_ovrdrv_results(self, raw, title=None, title_tokens=None, author=None, author_tokens=None, ovrdrv_id=None):
+    def sort_ovrdrv_results(self, raw, log, title=None, title_tokens=None, author=None, author_tokens=None, ovrdrv_id=None):
        close_matches = []
        raw = re.sub('.*?\[\[(?P<content>.*?)\]\].*', '[[\g<content>]]', raw)
        results = json.loads(raw)
-        #print results
+        #log.error('raw results are:'+str(results))
        # The search results are either from a keyword search or a multi-format list from a single ID,
        # sort through the results for closest match/format
        if results:
            for reserveid, od_title, subtitle, edition, series, publisher, format, formatid, creators, \
                    thumbimage, shortdescription, worldcatlink, excerptlink, creatorfile, sorttitle, \
                    availabletolibrary, availabletoretailer, relevancyrank, unknown1, unknown2, unknown3 in results:
-                #print "this record's title is "+od_title+", subtitle is "+subtitle+", author[s] are "+creators+", series is "+series
+                #log.error("this record's title is "+od_title+", subtitle is "+subtitle+", author[s] are "+creators+", series is "+series)
                if ovrdrv_id is not None and int(formatid) in [1, 50, 410, 900]:
-                    #print "overdrive id is not None, searching based on format type priority"
+                    #log.error('overdrive id is not None, searching based on format type priority')
                    return self.format_results(reserveid, od_title, subtitle, series, publisher,
                            creators, thumbimage, worldcatlink, formatid)
                else:
-                    creators = creators.split(', ')
+                    if creators:
+                        creators = creators.split(', ')
                    # if an exact match in a preferred format occurs
-                    if (author and creators[0] == author[0]) and od_title == title and int(formatid) in [1, 50, 410, 900] and thumbimage:
+                    if ((author and creators[0] == author[0]) or (not author and not creators)) and od_title.lower() == title.lower() and int(formatid) in [1, 50, 410, 900] and thumbimage:
                        return self.format_results(reserveid, od_title, subtitle, series, publisher,
                                creators, thumbimage, worldcatlink, formatid)
                    else:
@ -282,6 +291,10 @@ class OverDrive(Source):
                                close_matches.insert(0, self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
                            else:
                                close_matches.append(self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
+                                
+                        elif close_title_match and close_author_match and int(formatid) in [1, 50, 410, 900]:
+                            close_matches.append(self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
+
            if close_matches:
                return close_matches[0]
            else:
@ -289,7 +302,7 @@ class OverDrive(Source):
        else:
            return ''

-    def overdrive_get_record(self, br, q, ovrdrv_id):
+    def overdrive_get_record(self, br, log, q, ovrdrv_id):
        search_url = q+'SearchResults.aspx?ReserveID={'+ovrdrv_id+'}'
        results_url = q+'SearchResults.svc/GetResults?sEcho=1&iColumns=18&sColumns=ReserveID%2CTitle%2CSubtitle%2CEdition%2CSeries%2CPublisher%2CFormat%2CFormatID%2CCreators%2CThumbImage%2CShortDescription%2CWorldCatLink%2CExcerptLink%2CCreatorFile%2CSortTitle%2CAvailableToLibrary%2CAvailableToRetailer%2CRelevancyRank&iDisplayStart=0&iDisplayLength=10&sSearch=&bEscapeRegex=true&iSortingCols=1&iSortCol_0=17&sSortDir_0=asc'

@ -311,7 +324,7 @@ class OverDrive(Source):
        raw = str(list(raw))
        clean_cj = mechanize.CookieJar()
        br.set_cookiejar(clean_cj)
-        return self.sort_ovrdrv_results(raw, None, None, None, ovrdrv_id)
+        return self.sort_ovrdrv_results(raw, log, None, None, None, ovrdrv_id)


    def find_ovrdrv_data(self, br, log, title, author, isbn, ovrdrv_id=None):
@ -319,7 +332,7 @@ class OverDrive(Source):
        if ovrdrv_id is None:
           return self.overdrive_search(br, log, q, title, author)
        else:
-           return self.overdrive_get_record(br, q, ovrdrv_id)
+           return self.overdrive_get_record(br, log, q, ovrdrv_id)



--- a/src/calibre/ebooks/oeb/profile.py
+++ b/src/calibre/ebooks/oeb/profile.py
@ -1,75 +0,0 @@
-'''
-Device profiles.
-'''
-
-__license__   = 'GPL v3'
-__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
-
-from itertools import izip
-
-FONT_SIZES = [('xx-small', 1),
-              ('x-small',  None),
-              ('small',    2),
-              ('medium',   3),
-              ('large',    4),
-              ('x-large',  5),
-              ('xx-large', 6),
-              (None,       7)]
-
-
-class Profile(object):
-    def __init__(self, width, height, dpi, fbase, fsizes):
-        self.width = (float(width) / dpi) * 72.
-        self.height = (float(height) / dpi) * 72.
-        self.dpi = float(dpi)
-        self.fbase = float(fbase)
-        self.fsizes = []
-        for (name, num), size in izip(FONT_SIZES, fsizes):
-            self.fsizes.append((name, num, float(size)))
-        self.fnames = dict((name, sz) for name, _, sz in self.fsizes if name)
-        self.fnums = dict((num, sz) for _, num, sz in self.fsizes if num)
-
-
-PROFILES = {
-    'PRS505':
-        Profile(width=584, height=754, dpi=168.451, fbase=12,
-                fsizes=[7.5, 9, 10, 12, 15.5, 20, 22, 24]),
-
-    'MSReader':
-        Profile(width=480, height=652, dpi=96, fbase=13,
-                fsizes=[10, 11, 13, 16, 18, 20, 22, 26]),
-
-    # Not really, but let's pretend
-    'Mobipocket':
-        Profile(width=600, height=800, dpi=96, fbase=18,
-                fsizes=[14, 14, 16, 18, 20, 22, 24, 26]),
-    
-    # No clue on usable screen size; DPI should be good
-    'HanlinV3':
-        Profile(width=584, height=754, dpi=168.451, fbase=16,
-                fsizes=[12, 12, 14, 16, 18, 20, 22, 24]),
-
-    'CybookG3':
-        Profile(width=600, height=800, dpi=168.451, fbase=16,
-                fsizes=[12, 12, 14, 16, 18, 20, 22, 24]),
-
-    'Kindle':
-        Profile(width=525, height=640, dpi=168.451, fbase=16,
-                fsizes=[12, 12, 14, 16, 18, 20, 22, 24]),
-    
-    'Browser':
-        Profile(width=800, height=600, dpi=100.0, fbase=12,
-                fsizes=[5, 7, 9, 12, 13.5, 17, 20, 22, 24])
-    }
-
-
-class Context(object):
-    PROFILES = PROFILES
-    
-    def __init__(self, source, dest):
-        if source in PROFILES:
-            source = PROFILES[source]
-        if dest in PROFILES:
-            dest = PROFILES[dest]
-        self.source = source
-        self.dest = dest
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@ -21,7 +21,6 @@ from calibre import force_unicode
 from calibre.ebooks import unit_convert
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
 from calibre.ebooks.oeb.base import XPNSMAP, xpath, urlnormalize
-from calibre.ebooks.oeb.profile import PROFILES

 cssutils.log.setLevel(logging.WARN)

@ -123,10 +122,10 @@ class CSSSelector(etree.XPath):
 class Stylizer(object):
    STYLESHEETS = WeakKeyDictionary()

-    def __init__(self, tree, path, oeb, opts, profile=PROFILES['PRS505'],
+    def __init__(self, tree, path, oeb, opts, profile=None,
            extra_css='', user_css=''):
        self.oeb, self.opts = oeb, opts
-        self.profile = profile
+        self.profile = opts.input_profile
        self.logger = oeb.logger
        item = oeb.manifest.hrefs[path]
        basename = os.path.basename(path)
--- a/src/calibre/ebooks/pdb/init.py
+++ b/src/calibre/ebooks/pdb/init.py
@ -12,6 +12,7 @@ from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader
 from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader
 from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
 from calibre.ebooks.pdb.pdf.reader import Reader as pdf_reader
+from calibre.ebooks.pdb.plucker.reader import Reader as plucker_reader

 FORMAT_READERS = {
    'PNPdPPrs': ereader_reader,
@ -19,6 +20,7 @@ FORMAT_READERS = {
    'zTXTGPlm': ztxt_reader,
    'TEXtREAd': palmdoc_reader,
    '.pdfADBE': pdf_reader,
+    'DataPlkr': plucker_reader,
 }

 from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer
@ -37,6 +39,7 @@ IDENTITY_TO_NAME = {
    'zTXTGPlm': 'zTXT',
    'TEXtREAd': 'PalmDOC',
    '.pdfADBE': 'Adobe Reader',
+    'DataPlkr': 'Plucker',

    'BVokBDIC': 'BDicty',
    'DB99DBOS': 'DB (Database program)',
@ -50,7 +53,6 @@ IDENTITY_TO_NAME = {
    'DATALSdb': 'LIST',
    'Mdb1Mdb1': 'MobileDB',
    'BOOKMOBI': 'MobiPocket',
-    'DataPlkr': 'Plucker',
    'DataSprd': 'QuickSheet',
    'SM01SMem': 'SuperMemo',
    'TEXtTlDc': 'TealDoc',
--- a/src/calibre/ebooks/pdb/plucker/init.py
+++ b/src/calibre/ebooks/pdb/plucker/init.py
--- a/src/calibre/ebooks/pdb/plucker/reader.py
+++ b/src/calibre/ebooks/pdb/plucker/reader.py
@ -0,0 +1,764 @@
+# -*- coding: utf-8 -*-
+
+#from __future__ import (unicode_literals, division, absolute_import, print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '20011, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os
+import struct
+import zlib
+
+from collections import OrderedDict
+
+from calibre import CurrentDir
+from calibre.ebooks.pdb.formatreader import FormatReader
+from calibre.ptempfile import TemporaryFile
+from calibre.utils.magick import Image, create_canvas
+
+DATATYPE_PHTML = 0
+DATATYPE_PHTML_COMPRESSED = 1
+DATATYPE_TBMP = 2
+DATATYPE_TBMP_COMPRESSED = 3
+DATATYPE_MAILTO = 4
+DATATYPE_LINK_INDEX = 5
+DATATYPE_LINKS = 6
+DATATYPE_LINKS_COMPRESSED = 7
+DATATYPE_BOOKMARKS = 8
+DATATYPE_CATEGORY = 9
+DATATYPE_METADATA = 10
+DATATYPE_STYLE_SHEET = 11
+DATATYPE_FONT_PAGE = 12
+DATATYPE_TABLE = 13
+DATATYPE_TABLE_COMPRESSED = 14
+DATATYPE_COMPOSITE_IMAGE = 15
+DATATYPE_PAGELIST_METADATA = 16
+DATATYPE_SORTED_URL_INDEX = 17
+DATATYPE_SORTED_URL = 18
+DATATYPE_SORTED_URL_COMPRESSED = 19
+DATATYPE_EXT_ANCHOR_INDEX = 20
+DATATYPE_EXT_ANCHOR = 21
+DATATYPE_EXT_ANCHOR_COMPRESSED = 22
+
+# IETF IANA MIBenum value for the character set.
+# See the http://www.iana.org/assignments/character-sets for valid values.
+# Not all character sets are handled by Python. This is a small subset that
+# the MIBenum maps to Python standard encodings
+# from http://docs.python.org/library/codecs.html#standard-encodings
+MIBNUM_TO_NAME = {
+    3: 'ascii',
+    4: 'latin_1',
+    5: 'iso8859_2',
+    6: 'iso8859_3',
+    7: 'iso8859_4',
+    8: 'iso8859_5',
+    9: 'iso8859_6',
+    10: 'iso8859_7',
+    11: 'iso8859_8',
+    12: 'iso8859_9',
+    13: 'iso8859_10',
+    17: 'shift_jis',
+    18: 'euc_jp',
+    27: 'utf_7',
+    36: 'euc_kr',
+    37: 'iso2022_kr',
+    38: 'euc_kr',
+    39: 'iso2022_jp',
+    40: 'iso2022_jp_2',
+    106: 'utf-8',
+    109: 'iso8859_13',
+    110: 'iso8859_14',
+    111: 'iso8859_15',
+    112: 'iso8859_16',
+    1013: 'utf_16_be',
+    1014: 'utf_16_le',
+    1015: 'utf_16',
+    2009: 'cp850',
+    2010: 'cp852',
+    2011: 'cp437',
+    2013: 'cp862',
+    2025: 'gb2312',
+    2026: 'big5',
+    2028: 'cp037',
+    2043: 'cp424',
+    2044: 'cp500',
+    2046: 'cp855',
+    2047: 'cp857',
+    2048: 'cp860',
+    2049: 'cp861',
+    2050: 'cp863',
+    2051: 'cp864',
+    2052: 'cp865',
+    2054: 'cp869',
+    2063: 'cp1026',
+    2085: 'hz',
+    2086: 'cp866',
+    2087: 'cp775',
+    2089: 'cp858',
+    2091: 'cp1140',
+    2102: 'big5hkscs',
+    2250: 'cp1250',
+    2251: 'cp1251',
+    2252: 'cp1252',
+    2253: 'cp1253',
+    2254: 'cp1254',
+    2255: 'cp1255',
+    2256: 'cp1256',
+    2257: 'cp1257',
+    2258: 'cp1258',
+}
+
+def decompress_doc(data):
+    buffer = [ord(i) for i in data]
+    res = []
+    i = 0
+    while i < len(buffer):
+        c = buffer[i]
+        i += 1
+        if c >= 1 and c <= 8:
+            res.extend(buffer[i:i+c])
+            i += c
+        elif c <= 0x7f:
+            res.append(c)
+        elif c >= 0xc0:
+            res.extend( (ord(' '), c^0x80) )
+        else:
+            c = (c << 8) + buffer[i]
+            i += 1
+            di = (c & 0x3fff) >> 3
+            j = len(res)
+            num = (c & ((1 << 3) - 1)) + 3
+
+            for k in range( num ):
+                res.append(res[j - di+k])
+
+    return ''.join([chr(i) for i in res])
+
+class HeaderRecord(object):
+    '''
+    Plucker header. PDB record 0.
+    '''
+
+    def __init__(self, raw):
+        self.uid, = struct.unpack('>H', raw[0:2])
+        # This is labled version in the spec.
+        # 2 is ZLIB compressed,
+        # 1 is DOC compressed
+        self.compression, = struct.unpack('>H', raw[2:4])
+        self.records, = struct.unpack('>H', raw[4:6])
+        # uid of the first html file. This should link
+        # to other files which in turn may link to others.
+        self.home_html = None
+
+        self.reserved = {}
+        for i in xrange(self.records):
+            adv = 4*i
+            name, = struct.unpack('>H', raw[6+adv:8+adv])
+            id, = struct.unpack('>H', raw[8+adv:10+adv])
+            self.reserved[id] = name
+            if name == 0:
+                self.home_html = id
+
+
+class SectionHeader(object):
+    '''
+    Every sections (record) has this header. It gives
+    details about the section such as it's uid.
+    '''
+
+    def __init__(self, raw):
+        self.uid, = struct.unpack('>H', raw[0:2])
+        self.paragraphs, = struct.unpack('>H', raw[2:4])
+        self.size, = struct.unpack('>H', raw[4:6])
+        self.type, = struct.unpack('>B', raw[6])
+        self.flags, = struct.unpack('>B', raw[7])
+
+
+class SectionHeaderText(object):
+    '''
+    Sub header for text records.
+    '''
+
+    def __init__(self, section_header, raw):
+        # The uncompressed size of each paragraph.
+        self.sizes = []
+        # uncompressed offset of each paragraph starting
+        # at the beginning of the PHTML.
+        self.paragraph_offsets = []
+        # Paragraph attributes.
+        self.attributes = []
+
+        for i in xrange(section_header.paragraphs):
+            adv = 4*i
+            self.sizes.append(struct.unpack('>H', raw[adv:2+adv])[0])
+            self.attributes.append(struct.unpack('>H', raw[2+adv:4+adv])[0])
+
+        running_offset = 0
+        for size in self.sizes:
+            running_offset += size
+            self.paragraph_offsets.append(running_offset)
+
+
+class SectionMetadata(object):
+    '''
+    Metadata.
+
+    This does not store metadata such as title, or author.
+    That metadata would be best retrieved with the PDB (plucker)
+    metdata reader.
+
+    This stores document specific information such as the
+    text encoding.
+
+    Note: There is a default encoding but each text section
+    can be assigned a different encoding.
+    '''
+
+    def __init__(self, raw):
+        self.default_encoding = 'latin-1'
+        self.exceptional_uid_encodings = {}
+        self.owner_id = None
+
+        record_count, = struct.unpack('>H', raw[0:2])
+
+        adv = 0
+        for i in xrange(record_count):
+            type, = struct.unpack('>H', raw[2+adv:4+adv])
+            length, = struct.unpack('>H', raw[4+adv:6+adv])
+
+            # CharSet
+            if type == 1:
+                val, = struct.unpack('>H', raw[6+adv:8+adv])
+                self.default_encoding = MIBNUM_TO_NAME.get(val, 'latin-1')
+            # ExceptionalCharSets
+            elif type == 2:
+                ii_adv = 0
+                for ii in xrange(length / 2):
+                    uid, = struct.unpack('>H', raw[6+adv+ii_adv:8+adv+ii_adv])
+                    mib, = struct.unpack('>H', raw[8+adv+ii_adv:10+adv+ii_adv])
+                    self.exceptional_uid_encodings[uid] = MIBNUM_TO_NAME.get(mib, 'latin-1')
+                    ii_adv += 4
+            # OwnerID
+            elif type == 3:
+                self.owner_id = struct.unpack('>I', raw[6+adv:10+adv])
+            # Author, Title, PubDate
+            # Ignored here. The metadata reader plugin
+            # will get this info because if it's missing
+            # the metadata reader plugin will use fall
+            # back data from elsewhere in the file.
+            elif type in (4, 5, 6):
+                pass
+            # Linked Documents
+            elif type == 7:
+                pass
+
+            adv += 2*length
+
+
+class SectionText(object):
+    '''
+    Text data. Stores a text section header and the PHTML.
+    '''
+
+    def __init__(self, section_header, raw):
+        self.header = SectionHeaderText(section_header, raw)
+        self.data = raw[section_header.paragraphs * 4:]
+
+
+class SectionCompositeImage(object):
+    '''
+    A composite image consists of a a 2D array
+    of rows and columns. The entries in the array
+    are uid's.
+    '''
+
+    def __init__(self, raw):
+        self.columns, = struct.unpack('>H', raw[0:2])
+        self.rows, = struct.unpack('>H', raw[2:4])
+
+        # [
+        #  [uid, uid, uid, ...],
+        #  [uid, uid, uid, ...],
+        #  ...
+        # ]
+        #
+        # Each item in the layout is in it's
+        # correct position in the final
+        # composite.
+        #
+        # Each item in the layout is a uid
+        # to an image record.
+        self.layout = []
+        offset = 4
+        for i in xrange(self.rows):
+            col = []
+            for j in xrange(self.columns):
+                col.append(struct.unpack('>H', raw[offset:offset+2])[0])
+                offset += 2
+            self.layout.append(col)
+
+
+class Reader(FormatReader):
+    '''
+    Convert a plucker archive into HTML.
+
+    TODO:
+          * UTF 16 and 32 characters.
+          * Margins.
+          * Alignment.
+          * Font color.
+          * DATATYPE_MAILTO
+          * DATATYPE_TABLE(_COMPRESSED)
+          * DATATYPE_EXT_ANCHOR_INDEX
+          * DATATYPE_EXT_ANCHOR(_COMPRESSED)
+    '''
+
+    def __init__(self, header, stream, log, options):
+        self.stream = stream
+        self.log = log
+        self.options = options
+
+        # Mapping of section uid to our internal
+        # list of sections.
+        self.uid_section_number = OrderedDict()
+        self.uid_text_secion_number = OrderedDict()
+        self.uid_text_secion_encoding = {}
+        self.uid_image_section_number = {}
+        self.uid_composite_image_section_number = {}
+        self.metadata_section_number = None
+        self.default_encoding = 'latin-1'
+        self.owner_id = None
+        self.sections = []
+
+        # The Plucker record0 header
+        self.header_record = HeaderRecord(header.section_data(0))
+
+        for i in range(1, header.num_sections):
+            section_number = len(self.sections)
+            # The length of the section header.
+            # Where the actual data in the section starts.
+            start = 8
+            section = None
+
+            raw_data = header.section_data(i)
+            # Every sections has a section header.
+            section_header = SectionHeader(raw_data)
+
+            # Store sections we care able.
+            if section_header.type in (DATATYPE_PHTML, DATATYPE_PHTML_COMPRESSED):
+                self.uid_text_secion_number[section_header.uid] = section_number
+                section = SectionText(section_header, raw_data[start:])
+            elif section_header.type in (DATATYPE_TBMP, DATATYPE_TBMP_COMPRESSED):
+                self.uid_image_section_number[section_header.uid] = section_number
+                section = raw_data[start:]
+            elif section_header.type == DATATYPE_METADATA:
+                self.metadata_section_number = section_number
+                section = SectionMetadata(raw_data[start:])
+            elif section_header.type == DATATYPE_COMPOSITE_IMAGE:
+                self.uid_composite_image_section_number[section_header.uid] = section_number
+                section = SectionCompositeImage(raw_data[start:])
+
+            # Store the section.
+            if section:
+                self.uid_section_number[section_header.uid] = section_number
+                self.sections.append((section_header, section))
+
+        # Store useful information from the metadata section locally
+        # to make access easier.
+        if self.metadata_section_number:
+            mdata_section = self.sections[self.metadata_section_number][1]
+            for k, v in mdata_section.exceptional_uid_encodings.items():
+                self.uid_text_secion_encoding[k] = v
+            self.default_encoding = mdata_section.default_encoding
+            self.owner_id = mdata_section.owner_id
+
+        # Get the metadata (tile, author, ...) with the metadata reader.
+        from calibre.ebooks.metadata.pdb import get_metadata
+        self.mi = get_metadata(stream, False)
+
+    def extract_content(self, output_dir):
+        # Each text record is independent (unless the continuation
+        # value is set in the previous record). Put each converted
+        # text recored into a separate file. We will reference the
+        # home.html file as the first file and let the HTML input
+        # plugin assemble the order based on hyperlinks.
+        with CurrentDir(output_dir):
+            for uid, num in self.uid_text_secion_number.items():
+                self.log.debug(_('Writing record with uid: %s as %s.html' % (uid, uid)))
+                with open('%s.html' % uid, 'wb') as htmlf:
+                    html = u'<html><body>'
+                    section_header, section_data = self.sections[num]
+                    if section_header.type == DATATYPE_PHTML:
+                        html += self.process_phtml(section_data.data, section_data.header.paragraph_offsets)
+                    elif section_header.type == DATATYPE_PHTML_COMPRESSED:
+                        d = self.decompress_phtml(section_data.data)
+                        html += self.process_phtml(d, section_data.header.paragraph_offsets).decode(self.get_text_uid_encoding(section_header.uid), 'replace')
+                    html += '</body></html>'
+                    htmlf.write(html.encode('utf-8'))
+
+        # Images.
+        # Cache the image sizes in case they are used by a composite image.
+        image_sizes = {}
+        if not os.path.exists(os.path.join(output_dir, 'images/')):
+            os.makedirs(os.path.join(output_dir, 'images/'))
+        with CurrentDir(os.path.join(output_dir, 'images/')):
+            # Single images.
+            for uid, num in self.uid_image_section_number.items():
+                section_header, section_data = self.sections[num]
+                if section_data:
+                    idata = None
+                    if section_header.type == DATATYPE_TBMP:
+                        idata = section_data
+                    elif section_header.type == DATATYPE_TBMP_COMPRESSED:
+                        if self.header_record.compression == 1:
+                            idata = decompress_doc(section_data)
+                        elif self.header_record.compression == 2:
+                            idata = zlib.decompress(section_data)
+                    try:
+                        with TemporaryFile(suffix='.palm') as itn:
+                            with open(itn, 'wb') as itf:
+                                itf.write(idata)
+                            im = Image()
+                            im.read(itn)
+                            image_sizes[uid] = im.size
+                            im.set_compression_quality(70)
+                            im.save('%s.jpg' % uid)
+                            self.log.debug('Wrote image with uid %s to images/%s.jpg' % (uid, uid))
+                    except Exception as e:
+                        self.log.error('Failed to write image with uid %s: %s' % (uid, e))
+                else:
+                    self.log.error('Failed to write image with uid %s: No data.' % uid)
+            # Composite images.
+            # We're going to use the already compressed .jpg images here.
+            for uid, num in self.uid_composite_image_section_number.items():
+                try:
+                    section_header, section_data = self.sections[num]
+                    # Get the final width and height.
+                    width = 0
+                    height = 0
+                    for row in section_data.layout:
+                        row_width = 0
+                        col_height = 0
+                        for col in row:
+                            if col not in image_sizes:
+                                raise Exception('Image with uid: %s missing.' % col)
+                            im = Image()
+                            im.read('%s.jpg' % col)
+                            w, h = im.size
+                            row_width += w
+                            if col_height < h:
+                                col_height = h
+                        if width < row_width:
+                            width = row_width
+                        height += col_height
+                    # Create a new image the total size of all image
+                    # parts. Put the parts into the new image.
+                    canvas = create_canvas(width, height)
+                    y_off = 0
+                    for row in section_data.layout:
+                        x_off = 0
+                        largest_height = 0
+                        for col in row:
+                            im = Image()
+                            im.read('%s.jpg' % col)
+                            canvas.compose(im, x_off, y_off)
+                            w, h = im.size
+                            x_off += w
+                            if largest_height < h:
+                                largest_height = h
+                        y_off += largest_height
+                    canvas.set_compression_quality(70)
+                    canvas.save('%s.jpg' % uid)
+                    self.log.debug('Wrote composite image with uid %s to images/%s.jpg' % (uid, uid))
+                except Exception as e:
+                    self.log.error('Failed to write composite image with uid %s: %s' % (uid, e))
+
+        # Run the HTML through the html processing plugin.
+        from calibre.customize.ui import plugin_for_input_format
+        html_input = plugin_for_input_format('html')
+        for opt in html_input.options:
+            setattr(self.options, opt.option.name, opt.recommended_value)
+        self.options.input_encoding = 'utf-8'
+        odi = self.options.debug_pipeline
+        self.options.debug_pipeline = None
+        # Determine the home.html record uid. This should be set in the
+        # reserved values in the metadata recored. home.html is the first
+        # text record (should have hyper link references to other records)
+        # in the document.
+        try:
+            home_html = self.header_record.home_html
+            if not home_html:
+                home_html = self.uid_text_secion_number.items()[0][0]
+        except:
+            raise Exception(_('Could not determine home.html'))
+        # Generate oeb from html conversion.
+        oeb = html_input.convert(open('%s.html' % home_html, 'rb'), self.options, 'html', self.log, {})
+        self.options.debug_pipeline = odi
+
+        return oeb
+
+    def decompress_phtml(self, data):
+        if self.header_record.compression == 2:
+            if self.owner_id:
+                raise NotImplementedError
+            return zlib.decompress(data)
+        elif self.header_record.compression == 1:
+            #from calibre.ebooks.compression.palmdoc import decompress_doc
+            return decompress_doc(data)
+
+    def process_phtml(self, d, paragraph_offsets=[]):
+        html = u'<p id="p0">'
+        offset = 0
+        paragraph_open = True
+        link_open = False
+        need_set_p_id = False
+        p_num = 1
+        font_specifier_close = ''
+
+        while offset < len(d):
+            if not paragraph_open:
+                if need_set_p_id:
+                    html += u'<p id="p%s">' % p_num
+                    p_num += 1
+                    need_set_p_id = False
+                else:
+                    html += u'<p>'
+                paragraph_open = True
+
+            c = ord(d[offset])
+            # PHTML "functions"
+            if c == 0x0:
+                offset += 1
+                c = ord(d[offset])
+                # Page link begins
+                # 2 Bytes
+                # record ID
+                if c == 0x0a:
+                    offset += 1
+                    id = struct.unpack('>H', d[offset:offset+2])[0]
+                    if id in self.uid_text_secion_number:
+                        html += '<a href="%s.html">' % id
+                        link_open = True
+                    offset += 1
+                # Targeted page link begins
+                # 3 Bytes
+                # record ID, target
+                elif c == 0x0b:
+                    offset += 3
+                # Paragraph link begins
+                # 4 Bytes
+                # record ID, paragraph number
+                elif c == 0x0c:
+                    offset += 1
+                    id = struct.unpack('>H', d[offset:offset+2])[0]
+                    offset += 2
+                    pid = struct.unpack('>H', d[offset:offset+2])[0]
+                    if id in self.uid_text_secion_number:
+                        html += '<a href="%s.html#p%s">' % (id, pid)
+                        link_open = True
+                    offset += 1
+                # Targeted paragraph link begins
+                # 5 Bytes
+                # record ID, paragraph number, target
+                elif c == 0x0d:
+                    offset += 5
+                # Link ends
+                # 0 Bytes
+                elif c == 0x08:
+                    if link_open:
+                        html += '</a>'
+                        link_open = False
+                # Set font
+                # 1 Bytes
+                # font specifier
+                elif c == 0x11:
+                    offset += 1
+                    specifier = d[offset]
+                    html += font_specifier_close
+                    # Regular text
+                    if specifier == 0:
+                        font_specifier_close = ''
+                    # h1
+                    elif specifier == 1:
+                        html += '<h1>'
+                        font_specifier_close = '</h1>'
+                    # h2
+                    elif specifier == 2:
+                        html += '<h2>'
+                        font_specifier_close = '</h2>'
+                    # h3
+                    elif specifier == 3:
+                        html += '<h13>'
+                        font_specifier_close = '</h3>'
+                    # h4
+                    elif specifier == 4:
+                        html += '<h4>'
+                        font_specifier_close = '</h4>'
+                    # h5
+                    elif specifier == 5:
+                        html += '<h5>'
+                        font_specifier_close = '</h5>'
+                    # h6
+                    elif specifier == 6:
+                        html += '<h6>'
+                        font_specifier_close = '</h6>'
+                    # Bold
+                    elif specifier == 7:
+                        html += '<b>'
+                        font_specifier_close = '</b>'
+                    # Fixed-width
+                    elif specifier == 8:
+                        html += '<tt>'
+                        font_specifier_close = '</tt>'
+                    # Small
+                    elif specifier == 9:
+                        html += '<small>'
+                        font_specifier_close = '</small>'
+                    # Subscript
+                    elif specifier == 10:
+                        html += '<sub>'
+                        font_specifier_close = '</sub>'
+                    # Superscript
+                    elif specifier == 11:
+                        html += '<sup>'
+                        font_specifier_close = '</sup>'
+                # Embedded image
+                # 2 Bytes
+                # image record ID
+                elif c == 0x1a:
+                    offset += 1
+                    uid = struct.unpack('>H', d[offset:offset+2])[0]
+                    html += '<img src="images/%s.jpg" />' % uid
+                    offset += 1
+                # Set margin
+                # 2 Bytes
+                # left margin, right margin
+                elif c == 0x22:
+                    offset += 2
+                # Alignment of text
+                # 1 Bytes
+                # alignment
+                elif c == 0x29:
+                    offset += 1
+                # Horizontal rule
+                # 3 Bytes
+                # 8-bit height, 8-bit width (pixels), 8-bit width (%, 1-100)
+                elif c == 0x33:
+                    offset += 3
+                    if paragraph_open:
+                        html += u'</p>'
+                        paragraph_open = False
+                    html += u'<hr />'
+                # New line
+                # 0 Bytes
+                elif c == 0x38:
+                    if paragraph_open:
+                        html += u'</p>\n'
+                        paragraph_open = False
+                # Italic text begins
+                # 0 Bytes
+                elif c == 0x40:
+                    html += u'<i>'
+                # Italic text ends
+                # 0 Bytes
+                elif c == 0x48:
+                    html += u'</i>'
+                # Set text color
+                # 3 Bytes
+                # 8-bit red, 8-bit green, 8-bit blue
+                elif c == 0x53:
+                    offset += 3
+                # Multiple embedded image
+                # 4 Bytes
+                # alternate image record ID, image record ID
+                elif c == 0x5c:
+                    offset += 3
+                    uid = struct.unpack('>H', d[offset:offset+2])[0]
+                    html += '<img src="images/%s.jpg" />' % uid
+                    offset += 1
+                # Underline text begins
+                # 0 Bytes
+                elif c == 0x60:
+                    html += u'<u>'
+                # Underline text ends
+                # 0 Bytes
+                elif c == 0x68:
+                    html += u'</u>'
+                # Strike-through text begins
+                # 0 Bytes
+                elif c == 0x70:
+                    html += u'<s>'
+                # Strike-through text ends
+                # 0 Bytes
+                elif c == 0x78:
+                    html += u'</s>'
+                # 16-bit Unicode character
+                # 3 Bytes
+                # alternate text length, 16-bit unicode character
+                elif c == 0x83:
+                    offset += 3
+                # 32-bit Unicode character
+                # 5 Bytes
+                # alternate text length, 32-bit unicode character
+                elif c == 0x85:
+                    offset += 5
+                # Begin custom font span
+                # 6 Bytes
+                # font page record ID, X page position, Y page position
+                elif c == 0x8e:
+                    offset += 6
+                # Adjust custom font glyph position
+                # 4 Bytes
+                # X page position, Y page position
+                elif c == 0x8c:
+                    offset += 4
+                # Change font page
+                # 2 Bytes
+                # font record ID
+                elif c == 0x8a:
+                    offset += 2
+                # End custom font span
+                # 0 Bytes
+                elif c == 0x88:
+                    pass
+                # Begin new table row
+                # 0 Bytes
+                elif c == 0x90:
+                    pass
+                # Insert table (or table link)
+                # 2 Bytes
+                # table record ID
+                elif c == 0x92:
+                    offset += 2
+                # Table cell data
+                # 7 Bytes
+                # 8-bit alignment, 16-bit image record ID, 8-bit columns, 8-bit rows, 16-bit text length
+                elif c == 0x97:
+                    offset += 7
+                # Exact link modifier
+                # 2 Bytes
+                # Paragraph Offset (The Exact Link Modifier modifies a Paragraph Link or Targeted Paragraph Link function to specify an exact byte offset within the paragraph. This function must be followed immediately by the function it modifies).
+                elif c == 0x9a:
+                    offset += 2
+            elif c == 0xa0:
+                html += '&nbsp;'
+            else:
+                html += unichr(c)
+            offset += 1
+            if offset in paragraph_offsets:
+                need_set_p_id = True
+                if paragraph_open:
+                    html += u'</p>\n'
+                    paragraph_open = False
+
+        if paragraph_open:
+            html += u'</p>'
+
+        return html
+
+    def get_text_uid_encoding(self, uid):
+        # Return the user sepcified input encoding,
+        # otherwise return the alternate encoding specified for the uid,
+        # otherwise retur the default encoding for the document.
+        return self.options.input_encoding if self.options.input_encoding else self.uid_text_secion_encoding.get(uid, self.default_encoding)
--- a/src/calibre/gui2/actions/edit_metadata.py
+++ b/src/calibre/gui2/actions/edit_metadata.py
@ -117,11 +117,11 @@ class EditMetadataAction(InterfaceAction):

        payload = (id_map, failed_ids, failed_covers)
        from calibre.gui2.dialogs.message_box import ProceedNotification
-        p = ProceedNotification(payload, job.html_details,
+        p = ProceedNotification(self.apply_downloaded_metadata,
+                payload, job.html_details,
                _('Download log'), _('Download complete'), msg,
                det_msg=det_msg, show_copy_button=show_copy_button,
                parent=self.gui)
-        p.proceed.connect(self.apply_downloaded_metadata)
        p.show()

    def apply_downloaded_metadata(self, payload):
@ -529,13 +529,17 @@ class EditMetadataAction(InterfaceAction):
            view.reset()

    # Apply bulk metadata changes {{{
-    def apply_metadata_changes(self, id_map, title=None, msg=''):
+    def apply_metadata_changes(self, id_map, title=None, msg='', callback=None):
        '''
        Apply the metadata changes in id_map to the database synchronously
        id_map must be a mapping of ids to Metadata objects. Set any fields you
        do not want updated in the Metadata object to null. An easy way to do
        that is to create a metadata object as Metadata(_('Unknown')) and then
        only set the fields you want changed on this object.
+
+        callback can be either None or a function accepting a single argument,
+        in which case it is called after applying is complete with the list of
+        changed ids.
        '''
        if title is None:
            title = _('Applying changed metadata')
@ -544,6 +548,7 @@ class EditMetadataAction(InterfaceAction):
        self.apply_failures = []
        self.applied_ids = []
        self.apply_pd = None
+        self.apply_callback = callback
        if len(self.apply_id_map) > 1:
            from calibre.gui2.dialogs.progress import ProgressDialog
            self.apply_pd = ProgressDialog(title, msg, min=0,
@ -611,6 +616,11 @@ class EditMetadataAction(InterfaceAction):

        self.apply_id_map = []
        self.apply_pd = None
+        try:
+            if callable(self.apply_callback):
+                self.apply_callback(self.applied_ids)
+        finally:
+            self.apply_callback = None

    # }}}

--- a/src/calibre/gui2/dialogs/message_box.py
+++ b/src/calibre/gui2/dialogs/message_box.py
@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'


 from PyQt4.Qt import (QDialog, QIcon, QApplication, QSize, QKeySequence,
-    QAction, Qt, pyqtSignal, QTextBrowser, QDialogButtonBox, QVBoxLayout)
+    QAction, Qt, QTextBrowser, QDialogButtonBox, QVBoxLayout)

 from calibre.constants import __version__
 from calibre.gui2.dialogs.message_box_ui import Ui_Dialog
@ -143,17 +143,20 @@ class ViewLog(QDialog): # {{{
        QApplication.clipboard().setText(txt)
 # }}}

+
+_proceed_memory = []
+
 class ProceedNotification(MessageBox): # {{{

-    proceed = pyqtSignal(object)
-
-    def __init__(self, payload, html_log, log_viewer_title, title, msg, det_msg='', show_copy_button=False, parent=None):
+    def __init__(self, callback, payload, html_log, log_viewer_title, title, msg,
+            det_msg='', show_copy_button=False, parent=None):
        '''
        A non modal popup that notifies the user that a background task has
-        been completed. If they user clicks yes, the proceed signal is emitted
-        with payload as its argument.
+        been completed.

-        :param payload: Arbitrary object, emitted in the proceed signal
+        :param callback: A callable that is called with payload if the user
+        asks to proceed. Note that this is always called in the GUI thread
+        :param payload: Arbitrary object, passed to callback
        :param html_log: An HTML or plain text log
        :param log_viewer_title: The title for the log viewer window
        :param title: The title fo rthis popup
@ -166,25 +169,31 @@ class ProceedNotification(MessageBox): # {{{
        self.payload = payload
        self.html_log = html_log
        self.log_viewer_title = log_viewer_title
-        self.finished.connect(self.do_proceed)
+        self.finished.connect(self.do_proceed, type=Qt.QueuedConnection)

        self.vlb = self.bb.addButton(_('View log'), self.bb.ActionRole)
        self.vlb.setIcon(QIcon(I('debug.png')))
        self.vlb.clicked.connect(self.show_log)
        self.det_msg_toggle.setVisible(bool(det_msg))
        self.setModal(False)
+        self.callback = callback
+        _proceed_memory.append(self)

    def show_log(self):
        self.log_viewer = ViewLog(self.log_viewer_title, self.html_log,
                parent=self)

    def do_proceed(self, result):
-        if result == self.Accepted:
-            self.proceed.emit(self.payload)
        try:
-            self.proceed.disconnect()
-        except:
-            pass
+            if result == self.Accepted:
+                self.callback(self.payload)
+        finally:
+            # Ensure this notification is garbage collected
+            self.callback = None
+            self.setParent(None)
+            self.finished.disconnect()
+            self.vlb.clicked.disconnect()
+            _proceed_memory.remove(self)
 # }}}

 if __name__ == '__main__':
--- a/src/calibre/gui2/preferences/behavior.ui
+++ b/src/calibre/gui2/preferences/behavior.ui
@ -6,7 +6,7 @@
   <rect>
    <x>0</x>
    <y>0</y>
-    <width>672</width>
+    <width>941</width>
    <height>563</height>
   </rect>
  </property>
@ -22,7 +22,7 @@
     <property name="sizeHint" stdset="0">
      <size>
       <width>10</width>
-       <height>00</height>
+       <height>0</height>
      </size>
     </property>
    </spacer>
@ -50,13 +50,13 @@
   </item>
   <item row="2" column="2">
    <widget class="QCheckBox" name="opt_bools_are_tristate">
-     <property name="text">
-      <string>Yes/No columns have three values (Requires restart)</string>
-     </property>
     <property name="toolTip">
      <string>If checked, Yes/No custom columns values can be Yes, No, or Unknown.
 If not checked, the values can be Yes or No.</string>
     </property>
+     <property name="text">
+      <string>Yes/No columns have three values (Requires restart)</string>
+     </property>
    </widget>
   </item>
   <item row="4" column="0">
@ -304,7 +304,7 @@ If not checked, the values can be Yes or No.</string>
     </layout>
    </widget>
   </item>
-   <item row="30" column="0" colspan="3">
+   <item row="9" column="2">
    <widget class="QPushButton" name="reset_confirmation_button">
     <property name="text">
      <string>Reset all disabled &amp;confirmation dialogs</string>
--- a/src/calibre/gui2/preferences/create_custom_column.py
+++ b/src/calibre/gui2/preferences/create_custom_column.py
@ -158,7 +158,7 @@ class CreateCustomColumn(QDialog, Ui_QCreateCustomColumn):
                {
                    'isbn': '{identifiers:select(isbn)}',
                    'formats': '{formats}',
-                    'last_modified':'''{last_modified:'format_date($, "dd MMM yy")'}'''
+                    'last_modified':'''{last_modified:'format_date($, "dd MMM yyyy")'}'''
                    }[which])
            self.composite_sort_by.setCurrentIndex(2 if which == 'last_modified' else 0)

--- a/src/calibre/gui2/preferences/main.py
+++ b/src/calibre/gui2/preferences/main.py
@ -87,6 +87,8 @@ class Category(QWidget): # {{{
        self.plugins = plugins

        self.bar = QToolBar(self)
+        self.bar.setStyleSheet(
+                'QToolBar { border: none; background: none }')
        self.bar.setIconSize(QSize(48, 48))
        self.bar.setMovable(False)
        self.bar.setFloatable(False)
--- a/src/calibre/gui2/store/mobileread/cache_progress_dialog.py
+++ b/src/calibre/gui2/store/mobileread/cache_progress_dialog.py
@ -6,19 +6,19 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

-from PyQt4.Qt import (QCoreApplication, QDialog, QTimer)
+from PyQt4.Qt import QDialog

 from calibre.gui2.store.mobileread.cache_progress_dialog_ui import Ui_Dialog

 class CacheProgressDialog(QDialog, Ui_Dialog):
-    
+
    def __init__(self, parent=None, total=None):
        QDialog.__init__(self, parent)
        self.setupUi(self)
-        
+
        self.completed = 0
-        self.canceled = False        
-        
+        self.canceled = False
+
        self.progress.setValue(0)
        self.progress.setMinimum(0)
        self.progress.setMaximum(total if total else 0)
@ -32,7 +32,7 @@ class CacheProgressDialog(QDialog, Ui_Dialog):
        self.completed = 0
        self.canceled = False
        QDialog.open(self)
-        
+
    def reject(self):
        self.canceled = True
        QDialog.reject(self)
@ -43,13 +43,13 @@ class CacheProgressDialog(QDialog, Ui_Dialog):
        records that have bee completed.
        '''
        self.set_progress(self.completed + 1)
-    
+
    def set_message(self, msg):
        self.message.setText(msg)
-    
+
    def set_details(self, msg):
        self.details.setText(msg)
-    
+
    def set_progress(self, completed):
        '''
        completed is an int from 0 to total representing the number
@ -57,6 +57,6 @@ class CacheProgressDialog(QDialog, Ui_Dialog):
        '''
        self.completed = completed
        self.progress.setValue(self.completed)
-    
+
    def set_total(self, total):
        self.progress.setMaximum(total)
--- a/src/calibre/gui2/viewer/documentview.py
+++ b/src/calibre/gui2/viewer/documentview.py
@ -534,6 +534,7 @@ class DocumentView(QWebView): # {{{
                _('&Lookup in dictionary'), self)
        self.dictionary_action.setShortcut(Qt.CTRL+Qt.Key_L)
        self.dictionary_action.triggered.connect(self.lookup)
+        self.addAction(self.dictionary_action)
        self.goto_location_action = QAction(_('Go to...'), self)
        self.goto_location_menu = m = QMenu(self)
        self.goto_location_actions = a = {
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@ -556,10 +556,14 @@ class ResultCache(SearchQueryParser): # {{{
        return matchkind, query

    def get_bool_matches(self, location, query, candidates):
-        bools_are_tristate = not self.db_prefs.get('bools_are_tristate')
+        bools_are_tristate = self.db_prefs.get('bools_are_tristate')
        loc = self.field_metadata[location]['rec_index']
        matches = set()
        query = icu_lower(query)
+        if query not in (_('no'), _('unchecked'), '_no', 'false',
+                         _('yes'), _('checked'), '_yes', 'true',
+                         _('empty'), _('blank'), '_empty'):
+            raise ParseException(_('Invalid boolean query "{0}"').format(query))
        for id_ in candidates:
            item = self._data[id_]
            if item is None:
@ -630,8 +634,11 @@ class ResultCache(SearchQueryParser): # {{{
                        terms.add(l)
                if terms:
                    for l in terms:
-                        matches |= self.get_matches(l, query,
-                            candidates=candidates, allow_recursion=allow_recursion)
+                        try:
+                            matches |= self.get_matches(l, query,
+                                candidates=candidates, allow_recursion=allow_recursion)
+                        except:
+                            pass
                    return matches

            if location in self.field_metadata:
@ -1005,9 +1012,9 @@ class SortKeyGenerator(object):
                if sb == 'date':
                    try:
                        val = parse_date(val)
-                        dt = 'datetime'
                    except:
-                        pass
+                        val = UNDEFINED_DATE
+                    dt = 'datetime'
                elif sb == 'number':
                    try:
                        val = float(val)
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@ -149,7 +149,8 @@ class PostInstall:
            if islinux or isfreebsd:
                for f in os.listdir('.'):
                    if os.stat(f).st_uid == 0:
-                        os.rmdir(f) if os.path.isdir(f) else os.unlink(f)
+                        import shutil
+                        shutil.rmtree(f) if os.path.isdir(f) else os.unlink(f)
                if os.stat(config_dir).st_uid == 0:
                    os.rmdir(config_dir)

--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@ -20,12 +20,14 @@ What formats does |app| support conversion to/from?
 |app| supports the conversion of many input formats to many output formats.
 It can convert every input format in the following list, to every output format.

-*Input Formats:* CBZ, CBR, CBC, CHM, EPUB, FB2, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC**, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ
+*Input Formats:* CBZ, CBR, CBC, CHM, EPUB, FB2, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC**, PDB***, PML, RB, RTF, SNB, TCR, TXT, TXTZ

 *Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, HTMLZ, PDB, PML, RB, PDF, SNB, TCR, TXT, TXTZ

 ** PRC is a generic format, |app| supports PRC files with TextRead and MOBIBook headers

+*** PDB is also a generic format. |app| supports eReder, Plucker, PML and zTxt PDB files.
+
 .. _best-source-formats:

 What are the best source formats to convert?