Merge from trunk

2025-11-24 15:25:01 -05:00 · 2011-05-05 20:05:58 +01:00 · 2011-05-05 20:05:58 +01:00 · 022925988a
commit 022925988a
parent ec776e3052 6dc8803ac5
21 changed files with 296 additions and 450 deletions
--- a/recipes/telepolis.recipe
+++ b/recipes/telepolis.recipe
@ -18,7 +18,7 @@ class TelepolisNews(BasicNewsRecipe):
    recursion = 0
    no_stylesheets = True
    encoding = "utf-8"
-    language = 'de_AT'
+    language = 'de'
    use_embedded_content =False
    remove_empty_feeds = True
--- a/recipes/usatoday.recipe
+++ b/recipes/usatoday.recipe
@ -7,13 +7,11 @@ usatoday.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, NavigableString, Tag
 import re
 class USAToday(BasicNewsRecipe):
    title = 'USA Today'
-    __author__ = 'GRiker'
+    __author__ = 'Kovid Goyal'
    oldest_article = 1
    timefmt  = ''
    max_articles_per_feed = 20
@ -31,7 +29,6 @@ class USAToday(BasicNewsRecipe):
                                 margin-bottom: 0em;        \
                                 font-size:     smaller;}\n \
                 .articleBody   {text-align:    left;}\n    '
    conversion_options = { 'linearize_tables' : True }
    #simultaneous_downloads = 1
    feeds =  [
                ('Top Headlines', 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'),
@ -47,63 +44,26 @@ class USAToday(BasicNewsRecipe):
                ('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'),
                ('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories'),
                ]
-    keep_only_tags = [dict(attrs={'class':[
+    keep_only_tags = [dict(attrs={'class':'story'})]
-                                           'byLine',
+    remove_tags = [
-                                           'inside-copy',
+            dict(attrs={'class':[
-                                           'inside-head',
+                                'share',
-                                           'inside-head2',
+                                'reprints',
-                                           'item',
+                                'inline-h3',
-                                           'item-block',
+                                'info-extras',
-                                           'photo-container',
+                                'ppy-outer',
-                                           ]}),
+                                'ppy-caption',
-                      dict(id=[
+                                'comments',
-                               'applyMainStoryPhoto',
+                                'jump',
-                               'permalink',
+                                'pagetools',
-                               ])]
+                                'post-attributes',
                                'tags',
                                'bottom-tools',
                                'sponsoredlinks',
                                ]}),
            dict(id=['pluck']),
                  ]
    remove_tags = [dict(attrs={'class':[
                                        'comments',
                                        'jump',
                                        'pagetools',
                                        'post-attributes',
                                        'tags',
                                        ]}),
                   dict(id=[])]
    #feeds =  [('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles')]
    def dump_hex(self, src, length=16):
        ''' Diagnostic '''
        FILTER=''.join([(len(repr(chr(x)))==3) and chr(x) or '.' for x in range(256)])
        N=0; result=''
        while src:
           s,src = src[:length],src[length:]
           hexa = ' '.join(["%02X"%ord(x) for x in s])
           s = s.translate(FILTER)
           result += "%04X   %-*s   %s\n" % (N, length*3, hexa, s)
           N+=length
        print result
    def fixChars(self,string):
        # Replace lsquo (\x91)
        fixed = re.sub("\x91","&#8216;",string)
        # Replace rsquo (\x92)
        fixed = re.sub("\x92","&#8217;",fixed)
        # Replace ldquo (\x93)
        fixed = re.sub("\x93","&#8220;",fixed)
        # Replace rdquo (\x94)
        fixed = re.sub("\x94","&#8221;",fixed)
        # Replace ndash (\x96)
        fixed = re.sub("\x96","&#8211;",fixed)
        # Replace mdash (\x97)
        fixed = re.sub("\x97","&#8212;",fixed)
        return fixed
    def get_masthead_url(self):
        masthead = 'http://i.usatoday.net/mobile/_common/_images/565x73_usat_mobile.gif'
@ -115,321 +75,4 @@ class USAToday(BasicNewsRecipe):
            masthead = None
        return masthead
    def massageNCXText(self, description):
        # Kindle TOC descriptions won't render certain characters
        if description:
            massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
            # Replace '&' with '&#38;'
            massaged = re.sub("&","&#38;", massaged)
            return self.fixChars(massaged)
        else:
            return description
    def parse_feeds(self, *args, **kwargs):
        parsed_feeds = BasicNewsRecipe.parse_feeds(self, *args, **kwargs)
        # Count articles for progress dialog
        article_count = 0
        for feed in parsed_feeds:
            article_count += len(feed)
        self.log( "Queued %d articles" % article_count)
        return parsed_feeds
    def preprocess_html(self, soup):
        soup = self.strip_anchors(soup)
        return soup
    def postprocess_html(self, soup, first_fetch):
        # Remove navLinks <div class="inside-copy" style="padding-bottom:3px">
        navLinks = soup.find(True,{'style':'padding-bottom:3px'})
        if navLinks:
            navLinks.extract()
        # Remove <div class="inside-copy" style="margin-bottom:10px">
        gibberish = soup.find(True,{'style':'margin-bottom:10px'})
        if gibberish:
            gibberish.extract()
        # Change <inside-head> to <h2>
        headline = soup.find(True, {'class':['inside-head','inside-head2']})
        if not headline:
            headline = soup.find('h3')
        if headline:
            tag = Tag(soup, "h2")
            tag['class'] = "headline"
            tag.insert(0, headline.contents[0])
            headline.replaceWith(tag)
        else:
            print "unable to find headline:\n%s\n" % soup
        # Change byLine to byline, change commas to middot
        # Kindle renders commas in byline as '&'
        byline = soup.find(True, {'class':'byLine'})
        if byline:
            byline['class'] = 'byline'
            # Replace comma with middot
            byline.contents[0].replaceWith(re.sub(","," &middot;", byline.renderContents()))
        jumpout_punc_list = [':','?']
        # Remove the inline jumpouts in <div class="inside-copy">
        paras = soup.findAll(True, {'class':'inside-copy'})
        for para in paras:
            if re.match("<b>[\w\W]+ ",para.renderContents()):
                p = para.find('b')
                for punc in jumpout_punc_list:
                    punc_offset = p.contents[0].find(punc)
                    if punc_offset == -1:
                        continue
                    if punc_offset > 1:
                        if p.contents[0][:punc_offset] == p.contents[0][:punc_offset].upper():
                            #print "extracting \n%s\n" % para.prettify()
                            para.extract()
        # Reset class for remaining
        paras = soup.findAll(True, {'class':'inside-copy'})
        for para in paras:
            para['class'] = 'articleBody'
        # Remove inline jumpouts in <p>
        paras = soup.findAll(['p'])
        for p in paras:
            if hasattr(p,'contents') and len(p.contents):
                for punc in jumpout_punc_list:
                    punc_offset = p.contents[0].find(punc)
                    if punc_offset == -1:
                        continue
                    if punc_offset > 2 and hasattr(p,'a') and len(p.contents):
                        #print "evaluating %s\n" % p.contents[0][:punc_offset+1]
                        if p.contents[0][:punc_offset] == p.contents[0][:punc_offset].upper():
                            #print "extracting \n%s\n" % p.prettify()
                            p.extract()
        # Capture the first img, insert after headline
        imgs = soup.findAll('img')
        print "postprocess_html(): %d images" % len(imgs)
        if imgs:
            divTag = Tag(soup, 'div')
            divTag['class'] = 'image'
            body = soup.find('body')
            img = imgs[0]
            #print "img: \n%s\n" % img.prettify()
            # Table for photo and credit
            tableTag = Tag(soup,'table')
            # Photo
            trimgTag = Tag(soup, 'tr')
            tdimgTag = Tag(soup, 'td')
            tdimgTag.insert(0,img)
            trimgTag.insert(0,tdimgTag)
            tableTag.insert(0,trimgTag)
            # Credit
            trcreditTag = Tag(soup, 'tr')
            tdcreditTag = Tag(soup, 'td')
            tdcreditTag['class'] = 'credit'
            credit = soup.find('td',{'class':'photoCredit'})
            if credit:
                tdcreditTag.insert(0,NavigableString(credit.renderContents()))
            else:
                credit = img['credit']
                if credit:
                    tdcreditTag.insert(0,NavigableString(credit))
                else:
                    tdcreditTag.insert(0,NavigableString(''))
            trcreditTag.insert(0,tdcreditTag)
            tableTag.insert(1,trcreditTag)
            dtc = 0
            divTag.insert(dtc,tableTag)
            dtc += 1
            if False:
                # Add the caption in the table
                tableCaptionTag = Tag(soup,'caption')
                tableCaptionTag.insert(0,soup.find('td',{'class':'photoCredit'}).renderContents())
                tableTag.insert(1,tableCaptionTag)
                divTag.insert(dtc,tableTag)
                dtc += 1
                body.insert(1,divTag)
            else:
                # Add the caption below the table
                #print "Looking for caption in this soup:\n%s" % img.prettify()
                captionTag = Tag(soup,'p')
                captionTag['class'] = 'caption'
                if hasattr(img,'alt') and img['alt']:
                    captionTag.insert(0,NavigableString('<blockquote>%s</blockquote>' % img['alt']))
                    divTag.insert(dtc, captionTag)
                    dtc += 1
                else:
                    try:
                        captionTag.insert(0,NavigableString('<blockquote>%s</blockquote>' % img['cutline']))
                        divTag.insert(dtc, captionTag)
                        dtc += 1
                    except:
                        pass
            hrTag = Tag(soup, 'hr')
            divTag.insert(dtc, hrTag)
            dtc += 1
            # Delete <div id="applyMainStoryPhoto"
            photoJunk = soup.find('div',{'id':'applyMainStoryPhoto'})
            if photoJunk:
                photoJunk.extract()
            # Insert img after headline
            tag = body.find(True)
            insertLoc = 0
            headline_found = False
            while True:
                # Scan the top-level tags
                insertLoc += 1
                if hasattr(tag,'class') and tag['class'] == 'headline':
                    headline_found = True
                    body.insert(insertLoc,divTag)
                    break
                tag = tag.nextSibling
                if not tag:
                    break
            if not headline_found:
                # Monolithic <div> - restructure
                tag = body.find(True)
                while True:
                    insertLoc += 1
                    try:
                        if hasattr(tag,'class') and tag['class'] == 'headline':
                            headline_found = True
                            tag.insert(insertLoc,divTag)
                            break
                    except:
                        pass
                    tag = tag.next
                    if not tag:
                        break
                # Yank out headline, img and caption
                headline = body.find('h2','headline')
                img = body.find('div','image')
                caption = body.find('p''class')
                # body(0) is calibre_navbar
                # body(1) is <div class="item">
                btc = 1
                headline.extract()
                body.insert(1, headline)
                btc += 1
                if img:
                    img.extract()
                    body.insert(btc, img)
                    btc += 1
                if caption:
                    caption.extract()
                    body.insert(btc, caption)
                    btc += 1
            if len(imgs) > 1:
                if True:
                    [img.extract() for img in imgs[1:]]
                else:
                    # Format the remaining images
                    # This doesn't work yet
                    for img in imgs[1:]:
                        print "img:\n%s\n" % img.prettify()
                        divTag = Tag(soup, 'div')
                        divTag['class'] = 'image'
                        # Table for photo and credit
                        tableTag = Tag(soup,'table')
                        # Photo
                        trimgTag = Tag(soup, 'tr')
                        tdimgTag = Tag(soup, 'td')
                        tdimgTag.insert(0,img)
                        trimgTag.insert(0,tdimgTag)
                        tableTag.insert(0,trimgTag)
                        # Credit
                        trcreditTag = Tag(soup, 'tr')
                        tdcreditTag = Tag(soup, 'td')
                        tdcreditTag['class'] = 'credit'
                        try:
                            tdcreditTag.insert(0,NavigableString(img['credit']))
                        except:
                            tdcreditTag.insert(0,NavigableString(''))
                        trcreditTag.insert(0,tdcreditTag)
                        tableTag.insert(1,trcreditTag)
                        divTag.insert(0,tableTag)
                        soup.img.replaceWith(divTag)
        return soup
    def postprocess_book(self, oeb, opts, log) :
        def extract_byline(href) :
            # <meta name="byline" content=
            soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
            byline = soup.find('div',attrs={'class':'byline'})
            if byline:
                byline['class'] = 'byline'
                # Replace comma with middot
                byline.contents[0].replaceWith(re.sub(u",", u" &middot;",
                    byline.renderContents(encoding=None)))
                return byline.renderContents(encoding=None)
            else :
                paras = soup.findAll(text=True)
                for para in paras:
                    if para.startswith("Copyright"):
                        return para[len('Copyright xxxx '):para.find('.')]
                return None
        def extract_description(href) :
            soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
            description = soup.find('meta',attrs={'name':'description'})
            if description :
                return self.massageNCXText(description['content'])
            else:
                # Take first paragraph of article
                articleBody = soup.find('div',attrs={'id':['articleBody','item']})
                if articleBody:
                    paras = articleBody.findAll('p')
                    for p in paras:
                        if p.renderContents() > '' :
                            return self.massageNCXText(self.tag_to_string(p,use_alt=False))
                else:
                    print "Didn't find <div id='articleBody'> in this soup:\n%s" % soup.prettify()
                    return None
        # Method entry point here
        # Single section toc looks different than multi-section tocs
        if oeb.toc.depth() == 2 :
            for article in oeb.toc :
                if article.author is None :
                    article.author = extract_byline(article.href)
                if article.description is None :
                    article.description = extract_description(article.href)
        elif oeb.toc.depth() == 3 :
            for section in oeb.toc :
                for article in section :
                    article.author = extract_byline(article.href)
                    '''
                    if article.author is None :
                        article.author = self.massageNCXText(extract_byline(article.href))
                    else:
                        article.author = self.massageNCXText(article.author)
                    '''
                    if article.description is None :
                        article.description = extract_description(article.href)
    def strip_anchors(self,soup):
        paras = soup.findAll(True)
        for para in paras:
            aTags = para.findAll('a')
            for a in aTags:
                if a.img is None:
                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
        return soup
--- a/src/calibre/devices/kindle/apnx.py
+++ b/src/calibre/devices/kindle/apnx.py
@ -164,7 +164,7 @@ class APNXBuilder(object):
                if c == '/':
                    closing = True
                    continue
-                elif c in ('d', 'p'):
+                elif c == 'p':
                    if closing:
                        in_p = False
                    else:
--- a/src/calibre/ebooks/htmlz/input.py
+++ b/src/calibre/ebooks/htmlz/input.py
@ -7,10 +7,12 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import os
 import posixpath
-from calibre import walk
+from calibre import guess_type, walk
 from calibre.customize.conversion import InputFormatPlugin
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.metadata.opf2 import OPF
 from calibre.utils.zipfile import ZipFile
 class HTMLZInput(InputFormatPlugin):
@ -27,7 +29,7 @@ class HTMLZInput(InputFormatPlugin):
        # Extract content from zip archive.
        zf = ZipFile(stream)
-        zf.extractall('.')
+        zf.extractall()
        for x in walk('.'):
            if os.path.splitext(x)[1].lower() in ('.html', '.xhtml', '.htm'):
@ -70,5 +72,24 @@ class HTMLZInput(InputFormatPlugin):
        from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
        mi = get_file_type_metadata(stream, file_ext)
        meta_info_to_oeb_metadata(mi, oeb.metadata, log)
        # Get the cover path from the OPF.
        cover_href = None
        opf = None
        for x in walk('.'):
            if os.path.splitext(x)[1].lower() in ('.opf'):
                opf = x
                break
        if opf:
            opf = OPF(opf)
            cover_href = posixpath.relpath(opf.cover, os.path.dirname(stream.name))
        # Set the cover.
        if cover_href:
            cdata = None
            with open(cover_href, 'rb') as cf:
                cdata = cf.read()
            id, href = oeb.manifest.generate('cover', cover_href)
            oeb.manifest.add(id, href, guess_type(cover_href)[0], data=cdata)
            oeb.guide.add('cover', 'Cover', href)
        return oeb
--- a/src/calibre/ebooks/htmlz/output.py
+++ b/src/calibre/ebooks/htmlz/output.py
@ -7,11 +7,13 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import os
 from cStringIO import StringIO
 from lxml import etree
 from calibre.customize.conversion import OutputFormatPlugin, \
    OptionRecommendation
 from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
 from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.zipfile import ZipFile
@ -79,10 +81,31 @@ class HTMLZOutput(OutputFormatPlugin):
                        fname = os.path.join(tdir, 'images', images[item.href])
                        with open(fname, 'wb') as img:
                            img.write(data)
            # Cover
            cover_path = None
            try:
                cover_data = None
                if oeb_book.metadata.cover:
                    term = oeb_book.metadata.cover[0].term
                    cover_data = oeb_book.guide[term].item.data
                if cover_data:
                    from calibre.utils.magick.draw import save_cover_data_to
                    cover_path = os.path.join(tdir, 'cover.jpg')
                    with open(cover_path, 'w') as cf:
                        cf.write('')
                    save_cover_data_to(cover_data, cover_path)
            except:
                import traceback
                traceback.print_exc()
            # Metadata
            with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
-                mdataf.write(etree.tostring(oeb_book.metadata.to_opf1()))
+                opf = OPF(StringIO(etree.tostring(oeb_book.metadata.to_opf1())))
                mi = opf.to_book_metadata()
                if cover_path:
                    mi.cover = 'cover.jpg'
                mdataf.write(metadata_to_opf(mi))
            htmlz = ZipFile(output_path, 'w')
            htmlz.add_dir(tdir)
--- a/src/calibre/ebooks/metadata/extz.py
+++ b/src/calibre/ebooks/metadata/extz.py
@ -13,7 +13,7 @@ import posixpath
 from cStringIO import StringIO
 from calibre.ebooks.metadata import MetaInformation
-from calibre.ebooks.metadata.opf2 import OPF
+from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.utils.zipfile import ZipFile, safe_replace
@ -31,9 +31,9 @@ def get_metadata(stream, extract_cover=True):
            opf = OPF(opf_stream)
            mi = opf.to_book_metadata()
            if extract_cover:
-                cover_name = opf.raster_cover
+                cover_href = posixpath.relpath(opf.cover, os.path.dirname(stream.name))
-                if cover_name:
+                if cover_href:
-                    mi.cover_data = ('jpg', zf.read(cover_name))
+                    mi.cover_data = ('jpg', zf.read(cover_href))
    except:
        return mi
    return mi
@ -59,17 +59,20 @@ def set_metadata(stream, mi):
        except:
            pass
    if new_cdata:
-        raster_cover = opf.raster_cover
+        cover = opf.cover
-        if not raster_cover:
+        if not cover:
-            raster_cover = 'cover.jpg'
+            cover = 'cover.jpg'
-        cpath = posixpath.join(posixpath.dirname(opf_path), raster_cover)
+        cpath = posixpath.join(posixpath.dirname(opf_path), cover)
        new_cover = _write_new_cover(new_cdata, cpath)
        replacements[cpath] = open(new_cover.name, 'rb')
        mi.cover = cover
    # Update the metadata.
-    opf.smart_update(mi, replace_metadata=True)
+    old_mi = opf.to_book_metadata()
    old_mi.smart_update(mi)
    opf.smart_update(metadata_to_opf(old_mi), replace_metadata=True)
    newopf = StringIO(opf.render())
-    safe_replace(stream, opf_path, newopf, extra_replacements=replacements)
+    safe_replace(stream, opf_path, newopf, extra_replacements=replacements, add_missing=True)
    # Cleanup temporary files.
    try:
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@ -966,7 +966,9 @@ class OPF(object): # {{{
            cover_id = covers[0].get('content')
            for item in self.itermanifest():
                if item.get('id', None) == cover_id:
-                    return item.get('href', None)
+                    mt = item.get('media-type', '')
                    if 'xml' not in mt:
                        return item.get('href', None)
    @dynamic_property
    def cover(self):
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@ -13,6 +13,7 @@ from Queue import Queue, Empty
 from threading import Thread
 from io import BytesIO
 from operator import attrgetter
 from urlparse import urlparse
 from calibre.customize.ui import metadata_plugins, all_metadata_plugins
 from calibre.ebooks.metadata.sources.base import create_log, msprefs
@ -458,6 +459,14 @@ def urls_from_identifiers(identifiers): # {{{
    if oclc:
        ans.append(('OCLC', 'oclc', oclc,
            'http://www.worldcat.org/oclc/'+oclc))
    url = identifiers.get('uri', None)
    if url is None:
        url = identifiers.get('url', None)
    if url and url.startswith('http'):
        url = url[:8].replace('|', ':') + url[8:].replace('|', ',')
        parts = urlparse(url)
        name = parts.netloc
        ans.append((name, 'url', url, url))
    return ans
 # }}}
--- a/src/calibre/ebooks/odt/input.py
+++ b/src/calibre/ebooks/odt/input.py
@ -7,6 +7,8 @@ __docformat__ = 'restructuredtext en'
 Convert an ODT file into a Open Ebook
 '''
 import os
 from lxml import etree
 from odf.odf2xhtml import ODF2XHTML
 from calibre import CurrentDir, walk
@ -23,7 +25,51 @@ class Extract(ODF2XHTML):
                with open(name, 'wb') as f:
                    f.write(data)
-    def __call__(self, stream, odir):
+    def filter_css(self, html, log):
        root = etree.fromstring(html)
        style = root.xpath('//*[local-name() = "style" and @type="text/css"]')
        if style:
            style = style[0]
            css = style.text
            if css:
                style.text, sel_map = self.do_filter_css(css)
                for x in root.xpath('//*[@class]'):
                    extra = []
                    orig = x.get('class')
                    for cls in orig.split():
                        extra.extend(sel_map.get(cls, []))
                    if extra:
                        x.set('class', orig + ' ' + ' '.join(extra))
                html = etree.tostring(root, encoding='utf-8',
                        xml_declaration=True)
        return html
    def do_filter_css(self, css):
        from cssutils import parseString
        from cssutils.css import CSSRule
        sheet = parseString(css)
        rules = list(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
        sel_map = {}
        count = 0
        for r in rules:
            # Check if we have only class selectors for this rule
            nc = [x for x in r.selectorList if not
                    x.selectorText.startswith('.')]
            if len(r.selectorList) > 1 and not nc:
                # Replace all the class selectors with a single class selector
                # This will be added to the class attribute of all elements
                # that have one of these selectors.
                replace_name = 'c_odt%d'%count
                count += 1
                for sel in r.selectorList:
                    s = sel.selectorText[1:]
                    if s not in sel_map:
                        sel_map[s] = []
                    sel_map[s].append(replace_name)
                r.selectorText = '.'+replace_name
        return sheet.cssText, sel_map
    def __call__(self, stream, odir, log):
        from calibre.utils.zipfile import ZipFile
        from calibre.ebooks.metadata.meta import get_metadata
        from calibre.ebooks.metadata.opf2 import OPFCreator
@ -32,13 +78,17 @@ class Extract(ODF2XHTML):
        if not os.path.exists(odir):
            os.makedirs(odir)
        with CurrentDir(odir):
-            print 'Extracting ODT file...'
+            log('Extracting ODT file...')
            html = self.odf2xhtml(stream)
            # A blanket img specification like this causes problems
-            # with EPUB output as the contaiing element often has
+            # with EPUB output as the containing element often has
            # an absolute height and width set that is larger than
            # the available screen real estate
            html = html.replace('img { width: 100%; height: 100%; }', '')
            try:
                html = self.filter_css(html, log)
            except:
                log.exception('Failed to filter CSS, conversion may be slow')
            with open('index.xhtml', 'wb') as f:
                f.write(html.encode('utf-8'))
            zf = ZipFile(stream, 'r')
@ -67,7 +117,7 @@ class ODTInput(InputFormatPlugin):
    def convert(self, stream, options, file_ext, log,
                accelerators):
-        return Extract()(stream, '.')
+        return Extract()(stream, '.', log)
    def postprocess_book(self, oeb, opts, log):
        # Fix <p><div> constructs as the asinine epubchecker complains
--- a/src/calibre/ebooks/oeb/transforms/metadata.py
+++ b/src/calibre/ebooks/oeb/transforms/metadata.py
@ -36,7 +36,7 @@ def meta_info_to_oeb_metadata(mi, m, log, override_input_metadata=False):
        m.clear('description')
        m.add('description', mi.comments)
    elif override_input_metadata:
-         m.clear('description')
+        m.clear('description')
    if not mi.is_null('publisher'):
        m.clear('publisher')
        m.add('publisher', mi.publisher)
--- a/src/calibre/ebooks/rtf/rtfml.py
+++ b/src/calibre/ebooks/rtf/rtfml.py
@ -15,7 +15,6 @@ import cStringIO
 from lxml import etree
 from calibre.ebooks.metadata import authors_to_string
 from calibre.utils.filenames import ascii_text
 from calibre.utils.magick.draw import save_cover_data_to, identify_data
 TAGS = {
@ -79,8 +78,7 @@ def txt2rtf(text):
        elif val <= 127:
            buf.write(x)
        else:
-            repl = ascii_text(x)
+            c = r'\u{0:d}?'.format(val)
            c = r'\uc{2}\u{0:d}{1}'.format(val, repl, len(repl))
            buf.write(c)
    return buf.getvalue()
--- a/src/calibre/gui2/actions/choose_library.py
+++ b/src/calibre/gui2/actions/choose_library.py
@ -246,7 +246,7 @@ class ChooseLibraryAction(InterfaceAction):
    def delete_requested(self, name, location):
        loc = location.replace('/', os.sep)
        if not question_dialog(self.gui, _('Are you sure?'), '<p>'+
-                _('All files from %s will be '
+                _('<b style="color: red">All files</b> from <br><br><b>%s</b><br><br> will be '
                '<b>permanently deleted</b>. Are you sure?') % loc,
                show_copy_button=False):
            return
--- a/src/calibre/gui2/dialogs/tweak_epub.py
+++ b/src/calibre/gui2/dialogs/tweak_epub.py
@ -7,16 +7,16 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import os, shutil
 from contextlib import closing
 from zipfile import ZipFile, ZIP_DEFLATED, ZIP_STORED
 from PyQt4.Qt import QDialog
 from calibre.constants import isosx
-from calibre.gui2 import open_local_file
+from calibre.gui2 import open_local_file, error_dialog
 from calibre.gui2.dialogs.tweak_epub_ui import Ui_Dialog
 from calibre.libunzip import extract as zipextract
-from calibre.ptempfile import PersistentTemporaryDirectory
+from calibre.ptempfile import (PersistentTemporaryDirectory,
        PersistentTemporaryFile)
 class TweakEpub(QDialog, Ui_Dialog):
    '''
@ -37,11 +37,15 @@ class TweakEpub(QDialog, Ui_Dialog):
        self.cancel_button.clicked.connect(self.reject)
        self.explode_button.clicked.connect(self.explode)
        self.rebuild_button.clicked.connect(self.rebuild)
        self.preview_button.clicked.connect(self.preview)
        # Position update dialog overlaying top left of app window
        parent_loc = parent.pos()
        self.move(parent_loc.x(),parent_loc.y())
        self.gui = parent
        self._preview_files = []
    def cleanup(self):
        if isosx:
            try:
@ -55,6 +59,11 @@ class TweakEpub(QDialog, Ui_Dialog):
        # Delete directory containing exploded ePub
        if self._exploded is not None:
            shutil.rmtree(self._exploded, ignore_errors=True)
        for x in self._preview_files:
            try:
                os.remove(x)
            except:
                pass
    def display_exploded(self):
        '''
@ -71,9 +80,8 @@ class TweakEpub(QDialog, Ui_Dialog):
            self.rebuild_button.setEnabled(True)
            self.explode_button.setEnabled(False)
-    def rebuild(self, *args):
+    def do_rebuild(self, src):
-        self._output = os.path.join(self._exploded, 'rebuilt.epub')
+        with ZipFile(src, 'w', compression=ZIP_DEFLATED) as zf:
        with closing(ZipFile(self._output, 'w', compression=ZIP_DEFLATED)) as zf:
            # Write mimetype
            zf.write(os.path.join(self._exploded,'mimetype'), 'mimetype', compress_type=ZIP_STORED)
            # Write everything else
@ -86,5 +94,23 @@ class TweakEpub(QDialog, Ui_Dialog):
                    zfn = os.path.relpath(absfn,
                            self._exploded).replace(os.sep, '/')
                    zf.write(absfn, zfn)
    def preview(self):
        if not self._exploded:
            return error_dialog(self, _('Cannot preview'),
                    _('You must first explode the epub before previewing.'),
                    show=True)
        tf = PersistentTemporaryFile('.epub')
        tf.close()
        self._preview_files.append(tf.name)
        self.do_rebuild(tf.name)
        self.gui.iactions['View']._view_file(tf.name)
    def rebuild(self, *args):
        self._output = os.path.join(self._exploded, 'rebuilt.epub')
        self.do_rebuild(self._output)
        return QDialog.accept(self)
--- a/src/calibre/gui2/dialogs/tweak_epub.ui
+++ b/src/calibre/gui2/dialogs/tweak_epub.ui
@ -23,6 +23,16 @@
   <bool>false</bool>
  </property>
  <layout class="QGridLayout" name="gridLayout">
   <item row="0" column="0" colspan="2">
    <widget class="QLabel" name="label">
     <property name="text">
      <string>&lt;p&gt;Explode the ePub to display contents in a file browser window. To tweak individual files, right-click, then 'Open with...' your editor of choice. When tweaks are complete, close the file browser window &lt;b&gt;and the editor windows you used to edit files in the epub&lt;/b&gt;.&lt;/p&gt;&lt;p&gt;Rebuild the ePub, updating your calibre library.&lt;/p&gt;</string>
     </property>
     <property name="wordWrap">
      <bool>true</bool>
     </property>
    </widget>
   </item>
   <item row="1" column="0">
    <widget class="QPushButton" name="explode_button">
     <property name="statusTip">
@ -37,23 +47,6 @@
     </property>
    </widget>
   </item>
   <item row="2" column="0">
    <widget class="QPushButton" name="rebuild_button">
     <property name="enabled">
      <bool>false</bool>
     </property>
     <property name="statusTip">
      <string>Rebuild ePub from exploded contents</string>
     </property>
     <property name="text">
      <string>&amp;Rebuild ePub</string>
     </property>
     <property name="icon">
      <iconset resource="../../../../resources/images.qrc">
       <normaloff>:/images/exec.png</normaloff>:/images/exec.png</iconset>
     </property>
    </widget>
   </item>
   <item row="3" column="0">
    <widget class="QPushButton" name="cancel_button">
     <property name="statusTip">
@ -68,13 +61,31 @@
     </property>
    </widget>
   </item>
-   <item row="0" column="0">
+   <item row="3" column="1">
-    <widget class="QLabel" name="label">
+    <widget class="QPushButton" name="rebuild_button">
-     <property name="text">
+     <property name="enabled">
-      <string>&lt;p&gt;Explode the ePub to display contents in a file browser window. To tweak individual files, right-click, then 'Open with...' your editor of choice. When tweaks are complete, close the file browser window &lt;b&gt;and the editor windows you used to edit files in the epub&lt;/b&gt;.&lt;/p&gt;&lt;p&gt;Rebuild the ePub, updating your calibre library.&lt;/p&gt;</string>
+      <bool>false</bool>
     </property>
-     <property name="wordWrap">
+     <property name="statusTip">
-      <bool>true</bool>
+      <string>Rebuild ePub from exploded contents</string>
     </property>
     <property name="text">
      <string>&amp;Rebuild ePub</string>
     </property>
     <property name="icon">
      <iconset resource="../../../../resources/images.qrc">
       <normaloff>:/images/exec.png</normaloff>:/images/exec.png</iconset>
     </property>
    </widget>
   </item>
   <item row="1" column="1">
    <widget class="QPushButton" name="preview_button">
     <property name="text">
      <string>&amp;Preview ePub</string>
     </property>
     <property name="icon">
      <iconset resource="../../../../resources/images.qrc">
       <normaloff>:/images/view.png</normaloff>:/images/view.png</iconset>
     </property>
    </widget>
   </item>
--- a/src/calibre/gui2/layout.py
+++ b/src/calibre/gui2/layout.py
@ -44,18 +44,19 @@ class LocationManager(QObject): # {{{
            receiver = partial(self._location_selected, name)
            ac.triggered.connect(receiver)
            self.tooltips[name] = tooltip
            m = QMenu(parent)
            self._mem.append(m)
            a = m.addAction(icon, tooltip)
            a.triggered.connect(receiver)
            if name != 'library':
                m = QMenu(parent)
                self._mem.append(m)
                a = m.addAction(icon, tooltip)
                a.triggered.connect(receiver)
                self._mem.append(a)
                a = m.addAction(QIcon(I('eject.png')), _('Eject this device'))
                a.triggered.connect(self._eject_requested)
                ac.setMenu(m)
                self._mem.append(a)
            else:
                ac.setToolTip(tooltip)
            ac.setMenu(m)
            ac.calibre_name = name
            return ac
@ -71,7 +72,12 @@ class LocationManager(QObject): # {{{
    def set_switch_actions(self, quick_actions, rename_actions, delete_actions,
            switch_actions, choose_action):
-        self.switch_menu = QMenu()
+        self.switch_menu = self.library_action.menu()
        if self.switch_menu:
            self.switch_menu.addSeparator()
        else:
            self.switch_menu = QMenu()
        self.switch_menu.addAction(choose_action)
        self.cs_menus = []
        for t, acs in [(_('Quick switch'), quick_actions),
@ -85,7 +91,9 @@ class LocationManager(QObject): # {{{
        self.switch_menu.addSeparator()
        for ac in switch_actions:
            self.switch_menu.addAction(ac)
-        self.library_action.setMenu(self.switch_menu)
+        
        if self.switch_menu != self.library_action.menu():
            self.library_action.setMenu(self.switch_menu)
    def _location_selected(self, location, *args):
        if location != self.current_location and hasattr(self,
--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@ -439,10 +439,16 @@ class BooksView(QTableView): # {{{
        if tweaks['sort_columns_at_startup'] is not None:
            sh = []
-            for c,d in tweaks['sort_columns_at_startup']:
+            try:
-                if not isinstance(d, bool):
+                for c,d in tweaks['sort_columns_at_startup']:
-                    d = True if d == 0 else False
+                    if not isinstance(d, bool):
-                sh.append((c, d))
+                        d = True if d == 0 else False
                    sh.append((c, d))
            except:
                # Ignore invalid tweak values as users seem to often get them
                # wrong
                import traceback
                traceback.print_exc()
            old_state['sort_history'] = sh
        self.apply_state(old_state)
--- a/src/calibre/gui2/preferences/metadata_sources.py
+++ b/src/calibre/gui2/preferences/metadata_sources.py
@ -190,7 +190,15 @@ class FieldsModel(QAbstractListModel): # {{{
        return ans | Qt.ItemIsUserCheckable
    def restore_defaults(self):
-        self.overrides = dict([(f, self.state(f, True)) for f in self.fields])
+        self.overrides = dict([(f, self.state(f, Qt.Checked)) for f in self.fields])
        self.reset()
    def select_all(self):
        self.overrides = dict([(f, Qt.Checked) for f in self.fields])
        self.reset()
    def clear_all(self):
        self.overrides = dict([(f, Qt.Unchecked) for f in self.fields])
        self.reset()
    def setData(self, index, val, role):
@ -273,6 +281,9 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        self.fields_view.setModel(self.fields_model)
        self.fields_model.dataChanged.connect(self.changed_signal)
        self.select_all_button.clicked.connect(self.fields_model.select_all)
        self.clear_all_button.clicked.connect(self.fields_model.clear_all)
    def configure_plugin(self):
        for index in self.sources_view.selectionModel().selectedRows():
            plugin = self.sources_model.data(index, Qt.UserRole)
--- a/src/calibre/gui2/preferences/metadata_sources.ui
+++ b/src/calibre/gui2/preferences/metadata_sources.ui
@ -77,8 +77,8 @@
         <property name="title">
          <string>Downloaded metadata fields</string>
         </property>
-         <layout class="QVBoxLayout" name="verticalLayout_2">
+         <layout class="QGridLayout" name="gridLayout_2">
-          <item>
+          <item row="0" column="0" colspan="2">
           <widget class="QListView" name="fields_view">
            <property name="toolTip">
             <string>If you uncheck any fields, metadata for those fields will not be downloaded</string>
@ -88,6 +88,20 @@
            </property>
           </widget>
          </item>
          <item row="1" column="0">
           <widget class="QPushButton" name="select_all_button">
            <property name="text">
             <string>&amp;Select all</string>
            </property>
           </widget>
          </item>
          <item row="1" column="1">
           <widget class="QPushButton" name="clear_all_button">
            <property name="text">
             <string>&amp;Clear all</string>
            </property>
           </widget>
          </item>
         </layout>
        </widget>
       </item>
--- a/src/calibre/gui2/store/search/search.py
+++ b/src/calibre/gui2/store/search/search.py
@ -155,6 +155,7 @@ class SearchDialog(QDialog, Ui_Dialog):
        self.config['results_view_column_width'] = [self.results_view.columnWidth(i) for i in range(self.results_view.model().columnCount())]
        self.config['sort_col'] = self.results_view.model().sort_col
        self.config['sort_order'] = self.results_view.model().sort_order
        self.config['open_external'] = self.open_external.isChecked()
        store_check = {}
        for n in self.store_plugins:
@ -179,6 +180,8 @@ class SearchDialog(QDialog, Ui_Dialog):
        else:
            self.resize_columns()
        self.open_external.setChecked(self.config.get('open_external', False))
        store_check = self.config.get('store_checked', None)
        if store_check:
            for n in store_check:
@ -212,7 +215,7 @@ class SearchDialog(QDialog, Ui_Dialog):
    def open_store(self, index):
        result = self.results_view.model().get_result(index)
-        self.store_plugins[result.store_name].open(self, result.detail_item)
+        self.store_plugins[result.store_name].open(self, result.detail_item, self.open_external.isChecked())
    def check_progress(self):
        if not self.search_pool.threads_running() and not self.results_view.model().cover_pool.threads_running() and not self.results_view.model().details_pool.threads_running(): 
--- a/src/calibre/gui2/store/search/search.ui
+++ b/src/calibre/gui2/store/search/search.ui
@ -70,7 +70,7 @@
            <x>0</x>
            <y>0</y>
            <width>215</width>
-            <height>116</height>
+            <height>93</height>
           </rect>
          </property>
         </widget>
@ -101,6 +101,16 @@
         </item>
        </layout>
       </item>
       <item>
        <widget class="QCheckBox" name="open_external">
         <property name="toolTip">
          <string>Open a selected book in the system's web browser</string>
         </property>
         <property name="text">
          <string>Open in &amp;external browser</string>
         </property>
        </widget>
       </item>
      </layout>
     </widget>
     <widget class="QSplitter" name="splitter_2">
--- a/src/odf/odf2xhtml.py
+++ b/src/odf/odf2xhtml.py
@ -841,11 +841,19 @@ ol, ul { padding-left: 2em; }
            self.styledict[name] = styles
        # Write the styles to HTML
        self.writeout(self.default_styles)
        # Changed by Kovid to not write out endless copies of the same style
        css_styles = {}
        for name in self.stylestack:
            styles = self.styledict.get(name)
-            css2 = self.cs.convert_styles(styles)
+            css2 = tuple(self.cs.convert_styles(styles).iteritems())
-            self.writeout("%s {\n" % name)
+            if css2 in css_styles:
-            for style, val in css2.items():
+                css_styles[css2].append(name)
            else:
                css_styles[css2] = [name]
        for css2, names in css_styles.iteritems():
            self.writeout("%s {\n" % ', '.join(names))
            for style, val in css2:
                self.writeout("\t%s: %s;\n" % (style, val) )
            self.writeout("}\n")