Merge from trunk

2025-07-09 03:04:10 -04:00 · 2010-12-06 21:28:25 +00:00 · 2010-12-06 21:28:25 +00:00 · 0dac0ef3a0
commit 0dac0ef3a0
parent a423062483 91cdd30620
23 changed files with 1535 additions and 315 deletions
--- a/resources/recipes/mainichi.recipe
+++ b/resources/recipes/mainichi.recipe
@ -4,6 +4,7 @@ __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 www.mainichi.jp
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class MainichiDailyNews(BasicNewsRecipe):
@ -22,3 +23,18 @@ class MainichiDailyNews(BasicNewsRecipe):
    remove_tags = [{'class':"RelatedArticle"}]
    remove_tags_after = {'class':"Credit"}
    def parse_feeds(self):
        feeds = BasicNewsRecipe.parse_feeds(self)
        for curfeed in feeds:
            delList = []
            for a,curarticle in enumerate(curfeed.articles):
                if re.search(r'pheedo.jp', curarticle.url):
                    delList.append(curarticle)
            if len(delList)>0:
                for d in delList:
                    index = curfeed.articles.index(d)
                    curfeed.articles[index:index+1] = []
        return feeds
--- a/resources/recipes/mainichi_it_news.recipe
+++ b/resources/recipes/mainichi_it_news.recipe
@ -14,5 +14,19 @@ class MainichiDailyITNews(BasicNewsRecipe):
    remove_tags_before = {'class':"NewsTitle"}
    remove_tags = [{'class':"RelatedArticle"}]
    remove_tags_after = {'class':"Credit"}
    def parse_feeds(self):
        feeds = BasicNewsRecipe.parse_feeds(self)
        for curfeed in feeds:
            delList = []
            for a,curarticle in enumerate(curfeed.articles):
                if re.search(r'pheedo.jp', curarticle.url):
                    delList.append(curarticle)
            if len(delList)>0:
                for d in delList:
                    index = curfeed.articles.index(d)
                    curfeed.articles[index:index+1] = []
        return feeds   remove_tags_after = {'class':"Credit"}
--- a/resources/recipes/nikkei_sub_life.recipe
+++ b/resources/recipes/nikkei_sub_life.recipe
@ -32,12 +32,9 @@ class NikkeiNet_sub_life(BasicNewsRecipe):
    remove_tags_after = {'class':"cmn-pr_list"}
    feeds = [  (u'\u304f\u3089\u3057', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
 		 (u'\u30b9\u30dd\u30fc\u30c4', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sports'),
 		 (u'\u793e\u4f1a', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai'),
 		 (u'\u30a8\u30b3', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'),
 		 (u'\u5065\u5eb7', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
-		 (u'\u7279\u96c6', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special'),
+		 (u'\u7279\u96c6', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special')
 		 (u'\u30e9\u30f3\u30ad\u30f3\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=ranking')
        ]
    def get_browser(self):
--- a/resources/recipes/nikkei_sub_shakai.recipe
+++ b/resources/recipes/nikkei_sub_shakai.recipe
@ -0,0 +1,102 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.nikkei.com
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 import mechanize
 from calibre.ptempfile import PersistentTemporaryFile
 class NikkeiNet_sub_life(BasicNewsRecipe):
    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u751f\u6d3b)'
    __author__      = 'Hiroshi Miura'
    description     = 'News and current market affairs from Japan'
    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    needs_subscription = True
    oldest_article  = 2
    max_articles_per_feed = 20
    language        = 'ja'
    remove_javascript = False
    temp_files = []
    remove_tags_before = {'class':"cmn-section cmn-indent"}
    remove_tags = [
                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
                       {'class':"cmn-article_keyword cmn-clearfix"},
                       {'class':"cmn-print_headline cmn-clearfix"},
                         ]
    remove_tags_after = {'class':"cmn-pr_list"}
    feeds = [ 
 		 (u'\u793e\u4f1a', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai')
        ]
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        cj = mechanize.LWPCookieJar()
        br.set_cookiejar(cj)
        #br.set_debug_http(True)
        #br.set_debug_redirects(True)
        #br.set_debug_responses(True)
        if self.username is not None and self.password is not None:
            #print "----------------------------get login form--------------------------------------------"
            # open login form
            br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
            response = br.response()
            #print "----------------------------get login form---------------------------------------------"
            #print "----------------------------set login form---------------------------------------------"
            # remove disabled input which brings error on mechanize
            response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
            response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
            br.set_response(response)
            br.select_form(name='LA0010Form01')
            br['LA0010Form01:LA0010Email']   = self.username
            br['LA0010Form01:LA0010Password'] = self.password
            br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
            br.submit()
            br.response()
            #print "----------------------------send login form---------------------------------------------"
            #print "----------------------------open news main page-----------------------------------------"
            # open news site
            br.open('http://www.nikkei.com/')
            br.response()
            #print "----------------------------www.nikkei.com BODY   --------------------------------------"
            #print response2.get_data()
            #print "-------------------------^^-got auto redirect form----^^--------------------------------"
            # forced redirect in default
            br.select_form(nr=0)
            br.submit()
            response3 = br.response()
            # return some cookie which should be set by Javascript
            #print response3.geturl()
            raw = response3.get_data()
            #print "---------------------------response to form --------------------------------------------"
            # grab cookie from JS and set it
            redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
            br.select_form(nr=0)
            self.temp_files.append(PersistentTemporaryFile('_fa.html'))
            self.temp_files[-1].write("#LWP-Cookies-2.0\n")
            self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
            self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
            self.temp_files[-1].close()
            cj.load(self.temp_files[-1].name)
            br.submit()
            #br.set_debug_http(False)
            #br.set_debug_redirects(False)
            #br.set_debug_responses(False)
        return br
--- a/resources/recipes/yomiuri.recipe
+++ b/resources/recipes/yomiuri.recipe
@ -21,7 +21,7 @@ class YOLNews(BasicNewsRecipe):
    remove_javascript = True
    masthead_title = u'YOMIURI ONLINE'
-    remove_tags_before = {'class':"article-def"}
+    keep_only_tags = [{'class':"article-def"}]
    remove_tags = [{'class':"RelatedArticle"},
                   {'class':"sbtns"}
                    ]
--- a/resources/recipes/yomiuri_world.recipe
+++ b/resources/recipes/yomiuri_world.recipe
@ -21,7 +21,7 @@ class YOLNews(BasicNewsRecipe):
    remove_javascript = True
    masthead_title = u"YOMIURI ONLINE"
-    remove_tags_before = {'class':"article-def"}
+    keep_only_tags = [{'class':"article-def"}]
    remove_tags = [{'class':"RelatedArticle"},
                   {'class':"sbtns"}
                    ]
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -21,7 +21,7 @@ class ANDROID(USBMS):
            # HTC
            0x0bb4 : { 0x0c02 : [0x100, 0x0227, 0x0226], 0x0c01 : [0x100, 0x0227], 0x0ff9
                : [0x0100, 0x0227, 0x0226], 0x0c87: [0x0100, 0x0227, 0x0226],
-                0xc92 : [0x100]},
+                0xc92 : [0x100], 0xc97: [0x226]},
            # Eken
            0x040d : { 0x8510 : [0x0001] },
@ -63,7 +63,7 @@ class ANDROID(USBMS):
    WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
            '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
            'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
-            'SCH-I500_CARD']
+            'SCH-I500_CARD', 'SPH-D700_CARD']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID']
--- a/src/calibre/devices/usbms/books.py
+++ b/src/calibre/devices/usbms/books.py
@ -11,9 +11,9 @@ from calibre.ebooks.metadata.book.base import Metadata
 from calibre.devices.mime import mime_type_ext
 from calibre.devices.interface import BookList as _BookList
 from calibre.constants import preferred_encoding
-from calibre import isbytestring
+from calibre import isbytestring, force_unicode
 from calibre.utils.config import prefs, tweaks
-from calibre.utils.icu import sort_key, strcmp as icu_strcmp
+from calibre.utils.icu import strcmp
 class Book(Metadata):
    def __init__(self, prefix, lpath, size=None, other=None):
@ -241,7 +241,7 @@ class CollectionsBookList(BookList):
            if y is None:
                return -1
            if isinstance(x, (unicode, str)):
-                c = strcmp(x, y)
+                c = strcmp(force_unicode(x), force_unicode(y))
            else:
                c = cmp(x, y)
            if c != 0:
--- a/src/calibre/ebooks/metadata/amazonfr.py
+++ b/src/calibre/ebooks/metadata/amazonfr.py
@ -0,0 +1,516 @@
 from __future__ import with_statement
 __license__ = 'GPL 3'
 __copyright__ = '2010, sengian <sengian1@gmail.com>'
 import sys, textwrap, re, traceback
 from urllib import urlencode
 from math import ceil
 from lxml import html
 from lxml.html import soupparser
 from calibre.utils.date import parse_date, utcnow, replace_months
 from calibre.utils.cleantext import clean_ascii_chars
 from calibre import browser, preferred_encoding
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.metadata import MetaInformation, check_isbn, \
    authors_to_sort_string
 from calibre.ebooks.metadata.fetch import MetadataSource
 from calibre.utils.config import OptionParser
 from calibre.library.comments import sanitize_comments_html
 class AmazonFr(MetadataSource):
    name = 'Amazon French'
    description = _('Downloads metadata from amazon.fr')
    supported_platforms = ['windows', 'osx', 'linux']
    author = 'Sengian'
    version = (1, 0, 0)
    has_html_comments = True
    def fetch(self):
        try:
            self.results = search(self.title, self.book_author, self.publisher,
                                  self.isbn, max_results=10, verbose=self.verbose, lang='fr')
        except Exception, e:
            self.exception = e
            self.tb = traceback.format_exc()
 class AmazonEs(MetadataSource):
    name = 'Amazon Spanish'
    description = _('Downloads metadata from amazon.com in spanish')
    supported_platforms = ['windows', 'osx', 'linux']
    author = 'Sengian'
    version = (1, 0, 0)
    has_html_comments = True
    def fetch(self):
        try:
            self.results = search(self.title, self.book_author, self.publisher,
                                  self.isbn, max_results=10, verbose=self.verbose, lang='es')
        except Exception, e:
            self.exception = e
            self.tb = traceback.format_exc()
 class AmazonEn(MetadataSource):
    name = 'Amazon English'
    description = _('Downloads metadata from amazon.com in english')
    supported_platforms = ['windows', 'osx', 'linux']
    author = 'Sengian'
    version = (1, 0, 0)
    has_html_comments = True
    def fetch(self):
        try:
            self.results = search(self.title, self.book_author, self.publisher,
                                  self.isbn, max_results=10, verbose=self.verbose, lang='en')
        except Exception, e:
            self.exception = e
            self.tb = traceback.format_exc()
 class AmazonDe(MetadataSource):
    name = 'Amazon German'
    description = _('Downloads metadata from amazon.de')
    supported_platforms = ['windows', 'osx', 'linux']
    author = 'Sengian'
    version = (1, 0, 0)
    has_html_comments = True
    def fetch(self):
        try:
            self.results = search(self.title, self.book_author, self.publisher,
                                  self.isbn, max_results=10, verbose=self.verbose, lang='de')
        except Exception, e:
            self.exception = e
            self.tb = traceback.format_exc()
 class Amazon(MetadataSource):
    name = 'Amazon'
    description = _('Downloads metadata from amazon.com')
    supported_platforms = ['windows', 'osx', 'linux']
    author = 'Kovid Goyal & Sengian'
    version = (1, 1, 0)
    has_html_comments = True
    def fetch(self):
        # if not self.site_customization:
            # return
        try:
            self.results = search(self.title, self.book_author, self.publisher,
                                  self.isbn, max_results=10, verbose=self.verbose, lang='all')
        except Exception, e:
            self.exception = e
            self.tb = traceback.format_exc()
    # @property
    # def string_customization_help(self):
        # return _('You can select here the language for metadata search with amazon.com')
 def report(verbose):
    if verbose:
        traceback.print_exc()
 class Query(object):
    BASE_URL_ALL = 'http://www.amazon.com'
    BASE_URL_FR = 'http://www.amazon.fr'
    BASE_URL_DE = 'http://www.amazon.de'
    def __init__(self, title=None, author=None, publisher=None, isbn=None, keywords=None,
        max_results=20, rlang='all'):
        assert not(title is None and author is None and publisher is None \
            and isbn is None and keywords is None)
        assert (max_results < 21)
        self.max_results = int(max_results)
        self.renbres = re.compile(u'\s*(\d+)\s*')
        q = {   'search-alias' : 'stripbooks' ,
                'unfiltered' : '1',
                'field-keywords' : '',
                'field-author' : '',
                'field-title' : '',
                'field-isbn' : '',
                'field-publisher' : ''
                #get to amazon detailed search page to get all options
                # 'node' : '',
                # 'field-binding' : '',
                #before, during, after
                # 'field-dateop' : '',
                #month as number
                # 'field-datemod' : '',
                # 'field-dateyear' : '',
                #french only
                # 'field-collection' : '',
                #many options available
            }
        if rlang =='all':
            q['sort'] = 'relevanceexprank'
            self.urldata = self.BASE_URL_ALL
        elif rlang =='es':
            q['sort'] = 'relevanceexprank'
            q['field-language'] = 'Spanish'
            self.urldata = self.BASE_URL_ALL
        elif rlang =='en':
            q['sort'] = 'relevanceexprank'
            q['field-language'] = 'English'
            self.urldata = self.BASE_URL_ALL
        elif rlang =='fr':
            q['sort'] = 'relevancerank'
            self.urldata = self.BASE_URL_FR
        elif rlang =='de':
            q['sort'] = 'relevancerank'
            self.urldata = self.BASE_URL_DE
        self.baseurl = self.urldata
        if isbn is not None:
            q['field-isbn'] = isbn.replace('-', '')
        else:
            if title is not None:
                q['field-title'] = title
            if author is not None:
                q['field-author'] = author
            if publisher is not None:
                q['field-publisher'] = publisher
            if keywords is not None:
                q['field-keywords'] = keywords
        if isinstance(q, unicode):
            q = q.encode('utf-8')
        self.urldata += '/gp/search/ref=sr_adv_b/?' + urlencode(q)
    def __call__(self, browser, verbose, timeout = 5.):
        if verbose:
            print 'Query:', self.urldata
        try:
            raw = browser.open_novisit(self.urldata, timeout=timeout).read()
        except Exception, e:
            report(verbose)
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                return
            raise
        if '<title>404 - ' in raw:
            return
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
                resolve_entities=True)[0]
        try:
            feed = soupparser.fromstring(raw)
        except:
            try:
                #remove ASCII invalid chars
                return soupparser.fromstring(clean_ascii_chars(raw))
            except:
                return None, self.urldata
        #nb of page
        try:
            nbresults = self.renbres.findall(feed.xpath("//*[@class='resultCount']")[0].text)
        except:
            return None, self.urldata
        pages =[feed]
        if len(nbresults) > 1:
            nbpagetoquery = int(ceil(float(min(int(nbresults[2]), self.max_results))/ int(nbresults[1])))
            for i in xrange(2, nbpagetoquery + 1):
                try:
                    urldata = self.urldata + '&page=' + str(i)
                    raw = browser.open_novisit(urldata, timeout=timeout).read()
                except Exception, e:
                    continue
                if '<title>404 - ' in raw:
                    continue
                raw = xml_to_unicode(raw, strip_encoding_pats=True,
                        resolve_entities=True)[0]
                try:
                    feed = soupparser.fromstring(raw)
                except:
                    try:
                        #remove ASCII invalid chars
                        return soupparser.fromstring(clean_ascii_chars(raw))
                    except:
                        continue
                pages.append(feed)
        results = []
        for x in pages:
            results.extend([i.getparent().get('href') \
                for i in x.xpath("//a/span[@class='srTitle']")])
        return results[:self.max_results], self.baseurl
 class ResultList(list):
    def __init__(self, baseurl, lang = 'all'):
        self.baseurl = baseurl
        self.lang = lang
        self.repub = re.compile(u'\((.*)\)')
        self.rerat = re.compile(u'([0-9.]+)')
        self.reattr = re.compile(r'<([a-zA-Z0-9]+)\s[^>]+>')
        self.reoutp = re.compile(r'(?s)<em>--This text ref.*?</em>')
        self.recom = re.compile(r'(?s)<!--.*?-->')
        self.republi = re.compile(u'(Editeur|Publisher|Verlag)', re.I)
        self.reisbn = re.compile(u'(ISBN-10|ISBN-10|ASIN)', re.I)
        self.relang = re.compile(u'(Language|Langue|Sprache)', re.I)
        self.reratelt = re.compile(u'(Average\s*Customer\s*Review|Moyenne\s*des\s*commentaires\s*client|Durchschnittliche\s*Kundenbewertung)', re.I)
        self.reprod = re.compile(u'(Product\s*Details|D.tails\s*sur\s*le\s*produit|Produktinformation)', re.I)
    def strip_tags_etree(self, etreeobj, invalid_tags):
        for (itag, rmv) in invalid_tags.iteritems():
            if rmv:
                for elts in etreeobj.getiterator(itag):
                    elts.drop_tree()
            else:
                for elts in etreeobj.getiterator(itag):
                    elts.drop_tag()
    def clean_entry(self, entry, invalid_tags = {'script': True},
                invalid_id = (), invalid_class=()):
        #invalid_tags: remove tag and keep content if False else remove
        #remove tags
        if invalid_tags:
            self.strip_tags_etree(entry, invalid_tags)
        #remove id
        if invalid_id:
            for eltid in invalid_id:
                elt = entry.get_element_by_id(eltid)
                if elt is not None:
                    elt.drop_tree()
        #remove class
        if invalid_class:
            for eltclass in invalid_class:
                elts = entry.find_class(eltclass)
                if elts is not None:
                    for elt in elts:
                        elt.drop_tree()
    def get_title(self, entry):
        title = entry.get_element_by_id('btAsinTitle')
        if title is not None:
            title = title.text
        return unicode(title.replace('\n', '').strip())
    def get_authors(self, entry):
        author = entry.get_element_by_id('btAsinTitle')
        while author.getparent().tag != 'div':
            author = author.getparent()
        author = author.getparent()
        authortext = []
        for x in author.getiterator('a'):
            authortext.append(unicode(x.text_content().strip()))
        return authortext
    def get_description(self, entry, verbose):
        try:
            description = entry.get_element_by_id("productDescription").find("div[@class='content']")
            inv_class = ('seeAll', 'emptyClear')
            inv_tags ={'img': True, 'a': False}
            self.clean_entry(description, invalid_tags=inv_tags, invalid_class=inv_class)
            description = html.tostring(description, method='html', encoding=unicode).strip()
            # remove all attributes from tags
            description = self.reattr.sub(r'<\1>', description)
            # Remove the notice about text referring to out of print editions
            description = self.reoutp.sub('', description)
            # Remove comments
            description = self.recom.sub('', description)
            return unicode(sanitize_comments_html(description))
        except:
            report(verbose)
            return None
    def get_tags(self, entry, browser, verbose):
        try:
            tags = entry.get_element_by_id('tagContentHolder')
            testptag = tags.find_class('see-all')
            if testptag:
                for x in testptag:
                    alink = x.xpath('descendant-or-self::a')
                    if alink:
                        if alink[0].get('class') == 'tgJsActive':
                            continue
                        link = self.baseurl + alink[0].get('href')
                        entry = self.get_individual_metadata(browser, link, verbose)
                        tags = entry.get_element_by_id('tagContentHolder')
                        break
            tags = [a.text for a in tags.getiterator('a') if a.get('rel') == 'tag']
        except:
            report(verbose)
            tags = []
        return tags
    def get_book_info(self, entry, mi, verbose):
        try:
            entry = entry.get_element_by_id('SalesRank').getparent()
        except:
            try:
                for z in entry.getiterator('h2'):
                    if self.reprod.search(z.text_content()):
                        entry = z.getparent().find("div[@class='content']/ul")
                        break
            except:
                report(verbose)
                return mi
        elts = entry.findall('li')
        #pub & date
        elt = filter(lambda x: self.republi.search(x.find('b').text), elts)
        if elt:
            pub = elt[0].find('b').tail
            mi.publisher = unicode(self.repub.sub('', pub).strip())
            d = self.repub.search(pub)
            if d is not None:
                d = d.group(1)
                try:
                    default = utcnow().replace(day=15)
                    if self.lang != 'all':
                        d = replace_months(d, self.lang)
                    d = parse_date(d, assume_utc=True, default=default)
                    mi.pubdate = d
                except:
                    report(verbose)
        #ISBN
        elt = filter(lambda x: self.reisbn.search(x.find('b').text), elts)
        if elt:
            isbn = elt[0].find('b').tail.replace('-', '').strip()
            if check_isbn(isbn):
                    mi.isbn = unicode(isbn)
            elif len(elt) > 1:
                isbn = elt[1].find('b').tail.replace('-', '').strip()
                if check_isbn(isbn):
                    mi.isbn = unicode(isbn)
        #Langue
        elt = filter(lambda x: self.relang.search(x.find('b').text), elts)
        if elt:
            langue = elt[0].find('b').tail.strip()
            if langue:
                mi.language = unicode(langue)
        #ratings
        elt = filter(lambda x: self.reratelt.search(x.find('b').text), elts)
        if elt:
            ratings = elt[0].find_class('swSprite')
            if ratings:
                ratings = self.rerat.findall(ratings[0].get('title'))
                if len(ratings) == 2:
                    mi.rating = float(ratings[0])/float(ratings[1]) * 5
        return mi
    def fill_MI(self, entry, title, authors, browser, verbose):
        mi = MetaInformation(title, authors)
        mi.author_sort = authors_to_sort_string(authors)
        mi.comments = self.get_description(entry, verbose)
        mi = self.get_book_info(entry, mi, verbose)
        mi.tags = self.get_tags(entry, browser, verbose)
        return mi
    def get_individual_metadata(self, browser, linkdata, verbose):
        try:
            raw = browser.open_novisit(linkdata).read()
        except Exception, e:
            report(verbose)
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                return
            raise
        if '<title>404 - ' in raw:
            report(verbose)
            return
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
                resolve_entities=True)[0]
        try:
            return soupparser.fromstring(raw)
        except:
            try:
                #remove ASCII invalid chars
                return soupparser.fromstring(clean_ascii_chars(raw))
            except:
                report(verbose)
                return
    def populate(self, entries, browser, verbose=False):
        for x in entries:
            try:
                entry = self.get_individual_metadata(browser, x, verbose)
                # clean results
                # inv_ids = ('divsinglecolumnminwidth', 'sims.purchase', 'AutoBuyXGetY', 'A9AdsMiddleBoxTop')
                # inv_class = ('buyingDetailsGrid', 'productImageGrid')
                # inv_tags ={'script': True, 'style': True, 'form': False}
                # self.clean_entry(entry, invalid_id=inv_ids)
                title = self.get_title(entry)
                authors = self.get_authors(entry)
            except Exception, e:
                if verbose:
                    print 'Failed to get all details for an entry'
                    print e
                    print 'URL who failed:', x
                    report(verbose)
                continue
            self.append(self.fill_MI(entry, title, authors, browser, verbose))
 def search(title=None, author=None, publisher=None, isbn=None,
           max_results=5, verbose=False, keywords=None, lang='all'):
    br = browser()
    entries, baseurl = Query(title=title, author=author, isbn=isbn, publisher=publisher,
        keywords=keywords, max_results=max_results,rlang=lang)(br, verbose)
    if entries is None or len(entries) == 0:
        return
    #List of entry
    ans = ResultList(baseurl, lang)
    ans.populate(entries, br, verbose)
    return ans
 def option_parser():
    parser = OptionParser(textwrap.dedent(\
    _('''\
        %prog [options]
        Fetch book metadata from Amazon. You must specify one of title, author,
        ISBN, publisher or keywords. Will fetch a maximum of 10 matches,
        so you should make your query as specific as possible.
        You can chose the language for metadata retrieval:
        All & english & french & german & spanish
    '''
    )))
    parser.add_option('-t', '--title', help='Book title')
    parser.add_option('-a', '--author', help='Book author(s)')
    parser.add_option('-p', '--publisher', help='Book publisher')
    parser.add_option('-i', '--isbn', help='Book ISBN')
    parser.add_option('-k', '--keywords', help='Keywords')
    parser.add_option('-m', '--max-results', default=10,
                      help='Maximum number of results to fetch')
    parser.add_option('-l', '--lang', default='all',
                      help='Chosen language for metadata search (all, en, fr, es, de)')
    parser.add_option('-v', '--verbose', default=0, action='count',
                      help='Be more verbose about errors')
    return parser
 def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)
    try:
        results = search(opts.title, opts.author, isbn=opts.isbn, publisher=opts.publisher,
            keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results,
                lang=opts.lang)
    except AssertionError:
        report(True)
        parser.print_help()
        return 1
    if results is None or len(results) == 0:
        print 'No result found for this search!'
        return 0
    for result in results:
        print unicode(result).encode(preferred_encoding, 'replace')
        print
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/metadata/fictionwise.py
+++ b/src/calibre/ebooks/metadata/fictionwise.py
@ -0,0 +1,390 @@
 from __future__ import with_statement
 __license__ = 'GPL 3'
 __copyright__ = '2010, sengian <sengian1@gmail.com>'
 __docformat__ = 'restructuredtext en'
 import sys, textwrap, re, traceback, socket
 from urllib import urlencode
 from lxml.html import soupparser, tostring
 from calibre import browser, preferred_encoding
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.metadata import MetaInformation, check_isbn, \
    authors_to_sort_string
 from calibre.library.comments import sanitize_comments_html
 from calibre.ebooks.metadata.fetch import MetadataSource
 from calibre.utils.config import OptionParser
 from calibre.utils.date import parse_date, utcnow
 from calibre.utils.cleantext import clean_ascii_chars
 class Fictionwise(MetadataSource): # {{{
    author = 'Sengian'
    name = 'Fictionwise'
    description = _('Downloads metadata from Fictionwise')
    has_html_comments = True
    def fetch(self):
        try:
            self.results = search(self.title, self.book_author, self.publisher,
                self.isbn, max_results=10, verbose=self.verbose)
        except Exception, e:
            self.exception = e
            self.tb = traceback.format_exc()
    # }}}
 class FictionwiseError(Exception):
    pass
 def report(verbose):
    if verbose:
        traceback.print_exc()
 class Query(object):
    BASE_URL = 'http://www.fictionwise.com/servlet/mw'
    def __init__(self, title=None, author=None, publisher=None, keywords=None, max_results=20):
        assert not(title is None and author is None and publisher is None and keywords is None)
        assert (max_results < 21)
        self.max_results = int(max_results)
        q = {   'template' : 'searchresults_adv.htm' ,
                'searchtitle' : '',
                'searchauthor' : '',
                'searchpublisher' : '',
                'searchkeyword' : '',
                #possibilities startoflast, fullname, lastfirst
                'searchauthortype' : 'startoflast',
                'searchcategory' : '',
                'searchcategory2' : '',
                'searchprice_s' : '0',
                'searchprice_e' : 'ANY',
                'searchformat' : '',
                'searchgeo' : 'US',
                'searchfwdatetype' : '',
                #maybe use dates fields if needed?
                #'sortorder' : 'DESC',
                #many options available: b.SortTitle, a.SortName,
                #b.DateFirstPublished, b.FWPublishDate
                'sortby' : 'b.SortTitle'
            }
        if title is not None:
            q['searchtitle'] = title
        if author is not None:
            q['searchauthor'] = author
        if publisher is not None:
            q['searchpublisher'] = publisher
        if keywords is not None:
            q['searchkeyword'] = keywords
        if isinstance(q, unicode):
            q = q.encode('utf-8')
        self.urldata = urlencode(q)
    def __call__(self, browser, verbose, timeout = 5.):
        if verbose:
            print _('Query: %s') % self.BASE_URL+self.urldata
        try:
            raw = browser.open_novisit(self.BASE_URL, self.urldata, timeout=timeout).read()
        except Exception, e:
            report(verbose)
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                return
            if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
                raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
            raise FictionwiseError(_('Fictionwise encountered an error.'))
        if '<title>404 - ' in raw:
            return
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
                resolve_entities=True)[0]
        try:
            feed = soupparser.fromstring(raw)
        except:
            try:
                #remove ASCII invalid chars
                feed = soupparser.fromstring(clean_ascii_chars(raw))
            except:
                return None
        # get list of results as links
        results = feed.xpath("//table[3]/tr/td[2]/table/tr/td/p/table[2]/tr[@valign]")
        results = results[:self.max_results]
        results = [i.xpath('descendant-or-self::a')[0].get('href') for i in results]
        #return feed if no links ie normally a single book or nothing
        if not results:
            results = [feed]
        return results
 class ResultList(list):
    BASE_URL = 'http://www.fictionwise.com'
    COLOR_VALUES = {'BLUE': 4, 'GREEN': 3, 'YELLOW': 2, 'RED': 1, 'NA': 0}
    def __init__(self):
        self.retitle = re.compile(r'\[[^\[\]]+\]')
        self.rechkauth = re.compile(r'.*book\s*by', re.I)
        self.redesc = re.compile(r'book\s*description\s*:\s*(<br[^>]+>)*(?P<desc>.*)<br[^>]*>.{,15}publisher\s*:', re.I)
        self.repub = re.compile(r'.*publisher\s*:\s*', re.I)
        self.redate = re.compile(r'.*release\s*date\s*:\s*', re.I)
        self.retag = re.compile(r'.*book\s*category\s*:\s*', re.I)
        self.resplitbr = re.compile(r'<br[^>]*>', re.I)
        self.recomment = re.compile(r'(?s)<!--.*?-->')
        self.reimg = re.compile(r'<img[^>]*>', re.I)
        self.resanitize = re.compile(r'\[HTML_REMOVED\]\s*', re.I)
        self.renbcom = re.compile('(?P<nbcom>\d+)\s*Reader Ratings:')
        self.recolor = re.compile('(?P<ncolor>[^/]+).gif')
        self.resplitbrdiv = re.compile(r'(<br[^>]+>|</?div[^>]*>)', re.I)
        self.reisbn = re.compile(r'.*ISBN\s*:\s*', re.I)
    def strip_tags_etree(self, etreeobj, invalid_tags):
        for (itag, rmv) in invalid_tags.iteritems():
            if rmv:
                for elts in etreeobj.getiterator(itag):
                    elts.drop_tree()
            else:
                for elts in etreeobj.getiterator(itag):
                    elts.drop_tag()
    def clean_entry(self, entry, invalid_tags = {'script': True},
                invalid_id = (), invalid_class=(), invalid_xpath = ()):
        #invalid_tags: remove tag and keep content if False else remove
        #remove tags
        if invalid_tags:
            self.strip_tags_etree(entry, invalid_tags)
        #remove xpath
        if invalid_xpath:
            for eltid in invalid_xpath:
                elt = entry.xpath(eltid)
                for el in elt:
                    el.drop_tree()
        #remove id
        if invalid_id:
            for eltid in invalid_id:
                elt = entry.get_element_by_id(eltid)
                if elt is not None:
                    elt.drop_tree()
        #remove class
        if invalid_class:
            for eltclass in invalid_class:
                elts = entry.find_class(eltclass)
                if elts is not None:
                    for elt in elts:
                        elt.drop_tree()
    def output_entry(self, entry, prettyout = True, htmlrm="\d+"):
        out = tostring(entry, pretty_print=prettyout)
        #try to work around tostring to remove this encoding for exemle
        reclean = re.compile('(\n+|\t+|\r+|&#'+htmlrm+';)')
        return reclean.sub('', out)
    def get_title(self, entry):
        title = entry.findtext('./')
        return self.retitle.sub('', title).strip()
    def get_authors(self, entry):
        authortext = entry.find('./br').tail
        if not self.rechkauth.search(authortext):
            return []
        authortext = self.rechkauth.sub('', authortext)
        return [a.strip() for a in authortext.split('&')]
    def get_rating(self, entrytable, verbose):
        nbcomment = tostring(entrytable.getprevious())
        try:
            nbcomment = self.renbcom.search(nbcomment).group("nbcom")
        except:
            report(verbose)
            return None
        hval = dict((self.COLOR_VALUES[self.recolor.search(image.get('src', default='NA.gif')).group("ncolor")],
                    float(image.get('height', default=0))) \
                        for image in entrytable.getiterator('img'))
        #ratings as x/5
        return float(1.25*sum(k*v for (k, v) in hval.iteritems())/sum(hval.itervalues()))
    def get_description(self, entry):
        description = self.output_entry(entry.xpath('./p')[1],htmlrm="")
        description = self.redesc.search(description)
        if not description or not description.group("desc"):
            return None
        #remove invalid tags
        description = self.reimg.sub('', description.group("desc"))
        description = self.recomment.sub('', description)
        description = self.resanitize.sub('', sanitize_comments_html(description))
        return _('SUMMARY:\n %s') % re.sub(r'\n\s+</p>','\n</p>', description)
    def get_publisher(self, entry):
        publisher = self.output_entry(entry.xpath('./p')[1])
        publisher = filter(lambda x: self.repub.search(x) is not None,
            self.resplitbr.split(publisher))
        if not len(publisher):
            return None
        publisher = self.repub.sub('', publisher[0])
        return publisher.split(',')[0].strip()
    def get_tags(self, entry):
        tag = self.output_entry(entry.xpath('./p')[1])
        tag = filter(lambda x: self.retag.search(x) is not None,
            self.resplitbr.split(tag))
        if not len(tag):
            return []
        return map(lambda x: x.strip(), self.retag.sub('', tag[0]).split('/'))
    def get_date(self, entry, verbose):
        date = self.output_entry(entry.xpath('./p')[1])
        date = filter(lambda x: self.redate.search(x) is not None,
            self.resplitbr.split(date))
        if not len(date):
            return None
        try:
            d = self.redate.sub('', date[0])
            if d:
                default = utcnow().replace(day=15)
                d = parse_date(d, assume_utc=True, default=default)
            else:
                d = None
        except:
            report(verbose)
            d = None
        return d
    def get_ISBN(self, entry):
        isbns = self.output_entry(entry.xpath('./p')[2])
        isbns = filter(lambda x: self.reisbn.search(x) is not None,
            self.resplitbrdiv.split(isbns))
        if not len(isbns):
            return None
        isbns = [self.reisbn.sub('', x) for x in isbns if check_isbn(self.reisbn.sub('', x))]
        return sorted(isbns, cmp=lambda x,y:cmp(len(x), len(y)))[-1]
    def fill_MI(self, entry, title, authors, ratings, verbose):
        mi = MetaInformation(title, authors)
        mi.rating = ratings
        mi.comments = self.get_description(entry)
        mi.publisher = self.get_publisher(entry)
        mi.tags = self.get_tags(entry)
        mi.pubdate = self.get_date(entry, verbose)
        mi.isbn = self.get_ISBN(entry)
        mi.author_sort = authors_to_sort_string(authors)
        return mi
    def get_individual_metadata(self, browser, linkdata, verbose):
        try:
            raw = browser.open_novisit(self.BASE_URL + linkdata).read()
        except Exception, e:
            report(verbose)
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                return
            if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
                raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
            raise FictionwiseError(_('Fictionwise encountered an error.'))
        if '<title>404 - ' in raw:
            report(verbose)
            return
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
                resolve_entities=True)[0]
        try:
            return soupparser.fromstring(raw)
        except:
            try:
                #remove ASCII invalid chars
                return soupparser.fromstring(clean_ascii_chars(raw))
            except:
                return None
    def populate(self, entries, browser, verbose=False):
        inv_tags ={'script': True, 'a': False, 'font': False, 'strong': False, 'b': False,
            'ul': False, 'span': False}
        inv_xpath =('./table',)
        #single entry
        if len(entries) == 1 and not isinstance(entries[0], str):
            try:
                entry = entries.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td")
                self.clean_entry(entry, invalid_tags=inv_tags, invalid_xpath=inv_xpath)
                title = self.get_title(entry)
                #maybe strenghten the search
                ratings =  self.get_rating(entry.xpath("./p/table")[1], verbose)
                authors = self.get_authors(entry)
            except Exception, e:
                if verbose:
                    print _('Failed to get all details for an entry')
                    print e
                return
            self.append(self.fill_MI(entry, title, authors, ratings, verbose))
        else:
            #multiple entries
            for x in entries:
                try:
                    entry = self.get_individual_metadata(browser, x, verbose)
                    entry = entry.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td")[0]
                    self.clean_entry(entry, invalid_tags=inv_tags, invalid_xpath=inv_xpath)
                    title = self.get_title(entry)
                    #maybe strenghten the search
                    ratings =  self.get_rating(entry.xpath("./p/table")[1], verbose)
                    authors = self.get_authors(entry)
                except Exception, e:
                    if verbose:
                        print _('Failed to get all details for an entry')
                        print e
                    continue
                self.append(self.fill_MI(entry, title, authors, ratings, verbose))
 def search(title=None, author=None, publisher=None, isbn=None,
           min_viewability='none', verbose=False, max_results=5,
            keywords=None):
    br = browser()
    entries = Query(title=title, author=author, publisher=publisher,
        keywords=keywords, max_results=max_results)(br, verbose, timeout = 15.)
    #List of entry
    ans = ResultList()
    ans.populate(entries, br, verbose)
    return ans
 def option_parser():
    parser = OptionParser(textwrap.dedent(\
    _('''\
        %prog [options]
        Fetch book metadata from Fictionwise. You must specify one of title, author,
        or keywords. No ISBN specification possible. Will fetch a maximum of 20 matches,
        so you should make your query as specific as possible.
    ''')
    ))
    parser.add_option('-t', '--title', help=_('Book title'))
    parser.add_option('-a', '--author', help=_('Book author(s)'))
    parser.add_option('-p', '--publisher', help=_('Book publisher'))
    parser.add_option('-k', '--keywords', help=_('Keywords'))
    parser.add_option('-m', '--max-results', default=20,
                      help=_('Maximum number of results to fetch'))
    parser.add_option('-v', '--verbose', default=0, action='count',
                      help=_('Be more verbose about errors'))
    return parser
 def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)
    try:
        results = search(opts.title, opts.author, publisher=opts.publisher,
            keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results)
    except AssertionError:
        report(True)
        parser.print_help()
        return 1
    if results is None or len(results) == 0:
        print _('No result found for this search!')
        return 0
    for result in results:
        print unicode(result).encode(preferred_encoding, 'replace')
        print
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/metadata/nicebooks.py
+++ b/src/calibre/ebooks/metadata/nicebooks.py
@ -10,7 +10,8 @@ from copy import deepcopy
 from lxml.html import soupparser
-from calibre.utils.date import parse_date, utcnow
+from calibre.utils.date import parse_date, utcnow, replace_months
 from calibre.utils.cleantext import clean_ascii_chars
 from calibre import browser, preferred_encoding
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.metadata import MetaInformation, check_isbn, \
@ -71,31 +72,16 @@ class NiceBooksCovers(CoverDownload):
                traceback.format_exc(), self.name))
 class NiceBooksError(Exception):
    pass
 class ISBNNotFound(NiceBooksError):
    pass
 def report(verbose):
    if verbose:
        import traceback
        traceback.print_exc()
 def replace_monthsfr(datefr):
    # Replace french months by english equivalent for parse_date
    frtoen = {
        u'[jJ]anvier': u'jan',
        u'[fF].vrier': u'feb',
        u'[mM]ars': u'mar',
        u'[aA]vril': u'apr',
        u'[mM]ai': u'may',
        u'[jJ]uin': u'jun',
        u'[jJ]uillet': u'jul',
        u'[aA]o.t': u'aug',
        u'[sS]eptembre': u'sep',
        u'[Oo]ctobre': u'oct',
        u'[nN]ovembre': u'nov',
        u'[dD].cembre': u'dec' }
    for k in frtoen.iterkeys():
        tmp = re.sub(k, frtoen[k], datefr)
        if tmp <> datefr: break
    return tmp
 class Query(object):
    BASE_URL = 'http://fr.nicebooks.com/'
@ -119,7 +105,7 @@ class Query(object):
    def __call__(self, browser, verbose, timeout = 5.):
        if verbose:
-            print 'Query:', self.BASE_URL+self.urldata
+            print _('Query: %s') % self.BASE_URL+self.urldata
        try:
            raw = browser.open_novisit(self.BASE_URL+self.urldata, timeout=timeout).read()
@ -128,7 +114,9 @@ class Query(object):
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                return
-            raise
+            if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
                raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
            raise NiceBooksError(_('Nicebooks encountered an error.'))
        if '<title>404 - ' in raw:
            return
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
@ -136,7 +124,11 @@ class Query(object):
        try:
            feed = soupparser.fromstring(raw)
        except:
-            return
+            try:
                #remove ASCII invalid chars
                feed = soupparser.fromstring(clean_ascii_chars(raw))
            except:
                return None
        #nb of page to call
        try:
@ -161,7 +153,11 @@ class Query(object):
                try:
                    feed = soupparser.fromstring(raw)
                except:
-                    continue
+                    try:
                        #remove ASCII invalid chars
                        feed = soupparser.fromstring(clean_ascii_chars(raw))
                    except:
                        continue
                pages.append(feed)
        results = []
@ -180,14 +176,12 @@ class ResultList(list):
        self.reautclean = re.compile(u'\s*\(.*\)\s*')
    def get_title(self, entry):
        # title = deepcopy(entry.find("div[@id='book-info']"))
        title = deepcopy(entry)
        title.remove(title.find("dl[@title='Informations sur le livre']"))
        title = ' '.join([i.text_content() for i in title.iterchildren()])
        return unicode(title.replace('\n', ''))
    def get_authors(self, entry):
        # author = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
        author = entry.find("dl[@title='Informations sur le livre']")
        authortext = []
        for x in author.getiterator('dt'):
@ -223,7 +217,7 @@ class ResultList(list):
                d = x.getnext().text_content()
                try:
                    default = utcnow().replace(day=15)
-                    d = replace_monthsfr(d)
+                    d = replace_months(d, 'fr')
                    d = parse_date(d, assume_utc=True, default=default)
                    mi.pubdate = d
                except:
@ -234,11 +228,6 @@ class ResultList(list):
        mi = MetaInformation(title, authors)
        mi.author_sort = authors_to_sort_string(authors)
        mi.comments = self.get_description(entry, verbose)
        # entry = entry.find("dl[@title='Informations sur le livre']")
        # mi.publisher = self.get_publisher(entry)
        # mi.pubdate = self.get_date(entry, verbose)
        # mi.isbn = self.get_ISBN(entry)
        # mi.language = self.get_language(entry)
        return self.get_book_info(entry, mi, verbose)
    def get_individual_metadata(self, browser, linkdata, verbose):
@ -249,7 +238,9 @@ class ResultList(list):
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                return
-            raise
+            if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
                raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
            raise NiceBooksError(_('Nicebooks encountered an error.'))
        if '<title>404 - ' in raw:
            report(verbose)
            return
@ -258,7 +249,11 @@ class ResultList(list):
        try:
            feed = soupparser.fromstring(raw)
        except:
-            return
+            try:
                #remove ASCII invalid chars
                feed = soupparser.fromstring(clean_ascii_chars(raw))
            except:
                return None
        # get results
        return feed.xpath("//div[@id='container']")[0]
@ -292,13 +287,6 @@ class ResultList(list):
                    continue
                self.append(self.fill_MI(entry, title, authors, verbose))
 class NiceBooksError(Exception):
    pass
 class ISBNNotFound(NiceBooksError):
    pass
 class Covers(object):
    def __init__(self, isbn = None):
@ -329,11 +317,10 @@ class Covers(object):
            return cover, ext if ext else 'jpg'
        except Exception, err:
            if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
-                err = NiceBooksError(_('Nicebooks timed out. Try again later.'))
+                raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
                raise err
            if not len(self.urlimg):
                if not self.isbnf:
-                    raise ISBNNotFound('ISBN: '+self.isbn+_(' not found.'))
+                    raise ISBNNotFound(_('ISBN: %s not found.') % self.isbn)
                raise NiceBooksError(_('An errror occured with Nicebooks cover fetcher'))
@ -341,10 +328,10 @@ def search(title=None, author=None, publisher=None, isbn=None,
           max_results=5, verbose=False, keywords=None):
    br = browser()
    entries = Query(title=title, author=author, isbn=isbn, publisher=publisher,
-        keywords=keywords, max_results=max_results)(br, verbose)
+        keywords=keywords, max_results=max_results)(br, verbose,timeout = 10.)
    if entries is None or len(entries) == 0:
-        return
+        return None
    #List of entry
    ans = ResultList()
@ -364,28 +351,28 @@ def cover_from_isbn(isbn, timeout = 5.):
 def option_parser():
    parser = OptionParser(textwrap.dedent(\
-    '''\
+    _('''\
        %prog [options]
        Fetch book metadata from Nicebooks. You must specify one of title, author,
        ISBN, publisher or keywords. Will fetch a maximum of 20 matches,
        so you should make your query as specific as possible.
        It can also get covers if the option is activated.
-    '''
+    ''')
    ))
-    parser.add_option('-t', '--title', help='Book title')
+    parser.add_option('-t', '--title', help=_('Book title'))
-    parser.add_option('-a', '--author', help='Book author(s)')
+    parser.add_option('-a', '--author', help=_('Book author(s)'))
-    parser.add_option('-p', '--publisher', help='Book publisher')
+    parser.add_option('-p', '--publisher', help=_('Book publisher'))
-    parser.add_option('-i', '--isbn', help='Book ISBN')
+    parser.add_option('-i', '--isbn', help=_('Book ISBN'))
-    parser.add_option('-k', '--keywords', help='Keywords')
+    parser.add_option('-k', '--keywords', help=_('Keywords'))
    parser.add_option('-c', '--covers', default=0,
-                      help='Covers: 1-Check/ 2-Download')
+                      help=_('Covers: 1-Check/ 2-Download'))
    parser.add_option('-p', '--coverspath', default='',
-                      help='Covers files path')
+                      help=_('Covers files path'))
    parser.add_option('-m', '--max-results', default=20,
-                      help='Maximum number of results to fetch')
+                      help=_('Maximum number of results to fetch'))
    parser.add_option('-v', '--verbose', default=0, action='count',
-                      help='Be more verbose about errors')
+                      help=_('Be more verbose about errors'))
    return parser
 def main(args=sys.argv):
@ -400,15 +387,15 @@ def main(args=sys.argv):
        parser.print_help()
        return 1
    if results is None or len(results) == 0:
-        print 'No result found for this search!'
+        print _('No result found for this search!')
        return 0
    for result in results:
        print unicode(result).encode(preferred_encoding, 'replace')
        covact = int(opts.covers)
        if  covact == 1:
-            textcover = 'No cover found!'
+            textcover = _('No cover found!')
            if check_for_cover(result.isbn):
-                textcover = 'A cover was found for this book'
+                textcover = _('A cover was found for this book')
            print textcover
        elif covact == 2:
            cover_data, ext = cover_from_isbn(result.isbn)
@ -417,7 +404,7 @@ def main(args=sys.argv):
                cpath = os.path.normpath(opts.coverspath + '/' + result.isbn)
            oname = os.path.abspath(cpath+'.'+ext)
            open(oname, 'wb').write(cover_data)
-            print 'Cover saved to file ', oname
+            print _('Cover saved to file '), oname
        print
 if __name__ == '__main__':
--- a/src/calibre/ebooks/metadata/worker.py
+++ b/src/calibre/ebooks/metadata/worker.py
@ -8,12 +8,12 @@ __docformat__ = 'restructuredtext en'
 from threading import Thread
 from Queue import Empty
-import os, time, sys, shutil
+import os, time, sys, shutil, json
 from calibre.utils.ipc.job import ParallelJob
 from calibre.utils.ipc.server import Server
 from calibre.ptempfile import PersistentTemporaryDirectory, TemporaryDirectory
-from calibre import prints
+from calibre import prints, isbytestring
 from calibre.constants import filesystem_encoding
@ -194,14 +194,42 @@ class SaveWorker(Thread):
        self.daemon = True
        self.path, self.opts = path, opts
        self.ids = ids
-        self.library_path = db.library_path
+        self.db = db
        self.canceled = False
        self.result_queue = result_queue
        self.error = None
        self.spare_server = spare_server
        self.start()
    def collect_data(self, ids):
        from calibre.ebooks.metadata.opf2 import metadata_to_opf
        data = {}
        for i in set(ids):
            mi = self.db.get_metadata(i, index_is_id=True, get_cover=True)
            opf = metadata_to_opf(mi)
            if isbytestring(opf):
                opf = opf.decode('utf-8')
            cpath = None
            if mi.cover:
                cpath = mi.cover
                if isbytestring(cpath):
                    cpath = cpath.decode(filesystem_encoding)
            formats = {}
            if mi.formats:
                for fmt in mi.formats:
                    fpath = self.db.format_abspath(i, fmt, index_is_id=True)
                    if fpath is not None:
                        if isbytestring(fpath):
                            fpath = fpath.decode(filesystem_encoding)
                        formats[fmt.lower()] = fpath
            data[i] = [opf, cpath, formats]
        return data
    def run(self):
        with TemporaryDirectory('save_to_disk_data') as tdir:
            self._run(tdir)
    def _run(self, tdir):
        from calibre.library.save_to_disk import config
        server = Server() if self.spare_server is None else self.spare_server
        ids = set(self.ids)
@ -212,12 +240,19 @@ class SaveWorker(Thread):
        for pref in c.preferences:
            recs[pref.name] = getattr(self.opts, pref.name)
        plugboards = self.db.prefs.get('plugboards', {})
        for i, task in enumerate(tasks):
            tids = [x[-1] for x in task]
            data = self.collect_data(tids)
            dpath = os.path.join(tdir, '%d.json'%i)
            with open(dpath, 'wb') as f:
                f.write(json.dumps(data, ensure_ascii=False).encode('utf-8'))
            job = ParallelJob('save_book',
                    'Save books (%d of %d)'%(i, len(tasks)),
                    lambda x,y:x,
-                    args=[tids, self.library_path, self.path, recs])
+                    args=[tids, dpath, plugboards, self.path, recs])
            jobs.add(job)
            server.add_job(job)
@ -226,21 +261,21 @@ class SaveWorker(Thread):
            time.sleep(0.2)
            running = False
            for job in jobs:
-                job.update(consume_notifications=False)
+                self.get_notifications(job, ids)
                while True:
                    try:
                        id, title, ok, tb = job.notifications.get_nowait()[0]
                        if id in ids:
                            self.result_queue.put((id, title, ok, tb))
                            ids.remove(id)
                    except Empty:
                        break
                if not job.is_finished:
                    running = True
            if not running:
                break
        for job in jobs:
            if not job.result:
                continue
            for id_, title, ok, tb in job.result:
                if id_ in ids:
                    self.result_queue.put((id_, title, ok, tb))
                    ids.remove(id_)
        server.close()
        time.sleep(1)
@ -257,21 +292,39 @@ class SaveWorker(Thread):
                except:
                    pass
    def get_notifications(self, job, ids):
        job.update(consume_notifications=False)
        while True:
            try:
                id, title, ok, tb = job.notifications.get_nowait()[0]
                if id in ids:
                    self.result_queue.put((id, title, ok, tb))
                    ids.remove(id)
            except Empty:
                break
-def save_book(task, library_path, path, recs, notification=lambda x,y:x):
+
-    from calibre.library.database2 import LibraryDatabase2
+def save_book(ids, dpath, plugboards, path, recs, notification=lambda x,y:x):
-    db = LibraryDatabase2(library_path)
+    from calibre.library.save_to_disk import config, save_serialized_to_disk
    from calibre.library.save_to_disk import config, save_to_disk
    from calibre.customize.ui import apply_null_metadata
    opts = config().parse()
    for name in recs:
        setattr(opts, name, recs[name])
    results = []
    def callback(id, title, failed, tb):
        results.append((id, title, not failed, tb))
        notification((id, title, not failed, tb))
        return True
-    with apply_null_metadata:
+    data_ = json.loads(open(dpath, 'rb').read().decode('utf-8'))
-        save_to_disk(db, task, path, opts, callback)
+    data = {}
    for k, v in data_.iteritems():
        data[int(k)] = v
    with apply_null_metadata:
        save_serialized_to_disk(ids, data, plugboards, path, opts, callback)
    return results
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -123,6 +123,8 @@ def _config():
            help=_('Download social metadata (tags/rating/etc.)'))
    c.add_opt('overwrite_author_title_metadata', default=True,
            help=_('Overwrite author and title with new metadata'))
    c.add_opt('auto_download_cover', default=False,
            help=_('Automatically download the cover, if available'))
    c.add_opt('enforce_cpu_limit', default=True,
            help=_('Limit max simultaneous jobs to number of CPUs'))
    c.add_opt('tag_browser_hidden_categories', default=set(),
--- a/src/calibre/gui2/add.py
+++ b/src/calibre/gui2/add.py
@ -427,11 +427,27 @@ class Saver(QObject): # {{{
        if not self.ids or not self.worker.is_alive():
            self.timer.stop()
            self.pd.hide()
            while self.ids:
                before = len(self.ids)
                self.get_result()
                if before == len(self.ids):
                    for i in list(self.ids):
                        self.failures.add(('id:%d'%i, 'Unknown error'))
                        self.ids.remove(i)
                    break
            if not self.callback_called:
                try:
                    self.worker.join(1.5)
                except:
                    pass # The worker was not yet started
                self.callback(self.worker.path, self.failures, self.worker.error)
                self.callback_called = True
            return
        self.get_result()
    def get_result(self):
        try:
            id, title, ok, tb = self.rq.get_nowait()
        except Empty:
@ -441,6 +457,7 @@ class Saver(QObject): # {{{
        if not isinstance(title, unicode):
            title = str(title).decode(preferred_encoding, 'replace')
        self.pd.set_msg(_('Saved')+' '+title)
        if not ok:
            self.failures.add((title, tb))
 # }}}
--- a/src/calibre/gui2/dialogs/fetch_metadata.py
+++ b/src/calibre/gui2/dialogs/fetch_metadata.py
@ -9,7 +9,7 @@ from threading import Thread
 from PyQt4.QtCore import Qt, QObject, SIGNAL, QVariant, pyqtSignal, \
                         QAbstractTableModel, QCoreApplication, QTimer
-from PyQt4.QtGui import QDialog, QItemSelectionModel
+from PyQt4.QtGui import QDialog, QItemSelectionModel, QIcon
 from calibre.gui2.dialogs.fetch_metadata_ui import Ui_FetchMetadata
 from calibre.gui2 import error_dialog, NONE, info_dialog, config
@ -42,13 +42,14 @@ class Matches(QAbstractTableModel):
    def __init__(self, matches):
        self.matches = matches
        self.yes_icon = QVariant(QIcon(I('ok.png')))
        QAbstractTableModel.__init__(self)
    def rowCount(self, *args):
        return len(self.matches)
    def columnCount(self, *args):
-        return 6
+        return 8
    def headerData(self, section, orientation, role):
        if role != Qt.DisplayRole:
@ -61,6 +62,8 @@ class Matches(QAbstractTableModel):
            elif section == 3: text = _("Publisher")
            elif section == 4: text = _("ISBN")
            elif section == 5: text = _("Published")
            elif section == 6: text = _("Has Cover")
            elif section == 7: text = _("Has Summary")
            return QVariant(text)
        else:
@ -71,8 +74,8 @@ class Matches(QAbstractTableModel):
    def data(self, index, role):
        row, col = index.row(), index.column()
        book = self.matches[row]
        if role == Qt.DisplayRole:
            book = self.matches[row]
            res = None
            if col == 0:
                res = book.title
@ -90,6 +93,11 @@ class Matches(QAbstractTableModel):
            if not res:
                return NONE
            return QVariant(res)
        elif role == Qt.DecorationRole:
            if col == 6 and book.has_cover:
                return self.yes_icon
            if col == 7 and book.comments:
                return self.yes_icon
        return NONE
 class FetchMetadata(QDialog, Ui_FetchMetadata):
@ -131,7 +139,7 @@ class FetchMetadata(QDialog, Ui_FetchMetadata):
        self.fetch_metadata()
        self.opt_get_social_metadata.setChecked(config['get_social_metadata'])
        self.opt_overwrite_author_title_metadata.setChecked(config['overwrite_author_title_metadata'])
-
+        self.opt_auto_download_cover.setChecked(config['auto_download_cover'])
    def show_summary(self, current, *args):
        row  = current.row()
@ -213,6 +221,12 @@ class FetchMetadata(QDialog, Ui_FetchMetadata):
            _hung_fetchers.add(self.fetcher)
        if hasattr(self, '_hangcheck') and self._hangcheck.isActive():
            self._hangcheck.stop()
        # Save value of auto_download_cover, since this is the only place it can
        # be set. The values of the other options can be set in
        # Preferences->Behavior and should not be set here as they affect bulk
        # downloading as well.
        if self.opt_auto_download_cover.isChecked() != config['auto_download_cover']:
           config.set('auto_download_cover', self.opt_auto_download_cover.isChecked())
    def __enter__(self, *args):
        return self
--- a/src/calibre/gui2/dialogs/fetch_metadata.ui
+++ b/src/calibre/gui2/dialogs/fetch_metadata.ui
@ -1,172 +1,179 @@
-<?xml version="1.0" encoding="UTF-8"?>
+<?xml version="1.0" encoding="UTF-8"?>
-<ui version="4.0">
+<ui version="4.0">
- <class>FetchMetadata</class>
+ <class>FetchMetadata</class>
- <widget class="QDialog" name="FetchMetadata">
+ <widget class="QDialog" name="FetchMetadata">
-  <property name="windowModality">
+  <property name="windowModality">
-   <enum>Qt::WindowModal</enum>
+   <enum>Qt::WindowModal</enum>
-  </property>
+  </property>
-  <property name="geometry">
+  <property name="geometry">
-   <rect>
+   <rect>
-    <x>0</x>
+    <x>0</x>
-    <y>0</y>
+    <y>0</y>
-    <width>830</width>
+    <width>890</width>
-    <height>642</height>
+    <height>642</height>
-   </rect>
+   </rect>
-  </property>
+  </property>
-  <property name="windowTitle">
+  <property name="windowTitle">
-   <string>Fetch metadata</string>
+   <string>Fetch metadata</string>
-  </property>
+  </property>
-  <property name="windowIcon">
+  <property name="windowIcon">
-   <iconset resource="../../../../resources/images.qrc">
+   <iconset resource="../../../../resources/images.qrc">
-    <normaloff>:/images/metadata.png</normaloff>:/images/metadata.png</iconset>
+    <normaloff>:/images/metadata.png</normaloff>:/images/metadata.png</iconset>
-  </property>
+  </property>
-  <layout class="QVBoxLayout">
+  <layout class="QVBoxLayout">
-   <item>
+   <item>
-    <widget class="QLabel" name="tlabel">
+    <widget class="QLabel" name="tlabel">
-     <property name="text">
+     <property name="text">
-      <string>&lt;p&gt;calibre can find metadata for your books from two locations: &lt;b&gt;Google Books&lt;/b&gt; and &lt;b&gt;isbndb.com&lt;/b&gt;. &lt;p&gt;To use isbndb.com you must sign up for a &lt;a href=&quot;http://www.isbndb.com&quot;&gt;free account&lt;/a&gt; and enter your access key below.</string>
+      <string>&lt;p&gt;calibre can find metadata for your books from two locations: &lt;b&gt;Google Books&lt;/b&gt; and &lt;b&gt;isbndb.com&lt;/b&gt;. &lt;p&gt;To use isbndb.com you must sign up for a &lt;a href=&quot;http://www.isbndb.com&quot;&gt;free account&lt;/a&gt; and enter your access key below.</string>
-     </property>
+     </property>
-     <property name="alignment">
+     <property name="alignment">
-      <set>Qt::AlignCenter</set>
+      <set>Qt::AlignCenter</set>
-     </property>
+     </property>
-     <property name="wordWrap">
+     <property name="wordWrap">
-      <bool>true</bool>
+      <bool>true</bool>
-     </property>
+     </property>
-     <property name="openExternalLinks">
+     <property name="openExternalLinks">
-      <bool>true</bool>
+      <bool>true</bool>
-     </property>
+     </property>
-    </widget>
+    </widget>
-   </item>
+   </item>
-   <item>
+   <item>
-    <layout class="QHBoxLayout">
+    <layout class="QHBoxLayout">
-     <item>
+     <item>
-      <widget class="QLabel" name="label_2">
+      <widget class="QLabel" name="label_2">
-       <property name="text">
+       <property name="text">
-        <string>&amp;Access Key:</string>
+        <string>&amp;Access Key:</string>
-       </property>
+       </property>
-       <property name="buddy">
+       <property name="buddy">
-        <cstring>key</cstring>
+        <cstring>key</cstring>
-       </property>
+       </property>
-      </widget>
+      </widget>
-     </item>
+     </item>
-     <item>
+     <item>
-      <widget class="QLineEdit" name="key"/>
+      <widget class="QLineEdit" name="key"/>
-     </item>
+     </item>
-     <item>
+     <item>
-      <widget class="QPushButton" name="fetch">
+      <widget class="QPushButton" name="fetch">
-       <property name="text">
+       <property name="text">
-        <string>Fetch</string>
+        <string>Fetch</string>
-       </property>
+       </property>
-      </widget>
+      </widget>
-     </item>
+     </item>
-    </layout>
+    </layout>
-   </item>
+   </item>
-   <item>
+   <item>
-    <widget class="QLabel" name="warning">
+    <widget class="QLabel" name="warning">
-     <property name="text">
+     <property name="text">
-      <string/>
+      <string/>
-     </property>
+     </property>
-     <property name="wordWrap">
+     <property name="wordWrap">
-      <bool>true</bool>
+      <bool>true</bool>
-     </property>
+     </property>
-    </widget>
+    </widget>
-   </item>
+   </item>
-   <item>
+   <item>
-    <widget class="QGroupBox" name="groupBox">
+    <widget class="QGroupBox" name="groupBox">
-     <property name="title">
+     <property name="title">
-      <string>Matches</string>
+      <string>Matches</string>
-     </property>
+     </property>
-     <layout class="QVBoxLayout">
+     <layout class="QVBoxLayout">
-      <item>
+      <item>
-       <widget class="QLabel" name="label_3">
+       <widget class="QLabel" name="label_3">
-        <property name="text">
+        <property name="text">
-         <string>Select the book that most closely matches your copy from the list below</string>
+         <string>Select the book that most closely matches your copy from the list below</string>
-        </property>
+        </property>
-       </widget>
+       </widget>
-      </item>
+      </item>
-      <item>
+      <item>
-       <widget class="QTableView" name="matches">
+       <widget class="QTableView" name="matches">
-        <property name="sizePolicy">
+        <property name="sizePolicy">
-         <sizepolicy hsizetype="Expanding" vsizetype="Expanding">
+         <sizepolicy hsizetype="Expanding" vsizetype="Expanding">
-          <horstretch>0</horstretch>
+          <horstretch>0</horstretch>
-          <verstretch>1</verstretch>
+          <verstretch>1</verstretch>
-         </sizepolicy>
+         </sizepolicy>
-        </property>
+        </property>
-        <property name="alternatingRowColors">
+        <property name="alternatingRowColors">
-         <bool>true</bool>
+         <bool>true</bool>
-        </property>
+        </property>
-        <property name="selectionMode">
+        <property name="selectionMode">
-         <enum>QAbstractItemView::SingleSelection</enum>
+         <enum>QAbstractItemView::SingleSelection</enum>
-        </property>
+        </property>
-        <property name="selectionBehavior">
+        <property name="selectionBehavior">
-         <enum>QAbstractItemView::SelectRows</enum>
+         <enum>QAbstractItemView::SelectRows</enum>
-        </property>
+        </property>
-       </widget>
+       </widget>
-      </item>
+      </item>
-      <item>
+      <item>
-       <widget class="QTextBrowser" name="summary"/>
+       <widget class="QTextBrowser" name="summary"/>
-      </item>
+      </item>
-     </layout>
+     </layout>
-    </widget>
+    </widget>
-   </item>
+   </item>
-   <item>
+   <item>
-    <widget class="QCheckBox" name="opt_get_social_metadata">
+    <widget class="QCheckBox" name="opt_overwrite_author_title_metadata">
-     <property name="text">
+     <property name="text">
-      <string>Download &amp;social metadata (tags/rating/etc.) for the selected book</string>
+      <string>Overwrite author and title with author and title of selected book</string>
-     </property>
+     </property>
-    </widget>
+    </widget>
-   </item>
+   </item>
-   <item>
+   <item>
-    <widget class="QCheckBox" name="opt_overwrite_author_title_metadata">
+    <widget class="QCheckBox" name="opt_get_social_metadata">
-     <property name="text">
+     <property name="text">
-      <string>Overwrite author and title with author and title of selected book</string>
+      <string>Download &amp;social metadata (tags/rating/etc.) for the selected book</string>
-     </property>
+     </property>
-    </widget>
+    </widget>
-   </item>
+   </item>
-   <item>
+   <item>
-    <widget class="QDialogButtonBox" name="buttonBox">
+    <widget class="QCheckBox" name="opt_auto_download_cover">
-     <property name="standardButtons">
+     <property name="text">
-      <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
+      <string>Automatically download the cover, if available</string>
-     </property>
+     </property>
-    </widget>
+    </widget>
-   </item>
+   </item>
-  </layout>
+   <item>
- </widget>
+    <widget class="QDialogButtonBox" name="buttonBox">
- <resources>
+     <property name="standardButtons">
-  <include location="../../../../resources/images.qrc"/>
+      <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
- </resources>
+     </property>
- <connections>
+    </widget>
-  <connection>
+   </item>
-   <sender>buttonBox</sender>
+  </layout>
-   <signal>accepted()</signal>
+ </widget>
-   <receiver>FetchMetadata</receiver>
+ <resources>
-   <slot>accept()</slot>
+  <include location="../../../../resources/images.qrc"/>
-   <hints>
+ </resources>
-    <hint type="sourcelabel">
+ <connections>
-     <x>460</x>
+  <connection>
-     <y>599</y>
+   <sender>buttonBox</sender>
-    </hint>
+   <signal>accepted()</signal>
-    <hint type="destinationlabel">
+   <receiver>FetchMetadata</receiver>
-     <x>657</x>
+   <slot>accept()</slot>
-     <y>530</y>
+   <hints>
-    </hint>
+    <hint type="sourcelabel">
-   </hints>
+     <x>460</x>
-  </connection>
+     <y>599</y>
-  <connection>
+    </hint>
-   <sender>buttonBox</sender>
+    <hint type="destinationlabel">
-   <signal>rejected()</signal>
+     <x>657</x>
-   <receiver>FetchMetadata</receiver>
+     <y>530</y>
-   <slot>reject()</slot>
+    </hint>
-   <hints>
+   </hints>
-    <hint type="sourcelabel">
+  </connection>
-     <x>417</x>
+  <connection>
-     <y>599</y>
+   <sender>buttonBox</sender>
-    </hint>
+   <signal>rejected()</signal>
-    <hint type="destinationlabel">
+   <receiver>FetchMetadata</receiver>
-     <x>0</x>
+   <slot>reject()</slot>
-     <y>491</y>
+   <hints>
-    </hint>
+    <hint type="sourcelabel">
-   </hints>
+     <x>417</x>
-  </connection>
+     <y>599</y>
- </connections>
+    </hint>
-</ui>
+    <hint type="destinationlabel">
     <x>0</x>
     <y>491</y>
    </hint>
   </hints>
  </connection>
 </connections>
 </ui>
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@ -760,8 +760,8 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
                        if book.publisher: self.publisher.setEditText(book.publisher)
                        if book.isbn: self.isbn.setText(book.isbn)
                        if book.pubdate:
-                            d = book.pubdate
+                            dt = book.pubdate
-                            self.pubdate.setDate(QDate(d.year, d.month, d.day))
+                            self.pubdate.setDate(QDate(dt.year, dt.month, dt.day))
                        summ = book.comments
                        if summ:
                            prefix = unicode(self.comments.toPlainText())
@ -777,8 +777,11 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
                                self.series.setText(book.series)
                                if book.series_index is not None:
                                    self.series_index.setValue(book.series_index)
-                        # Needed because of Qt focus bug on OS X
+                        if book.has_cover:
-                        self.fetch_cover_button.setFocus(Qt.OtherFocusReason)
+                            if d.opt_auto_download_cover.isChecked() and book.has_cover:
                                self.fetch_cover()
                            else:
                                self.fetch_cover_button.setFocus(Qt.OtherFocusReason)
        else:
            error_dialog(self, _('Cannot fetch metadata'),
                         _('You must specify at least one of ISBN, Title, '
--- a/src/calibre/library/save_to_disk.py
+++ b/src/calibre/library/save_to_disk.py
@ -6,7 +6,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-import os, traceback, cStringIO, re
+import os, traceback, cStringIO, re, shutil
 from calibre.constants import DEBUG
 from calibre.utils.config import Config, StringConfig, tweaks
@ -203,31 +203,49 @@ def get_components(template, mi, id, timefmt='%b %Y', length=250,
    return shorten_components_to(length, components)
-def save_book_to_disk(id, db, root, opts, length):
+def save_book_to_disk(id_, db, root, opts, length):
-    mi = db.get_metadata(id, index_is_id=True)
+    mi = db.get_metadata(id_, index_is_id=True)
    cover = db.cover(id_, index_is_id=True, as_path=True)
    plugboards = db.prefs.get('plugboards', {})
-    available_formats = db.formats(id, index_is_id=True)
+    available_formats = db.formats(id_, index_is_id=True)
    if not available_formats:
        available_formats = []
    else:
        available_formats = [x.lower().strip() for x in
                available_formats.split(',')]
    formats = {}
    fmts = db.formats(id_, index_is_id=True, verify_formats=False)
    if fmts:
        fmts = fmts.split(',')
        for fmt in fmts:
            fpath = db.format_abspath(id_, fmt, index_is_id=True)
            if fpath is not None:
                formats[fmt.lower()] = fpath
    return do_save_book_to_disk(id_, mi, cover, plugboards,
            formats, root, opts, length)
 def do_save_book_to_disk(id_, mi, cover, plugboards,
        format_map, root, opts, length):
    available_formats = [x.lower().strip() for x in format_map.keys()]
    if opts.formats == 'all':
        asked_formats = available_formats
    else:
        asked_formats = [x.lower().strip() for x in opts.formats.split(',')]
    formats = set(available_formats).intersection(set(asked_formats))
    if not formats:
-        return True, id, mi.title
+        return True, id_, mi.title
-    components = get_components(opts.template, mi, id, opts.timefmt, length,
+    components = get_components(opts.template, mi, id_, opts.timefmt, length,
            ascii_filename if opts.asciiize else sanitize_file_name,
            to_lowercase=opts.to_lowercase,
            replace_whitespace=opts.replace_whitespace)
    base_path = os.path.join(root, *components)
    base_name = os.path.basename(base_path)
    dirpath = os.path.dirname(base_path)
-    # Don't test for existence first are the test could fail but
+    # Don't test for existence first as the test could fail but
    # another worker process could create the directory before
    # the call to makedirs
    try:
@ -236,29 +254,23 @@ def save_book_to_disk(id, db, root, opts, length):
        if not os.path.exists(dirpath):
            raise
-    cdata = db.cover(id, index_is_id=True)
+    if opts.save_cover and cover and os.access(cover, os.R_OK):
-    if opts.save_cover:
+        with open(base_path+'.jpg', 'wb') as f:
-        if cdata is not None:
+            with open(cover, 'rb') as s:
-            with open(base_path+'.jpg', 'wb') as f:
+                shutil.copyfileobj(s, f)
-                f.write(cdata)
+        mi.cover = base_name+'.jpg'
-            mi.cover = base_name+'.jpg'
+    else:
-        else:
+        mi.cover = None
            mi.cover = None
    if opts.write_opf:
        opf = metadata_to_opf(mi)
        with open(base_path+'.opf', 'wb') as f:
            f.write(opf)
    if cdata is not None:
        mi.cover_data = ('jpg', cdata)
    mi.cover = None
    written = False
    for fmt in formats:
        global plugboard_save_to_disk_value, plugboard_any_format_value
        dev_name = plugboard_save_to_disk_value
        plugboards = db.prefs.get('plugboards', {})
        cpb = None
        if fmt in plugboards:
            cpb = plugboards[fmt]
@ -275,11 +287,12 @@ def save_book_to_disk(id, db, root, opts, length):
        # Leave this here for a while, in case problems arise.
        if cpb is not None:
            prints('Save-to-disk using plugboard:', fmt, cpb)
-        data = db.format(id, fmt, index_is_id=True)
+        fp = format_map.get(fmt, None)
-        if data is None:
+        if fp is None:
            continue
-        else:
+        with open(fp, 'rb') as f:
-            written = True
+            data = f.read()
        written = True
        if opts.update_metadata:
            stream = cStringIO.StringIO()
            stream.write(data)
@ -300,9 +313,21 @@ def save_book_to_disk(id, db, root, opts, length):
        with open(fmt_path, 'wb') as f:
            f.write(data)
-    return not written, id, mi.title
+    return not written, id_, mi.title
 def _sanitize_args(root, opts):
    if opts is None:
        opts = config().parse()
    if isinstance(root, unicode):
        root = root.encode(filesystem_encoding)
    root = os.path.abspath(root)
    opts.template = preprocess_template(opts.template)
    length = 1000 if supports_long_names(root) else 250
    length -= len(root)
    if length < 5:
        raise ValueError('%r is too long.'%root)
    return root, opts, length
 def save_to_disk(db, ids, root, opts=None, callback=None):
    '''
@ -316,17 +341,7 @@ def save_to_disk(db, ids, root, opts=None, callback=None):
    :return: A list of failures. Each element of the list is a tuple
    (id, title, traceback)
    '''
-    if opts is None:
+    root, opts, length = _sanitize_args(root, opts)
        opts = config().parse()
    if isinstance(root, unicode):
        root = root.encode(filesystem_encoding)
    root = os.path.abspath(root)
    opts.template = preprocess_template(opts.template)
    length = 1000 if supports_long_names(root) else 250
    length -= len(root)
    if length < 5:
        raise ValueError('%r is too long.'%root)
    failures = []
    for x in ids:
        tb = ''
@ -343,4 +358,28 @@ def save_to_disk(db, ids, root, opts=None, callback=None):
                break
    return failures
 def save_serialized_to_disk(ids, data, plugboards, root, opts, callback):
    from calibre.ebooks.metadata.opf2 import OPF
    root, opts, length = _sanitize_args(root, opts)
    failures = []
    for x in ids:
        opf, cover, format_map = data[x]
        if isinstance(opf, unicode):
            opf = opf.encode('utf-8')
        mi = OPF(cStringIO.StringIO(opf)).to_book_metadata()
        tb = ''
        try:
            failed, id, title = do_save_book_to_disk(x, mi, cover, plugboards,
                    format_map, root, opts, length)
            tb = _('Requested formats not available')
        except:
            failed, id, title = True, x, mi.title
            tb = traceback.format_exc()
        if failed:
            failures.append((id, title, tb))
        if callable(callback):
            if not callback(int(id), title, failed, tb):
                break
    return failures
--- a/src/calibre/utils/cleantext.py
+++ b/src/calibre/utils/cleantext.py
@ -0,0 +1,23 @@
 from __future__ import with_statement
 __license__ = 'GPL 3'
 __copyright__ = '2010, sengian <sengian1@gmail.com>'
 __docformat__ = 'restructuredtext en'
 import re
 _ascii_pat = None
 def clean_ascii_chars(txt, charlist=None):
    'remove ASCII invalid chars : 0 to 8 and 11-14 to 24-26-27 by default'
    global _ascii_pat
    if _ascii_pat is None:
        chars = list(range(8)) + [0x0B, 0x0E, 0x0F] + list(range(0x10, 0x19)) \
            + [0x1A, 0x1B]
        _ascii_pat = re.compile(u'|'.join(map(unichr, chars)))
    if charlist is None:
        pat = _ascii_pat
    else:
        pat = re.compile(u'|'.join(map(unichr, charlist)))
    return pat.sub('', txt)
--- a/src/calibre/utils/date.py
+++ b/src/calibre/utils/date.py
@ -151,3 +151,45 @@ def format_date(dt, format, assume_utc=False, as_utc=False):
    format = re.sub('d{1,4}', format_day, format)
    format = re.sub('M{1,4}', format_month, format)
    return re.sub('yyyy|yy', format_year, format)
 def replace_months(datestr, clang):
    # Replace months by english equivalent for parse_date
    frtoen = {
        u'[jJ]anvier': u'jan',
        u'[fF].vrier': u'feb',
        u'[mM]ars': u'mar',
        u'[aA]vril': u'apr',
        u'[mM]ai': u'may',
        u'[jJ]uin': u'jun',
        u'[jJ]uillet': u'jul',
        u'[aA]o.t': u'aug',
        u'[sS]eptembre': u'sep',
        u'[Oo]ctobre': u'oct',
        u'[nN]ovembre': u'nov',
        u'[dD].cembre': u'dec' }
    detoen = {
        u'[jJ]anuar': u'jan',
        u'[fF]ebruar': u'feb',
        u'[mM].rz': u'mar',
        u'[aA]pril': u'apr',
        u'[mM]ai': u'may',
        u'[jJ]uni': u'jun',
        u'[jJ]uli': u'jul',
        u'[aA]ugust': u'aug',
        u'[sS]eptember': u'sep',
        u'[Oo]ktober': u'oct',
        u'[nN]ovember': u'nov',
        u'[dD]ezember': u'dec' }
    if clang == 'fr':
        dictoen = frtoen
    elif clang == 'de':
        dictoen = detoen
    else:
        return datestr
    for k in dictoen.iterkeys():
        tmp = re.sub(k, dictoen[k], datestr)
        if tmp != datestr: break
    return tmp
--- a/src/calibre/utils/icu.c
+++ b/src/calibre/utils/icu.c
@ -237,8 +237,6 @@ static PyTypeObject icu_CollatorType = { // {{{
 // }}
 // }}}
 // }}}
 // Module initialization {{{
@ -286,7 +284,7 @@ icu_upper(PyObject *self, PyObject *args) {
    PyMem_Free(input);
    return ret;
-}
+} // }}}
 // lower {{{
 static PyObject *
--- a/src/calibre/utils/icu.py
+++ b/src/calibre/utils/icu.py
@ -56,7 +56,7 @@ def py_sort_key(obj):
 def icu_sort_key(collator, obj):
    if not obj:
        return _none2
-    return collator.sort_key(obj.lower())
+    return collator.sort_key(lower(obj))
 def py_case_sensitive_sort_key(obj):
    if not obj:
--- a/src/calibre/utils/zipfile.py
+++ b/src/calibre/utils/zipfile.py
@ -1227,7 +1227,7 @@ class ZipFile:
        self.fp.flush()
        if zinfo.flag_bits & 0x08:
            # Write CRC and file sizes after the file data
-            self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
+            self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
                  zinfo.file_size))
        self.filelist.append(zinfo)
        self.NameToInfo[zinfo.filename] = zinfo