Merge from trunk

2025-11-21 14:03:03 -05:00 · 2011-02-21 09:37:08 +00:00 · 2011-02-21 09:37:08 +00:00 · ff912773cf
commit ff912773cf
parent 32cbc59ec6 b6b942b617
15 changed files with 302 additions and 222 deletions
--- a/resources/recipes/ming_pao.recipe
+++ b/resources/recipes/ming_pao.recipe
@ -1,7 +1,9 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Eddie Lau'
+__copyright__ = '2010-2011, Eddie Lau'
 '''
 Change Log:
 2011/02/20: skip duplicated links in finance section, put photos which may extend a whole page to the back of the articles
            clean up the indentation
 2010/12/07: add entertainment section, use newspaper front page as ebook cover, suppress date display in section list
            (to avoid wrong date display in case the user generates the ebook in a time zone different from HKT)
 2010/11/22: add English section, remove eco-news section which is not updated daily, correct
@ -18,21 +20,19 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
 from contextlib import nested
 from calibre import __appname__
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata import MetaInformation
 class MPHKRecipe(BasicNewsRecipe):
-    IsKindleUsed = True  # to avoid generating periodical in which CJK characters can't be displayed in section/article view
+    IsCJKWellSupported = True  # Set to False to avoid generating periodical in which CJK characters can't be displayed in section/article view
    title          = 'Ming Pao - Hong Kong'
    oldest_article = 1
    max_articles_per_feed = 100
    __author__            = 'Eddie Lau'
-    description           = 'Hong Kong Chinese Newspaper'
+    description           = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
-    publisher             = 'news.mingpao.com'
+    publisher             = 'MingPao'
    category              = 'Chinese, News, Hong Kong'
    remove_javascript = True
    use_embedded_content   = False
@ -46,9 +46,10 @@ class MPHKRecipe(BasicNewsRecipe):
    masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
    keep_only_tags = [dict(name='h1'),
                      dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
                      dict(attrs={'class':['photo']}),
                      dict(attrs={'id':['newscontent']}), # entertainment page content
-                      dict(attrs={'id':['newscontent01','newscontent02']})]
+                      dict(attrs={'id':['newscontent01','newscontent02']}),
                      dict(attrs={'class':['photo']})
                      ]
    remove_tags = [dict(name='style'),
                   dict(attrs={'id':['newscontent135']})]  # for the finance page
    remove_attributes = ['width']
@ -107,6 +108,9 @@ class MPHKRecipe(BasicNewsRecipe):
    def get_fetchdate(self):
        return self.get_dtlocal().strftime("%Y%m%d")
    def get_fetchformatteddate(self):
        return self.get_dtlocal().strftime("%Y-%m-%d")
    def get_fetchday(self):
        # convert UTC to local hk time - at around HKT 6.00am, all news are available
        return self.get_dtlocal().strftime("%d")
@ -124,13 +128,13 @@ class MPHKRecipe(BasicNewsRecipe):
        feeds = []
        dateStr = self.get_fetchdate()
        for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
                               (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
                           (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
-                               (u'\u793e\u8a55\u2027\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'),
+                           (u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'),
                           (u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
                           (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
                           (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'),
                           ('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
                           (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
                           (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'),
                           (u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
                           (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
@ -141,14 +145,10 @@ class MPHKRecipe(BasicNewsRecipe):
        fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
        if fin_articles:
            feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
            # special - eco-friendly
            # eco_articles = self.parse_eco_section('http://tssl.mingpao.com/htm/marketing/eco/cfm/Eco1.cfm')
            # if eco_articles:
            #   feeds.append((u'\u74b0\u4fdd Eco News', eco_articles))
        # special - entertainment
        ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
        if ent_articles:
-                feeds.append((u'\u5f71\u8996 Entertainment', ent_articles))
+            feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
        return feeds
    def parse_section(self, url):
@ -174,31 +174,17 @@ class MPHKRecipe(BasicNewsRecipe):
        soup = self.index_to_soup(url)
        a = soup.findAll('a', href= True)
        current_articles = []
        for i in a:
            url = i.get('href', False)
            if not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
                title = self.tag_to_string(i)
                url = 'http://www.mpfinance.com/cfm/' +url
                current_articles.append({'title': title, 'url': url, 'description':''})
        return current_articles
    def parse_eco_section(self, url):
        dateStr = self.get_fetchdate()
        soup = self.index_to_soup(url)
        divs = soup.findAll(attrs={'class': ['bullet']})
        current_articles = []
        included_urls = []
-        for i in divs:
+        for i in a:
-            a = i.find('a', href = True)
+            url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
-            title = self.tag_to_string(a)
+            if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
-            url = a.get('href', False)
+                title = self.tag_to_string(i)
            url = 'http://tssl.mingpao.com/htm/marketing/eco/cfm/' +url
            if url not in included_urls and url.rfind('Redirect') == -1 and not url.rfind('.txt') == -1 and not url.rfind(dateStr) == -1:
                current_articles.append({'title': title, 'url': url, 'description':''})
                included_urls.append(url)
        return current_articles
    def parse_ent_section(self, url):
        self.get_fetchdate()
        soup = self.index_to_soup(url)
        a = soup.findAll('a', href=True)
        a.reverse()
@ -223,18 +209,22 @@ class MPHKRecipe(BasicNewsRecipe):
        return soup
    def create_opf(self, feeds, dir=None):
        if self.IsKindleUsed == False:
            super(MPHKRecipe,self).create_opf(feeds, dir)
            return
        if dir is None:
            dir = self.output_dir
-        title = self.short_title()
+        if self.IsCJKWellSupported == True:
-        title += ' ' + self.get_fetchdate()
+            # use Chinese title
-        #if self.output_profile.periodical_date_in_title:
+            title = u'\u660e\u5831 (\u9999\u6e2f) ' + self.get_fetchformatteddate()
        else:
            # use English title
            title = self.short_title() + ' ' + self.get_fetchformatteddate()
        if True:  # force date in title
            #    title += strftime(self.timefmt)
-        mi = MetaInformation(title, [__appname__])
+            mi = MetaInformation(title, [self.publisher])
-        mi.publisher = __appname__
+            mi.publisher = self.publisher
-        mi.author_sort = __appname__
+            mi.author_sort = self.publisher
            if self.IsCJKWellSupported == True:
                mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
            else:
                mi.publication_type = self.publication_type+':'+self.short_title()
            #mi.timestamp = nowf()
            mi.timestamp = self.get_dtlocal()
@ -321,7 +311,7 @@ class MPHKRecipe(BasicNewsRecipe):
                            prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
                            templ = self.navbar.generate(True, num, j, len(f),
                                            not self.has_single_feed,
-                                            a.orig_url, __appname__, prefix=prefix,
+                                            a.orig_url, self.publisher, prefix=prefix,
                                            center=self.center_navbar)
                            elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
                            body.insert(len(body.contents), elem)
@ -357,4 +347,3 @@ class MPHKRecipe(BasicNewsRecipe):
        with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
            opf.render(opf_file, ncx_file)
--- a/resources/recipes/osnews_pl.recipe
+++ b/resources/recipes/osnews_pl.recipe
@ -0,0 +1,50 @@
 #!/usr/bin/env  python
 __license__ = 'GPL v3'
 '''
 OSNews.pl
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class OSNewsRecipe(BasicNewsRecipe):
    __author__ = u'Mori & Tomasz D\u0142ugosz'
    language = 'pl'
    title = u'OSnews.pl'
    publisher = u'OSnews.pl'
    description = u'OSnews.pl jest spo\u0142eczno\u015bciowym serwisem informacyjnym po\u015bwi\u0119conym oprogramowaniu, systemom operacyjnym i \u015bwiatowi IT'
    no_stylesheets = True
    remove_javascript = True
    encoding = 'utf-8'
    use_embedded_content = False;
    oldest_article = 7
    max_articles_per_feed = 100
    extra_css = '''
        .news-heading {font-size:150%}
        .newsinformations li {display:inline;}
        blockquote {border:2px solid #000; padding:5px;}
    '''
    feeds = [
        (u'OSNews.pl', u'http://feeds.feedburner.com/OSnewspl')
    ]
    keep_only_tags = [
        dict(name = 'a', attrs = {'class' : 'news-heading'}),
        dict(name = 'div', attrs = {'class' : 'newsinformations'}),
        dict(name = 'div', attrs = {'id' : 'news-content'})
    ]
    remove_tags = [
        dict(name = 'div', attrs = {'class' : 'sociable'}),
        dict(name = 'div', attrs = {'class' : 'post_prev'}),
        dict(name = 'div', attrs = {'class' : 'post_next'}),
        dict(name = 'div', attrs = {'class' : 'clr'})
    ]
    preprocess_regexps = [(re.compile(u'</span>Komentarze: \(?[0-9]+\)? ?<span'), lambda match: '</span><span')]
--- a/resources/recipes/swiatkindle.recipe
+++ b/resources/recipes/swiatkindle.recipe
@ -0,0 +1,24 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Tomasz Dlugosz <tomek3d@gmail.com>'
 '''
 swiatkindle.pl
 '''
 import re
 class swiatkindle(BasicNewsRecipe):
    title          = u'Swiat Kindle'
    description    = u'Blog o czytniku Amazon Kindle. Wersje, ksi\u0105\u017cki, kupowanie i korzystanie w Polsce'
    language = 'pl'
    __author__ = u'Tomasz D\u0142ugosz'
    oldest_article = 7
    max_articles_per_feed = 100
    feeds       = [(u'\u015awiat Kindle - wpisy', u'http://swiatkindle.pl/feed')]
    remove_tags = [dict(name = 'ul', attrs = {'class' : 'similar-posts'})]
    preprocess_regexps = [(re.compile(u'<h3>Czytaj dalej:</h3>'), lambda match: '')]
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -573,8 +573,8 @@ from calibre.devices.edge.driver import EDGE
 from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, \
        SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH
 from calibre.devices.sne.driver import SNE
-from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \
+from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, \
-        GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, Q600, LUMIREAD, ALURATEK_COLOR, \
+        GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR, \
        TREKSTOR, EEEREADER, NEXTBOOK
 from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
 from calibre.devices.kobo.driver import KOBO
@ -691,8 +691,6 @@ plugins += [
    AVANT,
    MENTOR,
    SWEEX,
    Q600,
    KOGAN,
    PDNOVEL,
    SPECTRA,
    GEMEI,
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -121,7 +121,8 @@ def enable_plugin(plugin_or_name):
    config['enabled_plugins'] = ep
 default_disabled_plugins = set([
-    'Douban Books', 'Douban.com covers', 'Nicebooks', 'Nicebooks covers'
+    'Douban Books', 'Douban.com covers', 'Nicebooks', 'Nicebooks covers',
    'Kent District Library'
 ])
 def is_disabled(plugin):
--- a/src/calibre/devices/misc.py
+++ b/src/calibre/devices/misc.py
@ -54,41 +54,24 @@ class AVANT(USBMS):
 class SWEEX(USBMS):
    # Identical to the Promedia
    name           = 'Sweex Device Interface'
-    gui_name       = 'Sweex'
+    gui_name       = 'Sweex/Kogan/Q600/Wink'
-    description    = _('Communicate with the Sweex MM300')
+    description    = _('Communicate with the Sweex/Kogan/Q600/Wink')
    author         = 'Kovid Goyal'
    supported_platforms = ['windows', 'osx', 'linux']
    # Ordered list of supported formats
-    FORMATS     = ['epub', 'prc', 'fb2', 'html', 'rtf', 'chm', 'pdf', 'txt']
+    FORMATS     = ['epub', 'mobi', 'prc', 'fb2', 'html', 'rtf', 'chm', 'pdf', 'txt']
    VENDOR_ID   = [0x0525, 0x177f]
    PRODUCT_ID  = [0xa4a5, 0x300]
-    BCD         = [0x0319, 0x110]
+    BCD         = [0x0319, 0x110, 0x325]
-    VENDOR_NAME = 'SWEEX'
+    VENDOR_NAME = ['SWEEX', 'LINUX']
-    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'EBOOKREADER'
+    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['EBOOKREADER', 'FILE-STOR_GADGET']
    EBOOK_DIR_MAIN = ''
    SUPPORTS_SUB_DIRS = True
 class Q600(SWEEX):
    name = 'Digma Q600 Device interface'
    gui_name = 'Q600'
    description    = _('Communicate with the Digma Q600')
    BCD = [0x325]
    FORMATS     = ['epub', 'fb2', 'mobi', 'prc', 'html', 'rtf', 'chm', 'pdf', 'txt']
 class KOGAN(SWEEX):
    name           = 'Kogan Device Interface'
    gui_name       = 'Kogan'
    description    = _('Communicate with the Kogan')
    VENDOR_NAME = 'LINUX'
    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'FILE-STOR_GADGET'
    EBOOK_DIR_MAIN = 'Kogan eBooks'
 class PDNOVEL(USBMS):
    name = 'Pandigital Novel device interface'
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@ -350,6 +350,8 @@ class FB2MLizer(object):
        # Number of blank lines above tag
        try:
            ems = int(round((float(style.marginTop) / style.fontSize) - 1))
            if ems < 0:
                ems = 0
        except:
            ems = 0
@ -397,7 +399,7 @@ class FB2MLizer(object):
                    fb2_out += p_txt
                    tags += p_tag
                    fb2_out.append('<image xlink:href="#%s" />' % self.image_hrefs[page.abshref(elem_tree.attrib['src'])])
-        if tag in ('br', 'hr') or ems:
+        if tag in ('br', 'hr') or ems >= 1:
            if ems < 1:
                multiplier = 1
            else:
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@ -205,7 +205,10 @@ def main(args=sys.argv):
            open(cpath, 'wb').write(br.open_novisit(curl).read())
            print 'Cover for', title, 'saved to', cpath
        #import time
        #st = time.time()
        print get_social_metadata(title, None, None, isbn)
        #print '\n\n', time.time() - st, '\n\n'
    return 0
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@ -106,6 +106,9 @@ class MetadataSource(Plugin): # {{{
    def join(self):
        return self.worker.join()
    def is_alive(self):
        return self.worker.is_alive()
    def is_customizable(self):
        return True
@ -251,7 +254,9 @@ class KentDistrictLibrary(MetadataSource): # {{{
    name = 'Kent District Library'
    metadata_type = 'social'
-    description = _('Downloads series information from ww2.kdl.org')
+    description = _('Downloads series information from ww2.kdl.org. '
            'This website cannot handle large numbers of queries, '
            'so the plugin is disabled by default.')
    def fetch(self):
        if not self.title or not self.book_author:
--- a/src/calibre/ebooks/metadata/kdl.py
+++ b/src/calibre/ebooks/metadata/kdl.py
@ -5,7 +5,9 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-import re, urllib, urlparse
+import re, urllib, urlparse, socket
 from mechanize import URLError
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre import browser
@ -17,7 +19,7 @@ URL = \
 _ignore_starts = u'\'"'+u''.join(unichr(x) for x in range(0x2018, 0x201e)+[0x2032, 0x2033])
-def get_series(title, authors):
+def get_series(title, authors, timeout=60):
    mi = Metadata(title, authors)
    if title and title[0] in _ignore_starts:
        title = title[1:]
@ -39,7 +41,12 @@ def get_series(title, authors):
    url = URL.format(author, title)
    br = browser()
-    raw = br.open(url).read()
+    try:
        raw = br.open_novisit(url, timeout=timeout).read()
    except URLError, e:
        if isinstance(e.reason, socket.timeout):
            raise Exception('KDL Server busy, try again later')
        raise
    if 'see the full results' not in raw:
        return mi
    raw = xml_to_unicode(raw)[0]
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -85,7 +85,8 @@ class Source(Plugin):
    # Metadata API {{{
-    def identify(self, log, result_queue, abort, title=None, authors=None, identifiers={}):
+    def identify(self, log, result_queue, abort, title=None, authors=None,
            identifiers={}, timeout=5):
        '''
        Identify a book by its title/author/isbn/etc.
@ -98,6 +99,8 @@ class Source(Plugin):
        :param authors: A list of authors of the book, can be None
        :param identifiers: A dictionary of other identifiers, most commonly
                            {'isbn':'1234...'}
        :param timeout: Timeout in seconds, no network request should hang for
                        longer than timeout.
        :return: None if no errors occurred, otherwise a unicode representation
                 of the error suitable for showing to the user
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@ -10,7 +10,6 @@ __docformat__ = 'restructuredtext en'
 import time
 from urllib import urlencode
 from functools import partial
 from threading import Thread
 from lxml import etree
@ -18,6 +17,7 @@ from calibre.ebooks.metadata.sources.base import Source
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.utils.date import parse_date, utcnow
 from calibre.utils.cleantext import clean_ascii_chars
 from calibre import browser, as_unicode
 NAMESPACES = {
@ -41,20 +41,20 @@ subject        = XPath('descendant::dc:subject')
 description    = XPath('descendant::dc:description')
 language       = XPath('descendant::dc:language')
-def get_details(browser, url):
+def get_details(browser, url, timeout):
    try:
-        raw = browser.open_novisit(url).read()
+        raw = browser.open_novisit(url, timeout=timeout).read()
    except Exception as e:
        gc = getattr(e, 'getcode', lambda : -1)
        if gc() != 403:
            raise
        # Google is throttling us, wait a little
-        time.sleep(2)
+        time.sleep(1)
-        raw = browser.open_novisit(url).read()
+        raw = browser.open_novisit(url, timeout=timeout).read()
    return raw
-def to_metadata(browser, log, entry_):
+def to_metadata(browser, log, entry_, timeout):
    def get_text(extra, x):
        try:
@ -79,8 +79,9 @@ def to_metadata(browser, log, entry_):
    mi = Metadata(title_, authors)
    try:
-        raw = get_details(browser, id_url)
+        raw = get_details(browser, id_url, timeout)
-        feed = etree.fromstring(xml_to_unicode(raw, strip_encoding_pats=True)[0])
+        feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
            strip_encoding_pats=True)[0])
        extra = entry(feed)[0]
    except:
        log.exception('Failed to get additional details for', mi.title)
@ -131,25 +132,18 @@ def to_metadata(browser, log, entry_):
    return mi
 class Worker(Thread):
-    def __init__(self, log, entries, abort, result_queue):
+def get_all_details(br, log, entries, abort, result_queue, timeout):
-        self.browser, self.log, self.entries = browser(), log, entries
+    for i in entries:
        self.abort, self.result_queue = abort, result_queue
        Thread.__init__(self)
        self.daemon = True
    def run(self):
        for i in self.entries:
        try:
-                ans = to_metadata(self.browser, self.log, i)
+            ans = to_metadata(br, log, i, timeout)
            if isinstance(ans, Metadata):
-                    self.result_queue.put(ans)
+                result_queue.put(ans)
        except:
-                self.log.exception(
+            log.exception(
                'Failed to get metadata for identify entry:',
                etree.tostring(i))
-            if self.abort.is_set():
+        if abort.is_set():
            break
@ -192,54 +186,40 @@ class GoogleBooks(Source):
            })
-    def identify(self, log, result_queue, abort, title=None, authors=None, identifiers={}):
+    def identify(self, log, result_queue, abort, title=None, authors=None,
            identifiers={}, timeout=5):
        query = self.create_query(log, title=title, authors=authors,
                identifiers=identifiers)
        br = browser()
        try:
-            raw = browser().open_novisit(query).read()
+            raw = br.open_novisit(query, timeout=timeout).read()
        except Exception, e:
            log.exception('Failed to make identify query: %r'%query)
            return as_unicode(e)
        try:
            parser = etree.XMLParser(recover=True, no_network=True)
-            feed = etree.fromstring(xml_to_unicode(raw,
+            feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
                strip_encoding_pats=True)[0], parser=parser)
            entries = entry(feed)
        except Exception, e:
            log.exception('Failed to parse identify results')
            return as_unicode(e)
-
+        # There is no point running these queries in threads as google
-        groups = self.split_jobs(entries, 5) # At most 5 threads
+        # throttles requests returning Forbidden errors
-        if not groups:
+        get_all_details(br, log, entries, abort, result_queue, timeout)
            return None
        workers = [Worker(log, entries, abort, result_queue) for entries in
                groups]
        if abort.is_set():
            return None
        for worker in workers: worker.start()
        has_alive_worker = True
        while has_alive_worker and not abort.is_set():
            time.sleep(0.1)
            has_alive_worker = False
            for worker in workers:
                if worker.is_alive():
                    has_alive_worker = True
        return None
 if __name__ == '__main__':
    # To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/google.py
    from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
-            isbn_test)
+            title_test)
    test_identify_plugin(GoogleBooks.name,
        [
            (
                {'title': 'Great Expectations', 'authors':['Charles Dickens']},
-                [isbn_test('9781607541592')]
+                [title_test('Great Expectations', exact=True)]
            ),
    ])
--- a/src/calibre/ebooks/metadata/sources/test.py
+++ b/src/calibre/ebooks/metadata/sources/test.py
@ -7,7 +7,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-import os, tempfile
+import os, tempfile, time
 from Queue import Queue, Empty
 from threading import Event
@ -26,6 +26,17 @@ def isbn_test(isbn):
    return test
 def title_test(title, exact=False):
    title = title.lower()
    def test(mi):
        mt = mi.title.lower()
        return (exact and mt == title) or \
                (not exact and title in mt)
    return test
 def test_identify_plugin(name, tests):
    '''
    :param name: Plugin name
@ -48,11 +59,15 @@ def test_identify_plugin(name, tests):
    abort = Event()
    prints('Log saved to', lf)
    times = []
    for kwargs, test_funcs in tests:
        prints('Running test with:', kwargs)
        rq = Queue()
        args = (log, rq, abort)
        start_time = time.time()
        err = plugin.identify(*args, **kwargs)
        total_time = time.time() - start_time
        times.append(total_time)
        if err is not None:
            prints('identify returned an error for args', args)
            prints(err)
@ -87,6 +102,8 @@ def test_identify_plugin(name, tests):
            prints('Log saved to', lf)
            raise SystemExit(1)
    prints('Average time per query', sum(times)/len(times))
    if os.stat(lf).st_size > 10:
        prints('There were some errors, see log', lf)
--- a/src/calibre/ebooks/metadata/xisbn.py
+++ b/src/calibre/ebooks/metadata/xisbn.py
@ -11,6 +11,12 @@ from calibre import browser
 class xISBN(object):
    '''
    This class is used to find the ISBN numbers of "related" editions of a
    book, given its ISBN. Useful when querying services for metadata by ISBN,
    in case they do not have the ISBN for the particular edition.
    '''
    QUERY = 'http://xisbn.worldcat.org/webservices/xid/isbn/%s?method=getEditions&format=json&fl=form,year,lang,ed'
    def __init__(self):
--- a/src/calibre/gui2/actions/add.py
+++ b/src/calibre/gui2/actions/add.py
@ -259,6 +259,7 @@ class AddAction(InterfaceAction):
            if hasattr(self.gui, 'db_images'):
                self.gui.db_images.reset()
            self.gui.tags_view.recount()
        if getattr(self._adder, 'merged_books', False):
            books = u'\n'.join([x if isinstance(x, unicode) else
                    x.decode(preferred_encoding, 'replace') for x in
@ -266,6 +267,17 @@ class AddAction(InterfaceAction):
            info_dialog(self.gui, _('Merged some books'),
                    _('The following duplicate books were found and incoming book formats were '
                        'processed and merged into your Calibre database according to your automerge settings:'), det_msg=books, show=True)
        if getattr(self._adder, 'number_of_books_added', 0) > 0 or \
                getattr(self._adder, 'merged_books', False):
            # The formats of the current book could have changed if
            # automerge is enabled
            current_idx = self.gui.library_view.currentIndex()
            if current_idx.isValid():
                self.gui.library_view.model().current_changed(current_idx,
                        current_idx)
        if getattr(self._adder, 'critical', None):
            det_msg = []
            for name, log in self._adder.critical.items():