Merge from trunk

2025-08-30 23:00:21 -04:00 · 2011-02-21 09:37:08 +00:00 · 2011-02-21 09:37:08 +00:00 · ff912773cf
commit ff912773cf
parent 32cbc59ec6 b6b942b617
15 changed files with 302 additions and 222 deletions
--- a/resources/recipes/ming_pao.recipe
+++ b/resources/recipes/ming_pao.recipe
@ -1,7 +1,9 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Eddie Lau'
+__copyright__ = '2010-2011, Eddie Lau'
 '''
 Change Log:
+2011/02/20: skip duplicated links in finance section, put photos which may extend a whole page to the back of the articles
+            clean up the indentation
 2010/12/07: add entertainment section, use newspaper front page as ebook cover, suppress date display in section list
            (to avoid wrong date display in case the user generates the ebook in a time zone different from HKT)
 2010/11/22: add English section, remove eco-news section which is not updated daily, correct
@ -18,21 +20,19 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
 from contextlib import nested


-from calibre import __appname__
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata import MetaInformation

 class MPHKRecipe(BasicNewsRecipe):
-    IsKindleUsed = True  # to avoid generating periodical in which CJK characters can't be displayed in section/article view
-
+    IsCJKWellSupported = True  # Set to False to avoid generating periodical in which CJK characters can't be displayed in section/article view
    title          = 'Ming Pao - Hong Kong'
    oldest_article = 1
    max_articles_per_feed = 100
    __author__            = 'Eddie Lau'
-    description           = 'Hong Kong Chinese Newspaper'
-    publisher             = 'news.mingpao.com'
+    description           = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
+    publisher             = 'MingPao'
    category              = 'Chinese, News, Hong Kong'
    remove_javascript = True
    use_embedded_content   = False
@ -46,9 +46,10 @@ class MPHKRecipe(BasicNewsRecipe):
    masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
    keep_only_tags = [dict(name='h1'),
                      dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
-                      dict(attrs={'class':['photo']}),
                      dict(attrs={'id':['newscontent']}), # entertainment page content
-                      dict(attrs={'id':['newscontent01','newscontent02']})]
+                      dict(attrs={'id':['newscontent01','newscontent02']}),
+                      dict(attrs={'class':['photo']})
+                      ]
    remove_tags = [dict(name='style'),
                   dict(attrs={'id':['newscontent135']})]  # for the finance page
    remove_attributes = ['width']
@ -107,6 +108,9 @@ class MPHKRecipe(BasicNewsRecipe):
    def get_fetchdate(self):
        return self.get_dtlocal().strftime("%Y%m%d")

+    def get_fetchformatteddate(self):
+        return self.get_dtlocal().strftime("%Y-%m-%d")
+
    def get_fetchday(self):
        # convert UTC to local hk time - at around HKT 6.00am, all news are available
        return self.get_dtlocal().strftime("%d")
@ -124,13 +128,13 @@ class MPHKRecipe(BasicNewsRecipe):
        feeds = []
        dateStr = self.get_fetchdate()
        for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
-                               (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
                           (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
-                               (u'\u793e\u8a55\u2027\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'),
+                           (u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'),
                           (u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
                           (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
                           (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'),
                           ('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
+                           (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
                           (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'),
                           (u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
                           (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
@ -141,14 +145,10 @@ class MPHKRecipe(BasicNewsRecipe):
        fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
        if fin_articles:
            feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
-            # special - eco-friendly
-            # eco_articles = self.parse_eco_section('http://tssl.mingpao.com/htm/marketing/eco/cfm/Eco1.cfm')
-            # if eco_articles:
-            #   feeds.append((u'\u74b0\u4fdd Eco News', eco_articles))
        # special - entertainment
        ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
        if ent_articles:
-                feeds.append((u'\u5f71\u8996 Entertainment', ent_articles))
+            feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
        return feeds

    def parse_section(self, url):
@ -174,31 +174,17 @@ class MPHKRecipe(BasicNewsRecipe):
        soup = self.index_to_soup(url)
        a = soup.findAll('a', href= True)
        current_articles = []
-        for i in a:
-            url = i.get('href', False)
-            if not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
-                title = self.tag_to_string(i)
-                url = 'http://www.mpfinance.com/cfm/' +url
-                current_articles.append({'title': title, 'url': url, 'description':''})
-        return current_articles
-
-    def parse_eco_section(self, url):
-        dateStr = self.get_fetchdate()
-        soup = self.index_to_soup(url)
-        divs = soup.findAll(attrs={'class': ['bullet']})
-        current_articles = []
        included_urls = []
-        for i in divs:
-            a = i.find('a', href = True)
-            title = self.tag_to_string(a)
-            url = a.get('href', False)
-            url = 'http://tssl.mingpao.com/htm/marketing/eco/cfm/' +url
-            if url not in included_urls and url.rfind('Redirect') == -1 and not url.rfind('.txt') == -1 and not url.rfind(dateStr) == -1:
+        for i in a:
+            url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
+            if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
+                title = self.tag_to_string(i)
                current_articles.append({'title': title, 'url': url, 'description':''})
                included_urls.append(url)
        return current_articles

    def parse_ent_section(self, url):
+        self.get_fetchdate()
        soup = self.index_to_soup(url)
        a = soup.findAll('a', href=True)
        a.reverse()
@ -223,18 +209,22 @@ class MPHKRecipe(BasicNewsRecipe):
        return soup

    def create_opf(self, feeds, dir=None):
-        if self.IsKindleUsed == False:
-            super(MPHKRecipe,self).create_opf(feeds, dir)
-            return
        if dir is None:
            dir = self.output_dir
-        title = self.short_title()
-        title += ' ' + self.get_fetchdate()
-        #if self.output_profile.periodical_date_in_title:
+        if self.IsCJKWellSupported == True:
+            # use Chinese title
+            title = u'\u660e\u5831 (\u9999\u6e2f) ' + self.get_fetchformatteddate()
+        else:
+            # use English title
+            title = self.short_title() + ' ' + self.get_fetchformatteddate()
+        if True:  # force date in title
            #    title += strftime(self.timefmt)
-        mi = MetaInformation(title, [__appname__])
-        mi.publisher = __appname__
-        mi.author_sort = __appname__
+            mi = MetaInformation(title, [self.publisher])
+            mi.publisher = self.publisher
+            mi.author_sort = self.publisher
+            if self.IsCJKWellSupported == True:
+                mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
+            else:
                mi.publication_type = self.publication_type+':'+self.short_title()
            #mi.timestamp = nowf()
            mi.timestamp = self.get_dtlocal()
@ -321,7 +311,7 @@ class MPHKRecipe(BasicNewsRecipe):
                            prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
                            templ = self.navbar.generate(True, num, j, len(f),
                                            not self.has_single_feed,
-                                            a.orig_url, __appname__, prefix=prefix,
+                                            a.orig_url, self.publisher, prefix=prefix,
                                            center=self.center_navbar)
                            elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
                            body.insert(len(body.contents), elem)
@ -357,4 +347,3 @@ class MPHKRecipe(BasicNewsRecipe):

        with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
            opf.render(opf_file, ncx_file)
-
--- a/resources/recipes/osnews_pl.recipe
+++ b/resources/recipes/osnews_pl.recipe
@ -0,0 +1,50 @@
+#!/usr/bin/env  python
+
+__license__ = 'GPL v3'
+'''
+OSNews.pl
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class OSNewsRecipe(BasicNewsRecipe):
+    __author__ = u'Mori & Tomasz D\u0142ugosz'
+    language = 'pl'
+
+    title = u'OSnews.pl'
+    publisher = u'OSnews.pl'
+    description = u'OSnews.pl jest spo\u0142eczno\u015bciowym serwisem informacyjnym po\u015bwi\u0119conym oprogramowaniu, systemom operacyjnym i \u015bwiatowi IT'
+
+    no_stylesheets = True
+    remove_javascript = True
+    encoding = 'utf-8'
+    use_embedded_content = False;
+
+    oldest_article = 7
+    max_articles_per_feed = 100
+
+    extra_css = '''
+        .news-heading {font-size:150%}
+        .newsinformations li {display:inline;}
+        blockquote {border:2px solid #000; padding:5px;}
+    '''
+
+    feeds = [
+        (u'OSNews.pl', u'http://feeds.feedburner.com/OSnewspl')
+    ]
+
+    keep_only_tags = [
+        dict(name = 'a', attrs = {'class' : 'news-heading'}),
+        dict(name = 'div', attrs = {'class' : 'newsinformations'}),
+        dict(name = 'div', attrs = {'id' : 'news-content'})
+    ]
+
+    remove_tags = [
+        dict(name = 'div', attrs = {'class' : 'sociable'}),
+        dict(name = 'div', attrs = {'class' : 'post_prev'}),
+        dict(name = 'div', attrs = {'class' : 'post_next'}),
+        dict(name = 'div', attrs = {'class' : 'clr'})
+    ]
+
+    preprocess_regexps = [(re.compile(u'</span>Komentarze: \(?[0-9]+\)? ?<span'), lambda match: '</span><span')]
--- a/resources/recipes/swiatkindle.recipe
+++ b/resources/recipes/swiatkindle.recipe
@ -0,0 +1,24 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = u'2011, Tomasz Dlugosz <tomek3d@gmail.com>'
+'''
+swiatkindle.pl
+'''
+
+import re
+
+class swiatkindle(BasicNewsRecipe):
+    title          = u'Swiat Kindle'
+    description    = u'Blog o czytniku Amazon Kindle. Wersje, ksi\u0105\u017cki, kupowanie i korzystanie w Polsce'
+    language = 'pl'
+    __author__ = u'Tomasz D\u0142ugosz'
+    oldest_article = 7
+    max_articles_per_feed = 100
+
+    feeds       = [(u'\u015awiat Kindle - wpisy', u'http://swiatkindle.pl/feed')]
+
+    remove_tags = [dict(name = 'ul', attrs = {'class' : 'similar-posts'})]
+
+    preprocess_regexps = [(re.compile(u'<h3>Czytaj dalej:</h3>'), lambda match: '')]
+
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -573,8 +573,8 @@ from calibre.devices.edge.driver import EDGE
 from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, \
        SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH
 from calibre.devices.sne.driver import SNE
-from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \
-        GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, Q600, LUMIREAD, ALURATEK_COLOR, \
+from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, \
+        GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR, \
        TREKSTOR, EEEREADER, NEXTBOOK
 from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
 from calibre.devices.kobo.driver import KOBO
@ -691,8 +691,6 @@ plugins += [
    AVANT,
    MENTOR,
    SWEEX,
-    Q600,
-    KOGAN,
    PDNOVEL,
    SPECTRA,
    GEMEI,
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -121,7 +121,8 @@ def enable_plugin(plugin_or_name):
    config['enabled_plugins'] = ep

 default_disabled_plugins = set([
-    'Douban Books', 'Douban.com covers', 'Nicebooks', 'Nicebooks covers'
+    'Douban Books', 'Douban.com covers', 'Nicebooks', 'Nicebooks covers',
+    'Kent District Library'
 ])

 def is_disabled(plugin):
--- a/src/calibre/devices/misc.py
+++ b/src/calibre/devices/misc.py
@ -54,41 +54,24 @@ class AVANT(USBMS):
 class SWEEX(USBMS):
    # Identical to the Promedia
    name           = 'Sweex Device Interface'
-    gui_name       = 'Sweex'
-    description    = _('Communicate with the Sweex MM300')
+    gui_name       = 'Sweex/Kogan/Q600/Wink'
+    description    = _('Communicate with the Sweex/Kogan/Q600/Wink')
    author         = 'Kovid Goyal'
    supported_platforms = ['windows', 'osx', 'linux']

    # Ordered list of supported formats
-    FORMATS     = ['epub', 'prc', 'fb2', 'html', 'rtf', 'chm', 'pdf', 'txt']
+    FORMATS     = ['epub', 'mobi', 'prc', 'fb2', 'html', 'rtf', 'chm', 'pdf', 'txt']

    VENDOR_ID   = [0x0525, 0x177f]
    PRODUCT_ID  = [0xa4a5, 0x300]
-    BCD         = [0x0319, 0x110]
+    BCD         = [0x0319, 0x110, 0x325]

-    VENDOR_NAME = 'SWEEX'
-    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'EBOOKREADER'
+    VENDOR_NAME = ['SWEEX', 'LINUX']
+    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['EBOOKREADER', 'FILE-STOR_GADGET']

    EBOOK_DIR_MAIN = ''
    SUPPORTS_SUB_DIRS = True

-class Q600(SWEEX):
-
-    name = 'Digma Q600 Device interface'
-    gui_name = 'Q600'
-    description    = _('Communicate with the Digma Q600')
-
-    BCD = [0x325]
-    FORMATS     = ['epub', 'fb2', 'mobi', 'prc', 'html', 'rtf', 'chm', 'pdf', 'txt']
-
-class KOGAN(SWEEX):
-
-    name           = 'Kogan Device Interface'
-    gui_name       = 'Kogan'
-    description    = _('Communicate with the Kogan')
-    VENDOR_NAME = 'LINUX'
-    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'FILE-STOR_GADGET'
-    EBOOK_DIR_MAIN = 'Kogan eBooks'

 class PDNOVEL(USBMS):
    name = 'Pandigital Novel device interface'
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@ -350,6 +350,8 @@ class FB2MLizer(object):
        # Number of blank lines above tag
        try:
            ems = int(round((float(style.marginTop) / style.fontSize) - 1))
+            if ems < 0:
+                ems = 0
        except:
            ems = 0

@ -397,7 +399,7 @@ class FB2MLizer(object):
                    fb2_out += p_txt
                    tags += p_tag
                    fb2_out.append('<image xlink:href="#%s" />' % self.image_hrefs[page.abshref(elem_tree.attrib['src'])])
-        if tag in ('br', 'hr') or ems:
+        if tag in ('br', 'hr') or ems >= 1:
            if ems < 1:
                multiplier = 1
            else:
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@ -205,7 +205,10 @@ def main(args=sys.argv):
            open(cpath, 'wb').write(br.open_novisit(curl).read())
            print 'Cover for', title, 'saved to', cpath

+        #import time
+        #st = time.time()
        print get_social_metadata(title, None, None, isbn)
+        #print '\n\n', time.time() - st, '\n\n'

    return 0

--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@ -106,6 +106,9 @@ class MetadataSource(Plugin): # {{{
    def join(self):
        return self.worker.join()

+    def is_alive(self):
+        return self.worker.is_alive()
+
    def is_customizable(self):
        return True

@ -251,7 +254,9 @@ class KentDistrictLibrary(MetadataSource): # {{{

    name = 'Kent District Library'
    metadata_type = 'social'
-    description = _('Downloads series information from ww2.kdl.org')
+    description = _('Downloads series information from ww2.kdl.org. '
+            'This website cannot handle large numbers of queries, '
+            'so the plugin is disabled by default.')

    def fetch(self):
        if not self.title or not self.book_author:
--- a/src/calibre/ebooks/metadata/kdl.py
+++ b/src/calibre/ebooks/metadata/kdl.py
@ -5,7 +5,9 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import re, urllib, urlparse
+import re, urllib, urlparse, socket
+
+from mechanize import URLError

 from calibre.ebooks.metadata.book.base import Metadata
 from calibre import browser
@ -17,7 +19,7 @@ URL = \

 _ignore_starts = u'\'"'+u''.join(unichr(x) for x in range(0x2018, 0x201e)+[0x2032, 0x2033])

-def get_series(title, authors):
+def get_series(title, authors, timeout=60):
    mi = Metadata(title, authors)
    if title and title[0] in _ignore_starts:
        title = title[1:]
@ -39,7 +41,12 @@ def get_series(title, authors):

    url = URL.format(author, title)
    br = browser()
-    raw = br.open(url).read()
+    try:
+        raw = br.open_novisit(url, timeout=timeout).read()
+    except URLError, e:
+        if isinstance(e.reason, socket.timeout):
+            raise Exception('KDL Server busy, try again later')
+        raise
    if 'see the full results' not in raw:
        return mi
    raw = xml_to_unicode(raw)[0]
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -85,7 +85,8 @@ class Source(Plugin):

    # Metadata API {{{

-    def identify(self, log, result_queue, abort, title=None, authors=None, identifiers={}):
+    def identify(self, log, result_queue, abort, title=None, authors=None,
+            identifiers={}, timeout=5):
        '''
        Identify a book by its title/author/isbn/etc.

@ -98,6 +99,8 @@ class Source(Plugin):
        :param authors: A list of authors of the book, can be None
        :param identifiers: A dictionary of other identifiers, most commonly
                            {'isbn':'1234...'}
+        :param timeout: Timeout in seconds, no network request should hang for
+                        longer than timeout.
        :return: None if no errors occurred, otherwise a unicode representation
                 of the error suitable for showing to the user

--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@ -10,7 +10,6 @@ __docformat__ = 'restructuredtext en'
 import time
 from urllib import urlencode
 from functools import partial
-from threading import Thread

 from lxml import etree

@ -18,6 +17,7 @@ from calibre.ebooks.metadata.sources.base import Source
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.utils.date import parse_date, utcnow
+from calibre.utils.cleantext import clean_ascii_chars
 from calibre import browser, as_unicode

 NAMESPACES = {
@ -41,20 +41,20 @@ subject        = XPath('descendant::dc:subject')
 description    = XPath('descendant::dc:description')
 language       = XPath('descendant::dc:language')

-def get_details(browser, url):
+def get_details(browser, url, timeout):
    try:
-        raw = browser.open_novisit(url).read()
+        raw = browser.open_novisit(url, timeout=timeout).read()
    except Exception as e:
        gc = getattr(e, 'getcode', lambda : -1)
        if gc() != 403:
            raise
        # Google is throttling us, wait a little
-        time.sleep(2)
-        raw = browser.open_novisit(url).read()
+        time.sleep(1)
+        raw = browser.open_novisit(url, timeout=timeout).read()

    return raw

-def to_metadata(browser, log, entry_):
+def to_metadata(browser, log, entry_, timeout):

    def get_text(extra, x):
        try:
@ -79,8 +79,9 @@ def to_metadata(browser, log, entry_):

    mi = Metadata(title_, authors)
    try:
-        raw = get_details(browser, id_url)
-        feed = etree.fromstring(xml_to_unicode(raw, strip_encoding_pats=True)[0])
+        raw = get_details(browser, id_url, timeout)
+        feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
+            strip_encoding_pats=True)[0])
        extra = entry(feed)[0]
    except:
        log.exception('Failed to get additional details for', mi.title)
@ -131,25 +132,18 @@ def to_metadata(browser, log, entry_):

    return mi

-class Worker(Thread):

-    def __init__(self, log, entries, abort, result_queue):
-        self.browser, self.log, self.entries = browser(), log, entries
-        self.abort, self.result_queue = abort, result_queue
-        Thread.__init__(self)
-        self.daemon = True
-
-    def run(self):
-        for i in self.entries:
+def get_all_details(br, log, entries, abort, result_queue, timeout):
+    for i in entries:
        try:
-                ans = to_metadata(self.browser, self.log, i)
+            ans = to_metadata(br, log, i, timeout)
            if isinstance(ans, Metadata):
-                    self.result_queue.put(ans)
+                result_queue.put(ans)
        except:
-                self.log.exception(
+            log.exception(
                'Failed to get metadata for identify entry:',
                etree.tostring(i))
-            if self.abort.is_set():
+        if abort.is_set():
            break


@ -192,54 +186,40 @@ class GoogleBooks(Source):
            })


-    def identify(self, log, result_queue, abort, title=None, authors=None, identifiers={}):
+    def identify(self, log, result_queue, abort, title=None, authors=None,
+            identifiers={}, timeout=5):
        query = self.create_query(log, title=title, authors=authors,
                identifiers=identifiers)
+        br = browser()
        try:
-            raw = browser().open_novisit(query).read()
+            raw = br.open_novisit(query, timeout=timeout).read()
        except Exception, e:
            log.exception('Failed to make identify query: %r'%query)
            return as_unicode(e)

        try:
            parser = etree.XMLParser(recover=True, no_network=True)
-            feed = etree.fromstring(xml_to_unicode(raw,
+            feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
                strip_encoding_pats=True)[0], parser=parser)
            entries = entry(feed)
        except Exception, e:
            log.exception('Failed to parse identify results')
            return as_unicode(e)

-
-        groups = self.split_jobs(entries, 5) # At most 5 threads
-        if not groups:
-            return None
-        workers = [Worker(log, entries, abort, result_queue) for entries in
-                groups]
-
-        if abort.is_set():
-            return None
-
-        for worker in workers: worker.start()
-
-        has_alive_worker = True
-        while has_alive_worker and not abort.is_set():
-            time.sleep(0.1)
-            has_alive_worker = False
-            for worker in workers:
-                if worker.is_alive():
-                    has_alive_worker = True
+        # There is no point running these queries in threads as google
+        # throttles requests returning Forbidden errors
+        get_all_details(br, log, entries, abort, result_queue, timeout)

        return None

 if __name__ == '__main__':
    # To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/google.py
    from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
-            isbn_test)
+            title_test)
    test_identify_plugin(GoogleBooks.name,
        [
            (
                {'title': 'Great Expectations', 'authors':['Charles Dickens']},
-                [isbn_test('9781607541592')]
+                [title_test('Great Expectations', exact=True)]
            ),
    ])
--- a/src/calibre/ebooks/metadata/sources/test.py
+++ b/src/calibre/ebooks/metadata/sources/test.py
@ -7,7 +7,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import os, tempfile
+import os, tempfile, time
 from Queue import Queue, Empty
 from threading import Event

@ -26,6 +26,17 @@ def isbn_test(isbn):

    return test

+def title_test(title, exact=False):
+
+    title = title.lower()
+
+    def test(mi):
+        mt = mi.title.lower()
+        return (exact and mt == title) or \
+                (not exact and title in mt)
+
+    return test
+
 def test_identify_plugin(name, tests):
    '''
    :param name: Plugin name
@ -48,11 +59,15 @@ def test_identify_plugin(name, tests):
    abort = Event()
    prints('Log saved to', lf)

+    times = []
    for kwargs, test_funcs in tests:
        prints('Running test with:', kwargs)
        rq = Queue()
        args = (log, rq, abort)
+        start_time = time.time()
        err = plugin.identify(*args, **kwargs)
+        total_time = time.time() - start_time
+        times.append(total_time)
        if err is not None:
            prints('identify returned an error for args', args)
            prints(err)
@ -87,6 +102,8 @@ def test_identify_plugin(name, tests):
            prints('Log saved to', lf)
            raise SystemExit(1)

+    prints('Average time per query', sum(times)/len(times))
+
    if os.stat(lf).st_size > 10:
        prints('There were some errors, see log', lf)

--- a/src/calibre/ebooks/metadata/xisbn.py
+++ b/src/calibre/ebooks/metadata/xisbn.py
@ -11,6 +11,12 @@ from calibre import browser

 class xISBN(object):

+    '''
+    This class is used to find the ISBN numbers of "related" editions of a
+    book, given its ISBN. Useful when querying services for metadata by ISBN,
+    in case they do not have the ISBN for the particular edition.
+    '''
+
    QUERY = 'http://xisbn.worldcat.org/webservices/xid/isbn/%s?method=getEditions&format=json&fl=form,year,lang,ed'

    def __init__(self):
--- a/src/calibre/gui2/actions/add.py
+++ b/src/calibre/gui2/actions/add.py
@ -259,6 +259,7 @@ class AddAction(InterfaceAction):
            if hasattr(self.gui, 'db_images'):
                self.gui.db_images.reset()
            self.gui.tags_view.recount()
+
        if getattr(self._adder, 'merged_books', False):
            books = u'\n'.join([x if isinstance(x, unicode) else
                    x.decode(preferred_encoding, 'replace') for x in
@ -266,6 +267,17 @@ class AddAction(InterfaceAction):
            info_dialog(self.gui, _('Merged some books'),
                    _('The following duplicate books were found and incoming book formats were '
                        'processed and merged into your Calibre database according to your automerge settings:'), det_msg=books, show=True)
+
+        if getattr(self._adder, 'number_of_books_added', 0) > 0 or \
+                getattr(self._adder, 'merged_books', False):
+            # The formats of the current book could have changed if
+            # automerge is enabled
+            current_idx = self.gui.library_view.currentIndex()
+            if current_idx.isValid():
+                self.gui.library_view.model().current_changed(current_idx,
+                        current_idx)
+
+
        if getattr(self._adder, 'critical', None):
            det_msg = []
            for name, log in self._adder.critical.items():