From ca5703b250753a728c120099fb3053d6b940e64a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 16 Mar 2011 10:44:56 -0600 Subject: [PATCH 01/15] ... --- src/calibre/gui2/main_window.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/gui2/main_window.py b/src/calibre/gui2/main_window.py index ec58dd3856..134aae3ad1 100644 --- a/src/calibre/gui2/main_window.py +++ b/src/calibre/gui2/main_window.py @@ -20,7 +20,8 @@ Usage: %prog [options] Launch the Graphical User Interface '''): parser = OptionParser(usage) - parser.add_option('--redirect-console-output', default=False, action='store_true', dest='redirect', + # The b is required because of a regression in optparse.py in python 2.7.0 + parser.add_option(b'--redirect-console-output', default=False, action='store_true', dest='redirect', help=_('Redirect console output to a dialog window (both stdout and stderr). Useful on windows where GUI apps do not have a output streams.')) return parser From 1edc4f5a800473d80196139b502ce63bb7d70c1a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 16 Mar 2011 10:49:10 -0600 Subject: [PATCH 02/15] More work on the new Amazon metadata download plugin --- src/calibre/ebooks/metadata/sources/amazon.py | 120 +++++++++++++++++- src/calibre/manual/faq.rst | 4 +- 2 files changed, 120 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py index a62a9683cb..33ea24c421 100644 --- a/src/calibre/ebooks/metadata/sources/amazon.py +++ b/src/calibre/ebooks/metadata/sources/amazon.py @@ -7,7 +7,7 @@ __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import socket, time +import socket, time, re from urllib import urlencode from threading import Thread @@ -18,9 +18,15 @@ from calibre.ebooks.metadata import check_isbn from calibre.ebooks.metadata.sources.base import Source from calibre.utils.cleantext import clean_ascii_chars from calibre.ebooks.chardet import xml_to_unicode +from calibre.ebooks.metadata.book.base import Metadata +from calibre.library.comments import sanitize_comments_html class Worker(Thread): + ''' + Get book details from amazons book page in a separate thread + ''' + def __init__(self, url, result_queue, browser, log, timeout=20): self.url, self.result_queue = url, result_queue self.log, self.timeout = log, timeout @@ -75,7 +81,117 @@ class Worker(Thread): self.parse_details(root) def parse_details(self, root): - pass + try: + asin = self.parse_asin(root) + except: + self.log.exception('Error parsing asin for url: %r'%self.url) + asin = None + + try: + title = self.parse_title(root) + except: + self.log.exception('Error parsing title for url: %r'%self.url) + title = None + + try: + authors = self.parse_authors(root) + except: + self.log.exception('Error parsing authors for url: %r'%self.url) + authors = [] + + + if not title or not authors or not asin: + self.log.error('Could not find title/authors/asin for %r'%self.url) + self.log.error('ASIN: %r Title: %r Authors: %r'%(asin, title, + authors)) + return + + mi = Metadata(title, authors) + mi.set_identifier('amazon', asin) + self.amazon_id = asin + + try: + mi.rating = self.parse_ratings(root) + except: + self.log.exception('Error parsing ratings for url: %r'%self.url) + + try: + mi.comments = self.parse_comments(root) + except: + self.log.exception('Error parsing comments for url: %r'%self.url) + + try: + self.cover_url = self.parse_cover(root) + except: + self.log.exception('Error parsing cover for url: %r'%self.url) + + self.result_queue.put(mi) + + def parse_asin(self, root): + link = root.xpath('//link[@rel="canonical" and @href]') + for l in link: + return l.get('href').rpartition('/')[-1] + + def parse_title(self, root): + tdiv = root.xpath('//h1[@class="parseasinTitle"]')[0] + actual_title = tdiv.xpath('descendant::*[@id="btAsinTitle"]') + if actual_title: + title = tostring(actual_title[0], encoding=unicode, + method='text').strip() + else: + title = tostring(tdiv, encoding=unicode, method='text').strip() + return re.sub(r'[([].*[)]]', '', title).strip() + + def parse_authors(self, root): + bdiv = root.xpath('//div[@class="buying"]')[0] + aname = bdiv.xpath('descendant::span[@class="contributorNameTrigger"]') + authors = [tostring(x, encoding=unicode, method='text').strip() for x + in aname] + return authors + + def parse_ratings(self, root): + ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]') + pat = re.compile(r'([0-9.]+) out of (\d+) stars') + if ratings: + for elem in ratings[0].xpath('descendant::*[@title]'): + t = elem.get('title') + m = pat.match(t) + if m is not None: + try: + return float(m.group(1))/float(m.group(2)) * 5 + except: + pass + + def parse_comments(self, root): + desc = root.xpath('//div[@id="productDescription"]/*[@class="content"]') + if desc: + desc = desc[0] + for c in desc.xpath('descendant::*[@class="seeAll" or' + ' @class="emptyClear" or @href]'): + c.getparent().remove(c) + desc = tostring(desc, method='html', encoding=unicode).strip() + # remove all attributes from tags + desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc) + # Collapse whitespace + #desc = re.sub('\n+', '\n', desc) + #desc = re.sub(' +', ' ', desc) + # Remove the notice about text referring to out of print editions + desc = re.sub(r'(?s)--This text ref.*?', '', desc) + # Remove comments + desc = re.sub(r'(?s)', '', desc) + return sanitize_comments_html(desc) + + def parse_cover(self, root): + imgs = root.xpath('//img[@id="prodImage" and @src]') + if imgs: + src = imgs[0].get('src') + parts = src.split('/') + if len(parts) > 3: + bn = parts[-1] + sparts = bn.split('_') + if len(sparts) > 2: + bn = sparts[0] + sparts[-1] + return ('/'.join(parts[:-1]))+'/'+bn class Amazon(Source): diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index a3d4332fd0..948611f775 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -508,9 +508,9 @@ You have two choices: 1. Create a patch by hacking on |app| and send it to me for review and inclusion. See `Development `_. 2. `Open a ticket `_ (you have to register and login first). Remember that |app| development is done by volunteers, so if you get no response to your feature request, it means no one feels like implementing it. -Can I include |app| on a CD to be distributed with my product/magazine? +How is |app| licensed? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -|app| is licensed under the GNU General Public License v3 (an open source license). This means that you are free to redistribute |app| as long as you make the source code available. So if you want to put |app| on a CD with your product, you must also put the |app| source code on the CD. The source code is available for download `from googlecode `_. +|app| is licensed under the GNU General Public License v3 (an open source license). This means that you are free to redistribute |app| as long as you make the source code available. So if you want to put |app| on a CD with your product, you must also put the |app| source code on the CD. The source code is available for download `from googlecode `_. You are free to use the results of conversions from |app| however you want. You cannot use code, libraries from |app| in your software without maing your software open source. For details, see `The GNU GPL v3 http://www.gnu.org/licenses/gpl.html`_. How do I run calibre from my USB stick? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 103d994f176762c715c6fc1a88402356a44672bc Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 16 Mar 2011 10:52:06 -0600 Subject: [PATCH 03/15] Fix #9418 (Calibre 0.7.49 not detecting Verizon Motorola Droid 2) --- src/calibre/devices/android/driver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index 0491f34d78..1ddc14bd1f 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -98,7 +98,7 @@ class ANDROID(USBMS): 'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE', 'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H', 'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD', - '7', 'A956'] + '7', 'A956', 'A955'] WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'A70S', 'A101IT', '7'] From 2ea99bfdaf3076360fe6fccb3f8ad02d2fd42680 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 16 Mar 2011 11:37:52 -0600 Subject: [PATCH 04/15] Content server: Add workaround for Internet Explorer not supporting the ' entity. Fixes #9413 (Internet Explorer 8, Apostrophes in Book Titles in http://myhostname:8080/browse become ') --- src/calibre/library/server/browse.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/calibre/library/server/browse.py b/src/calibre/library/server/browse.py index 97bfc30f14..fd015f5848 100644 --- a/src/calibre/library/server/browse.py +++ b/src/calibre/library/server/browse.py @@ -12,7 +12,7 @@ import cherrypy from calibre.constants import filesystem_encoding from calibre import isbytestring, force_unicode, fit_image, \ - prepare_string_for_xml as xml + prepare_string_for_xml from calibre.utils.ordered_dict import OrderedDict from calibre.utils.filenames import ascii_filename from calibre.utils.config import prefs, tweaks @@ -23,6 +23,10 @@ from calibre.library.server import custom_fields_to_display from calibre.library.field_metadata import category_icon_map from calibre.library.server.utils import quote, unquote +def xml(*args, **kwargs): + ans = prepare_string_for_xml(*args, **kwargs) + return ans.replace(''', ''') + def render_book_list(ids, prefix, suffix=''): # {{{ pages = [] num = len(ids) From 487e28f8697f04084086fc734fc43efef9428da4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 16 Mar 2011 11:53:25 -0600 Subject: [PATCH 05/15] Fix #9419 (Enable CTRL-F keyboard shortcut in ebook viewer) --- src/calibre/gui2/viewer/main.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/calibre/gui2/viewer/main.py b/src/calibre/gui2/viewer/main.py index 964616ab48..13e7066806 100644 --- a/src/calibre/gui2/viewer/main.py +++ b/src/calibre/gui2/viewer/main.py @@ -225,6 +225,12 @@ class EbookViewer(MainWindow, Ui_EbookViewer): self.action_quit.setShortcuts(qs) self.connect(self.action_quit, SIGNAL('triggered(bool)'), lambda x:QApplication.instance().quit()) + self.action_focus_search = QAction(self) + self.addAction(self.action_focus_search) + self.action_focus_search.setShortcuts([Qt.Key_Slash, + QKeySequence(QKeySequence.Find)]) + self.action_focus_search.triggered.connect(lambda x: + self.search.setFocus(Qt.OtherFocusReason)) self.action_copy.setDisabled(True) self.action_metadata.setCheckable(True) self.action_metadata.setShortcut(Qt.CTRL+Qt.Key_I) @@ -494,12 +500,6 @@ class EbookViewer(MainWindow, Ui_EbookViewer): if self.view.search(text, backwards=backwards): self.scrolled(self.view.scroll_fraction) - def keyPressEvent(self, event): - if event.key() == Qt.Key_Slash: - self.search.setFocus(Qt.OtherFocusReason) - else: - return MainWindow.keyPressEvent(self, event) - def internal_link_clicked(self, frac): self.history.add(self.pos.value()) From 412fbc6fbc9bceda3c35843fcf0cfe311eccad97 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 16 Mar 2011 12:02:01 -0600 Subject: [PATCH 06/15] ... --- src/calibre/gui2/dialogs/scheduler.ui | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/gui2/dialogs/scheduler.ui b/src/calibre/gui2/dialogs/scheduler.ui index f295703b33..f26bfc7285 100644 --- a/src/calibre/gui2/dialogs/scheduler.ui +++ b/src/calibre/gui2/dialogs/scheduler.ui @@ -6,7 +6,7 @@ 0 0 - 767 + 792 575 @@ -44,7 +44,7 @@ 0 0 - 469 + 486 504 From be945ddda012020ed896391cc28bc4362c782e93 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 16 Mar 2011 12:07:41 -0600 Subject: [PATCH 07/15] Fix #9405 (Clearing last viewed book(s) history) --- src/calibre/gui2/viewer/main.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/calibre/gui2/viewer/main.py b/src/calibre/gui2/viewer/main.py index 13e7066806..c704b98dc9 100644 --- a/src/calibre/gui2/viewer/main.py +++ b/src/calibre/gui2/viewer/main.py @@ -299,6 +299,9 @@ class EbookViewer(MainWindow, Ui_EbookViewer): ca.setShortcut(QKeySequence.Copy) self.addAction(ca) self.open_history_menu = QMenu() + self.clear_recent_history_action = QAction( + _('Clear list of recently opened books'), self) + self.clear_recent_history_action.triggered.connect(self.clear_recent_history) self.build_recent_menu() self.action_open_ebook.setMenu(self.open_history_menu) self.open_history_menu.triggered[QAction].connect(self.open_recent) @@ -307,11 +310,19 @@ class EbookViewer(MainWindow, Ui_EbookViewer): self.restore_state() + def clear_recent_history(self, *args): + vprefs.set('viewer_open_history', []) + self.build_recent_menu() + def build_recent_menu(self): m = self.open_history_menu m.clear() + recent = vprefs.get('viewer_open_history', []) + if recent: + m.addAction(self.clear_recent_history_action) + m.addSeparator() count = 0 - for path in vprefs.get('viewer_open_history', []): + for path in recent: if count > 9: break if os.path.exists(path): From d05f8b79a7fe84b484c76801ad20acd0227015bf Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 16 Mar 2011 13:08:05 -0600 Subject: [PATCH 08/15] Updated Sports Illustrated --- resources/recipes/sportsillustrated.recipe | 88 ++++++++++------------ 1 file changed, 38 insertions(+), 50 deletions(-) diff --git a/resources/recipes/sportsillustrated.recipe b/resources/recipes/sportsillustrated.recipe index f5a7b4c32b..bec63f74ef 100644 --- a/resources/recipes/sportsillustrated.recipe +++ b/resources/recipes/sportsillustrated.recipe @@ -1,6 +1,7 @@ from calibre.web.feeds.recipes import BasicNewsRecipe #from calibre.ebooks.BeautifulSoup import BeautifulSoup from urllib import quote +import re class SportsIllustratedRecipe(BasicNewsRecipe) : __author__ = 'kwetal' @@ -15,65 +16,52 @@ class SportsIllustratedRecipe(BasicNewsRecipe) : remove_javascript = True use_embedded_content = False - INDEX = 'http://sportsillustrated.cnn.com/' + INDEX = 'http://sportsillustrated.cnn.com/vault/cover/home/index.htm' def parse_index(self): answer = [] soup = self.index_to_soup(self.INDEX) - # Find the link to the current issue on the front page. SI Cover - cover = soup.find('img', attrs = {'alt' : 'Read All Articles', 'style' : 'vertical-align:bottom;'}) - if cover: - currentIssue = cover.parent['href'] - if currentIssue: - # Open the index of current issue - index = self.index_to_soup(currentIssue) - self.log('\tLooking for current issue in: ' + currentIssue) - # Now let us see if they updated their frontpage - nav = index.find('div', attrs = {'class': 'siv_trav_top'}) - if nav: - img = nav.find('img', attrs = {'src': 'http://i.cdn.turner.com/sivault/.element/img/1.0/btn_next_v2.jpg'}) - if img: - parent = img.parent - if parent.name == 'a': - # They didn't update their frontpage; Load the next issue from here - href = self.INDEX + parent['href'] - index = self.index_to_soup(href) - self.log('\tLooking for current issue in: ' + href) + #Loop through all of the "latest" covers until we find one that actually has articles + for item in soup.findAll('div', attrs={'id': re.compile("ecomthumb_latest_*")}): + regex = re.compile('ecomthumb_latest_(\d*)') + result = regex.search(str(item)) + current_issue_number = str(result.group(1)) + current_issue_link = 'http://sportsillustrated.cnn.com/vault/cover/toc/' + current_issue_number + '/index.htm' + self.log('Checking this link for a TOC: ', current_issue_link) + index = self.index_to_soup(current_issue_link) + if index: if index.find('div', 'siv_noArticleMessage'): - nav = index.find('div', attrs = {'class': 'siv_trav_top'}) - if nav: - # Their frontpage points to an issue without any articles; Use the previous issue - img = nav.find('img', attrs = {'src': 'http://i.cdn.turner.com/sivault/.element/img/1.0/btn_previous_v2.jpg'}) - if img: - parent = img.parent - if parent.name == 'a': - href = self.INDEX + parent['href'] - index = self.index_to_soup(href) - self.log('\tLooking for current issue in: ' + href) + self.log('No TOC for this one. Skipping...') + else: + self.log('Found a TOC... Using this link') + break + # Find all articles. + list = index.find('div', attrs = {'class' : 'siv_artList'}) + if list: + self.log ('found siv_artList') + articles = [] + # Get all the artcles ready for calibre. + counter = 0 + for headline in list.findAll('div', attrs = {'class' : 'headline'}): + counter = counter + 1 + title = self.tag_to_string(headline.a) + '\n' + self.tag_to_string(headline.findNextSibling('div', attrs = {'class' : 'info'})) + url = self.INDEX + headline.a['href'] + description = self.tag_to_string(headline.findNextSibling('a').div) + article = {'title' : title, 'date' : u'', 'url' : url, 'description' : description} + articles.append(article) + if counter > 5: + break - # Find all articles. - list = index.find('div', attrs = {'class' : 'siv_artList'}) - if list: - articles = [] - # Get all the artcles ready for calibre. - for headline in list.findAll('div', attrs = {'class' : 'headline'}): - title = self.tag_to_string(headline.a) + '\n' + self.tag_to_string(headline.findNextSibling('div', attrs = {'class' : 'info'})) - url = self.INDEX + headline.a['href'] - description = self.tag_to_string(headline.findNextSibling('a').div) - article = {'title' : title, 'date' : u'', 'url' : url, 'description' : description} + # See if we can find a meaningfull title + feedTitle = 'Current Issue' + hasTitle = index.find('div', attrs = {'class' : 'siv_imageText_head'}) + if hasTitle : + feedTitle = self.tag_to_string(hasTitle.h1) - articles.append(article) - - # See if we can find a meaningfull title - feedTitle = 'Current Issue' - hasTitle = index.find('div', attrs = {'class' : 'siv_imageText_head'}) - if hasTitle : - feedTitle = self.tag_to_string(hasTitle.h1) - - answer.append([feedTitle, articles]) + answer.append([feedTitle, articles]) return answer @@ -82,6 +70,7 @@ class SportsIllustratedRecipe(BasicNewsRecipe) : # This is the url and the parameters that work to get the print version. printUrl = 'http://si.printthis.clickability.com/pt/printThis?clickMap=printThis' printUrl += '&fb=Y&partnerID=2356&url=' + quote(url) + self.log('PrintURL: ' , printUrl) return printUrl @@ -116,4 +105,3 @@ class SportsIllustratedRecipe(BasicNewsRecipe) : return homeMadeSoup ''' - From 0fbf30487e9b1b174c704d6c8a2651ddfade3318 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 16 Mar 2011 16:05:30 -0600 Subject: [PATCH 09/15] Print out identifiers when printing a Metadata object --- src/calibre/ebooks/metadata/book/base.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py index c5a8a82db1..6818126699 100644 --- a/src/calibre/ebooks/metadata/book/base.py +++ b/src/calibre/ebooks/metadata/book/base.py @@ -651,6 +651,9 @@ class Metadata(object): fmt('Published', isoformat(self.pubdate)) if self.rights is not None: fmt('Rights', unicode(self.rights)) + if self.identifiers: + fmt('Identifiers', u', '.join(['%s:%s'%(k, v) for k, v in + self.identifiers.iteritems()])) for key in self.custom_field_keys(): val = self.get(key, None) if val: From 68f63e807a8b60a5c582b78707779c1d01b3cec2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 16 Mar 2011 16:07:09 -0600 Subject: [PATCH 10/15] Fix ratings not being downloaded from Amazon --- src/calibre/ebooks/metadata/amazon.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/calibre/ebooks/metadata/amazon.py b/src/calibre/ebooks/metadata/amazon.py index 91e6919837..8e4dd1dd27 100644 --- a/src/calibre/ebooks/metadata/amazon.py +++ b/src/calibre/ebooks/metadata/amazon.py @@ -154,17 +154,16 @@ def get_metadata(br, asin, mi): return False if root.xpath('//*[@id="errorMessage"]'): return False - ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]') + + ratings = root.xpath('//div[@class="jumpBar"]/descendant::span[@class="asinReviewsSummary"]') + pat = re.compile(r'([0-9.]+) out of (\d+) stars') if ratings: - pat = re.compile(r'([0-9.]+) out of (\d+) stars') - r = ratings[0] - for elem in r.xpath('descendant::*[@title]'): - t = elem.get('title') + for elem in ratings[0].xpath('descendant::*[@title]'): + t = elem.get('title').strip() m = pat.match(t) if m is not None: try: mi.rating = float(m.group(1))/float(m.group(2)) * 5 - break except: pass @@ -216,6 +215,7 @@ def main(args=sys.argv): print 'Failed to downlaod social metadata for', title return 1 #print '\n\n', time.time() - st, '\n\n' + print mi print '\n' return 0 From abfab2fce09d0893407187085ffd8d1cc1cc53c1 Mon Sep 17 00:00:00 2001 From: John Schember Date: Wed, 16 Mar 2011 19:38:51 -0400 Subject: [PATCH 11/15] TXT Input: Textile, escaped styling submitted by Perkin --- src/calibre/ebooks/textile/functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/textile/functions.py b/src/calibre/ebooks/textile/functions.py index eca4bcecff..ec675b9b62 100644 --- a/src/calibre/ebooks/textile/functions.py +++ b/src/calibre/ebooks/textile/functions.py @@ -807,7 +807,7 @@ class Textile(object): for qtag in qtags: pattern = re.compile(r""" - (?:^|(?<=[\s>%(pnct)s])|([\]}])) + (?:^|(?<=[\s>%(pnct)s])|\[|([\]}])) (%(qtag)s)(?!%(qtag)s) (%(c)s) (?::(\S+))? From cc14a6a657072491cd0745220c6ccf95b7e505a6 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 16 Mar 2011 17:42:42 -0600 Subject: [PATCH 12/15] ... --- src/calibre/ebooks/metadata/book/base.py | 7 +- src/calibre/ebooks/metadata/sources/amazon.py | 156 +++++++++++++----- src/calibre/ebooks/metadata/sources/base.py | 9 + src/calibre/ebooks/metadata/sources/google.py | 5 +- 4 files changed, 134 insertions(+), 43 deletions(-) diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py index 6818126699..db0c278340 100644 --- a/src/calibre/ebooks/metadata/book/base.py +++ b/src/calibre/ebooks/metadata/book/base.py @@ -633,10 +633,6 @@ class Metadata(object): fmt('Publisher', self.publisher) if getattr(self, 'book_producer', False): fmt('Book Producer', self.book_producer) - if self.comments: - fmt('Comments', self.comments) - if self.isbn: - fmt('ISBN', self.isbn) if self.tags: fmt('Tags', u', '.join([unicode(t) for t in self.tags])) if self.series: @@ -654,6 +650,9 @@ class Metadata(object): if self.identifiers: fmt('Identifiers', u', '.join(['%s:%s'%(k, v) for k, v in self.identifiers.iteritems()])) + if self.comments: + fmt('Comments', self.comments) + for key in self.custom_field_keys(): val = self.get(key, None) if val: diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py index 33ea24c421..30b95950ea 100644 --- a/src/calibre/ebooks/metadata/sources/amazon.py +++ b/src/calibre/ebooks/metadata/sources/amazon.py @@ -20,6 +20,7 @@ from calibre.utils.cleantext import clean_ascii_chars from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.metadata.book.base import Metadata from calibre.library.comments import sanitize_comments_html +from calibre.utils.date import parse_date class Worker(Thread): @@ -28,10 +29,12 @@ class Worker(Thread): ''' def __init__(self, url, result_queue, browser, log, timeout=20): + Thread.__init__(self) + self.daemon = True self.url, self.result_queue = url, result_queue self.log, self.timeout = log, timeout self.browser = browser.clone_browser() - self.cover_url = self.amazon_id = None + self.cover_url = self.amazon_id = self.isbn = None def run(self): try: @@ -111,7 +114,7 @@ class Worker(Thread): self.amazon_id = asin try: - mi.rating = self.parse_ratings(root) + mi.rating = self.parse_rating(root) except: self.log.exception('Error parsing ratings for url: %r'%self.url) @@ -125,6 +128,37 @@ class Worker(Thread): except: self.log.exception('Error parsing cover for url: %r'%self.url) + pd = root.xpath('//h2[text()="Product Details"]/../div[@class="content"]') + if pd: + pd = pd[0] + + try: + isbn = self.parse_isbn(pd) + if isbn: + self.isbn = mi.isbn = isbn + except: + self.log.exception('Error parsing ISBN for url: %r'%self.url) + + try: + mi.publisher = self.parse_publisher(pd) + except: + self.log.exception('Error parsing publisher for url: %r'%self.url) + + try: + mi.pubdate = self.parse_pubdate(pd) + except: + self.log.exception('Error parsing publish date for url: %r'%self.url) + + try: + lang = self.parse_language(pd) + if lang: + mi.language = lang + except: + self.log.exception('Error parsing language for url: %r'%self.url) + + else: + self.log.warning('Failed to find product description for url: %r'%self.url) + self.result_queue.put(mi) def parse_asin(self, root): @@ -140,27 +174,23 @@ class Worker(Thread): method='text').strip() else: title = tostring(tdiv, encoding=unicode, method='text').strip() - return re.sub(r'[([].*[)]]', '', title).strip() + return re.sub(r'[(\[].*[)\]]', '', title).strip() def parse_authors(self, root): - bdiv = root.xpath('//div[@class="buying"]')[0] - aname = bdiv.xpath('descendant::span[@class="contributorNameTrigger"]') + aname = root.xpath('//span[@class="contributorNameTrigger"]') authors = [tostring(x, encoding=unicode, method='text').strip() for x in aname] return authors - def parse_ratings(self, root): - ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]') + def parse_rating(self, root): + ratings = root.xpath('//div[@class="jumpBar"]/descendant::span[@class="asinReviewsSummary"]') pat = re.compile(r'([0-9.]+) out of (\d+) stars') if ratings: for elem in ratings[0].xpath('descendant::*[@title]'): - t = elem.get('title') + t = elem.get('title').strip() m = pat.match(t) if m is not None: - try: - return float(m.group(1))/float(m.group(2)) * 5 - except: - pass + return float(m.group(1))/float(m.group(2)) * 5 def parse_comments(self, root): desc = root.xpath('//div[@id="productDescription"]/*[@class="content"]') @@ -193,6 +223,37 @@ class Worker(Thread): bn = sparts[0] + sparts[-1] return ('/'.join(parts[:-1]))+'/'+bn + def parse_isbn(self, pd): + for x in reversed(pd.xpath( + 'descendant::*[starts-with(text(), "ISBN")]')): + if x.tail: + ans = check_isbn(x.tail.strip()) + if ans: + return ans + + def parse_publisher(self, pd): + for x in reversed(pd.xpath( + 'descendant::*[starts-with(text(), "Publisher:")]')): + if x.tail: + ans = x.tail.partition(';')[0] + return ans.partition('(')[0].strip() + + def parse_pubdate(self, pd): + for x in reversed(pd.xpath( + 'descendant::*[starts-with(text(), "Publisher:")]')): + if x.tail: + ans = x.tail + date = ans.partition('(')[-1].replace(')', '').strip() + return parse_date(date, assume_utc=True) + + def parse_language(self, pd): + for x in reversed(pd.xpath( + 'descendant::*[starts-with(text(), "Language:")]')): + if x.tail: + ans = x.tail.strip() + if ans == 'English': + return 'en' + class Amazon(Source): @@ -200,7 +261,8 @@ class Amazon(Source): description = _('Downloads metadata from Amazon') capabilities = frozenset(['identify']) - touched_fields = frozenset(['title', 'authors', 'isbn', 'pubdate', 'comments']) + touched_fields = frozenset(['title', 'authors', 'identifier:amazon', + 'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate']) AMAZON_DOMAINS = { 'com': _('US'), @@ -254,6 +316,10 @@ class Amazon(Source): def identify(self, log, result_queue, abort, title=None, authors=None, identifiers={}, timeout=20): + ''' + Note this method will retry without identifiers automatically if no + match is found with identifiers. + ''' query = self.create_query(log, title=title, authors=authors, identifiers=identifiers) if query is None: @@ -281,37 +347,45 @@ class Amazon(Source): raw = xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True)[0] - if '404 - ' in raw: - log.error('No matches found for query: %r'%query) - return - - try: - root = soupparser.fromstring(clean_ascii_chars(raw)) - except: - msg = 'Failed to parse amazon page for query: %r'%query - log.exception(msg) - return msg - - errmsg = root.xpath('//*[@id="errorMessage"]') - if errmsg: - msg = tostring(errmsg, method='text', encoding=unicode).strip() - log.error(msg) - # The error is almost always a not found error - return - matches = [] - for div in root.xpath(r'//div[starts-with(@id, "result_")]'): - for a in div.xpath(r'descendant::a[@class="title" and @href]'): - title = tostring(a, method='text', encoding=unicode).lower() - if 'bulk pack' not in title: - matches.append(a.get('href')) - break + found = '<title>404 - ' not in raw + + if found: + try: + root = soupparser.fromstring(clean_ascii_chars(raw)) + except: + msg = 'Failed to parse amazon page for query: %r'%query + log.exception(msg) + return msg + + errmsg = root.xpath('//*[@id="errorMessage"]') + if errmsg: + msg = tostring(errmsg, method='text', encoding=unicode).strip() + log.error(msg) + # The error is almost always a not found error + found = False + + if found: + for div in root.xpath(r'//div[starts-with(@id, "result_")]'): + for a in div.xpath(r'descendant::a[@class="title" and @href]'): + title = tostring(a, method='text', encoding=unicode).lower() + if 'bulk pack' not in title: + matches.append(a.get('href')) + break # Keep only the top 5 matches as the matches are sorted by relevance by # Amazon so lower matches are not likely to be very relevant matches = matches[:5] + if abort.is_set(): + return + if not matches: + if identifiers and title and authors: + self.log('No matches found with identifiers, retrying using only' + ' title and authors') + return self.identify(log, result_queue, abort, title=title, + authors=authors, timeout=timeout) log.error('No matches found with query: %r'%query) return @@ -333,6 +407,14 @@ class Amazon(Source): if not a_worker_is_alive: break + for w in workers: + if w.amazon_id: + if w.isbn: + self.cache_isbn_to_identifier(w.isbn, w.amazon_id) + if w.cover_url: + self.cache_identifier_to_cover_url(w.amazon_id, + w.cover_url) + return None diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py index 523d012cd5..3c320d14b6 100644 --- a/src/calibre/ebooks/metadata/sources/base.py +++ b/src/calibre/ebooks/metadata/sources/base.py @@ -35,6 +35,7 @@ class Source(Plugin): def __init__(self, *args, **kwargs): Plugin.__init__(self, *args, **kwargs) self._isbn_to_identifier_cache = {} + self._identifier_to_cover_url_cache = {} self.cache_lock = threading.RLock() self._config_obj = None self._browser = None @@ -68,6 +69,14 @@ class Source(Plugin): with self.cache_lock: return self._isbn_to_identifier_cache.get(isbn, None) + def cache_identifier_to_cover_url(self, id_, url): + with self.cache_lock: + self._identifier_to_cover_url_cache[id_] = url + + def cached_identifier_to_cover_url(self, id_): + with self.cache_lock: + return self._identifier_to_cover_url_cache.get(id_, None) + def get_author_tokens(self, authors, only_first_author=True): ''' Take a list of authors and return a list of tokens useful for an diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py index 923062379e..8a7fc8e540 100644 --- a/src/calibre/ebooks/metadata/sources/google.py +++ b/src/calibre/ebooks/metadata/sources/google.py @@ -145,8 +145,9 @@ class GoogleBooks(Source): description = _('Downloads metadata from Google Books') capabilities = frozenset(['identify']) - touched_fields = frozenset(['title', 'authors', 'isbn', 'tags', 'pubdate', - 'comments', 'publisher', 'author_sort']) # language currently disabled + touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate', + 'comments', 'publisher', 'author_sort', 'identifier:isbn', + 'identifier:google']) # language currently disabled def create_query(self, log, title=None, authors=None, identifiers={}): BASE_URL = 'http://books.google.com/books/feeds/volumes?' From c53f66f7521f25f27493baed3200919a0da8f4d7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Wed, 16 Mar 2011 22:00:45 -0600 Subject: [PATCH 13/15] New google and amazon metadata plugins finished --- src/calibre/ebooks/metadata/book/base.py | 5 ++++ src/calibre/ebooks/metadata/sources/amazon.py | 10 ++++---- src/calibre/ebooks/metadata/sources/google.py | 23 ++++++++----------- src/calibre/ebooks/metadata/sources/test.py | 10 ++++++++ 4 files changed, 31 insertions(+), 17 deletions(-) diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py index db0c278340..2bf23e4b82 100644 --- a/src/calibre/ebooks/metadata/book/base.py +++ b/src/calibre/ebooks/metadata/book/base.py @@ -227,6 +227,11 @@ class Metadata(object): if val: identifiers[typ] = val + def has_identifier(self, typ): + identifiers = object.__getattribute__(self, + '_data')['identifiers'] + return typ in identifiers + # field-oriented interface. Intended to be the same as in LibraryDatabase def standard_field_keys(self): diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py index 30b95950ea..e8b7bf2e2c 100644 --- a/src/calibre/ebooks/metadata/sources/amazon.py +++ b/src/calibre/ebooks/metadata/sources/amazon.py @@ -22,7 +22,7 @@ from calibre.ebooks.metadata.book.base import Metadata from calibre.library.comments import sanitize_comments_html from calibre.utils.date import parse_date -class Worker(Thread): +class Worker(Thread): # {{{ ''' Get book details from amazons book page in a separate thread @@ -253,7 +253,7 @@ class Worker(Thread): ans = x.tail.strip() if ans == 'English': return 'en' - +# }}} class Amazon(Source): @@ -270,7 +270,7 @@ class Amazon(Source): 'de' : _('Germany'), } - def create_query(self, log, title=None, authors=None, identifiers={}): + def create_query(self, log, title=None, authors=None, identifiers={}): # {{{ domain = self.prefs.get('domain', 'com') # See the amazon detailed search page to get all options @@ -313,8 +313,9 @@ class Amazon(Source): url = 'http://www.amazon.%s/s/?'%domain + urlencode(utf8q) return url + # }}} - def identify(self, log, result_queue, abort, title=None, authors=None, + def identify(self, log, result_queue, abort, title=None, authors=None, # {{{ identifiers={}, timeout=20): ''' Note this method will retry without identifiers automatically if no @@ -416,6 +417,7 @@ class Amazon(Source): w.cover_url) return None + # }}} if __name__ == '__main__': diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py index 8a7fc8e540..8dffd3f053 100644 --- a/src/calibre/ebooks/metadata/sources/google.py +++ b/src/calibre/ebooks/metadata/sources/google.py @@ -42,7 +42,7 @@ subject = XPath('descendant::dc:subject') description = XPath('descendant::dc:description') language = XPath('descendant::dc:language') -def get_details(browser, url, timeout): +def get_details(browser, url, timeout): # {{{ try: raw = browser.open_novisit(url, timeout=timeout).read() except Exception as e: @@ -54,8 +54,9 @@ def get_details(browser, url, timeout): raw = browser.open_novisit(url, timeout=timeout).read() return raw +# }}} -def to_metadata(browser, log, entry_, timeout): +def to_metadata(browser, log, entry_, timeout): # {{{ def get_text(extra, x): try: @@ -94,12 +95,6 @@ def to_metadata(browser, log, entry_, timeout): #mi.language = get_text(extra, language) mi.publisher = get_text(extra, publisher) - # Author sort - for x in creator(extra): - for key, val in x.attrib.items(): - if key.endswith('file-as') and val and val.strip(): - mi.author_sort = val - break # ISBN isbns = [] for x in identifier(extra): @@ -137,7 +132,7 @@ def to_metadata(browser, log, entry_, timeout): return mi - +# }}} class GoogleBooks(Source): @@ -146,10 +141,10 @@ class GoogleBooks(Source): capabilities = frozenset(['identify']) touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate', - 'comments', 'publisher', 'author_sort', 'identifier:isbn', + 'comments', 'publisher', 'identifier:isbn', 'identifier:google']) # language currently disabled - def create_query(self, log, title=None, authors=None, identifiers={}): + def create_query(self, log, title=None, authors=None, identifiers={}): # {{{ BASE_URL = 'http://books.google.com/books/feeds/volumes?' isbn = check_isbn(identifiers.get('isbn', None)) q = '' @@ -177,6 +172,7 @@ class GoogleBooks(Source): 'start-index':1, 'min-viewability':'none', }) + # }}} def cover_url_from_identifiers(self, identifiers): goog = identifiers.get('google', None) @@ -209,11 +205,11 @@ class GoogleBooks(Source): if abort.is_set(): break - def identify(self, log, result_queue, abort, title=None, authors=None, + def identify(self, log, result_queue, abort, title=None, authors=None, # {{{ identifiers={}, timeout=20): query = self.create_query(log, title=title, authors=authors, identifiers=identifiers) - br = self.browser() + br = self.browser try: raw = br.open_novisit(query, timeout=timeout).read() except Exception, e: @@ -234,6 +230,7 @@ class GoogleBooks(Source): self.get_all_details(br, log, entries, abort, result_queue, timeout) return None + # }}} if __name__ == '__main__': # To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/google.py diff --git a/src/calibre/ebooks/metadata/sources/test.py b/src/calibre/ebooks/metadata/sources/test.py index 3b41e69d40..e877eabd83 100644 --- a/src/calibre/ebooks/metadata/sources/test.py +++ b/src/calibre/ebooks/metadata/sources/test.py @@ -102,6 +102,16 @@ def test_identify_plugin(name, tests): prints('Log saved to', lf) raise SystemExit(1) + for key in plugin.touched_fields: + if key.startswith('identifier:'): + key = key.partition(':')[-1] + if not match_found.has_identifier(key): + prints('Failed to find identifier:', key) + raise SystemExit(1) + elif match_found.is_null(key): + prints('Failed to find', key) + raise SystemExit(1) + prints('Average time per query', sum(times)/len(times)) if os.stat(lf).st_size > 10: From 2aee9ec3217c57df2540fe3d6ec18710155bbd57 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Wed, 16 Mar 2011 22:58:31 -0600 Subject: [PATCH 14/15] ... --- src/calibre/ebooks/metadata/sources/amazon.py | 23 +++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py index e8b7bf2e2c..2e3180a739 100644 --- a/src/calibre/ebooks/metadata/sources/amazon.py +++ b/src/calibre/ebooks/metadata/sources/amazon.py @@ -383,7 +383,7 @@ class Amazon(Source): if not matches: if identifiers and title and authors: - self.log('No matches found with identifiers, retrying using only' + log('No matches found with identifiers, retrying using only' ' title and authors') return self.identify(log, result_queue, abort, title=title, authors=authors, timeout=timeout) @@ -428,10 +428,29 @@ if __name__ == '__main__': test_identify_plugin(Amazon.name, [ - ( + ( # This isbn not on amazon + {'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python', + 'authors':['Lutz']}, + [title_test('Learning Python: Powerful Object-Oriented Programming', exact=True)] + + ), + + ( # Sophisticated comment formatting + {'identifiers':{'isbn': '9781416580829'}}, + [title_test('Angels & Demons - Movie Tie-In: A Novel', exact=True)] + ), + + ( # No specific problems {'identifiers':{'isbn': '0743273567'}}, [title_test('The great gatsby', exact=True)] ), + + ( # A newer book + {'identifiers':{'isbn': '9780316044981'}}, + [title_test('The Heroes', exact=True)] + + ), + ]) From 49e4f2cf28c09625a6ebe6476199d62958398637 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Wed, 16 Mar 2011 23:44:48 -0600 Subject: [PATCH 15/15] Amazon plugin: Handle the case of authors unknown to amazon and dont set the cover URL to the no cover available image --- src/calibre/ebooks/metadata/sources/amazon.py | 46 +++++++++++++------ src/calibre/ebooks/metadata/sources/test.py | 9 ++++ 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py index 2e3180a739..335a43ebb0 100644 --- a/src/calibre/ebooks/metadata/sources/amazon.py +++ b/src/calibre/ebooks/metadata/sources/amazon.py @@ -62,6 +62,7 @@ class Worker(Thread): # {{{ raw = xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True)[0] + # open('/t/t.html', 'wb').write(raw) if '<title>404 - ' in raw: self.log.error('URL malformed: %r'%self.url) @@ -127,6 +128,7 @@ class Worker(Thread): # {{{ self.cover_url = self.parse_cover(root) except: self.log.exception('Error parsing cover for url: %r'%self.url) + mi.has_cover = bool(self.cover_url) pd = root.xpath('//h2[text()="Product Details"]/../div[@class="content"]') if pd: @@ -177,7 +179,10 @@ class Worker(Thread): # {{{ return re.sub(r'[(\[].*[)\]]', '', title).strip() def parse_authors(self, root): - aname = root.xpath('//span[@class="contributorNameTrigger"]') + x = '//h1[@class="parseasinTitle"]/following-sibling::span/*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]' + aname = root.xpath(x) + for x in aname: + x.tail = '' authors = [tostring(x, encoding=unicode, method='text').strip() for x in aname] return authors @@ -215,13 +220,14 @@ class Worker(Thread): # {{{ imgs = root.xpath('//img[@id="prodImage" and @src]') if imgs: src = imgs[0].get('src') - parts = src.split('/') - if len(parts) > 3: - bn = parts[-1] - sparts = bn.split('_') - if len(sparts) > 2: - bn = sparts[0] + sparts[-1] - return ('/'.join(parts[:-1]))+'/'+bn + if '/no-image-avail' not in src: + parts = src.split('/') + if len(parts) > 3: + bn = parts[-1] + sparts = bn.split('_') + if len(sparts) > 2: + bn = sparts[0] + sparts[-1] + return ('/'.join(parts[:-1]))+'/'+bn def parse_isbn(self, pd): for x in reversed(pd.xpath( @@ -424,30 +430,44 @@ if __name__ == '__main__': # To run these test use: calibre-debug -e # src/calibre/ebooks/metadata/sources/amazon.py from calibre.ebooks.metadata.sources.test import (test_identify_plugin, - title_test) + title_test, authors_test) test_identify_plugin(Amazon.name, [ + ( # An e-book ISBN not on Amazon, one of the authors is + # unknown to Amazon, so no popup wrapper + {'identifiers':{'isbn': '0307459671'}, + 'title':'Invisible Gorilla', 'authors':['Christopher Chabris']}, + [title_test('The Invisible Gorilla: And Other Ways Our Intuitions Deceive Us', + exact=True), authors_test(['Christopher Chabris', 'Daniel Simons'])] + + ), + ( # This isbn not on amazon {'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python', 'authors':['Lutz']}, - [title_test('Learning Python: Powerful Object-Oriented Programming', exact=True)] + [title_test('Learning Python: Powerful Object-Oriented Programming', + exact=True), authors_test(['Mark Lutz']) + ] ), ( # Sophisticated comment formatting {'identifiers':{'isbn': '9781416580829'}}, - [title_test('Angels & Demons - Movie Tie-In: A Novel', exact=True)] + [title_test('Angels & Demons - Movie Tie-In: A Novel', + exact=True), authors_test(['Dan Brown'])] ), ( # No specific problems {'identifiers':{'isbn': '0743273567'}}, - [title_test('The great gatsby', exact=True)] + [title_test('The great gatsby', exact=True), + authors_test(['F. Scott Fitzgerald'])] ), ( # A newer book {'identifiers':{'isbn': '9780316044981'}}, - [title_test('The Heroes', exact=True)] + [title_test('The Heroes', exact=True), + authors_test(['Joe Abercrombie'])] ), diff --git a/src/calibre/ebooks/metadata/sources/test.py b/src/calibre/ebooks/metadata/sources/test.py index e877eabd83..69e0c32846 100644 --- a/src/calibre/ebooks/metadata/sources/test.py +++ b/src/calibre/ebooks/metadata/sources/test.py @@ -37,6 +37,15 @@ def title_test(title, exact=False): return test +def authors_test(authors): + authors = set([x.lower() for x in authors]) + + def test(mi): + au = set([x.lower() for x in mi.authors]) + return au == authors + + return test + def test_identify_plugin(name, tests): ''' :param name: Plugin name