From 0a9abf4ab63429cdede020d9eb75e45b376cc9a2 Mon Sep 17 00:00:00 2001 From: Lee Date: Thu, 21 Apr 2011 12:03:34 +0800 Subject: [PATCH 1/3] tweaks to overdrive, added text2int --- .../ebooks/metadata/sources/overdrive.py | 8 ++- src/calibre/utils/text2int.py | 63 +++++++++++++++++++ 2 files changed, 68 insertions(+), 3 deletions(-) create mode 100755 src/calibre/utils/text2int.py diff --git a/src/calibre/ebooks/metadata/sources/overdrive.py b/src/calibre/ebooks/metadata/sources/overdrive.py index 39fa2bc6ea..c8a2dc6d8b 100755 --- a/src/calibre/ebooks/metadata/sources/overdrive.py +++ b/src/calibre/ebooks/metadata/sources/overdrive.py @@ -17,7 +17,7 @@ from lxml import html from lxml.html import soupparser from calibre.ebooks.metadata import check_isbn -from calibre.ebooks.metadata.sources.base import Source +from calibre.ebooks.metadata.sources.base import Source, Option from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.chardet import xml_to_unicode from calibre.library.comments import sanitize_comments_html @@ -58,11 +58,13 @@ class OverDrive(Source): self.parse_search_results(ovrdrv_data, mi) if ovrdrv_id is None: ovrdrv_id = ovrdrv_data[7] + + if get_full_metadata(): + self.get_book_detail(br, ovrdrv_data[1], mi, ovrdrv_id, log) + if isbn is not None: self.cache_isbn_to_identifier(isbn, ovrdrv_id) - self.get_book_detail(br, ovrdrv_data[1], mi, ovrdrv_id, log) - result_queue.put(mi) return None diff --git a/src/calibre/utils/text2int.py b/src/calibre/utils/text2int.py new file mode 100755 index 0000000000..059e166141 --- /dev/null +++ b/src/calibre/utils/text2int.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python +__author__ = "stackoverflow community" +__docformat__ = 'restructuredtext en' +""" +Takes english numeric words and converts them to integers. +Returns False if the word isn't a number. + +implementation courtesy of the stackoverflow community: +http://stackoverflow.com/questions/493174/is-there-a-way-to-convert-number-words-to-integers-python +""" + +import re + +numwords = {} + +def text2int(textnum): + if not numwords: + + units = [ "zero", "one", "two", "three", "four", "five", "six", + "seven", "eight", "nine", "ten", "eleven", "twelve", + "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", + "eighteen", "nineteen"] + + tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", + "seventy", "eighty", "ninety"] + + scales = ["hundred", "thousand", "million", "billion", "trillion", + 'quadrillion', 'quintillion', 'sexillion', 'septillion', + 'octillion', 'nonillion', 'decillion' ] + + numwords["and"] = (1, 0) + for idx, word in enumerate(units): numwords[word] = (1, idx) + for idx, word in enumerate(tens): numwords[word] = (1, idx * 10) + for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0) + + ordinal_words = {'first':1, 'second':2, 'third':3, 'fifth':5, + 'eighth':8, 'ninth':9, 'twelfth':12} + ordinal_endings = [('ieth', 'y'), ('th', '')] + current = result = 0 + tokens = re.split(r"[\s-]+", textnum) + for word in tokens: + if word in ordinal_words: + scale, increment = (1, ordinal_words[word]) + else: + for ending, replacement in ordinal_endings: + if word.endswith(ending): + word = "%s%s" % (word[:-len(ending)], replacement) + + if word not in numwords: + #raise Exception("Illegal word: " + word) + return False + + scale, increment = numwords[word] + + if scale > 1: + current = max(1, current) + + current = current * scale + increment + if scale > 100: + result += current + current = 0 + + return result + current \ No newline at end of file From bc1299055390c83485f4935b5d7b62c1555ee6e8 Mon Sep 17 00:00:00 2001 From: Lee Date: Thu, 21 Apr 2011 15:55:22 +0800 Subject: [PATCH 2/3] added a pref to enable/disable scraping the book metadata page --- .../ebooks/metadata/sources/overdrive.py | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/metadata/sources/overdrive.py b/src/calibre/ebooks/metadata/sources/overdrive.py index dc28a6ced5..e9b327d7b7 100755 --- a/src/calibre/ebooks/metadata/sources/overdrive.py +++ b/src/calibre/ebooks/metadata/sources/overdrive.py @@ -40,6 +40,29 @@ class OverDrive(Source): supports_gzip_transfer_encoding = False cached_cover_url_is_reliable = True + def __init__(self, *args, **kwargs): + Source.__init__(self, *args, **kwargs) + + options = ( + Option('get_full_metadata', 'bool', False, _('Gather all Metadata:'), + _('Enable this option to gather all metadata available from Overdrive.')), + ) + + config_help_message = '

'+_('Additional metadata can be taken from Overdrive\'s book detail' + ' page. This includes a limited set of tags used by libraries, comments, language,' + ' and the ebook ISBN. Collecting this data is disabled by default due to the extra' + ' time required.') + + def __init__(self, *args, **kwargs): + Source.__init__(self, *args, **kwargs) + + prefs = self.prefs + prefs.defaults['get_full_metadata'] = False + + @property + def get_full_metadata(self): + return self.prefs['get_full_metadata'] + def identify(self, log, result_queue, abort, title=None, authors=None, # {{{ identifiers={}, timeout=30): ovrdrv_id = identifiers.get('overdrive', None) @@ -440,4 +463,3 @@ if __name__ == '__main__': authors_test(['Agatha Christie'])] ), ]) - From c50251a3f6b65b3cde511ea8789be03f983d7a13 Mon Sep 17 00:00:00 2001 From: Lee Date: Fri, 22 Apr 2011 01:06:09 +0800 Subject: [PATCH 3/3] made extra metadata downloading optional --- src/calibre/ebooks/metadata/sources/overdrive.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/overdrive.py b/src/calibre/ebooks/metadata/sources/overdrive.py index e9b327d7b7..cce35d5b74 100755 --- a/src/calibre/ebooks/metadata/sources/overdrive.py +++ b/src/calibre/ebooks/metadata/sources/overdrive.py @@ -3,7 +3,7 @@ from __future__ import (unicode_literals, division, absolute_import, print_function) __license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' +__copyright__ = '2011, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' ''' @@ -44,7 +44,7 @@ class OverDrive(Source): Source.__init__(self, *args, **kwargs) options = ( - Option('get_full_metadata', 'bool', False, _('Gather all Metadata:'), + Option('get_full_metadata', 'bool', None, _('Gather all Metadata:'), _('Enable this option to gather all metadata available from Overdrive.')), ) @@ -59,10 +59,6 @@ class OverDrive(Source): prefs = self.prefs prefs.defaults['get_full_metadata'] = False - @property - def get_full_metadata(self): - return self.prefs['get_full_metadata'] - def identify(self, log, result_queue, abort, title=None, authors=None, # {{{ identifiers={}, timeout=30): ovrdrv_id = identifiers.get('overdrive', None) @@ -78,7 +74,7 @@ class OverDrive(Source): if ovrdrv_id is None: ovrdrv_id = ovrdrv_data[7] - if get_full_metadata(): + if self.prefs['get_full_metadata']: self.get_book_detail(br, ovrdrv_data[1], mi, ovrdrv_id, log) if isbn is not None: