From 0a9abf4ab63429cdede020d9eb75e45b376cc9a2 Mon Sep 17 00:00:00 2001
From: Lee <ldolse@yahoo.com>
Date: Thu, 21 Apr 2011 12:03:34 +0800
Subject: [PATCH 1/3] tweaks to overdrive, added text2int

---
 .../ebooks/metadata/sources/overdrive.py      |  8 ++-
 src/calibre/utils/text2int.py                 | 63 +++++++++++++++++++
 2 files changed, 68 insertions(+), 3 deletions(-)
 create mode 100755 src/calibre/utils/text2int.py

diff --git a/src/calibre/ebooks/metadata/sources/overdrive.py b/src/calibre/ebooks/metadata/sources/overdrive.py
index 39fa2bc6ea..c8a2dc6d8b 100755
--- a/src/calibre/ebooks/metadata/sources/overdrive.py
+++ b/src/calibre/ebooks/metadata/sources/overdrive.py
@@ -17,7 +17,7 @@ from lxml import html
 from lxml.html import soupparser
 
 from calibre.ebooks.metadata import check_isbn
-from calibre.ebooks.metadata.sources.base import Source
+from calibre.ebooks.metadata.sources.base import Source, Option
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.library.comments import sanitize_comments_html
@@ -58,11 +58,13 @@ class OverDrive(Source):
             self.parse_search_results(ovrdrv_data, mi)
             if ovrdrv_id is None:
                 ovrdrv_id = ovrdrv_data[7]
+
+            if get_full_metadata():
+                self.get_book_detail(br, ovrdrv_data[1], mi, ovrdrv_id, log)
+
             if isbn is not None:
                 self.cache_isbn_to_identifier(isbn, ovrdrv_id)
 
-            self.get_book_detail(br, ovrdrv_data[1], mi, ovrdrv_id, log)
-
             result_queue.put(mi)
 
         return None
diff --git a/src/calibre/utils/text2int.py b/src/calibre/utils/text2int.py
new file mode 100755
index 0000000000..059e166141
--- /dev/null
+++ b/src/calibre/utils/text2int.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env  python
+__author__ = "stackoverflow community"
+__docformat__ = 'restructuredtext en'
+"""
+Takes english numeric words and converts them to integers.
+Returns False if the word isn't a number.
+
+implementation courtesy of the stackoverflow community:
+http://stackoverflow.com/questions/493174/is-there-a-way-to-convert-number-words-to-integers-python
+"""
+
+import re
+
+numwords = {}
+
+def text2int(textnum):
+    if not numwords:
+
+        units = [ "zero", "one", "two", "three", "four", "five", "six",
+                "seven", "eight", "nine", "ten", "eleven", "twelve",
+                "thirteen", "fourteen", "fifteen", "sixteen", "seventeen",
+                "eighteen", "nineteen"]
+
+        tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", 
+                "seventy", "eighty", "ninety"]
+
+        scales = ["hundred", "thousand", "million", "billion", "trillion", 
+                'quadrillion', 'quintillion', 'sexillion', 'septillion', 
+                'octillion', 'nonillion', 'decillion' ]
+
+        numwords["and"] = (1, 0)
+        for idx, word in enumerate(units): numwords[word] = (1, idx)
+        for idx, word in enumerate(tens): numwords[word] = (1, idx * 10)
+        for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0)
+
+    ordinal_words = {'first':1, 'second':2, 'third':3, 'fifth':5, 
+            'eighth':8, 'ninth':9, 'twelfth':12}
+    ordinal_endings = [('ieth', 'y'), ('th', '')]
+    current = result = 0
+    tokens = re.split(r"[\s-]+", textnum)
+    for word in tokens:
+        if word in ordinal_words:
+            scale, increment = (1, ordinal_words[word])
+        else:
+            for ending, replacement in ordinal_endings:
+                if word.endswith(ending):
+                    word = "%s%s" % (word[:-len(ending)], replacement)
+
+            if word not in numwords:
+                #raise Exception("Illegal word: " + word)
+                return False
+
+            scale, increment = numwords[word]
+
+        if scale > 1:
+            current = max(1, current)
+
+        current = current * scale + increment
+        if scale > 100:
+            result += current
+            current = 0
+
+    return result + current
\ No newline at end of file

From bc1299055390c83485f4935b5d7b62c1555ee6e8 Mon Sep 17 00:00:00 2001
From: Lee <ldolse@yahoo.com>
Date: Thu, 21 Apr 2011 15:55:22 +0800
Subject: [PATCH 2/3] added a pref to enable/disable scraping the book metadata
 page

---
 .../ebooks/metadata/sources/overdrive.py      | 24 ++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/metadata/sources/overdrive.py b/src/calibre/ebooks/metadata/sources/overdrive.py
index dc28a6ced5..e9b327d7b7 100755
--- a/src/calibre/ebooks/metadata/sources/overdrive.py
+++ b/src/calibre/ebooks/metadata/sources/overdrive.py
@@ -40,6 +40,29 @@ class OverDrive(Source):
     supports_gzip_transfer_encoding = False
     cached_cover_url_is_reliable = True
 
+    def __init__(self, *args, **kwargs):
+       Source.__init__(self, *args, **kwargs)
+
+    options = (
+            Option('get_full_metadata', 'bool', False, _('Gather all Metadata:'),
+                _('Enable this option to gather all metadata available from Overdrive.')),
+            )
+
+    config_help_message = '<p>'+_('Additional metadata can be taken from Overdrive\'s book detail'
+            ' page.  This includes a limited set of tags used by libraries, comments, language,'
+            ' and the ebook ISBN. Collecting this data is disabled by default due to the extra'
+            ' time required.')
+
+    def __init__(self, *args, **kwargs):
+        Source.__init__(self, *args, **kwargs)
+
+        prefs = self.prefs
+        prefs.defaults['get_full_metadata'] = False
+
+    @property
+    def get_full_metadata(self):
+        return self.prefs['get_full_metadata']
+
     def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
             identifiers={}, timeout=30):
         ovrdrv_id = identifiers.get('overdrive', None)
@@ -440,4 +463,3 @@ if __name__ == '__main__':
                     authors_test(['Agatha Christie'])]
             ),
     ])
-

From c50251a3f6b65b3cde511ea8789be03f983d7a13 Mon Sep 17 00:00:00 2001
From: Lee <ldolse@yahoo.com>
Date: Fri, 22 Apr 2011 01:06:09 +0800
Subject: [PATCH 3/3] made extra metadata downloading optional

---
 src/calibre/ebooks/metadata/sources/overdrive.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/overdrive.py b/src/calibre/ebooks/metadata/sources/overdrive.py
index e9b327d7b7..cce35d5b74 100755
--- a/src/calibre/ebooks/metadata/sources/overdrive.py
+++ b/src/calibre/ebooks/metadata/sources/overdrive.py
@@ -3,7 +3,7 @@ from __future__ import (unicode_literals, division, absolute_import,
                         print_function)
 
 __license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
+__copyright__ = '2011, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 
 '''
@@ -44,7 +44,7 @@ class OverDrive(Source):
        Source.__init__(self, *args, **kwargs)
 
     options = (
-            Option('get_full_metadata', 'bool', False, _('Gather all Metadata:'),
+            Option('get_full_metadata', 'bool', None, _('Gather all Metadata:'),
                 _('Enable this option to gather all metadata available from Overdrive.')),
             )
 
@@ -59,10 +59,6 @@ class OverDrive(Source):
         prefs = self.prefs
         prefs.defaults['get_full_metadata'] = False
 
-    @property
-    def get_full_metadata(self):
-        return self.prefs['get_full_metadata']
-
     def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
             identifiers={}, timeout=30):
         ovrdrv_id = identifiers.get('overdrive', None)
@@ -78,7 +74,7 @@ class OverDrive(Source):
             if ovrdrv_id is None:
                 ovrdrv_id = ovrdrv_data[7]
 
-            if get_full_metadata():
+            if self.prefs['get_full_metadata']:
                 self.get_book_detail(br, ovrdrv_data[1], mi, ovrdrv_id, log)
 
             if isbn is not None: