tweaks to overdrive, added text2int

2025-11-24 23:35:01 -05:00 · 2011-04-21 12:03:34 +08:00 · 2011-04-21 12:03:34 +08:00 · 0a9abf4ab6
commit 0a9abf4ab6
parent 112168f3f6
2 changed files with 68 additions and 3 deletions
--- a/src/calibre/ebooks/metadata/sources/overdrive.py
+++ b/src/calibre/ebooks/metadata/sources/overdrive.py
@ -17,7 +17,7 @@ from lxml import html
 from lxml.html import soupparser
 from calibre.ebooks.metadata import check_isbn
-from calibre.ebooks.metadata.sources.base import Source
+from calibre.ebooks.metadata.sources.base import Source, Option
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.library.comments import sanitize_comments_html
@ -58,11 +58,13 @@ class OverDrive(Source):
            self.parse_search_results(ovrdrv_data, mi)
            if ovrdrv_id is None:
                ovrdrv_id = ovrdrv_data[7]
            if get_full_metadata():
                self.get_book_detail(br, ovrdrv_data[1], mi, ovrdrv_id, log)
            if isbn is not None:
                self.cache_isbn_to_identifier(isbn, ovrdrv_id)
            self.get_book_detail(br, ovrdrv_data[1], mi, ovrdrv_id, log)
            result_queue.put(mi)
        return None
--- a/src/calibre/utils/text2int.py
+++ b/src/calibre/utils/text2int.py
@ -0,0 +1,63 @@
 #!/usr/bin/env  python
 __author__ = "stackoverflow community"
 __docformat__ = 'restructuredtext en'
 """
 Takes english numeric words and converts them to integers.
 Returns False if the word isn't a number.
 implementation courtesy of the stackoverflow community:
 http://stackoverflow.com/questions/493174/is-there-a-way-to-convert-number-words-to-integers-python
 """
 import re
 numwords = {}
 def text2int(textnum):
    if not numwords:
        units = [ "zero", "one", "two", "three", "four", "five", "six",
                "seven", "eight", "nine", "ten", "eleven", "twelve",
                "thirteen", "fourteen", "fifteen", "sixteen", "seventeen",
                "eighteen", "nineteen"]
        tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", 
                "seventy", "eighty", "ninety"]
        scales = ["hundred", "thousand", "million", "billion", "trillion", 
                'quadrillion', 'quintillion', 'sexillion', 'septillion', 
                'octillion', 'nonillion', 'decillion' ]
        numwords["and"] = (1, 0)
        for idx, word in enumerate(units): numwords[word] = (1, idx)
        for idx, word in enumerate(tens): numwords[word] = (1, idx * 10)
        for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0)
    ordinal_words = {'first':1, 'second':2, 'third':3, 'fifth':5, 
            'eighth':8, 'ninth':9, 'twelfth':12}
    ordinal_endings = [('ieth', 'y'), ('th', '')]
    current = result = 0
    tokens = re.split(r"[\s-]+", textnum)
    for word in tokens:
        if word in ordinal_words:
            scale, increment = (1, ordinal_words[word])
        else:
            for ending, replacement in ordinal_endings:
                if word.endswith(ending):
                    word = "%s%s" % (word[:-len(ending)], replacement)
            if word not in numwords:
                #raise Exception("Illegal word: " + word)
                return False
            scale, increment = numwords[word]
        if scale > 1:
            current = max(1, current)
        current = current * scale + increment
        if scale > 100:
            result += current
            current = 0
    return result + current