Sync to trunk.

2025-07-09 03:04:10 -04:00 · 2011-01-08 22:28:19 -05:00 · 2011-01-08 22:28:19 -05:00 · 04cf7a5e67
commit 04cf7a5e67
parent 12cbaa2304 5b8ea64321
5 changed files with 118 additions and 59 deletions
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -29,7 +29,7 @@ class ANDROID(USBMS):
            # Motorola
            0x22b8 : { 0x41d9 : [0x216], 0x2d61 : [0x100], 0x2d67 : [0x100],
                       0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216],
-                       0x4286 : [0x216] },
+                       0x4286 : [0x216], 0x42b3 : [0x216] },

            # Sony Ericsson
            0xfce : { 0xd12e : [0x0100]},
--- a/src/calibre/ebooks/txt/heuristicprocessor.py
+++ b/src/calibre/ebooks/txt/heuristicprocessor.py
@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en'
 import re

 from calibre import prepare_string_for_xml
-from calibre.ebooks.unidecode.unidecoder import Unidecoder

 class TXTHeuristicProcessor(object):

@ -16,7 +15,7 @@ class TXTHeuristicProcessor(object):
            'Etc.', 'etc.', 'viz.', 'ie.', 'i.e.', 'Ie.', 'I.e.', 'eg.',
            'e.g.', 'Eg.', 'E.g.', 'et al.', 'et cetra', 'n.b.', 'N.b.',
            'nota bene', 'Nota bene', 'Ste.', 'Mme.', 'Mdme.',
-            'Mlle.', 'Mons.', 'PS.', 'PPS.', 
+            'Mlle.', 'Mons.', 'PS.', 'PPS.',
        ]
        self.ITALICIZE_STYLE_PATS = [
            r'(?msu)_(?P<words>.+?)_',
@ -43,7 +42,7 @@ class TXTHeuristicProcessor(object):
        from calibre.ebooks.txt.processor import clean_txt, split_txt, HTML_TEMPLATE
        txt = clean_txt(txt)
        txt = split_txt(txt, epub_split_size_kb)
-        
+
        processed = []
        for line in txt.split('\n\n'):
            processed.append(u'<p>%s</p>' % self.process_paragraph(prepare_string_for_xml(line.replace('\n', ' '))))
@ -51,7 +50,7 @@ class TXTHeuristicProcessor(object):
        txt = u'\n'.join(processed)
        txt = re.sub('[ ]{2,}', ' ', txt)
        html = HTML_TEMPLATE % (title, txt)
-        
+
        from calibre.ebooks.conversion.utils import PreProcessor
        pp = PreProcessor()
        html = pp.markup_chapters(html, pp.get_word_count(html), False)
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@ -181,7 +181,7 @@ class ResultCache(SearchQueryParser): # {{{
        self.search_restriction = ''
        self.field_metadata = field_metadata
        self.all_search_locations = field_metadata.get_search_terms()
-        SearchQueryParser.__init__(self, self.all_search_locations)
+        SearchQueryParser.__init__(self, self.all_search_locations, optimize=True)
        self.build_date_relop_dict()
        self.build_numeric_relop_dict()

@ -264,7 +264,7 @@ class ResultCache(SearchQueryParser): # {{{
                            '<=':[2, relop_le]
                        }

-    def get_dates_matches(self, location, query):
+    def get_dates_matches(self, location, query, candidates):
        matches = set([])
        if len(query) < 2:
            return matches
@ -274,13 +274,15 @@ class ResultCache(SearchQueryParser): # {{{
        loc = self.field_metadata[location]['rec_index']

        if query == 'false':
-            for item in self._data:
+            for id_ in candidates:
+                item = self._data[id_]
                if item is None: continue
                if item[loc] is None or item[loc] <= UNDEFINED_DATE:
                    matches.add(item[0])
            return matches
        if query == 'true':
-            for item in self._data:
+            for id_ in candidates:
+                item = self._data[id_]
                if item is None: continue
                if item[loc] is not None and item[loc] > UNDEFINED_DATE:
                    matches.add(item[0])
@ -319,7 +321,8 @@ class ResultCache(SearchQueryParser): # {{{
                field_count = query.count('-') + 1
            else:
                field_count = query.count('/') + 1
-        for item in self._data:
+        for id_ in candidates:
+            item = self._data[id_]
            if item is None or item[loc] is None: continue
            if relop(item[loc], qd, field_count):
                matches.add(item[0])
@ -335,7 +338,7 @@ class ResultCache(SearchQueryParser): # {{{
                        '<=':[2, lambda r, q: r <= q]
                    }

-    def get_numeric_matches(self, location, query, val_func = None):
+    def get_numeric_matches(self, location, query, candidates, val_func = None):
        matches = set([])
        if len(query) == 0:
            return matches
@ -381,7 +384,8 @@ class ResultCache(SearchQueryParser): # {{{
        except:
            return matches

-        for item in self._data:
+        for id_ in candidates:
+            item = self._data[id_]
            if item is None:
                continue
            v = val_func(item)
@ -393,8 +397,13 @@ class ResultCache(SearchQueryParser): # {{{
                matches.add(item[0])
        return matches

-    def get_matches(self, location, query, allow_recursion=True):
+    def get_matches(self, location, query, allow_recursion=True, candidates=None):
        matches = set([])
+        if candidates is None:
+            candidates = self.universal_set()
+        if len(candidates) == 0:
+            return matches
+
        if query and query.strip():
            # get metadata key associated with the search term. Eliminates
            # dealing with plurals and other aliases
@ -476,7 +485,8 @@ class ResultCache(SearchQueryParser): # {{{
                else:
                    q = query

-                for item in self._data:
+                for id_ in candidates:
+                    item = self._data[id]
                    if item is None: continue

                    if col_datatype[loc] == 'bool': # complexity caused by the two-/three-value tweak
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
@ -5,8 +5,8 @@
 msgid ""
 msgstr ""
 "Project-Id-Version: calibre 0.7.38\n"
-"POT-Creation-Date: 2011-01-07 13:12+MST\n"
-"PO-Revision-Date: 2011-01-07 13:12+MST\n"
+"POT-Creation-Date: 2011-01-08 18:40+MST\n"
+"PO-Revision-Date: 2011-01-08 18:40+MST\n"
 "Last-Translator: Automatically generated\n"
 "Language-Team: LANGUAGE\n"
 "MIME-Version: 1.0\n"
@ -2905,28 +2905,29 @@ msgstr ""
 msgid " (Preface)"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:26
+#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:27
 msgid ""
 "Paragraph structure.\n"
-"choices are ['auto', 'block', 'single', 'print', 'markdown']\n"
+"choices are ['auto', 'block', 'single', 'print', 'unformatted']\n"
 "* auto: Try to auto detect paragraph type.\n"
 "* block: Treat a blank line as a paragraph break.\n"
 "* single: Assume every line is a paragraph.\n"
-"* print:  Assume every line starting with 2+ spaces or a tab starts a paragraph."
+"* print:  Assume every line starting with 2+ spaces or a tab starts a paragraph.* unformatted: Most lines have hard line breaks, few/no spaces or indents."
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:35
+#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:37
 msgid ""
-"Formatting used within the document.* auto: Try to auto detect the document formatting.\n"
-"* none: Do not modify the paragraph formatting. Everything is a paragraph.\n"
-"* markdown: Run the input though the markdown pre-processor. To learn more about markdown see"
+"Formatting used within the document.* auto: Automatically decide which formatting processor to use.\n"
+"* none: Do not process the document formatting. Everything is a paragraph and no styling is applied.\n"
+"* heuristic: Process using heuristics to determine formatting such as chapter headings and italic text.\n"
+"* markdown: Processing using markdown formatting. To learn more about markdown see"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:41
+#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:46
 msgid "Normally extra spaces are condensed into a single space. With this option all spaces will be displayed."
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:44
+#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:49
 msgid "Do not insert a Table of Contents into the output text."
 msgstr ""

@ -7225,7 +7226,7 @@ msgstr ""
 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/password_ui.py:65
 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/scheduler_ui.py:219
 #: /home/kovid/work/calibre/src/calibre/gui2/preferences/server_ui.py:130
-#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:169
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:172
 msgid "&Show password"
 msgstr ""

@ -10621,48 +10622,56 @@ msgstr ""
 msgid "Mail successfully sent"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:136
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:95
+msgid "OK to proceed?"
+msgstr ""
+
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:96
+msgid "This will display your email password on the screen. Is it OK to proceed?"
+msgstr ""
+
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:139
 msgid "If you are setting up a new hotmail account, you must log in to it  once before you will be able to send mails."
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:147
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:150
 msgid "Setup sending email using"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:149
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:152
 msgid "If you don't have an account, you can sign up for a free {name} email account at <a href=\"http://{url}\">http://{url}</a>. {extra}"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:156
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:159
 msgid "Your %s &email address:"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:157
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:160
 msgid "Your %s &username:"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:158
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:161
 msgid "Your %s &password:"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:176
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:179
 msgid "If you plan to use email to send books to your Kindle, remember to add the your %s email address to the allowed email addresses in your Amazon.com Kindle management page."
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:183
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:186
 msgid "Setup"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:198
-#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:205
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:201
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:208
 msgid "Bad configuration"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:199
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:202
 msgid "You must set the From email address"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:206
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:209
 msgid "You must set the username and password for the mail server."
 msgstr ""

--- a/src/calibre/utils/search_query_parser.py
+++ b/src/calibre/utils/search_query_parser.py
@ -118,8 +118,9 @@ class SearchQueryParser(object):
                failed.append(test[0])
        return failed

-    def __init__(self, locations, test=False):
+    def __init__(self, locations, test=False, optimize=False):
        self._tests_failed = False
+        self.optimize = optimize
        # Define a token
        standard_locations = map(lambda x : CaselessLiteral(x)+Suppress(':'),
                locations)
@ -182,38 +183,52 @@ class SearchQueryParser(object):
        # empty the list of searches used for recursion testing
        self.recurse_level = 0
        self.searches_seen = set([])
-        return self._parse(query)
+        candidates = self.universal_set()
+        return self._parse(query, candidates)

    # this parse is used internally because it doesn't clear the
    # recursive search test list. However, we permit seeing the
    # same search a few times because the search might appear within
    # another search.
-    def _parse(self, query):
+    def _parse(self, query, candidates=None):
        self.recurse_level += 1
        res = self._parser.parseString(query)[0]
-        t = self.evaluate(res)
+        if candidates is None:
+            candidates = self.universal_set()
+        t = self.evaluate(res, candidates)
        self.recurse_level -= 1
        return t

    def method(self, group_name):
        return getattr(self, 'evaluate_'+group_name)

-    def evaluate(self, parse_result):
-        return self.method(parse_result.getName())(parse_result)
+    def evaluate(self, parse_result, candidates):
+        return self.method(parse_result.getName())(parse_result, candidates)

-    def evaluate_and(self, argument):
-        return self.evaluate(argument[0]).intersection(self.evaluate(argument[1]))
+    def evaluate_and(self, argument, candidates):
+        # RHS checks only those items matched by LHS
+        # returns result of RHS check: RHmatches(LHmatches(c))
+        #  return self.evaluate(argument[0]).intersection(self.evaluate(argument[1]))
+        l = self.evaluate(argument[0], candidates)
+        return l.intersection(self.evaluate(argument[1], l))

-    def evaluate_or(self, argument):
-        return self.evaluate(argument[0]).union(self.evaluate(argument[1]))
+    def evaluate_or(self, argument, candidates):
+        # RHS checks only those elements not matched by LHS
+        # returns LHS union RHS: LHmatches(c) + RHmatches(c-LHmatches(c))
+        #  return self.evaluate(argument[0]).union(self.evaluate(argument[1]))
+        l = self.evaluate(argument[0], candidates)
+        return l.union(self.evaluate(argument[1], candidates.difference(l)))

-    def evaluate_not(self, argument):
-        return self.universal_set().difference(self.evaluate(argument[0]))
+    def evaluate_not(self, argument, candidates):
+        # unary op checks only candidates. Result: list of items matching
+        # returns: c - matches(c)
+        #  return self.universal_set().difference(self.evaluate(argument[0]))
+        return candidates.difference(self.evaluate(argument[0], candidates))

-    def evaluate_parenthesis(self, argument):
-        return self.evaluate(argument[0])
+    def evaluate_parenthesis(self, argument, candidates):
+        return self.evaluate(argument[0], candidates)

-    def evaluate_token(self, argument):
+    def evaluate_token(self, argument, candidates):
        location = argument[0]
        query = argument[1]
        if location.lower() == 'search':
@ -224,17 +239,27 @@ class SearchQueryParser(object):
                    raise ParseException(query, len(query), 'undefined saved search', self)
                if self.recurse_level > 5:
                    self.searches_seen.add(query)
-                return self._parse(saved_searches().lookup(query))
+                return self._parse(saved_searches().lookup(query), candidates)
            except: # convert all exceptions (e.g., missing key) to a parse error
                raise ParseException(query, len(query), 'undefined saved search', self)
-        return self.get_matches(location, query)
+        return self._get_matches(location, query, candidates)

-    def get_matches(self, location, query):
+    def _get_matches(self, location, query, candidates):
+        if self.optimize:
+            return self.get_matches(location, query, candidates=candidates)
+        else:
+            return self.get_matches(location, query)
+
+    def get_matches(self, location, query, candidates=None):
        '''
        Should return the set of matches for :param:'location` and :param:`query`.

+        The search must be performed over all entries is :param:`candidates` is
+        None otherwise only over the items in candidates.
+
        :param:`location` is one of the items in :member:`SearchQueryParser.DEFAULT_LOCATIONS`.
        :param:`query` is a string literal.
+        :param: None or a subset of the set returned by :meth:`universal_set`.
        '''
        return set([])

@ -561,7 +586,7 @@ class Tester(SearchQueryParser):
    def universal_set(self):
        return self._universal_set

-    def get_matches(self, location, query):
+    def get_matches(self, location, query, candidates=None):
        location = location.lower()
        if location in self.fields.keys():
            getter = operator.itemgetter(self.fields[location])
@ -573,8 +598,13 @@ class Tester(SearchQueryParser):
        if not query:
            return set([])
        query = query.lower()
-        return set(key for key, val in self.texts.items() \
-            if query and query in getattr(getter(val), 'lower', lambda : '')())
+        if candidates:
+            return set(key for key, val in self.texts.items() \
+                if key in candidates and query and query
+                        in getattr(getter(val), 'lower', lambda : '')())
+        else:
+            return set(key for key, val in self.texts.items() \
+                if query and query in getattr(getter(val), 'lower', lambda : '')())



@ -592,6 +622,7 @@ class Tester(SearchQueryParser):


 def main(args=sys.argv):
+    print 'testing unoptimized'
    tester = Tester(['authors', 'author', 'series', 'formats', 'format',
        'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover',
        'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read',
@ -601,6 +632,16 @@ def main(args=sys.argv):
        print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<'
        return 1

+    print '\n\ntesting optimized'
+    tester = Tester(['authors', 'author', 'series', 'formats', 'format',
+        'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover',
+        'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read',
+        'all', 'search'], test=True, optimize=True)
+    failed = tester.run_tests()
+    if tester._tests_failed or failed:
+        print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<'
+        return 1
+
    return 0

 if __name__ == '__main__':