mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
merge from trunk
This commit is contained in:
commit
04bbda5e5a
@ -29,7 +29,7 @@ class ANDROID(USBMS):
|
||||
# Motorola
|
||||
0x22b8 : { 0x41d9 : [0x216], 0x2d61 : [0x100], 0x2d67 : [0x100],
|
||||
0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216],
|
||||
0x4286 : [0x216] },
|
||||
0x4286 : [0x216], 0x42b3 : [0x216] },
|
||||
|
||||
# Sony Ericsson
|
||||
0xfce : { 0xd12e : [0x0100]},
|
||||
|
@ -324,14 +324,16 @@ class Metadata(object):
|
||||
if metadata is None:
|
||||
traceback.print_stack()
|
||||
return
|
||||
metadata = copy.deepcopy(metadata)
|
||||
if '#value#' not in metadata:
|
||||
if metadata['datatype'] == 'text' and metadata['is_multiple']:
|
||||
metadata['#value#'] = []
|
||||
m = {}
|
||||
for k in metadata:
|
||||
m[k] = copy.copy(metadata[k])
|
||||
if '#value#' not in m:
|
||||
if m['datatype'] == 'text' and m['is_multiple']:
|
||||
m['#value#'] = []
|
||||
else:
|
||||
metadata['#value#'] = None
|
||||
m['#value#'] = None
|
||||
_data = object.__getattribute__(self, '_data')
|
||||
_data['user_metadata'][field] = metadata
|
||||
_data['user_metadata'][field] = m
|
||||
|
||||
def template_to_attribute(self, other, ops):
|
||||
'''
|
||||
|
@ -8,7 +8,6 @@ __docformat__ = 'restructuredtext en'
|
||||
Transform OEB content into plain text
|
||||
'''
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
from lxml import etree
|
||||
@ -33,6 +32,15 @@ BLOCK_STYLES = [
|
||||
'block',
|
||||
]
|
||||
|
||||
HEADING_TAGS = [
|
||||
'h1',
|
||||
'h2',
|
||||
'h3',
|
||||
'h4',
|
||||
'h5',
|
||||
'h6',
|
||||
]
|
||||
|
||||
SPACE_TAGS = [
|
||||
'td',
|
||||
'br',
|
||||
@ -47,6 +55,10 @@ class TXTMLizer(object):
|
||||
self.log.info('Converting XHTML to TXT...')
|
||||
self.oeb_book = oeb_book
|
||||
self.opts = opts
|
||||
self.toc_ids = []
|
||||
self.last_was_heading = False
|
||||
|
||||
self.create_flat_toc(self.oeb_book.toc)
|
||||
|
||||
return self.mlize_spine()
|
||||
|
||||
@ -58,8 +70,11 @@ class TXTMLizer(object):
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
||||
content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
|
||||
content = self.remove_newlines(content)
|
||||
output += self.dump_text(etree.fromstring(content), stylizer)
|
||||
output = self.cleanup_text(u''.join(output))
|
||||
output += self.dump_text(etree.fromstring(content), stylizer, item)
|
||||
output += '\n\n\n\n\n\n'
|
||||
output = u''.join(output)
|
||||
output = u'\n'.join(l.rstrip() for l in output.splitlines())
|
||||
output = self.cleanup_text(output)
|
||||
|
||||
return output
|
||||
|
||||
@ -68,6 +83,8 @@ class TXTMLizer(object):
|
||||
text = text.replace('\r\n', ' ')
|
||||
text = text.replace('\n', ' ')
|
||||
text = text.replace('\r', ' ')
|
||||
# Condense redundant spaces created by replacing newlines with spaces.
|
||||
text = re.sub(r'[ ]{2,}', ' ', text)
|
||||
|
||||
return text
|
||||
|
||||
@ -80,6 +97,14 @@ class TXTMLizer(object):
|
||||
toc.append(u'* %s\n\n' % item.title)
|
||||
return ''.join(toc)
|
||||
|
||||
def create_flat_toc(self, nodes):
|
||||
'''
|
||||
Turns a hierarchical list of TOC href's into a flat list.
|
||||
'''
|
||||
for item in nodes:
|
||||
self.toc_ids.append(item.href)
|
||||
self.create_flat_toc(item.nodes)
|
||||
|
||||
def cleanup_text(self, text):
|
||||
self.log.debug('\tClean up text...')
|
||||
# Replace bad characters.
|
||||
@ -92,7 +117,7 @@ class TXTMLizer(object):
|
||||
text = text.replace('\f+', ' ')
|
||||
|
||||
# Single line paragraph.
|
||||
text = re.sub('(?<=.)%s(?=.)' % os.linesep, ' ', text)
|
||||
text = re.sub('(?<=.)\n(?=.)', ' ', text)
|
||||
|
||||
# Remove multiple spaces.
|
||||
text = re.sub('[ ]{2,}', ' ', text)
|
||||
@ -101,14 +126,20 @@ class TXTMLizer(object):
|
||||
text = re.sub('\n[ ]+\n', '\n\n', text)
|
||||
if self.opts.remove_paragraph_spacing:
|
||||
text = re.sub('\n{2,}', '\n', text)
|
||||
text = re.sub('(?imu)^(?=.)', '\t', text)
|
||||
text = re.sub(r'(?msu)^(?P<t>[^\t\n]+?)$', lambda mo: u'%s\n\n' % mo.group('t'), text)
|
||||
text = re.sub(r'(?msu)(?P<b>[^\n])\n+(?P<t>[^\t\n]+?)(?=\n)', lambda mo: '%s\n\n\n\n\n\n%s' % (mo.group('b'), mo.group('t')), text)
|
||||
else:
|
||||
text = re.sub('\n{3,}', '\n\n', text)
|
||||
text = re.sub('\n{7,}', '\n\n\n\n\n\n', text)
|
||||
|
||||
# Replace spaces at the beginning and end of lines
|
||||
# We don't replace tabs because those are only added
|
||||
# when remove paragraph spacing is enabled.
|
||||
text = re.sub('(?imu)^[ ]+', '', text)
|
||||
text = re.sub('(?imu)[ ]+$', '', text)
|
||||
|
||||
# Remove empty space and newlines at the beginning of the document.
|
||||
text = re.sub(r'(?u)^[ \n]+', '', text)
|
||||
|
||||
if self.opts.max_line_length:
|
||||
max_length = self.opts.max_line_length
|
||||
if self.opts.max_line_length < 25 and not self.opts.force_max_line_length:
|
||||
@ -145,13 +176,11 @@ class TXTMLizer(object):
|
||||
|
||||
return text
|
||||
|
||||
def dump_text(self, elem, stylizer, end=''):
|
||||
def dump_text(self, elem, stylizer, page):
|
||||
'''
|
||||
@elem: The element in the etree that we are working on.
|
||||
@stylizer: The style information attached to the element.
|
||||
@end: The last two characters of the text from the previous element.
|
||||
This is used to determine if a blank line is needed when starting
|
||||
a new block element.
|
||||
@page: OEB page used to determine absolute urls.
|
||||
'''
|
||||
|
||||
if not isinstance(elem.tag, basestring) \
|
||||
@ -170,13 +199,22 @@ class TXTMLizer(object):
|
||||
return ['']
|
||||
|
||||
tag = barename(elem.tag)
|
||||
tag_id = elem.attrib.get('id', None)
|
||||
in_block = False
|
||||
in_heading = False
|
||||
|
||||
# Are we in a heading?
|
||||
# This can either be a heading tag or a TOC item.
|
||||
if tag in HEADING_TAGS or '%s#%s' % (page.href, tag_id) in self.toc_ids:
|
||||
in_heading = True
|
||||
if not self.last_was_heading:
|
||||
text.append('\n\n\n\n\n\n')
|
||||
|
||||
# Are we in a paragraph block?
|
||||
if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
|
||||
if self.opts.remove_paragraph_spacing and not in_heading:
|
||||
text.append(u'\t')
|
||||
in_block = True
|
||||
if not end.endswith(u'\n\n') and hasattr(elem, 'text') and elem.text:
|
||||
text.append(u'\n\n')
|
||||
|
||||
if tag in SPACE_TAGS:
|
||||
text.append(u' ')
|
||||
@ -185,14 +223,17 @@ class TXTMLizer(object):
|
||||
if hasattr(elem, 'text') and elem.text:
|
||||
text.append(elem.text)
|
||||
|
||||
# Recurse down into tags within the tag we are in.
|
||||
for item in elem:
|
||||
en = u''
|
||||
if len(text) >= 2:
|
||||
en = text[-1][-2:]
|
||||
text += self.dump_text(item, stylizer, en)
|
||||
text += self.dump_text(item, stylizer, page)
|
||||
|
||||
if in_block:
|
||||
text.append(u'\n\n')
|
||||
if in_heading:
|
||||
text.append(u'\n')
|
||||
self.last_was_heading = True
|
||||
else:
|
||||
self.last_was_heading = False
|
||||
|
||||
if hasattr(elem, 'tail') and elem.tail:
|
||||
text.append(elem.tail)
|
||||
|
@ -1266,8 +1266,8 @@ class DeviceMixin(object): # {{{
|
||||
# Force a reset if the caches are not initialized
|
||||
if reset or not hasattr(self, 'db_book_title_cache'):
|
||||
# Build a cache (map) of the library, so the search isn't On**2
|
||||
self.db_book_title_cache = {}
|
||||
self.db_book_uuid_cache = {}
|
||||
db_book_title_cache = {}
|
||||
db_book_uuid_cache = {}
|
||||
# It might be possible to get here without having initialized the
|
||||
# library view. In this case, simply give up
|
||||
try:
|
||||
@ -1278,8 +1278,8 @@ class DeviceMixin(object): # {{{
|
||||
for id in db.data.iterallids():
|
||||
mi = db.get_metadata(id, index_is_id=True)
|
||||
title = clean_string(mi.title)
|
||||
if title not in self.db_book_title_cache:
|
||||
self.db_book_title_cache[title] = \
|
||||
if title not in db_book_title_cache:
|
||||
db_book_title_cache[title] = \
|
||||
{'authors':{}, 'author_sort':{}, 'db_ids':{}}
|
||||
# If there are multiple books in the library with the same title
|
||||
# and author, then remember the last one. That is OK, because as
|
||||
@ -1287,12 +1287,14 @@ class DeviceMixin(object): # {{{
|
||||
# as another.
|
||||
if mi.authors:
|
||||
authors = clean_string(authors_to_string(mi.authors))
|
||||
self.db_book_title_cache[title]['authors'][authors] = mi
|
||||
db_book_title_cache[title]['authors'][authors] = mi
|
||||
if mi.author_sort:
|
||||
aus = clean_string(mi.author_sort)
|
||||
self.db_book_title_cache[title]['author_sort'][aus] = mi
|
||||
self.db_book_title_cache[title]['db_ids'][mi.application_id] = mi
|
||||
self.db_book_uuid_cache[mi.uuid] = mi
|
||||
db_book_title_cache[title]['author_sort'][aus] = mi
|
||||
db_book_title_cache[title]['db_ids'][mi.application_id] = mi
|
||||
db_book_uuid_cache[mi.uuid] = mi
|
||||
self.db_book_title_cache = db_book_title_cache
|
||||
self.db_book_uuid_cache = db_book_uuid_cache
|
||||
|
||||
# Now iterate through all the books on the device, setting the
|
||||
# in_library field. If the UUID matches a book in the library, then
|
||||
|
@ -181,7 +181,7 @@ class ResultCache(SearchQueryParser): # {{{
|
||||
self.search_restriction = ''
|
||||
self.field_metadata = field_metadata
|
||||
self.all_search_locations = field_metadata.get_search_terms()
|
||||
SearchQueryParser.__init__(self, self.all_search_locations)
|
||||
SearchQueryParser.__init__(self, self.all_search_locations, optimize=True)
|
||||
self.build_date_relop_dict()
|
||||
self.build_numeric_relop_dict()
|
||||
|
||||
@ -264,7 +264,7 @@ class ResultCache(SearchQueryParser): # {{{
|
||||
'<=':[2, relop_le]
|
||||
}
|
||||
|
||||
def get_dates_matches(self, location, query):
|
||||
def get_dates_matches(self, location, query, candidates):
|
||||
matches = set([])
|
||||
if len(query) < 2:
|
||||
return matches
|
||||
@ -274,13 +274,15 @@ class ResultCache(SearchQueryParser): # {{{
|
||||
loc = self.field_metadata[location]['rec_index']
|
||||
|
||||
if query == 'false':
|
||||
for item in self._data:
|
||||
for id_ in candidates:
|
||||
item = self._data[id_]
|
||||
if item is None: continue
|
||||
if item[loc] is None or item[loc] <= UNDEFINED_DATE:
|
||||
matches.add(item[0])
|
||||
return matches
|
||||
if query == 'true':
|
||||
for item in self._data:
|
||||
for id_ in candidates:
|
||||
item = self._data[id_]
|
||||
if item is None: continue
|
||||
if item[loc] is not None and item[loc] > UNDEFINED_DATE:
|
||||
matches.add(item[0])
|
||||
@ -319,7 +321,8 @@ class ResultCache(SearchQueryParser): # {{{
|
||||
field_count = query.count('-') + 1
|
||||
else:
|
||||
field_count = query.count('/') + 1
|
||||
for item in self._data:
|
||||
for id_ in candidates:
|
||||
item = self._data[id_]
|
||||
if item is None or item[loc] is None: continue
|
||||
if relop(item[loc], qd, field_count):
|
||||
matches.add(item[0])
|
||||
@ -335,7 +338,7 @@ class ResultCache(SearchQueryParser): # {{{
|
||||
'<=':[2, lambda r, q: r <= q]
|
||||
}
|
||||
|
||||
def get_numeric_matches(self, location, query, val_func = None):
|
||||
def get_numeric_matches(self, location, query, candidates, val_func = None):
|
||||
matches = set([])
|
||||
if len(query) == 0:
|
||||
return matches
|
||||
@ -381,7 +384,8 @@ class ResultCache(SearchQueryParser): # {{{
|
||||
except:
|
||||
return matches
|
||||
|
||||
for item in self._data:
|
||||
for id_ in candidates:
|
||||
item = self._data[id_]
|
||||
if item is None:
|
||||
continue
|
||||
v = val_func(item)
|
||||
@ -393,8 +397,13 @@ class ResultCache(SearchQueryParser): # {{{
|
||||
matches.add(item[0])
|
||||
return matches
|
||||
|
||||
def get_matches(self, location, query, allow_recursion=True):
|
||||
def get_matches(self, location, query, allow_recursion=True, candidates=None):
|
||||
matches = set([])
|
||||
if candidates is None:
|
||||
candidates = self.universal_set()
|
||||
if len(candidates) == 0:
|
||||
return matches
|
||||
|
||||
if query and query.strip():
|
||||
# get metadata key associated with the search term. Eliminates
|
||||
# dealing with plurals and other aliases
|
||||
@ -476,7 +485,8 @@ class ResultCache(SearchQueryParser): # {{{
|
||||
else:
|
||||
q = query
|
||||
|
||||
for item in self._data:
|
||||
for id_ in candidates:
|
||||
item = self._data[id_]
|
||||
if item is None: continue
|
||||
|
||||
if col_datatype[loc] == 'bool': # complexity caused by the two-/three-value tweak
|
||||
|
@ -195,8 +195,8 @@ class CustomColumns(object):
|
||||
data = self.custom_column_num_map[num]
|
||||
row = self.data._data[idx] if index_is_id else self.data[idx]
|
||||
ans = row[self.FIELD_MAP[data['num']]]
|
||||
if data['is_multiple'] and data['datatype'] == 'text':
|
||||
ans = ans.split('|') if ans else []
|
||||
if ans and data['is_multiple'] and data['datatype'] == 'text':
|
||||
ans = ans.split('|')
|
||||
if data['display'].get('sort_alpha', False):
|
||||
ans.sort(cmp=lambda x,y:cmp(x.lower(), y.lower()))
|
||||
return ans
|
||||
|
@ -256,7 +256,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
'pubdate',
|
||||
'flags',
|
||||
'uuid',
|
||||
'has_cover'
|
||||
'has_cover',
|
||||
('au_map', 'authors', 'author', 'aum_sortconcat(link.id, authors.name, authors.sort)')
|
||||
]
|
||||
lines = []
|
||||
for col in columns:
|
||||
@ -273,9 +274,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
|
||||
self.FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'timestamp':3,
|
||||
'size':4, 'rating':5, 'tags':6, 'comments':7, 'series':8,
|
||||
'publisher':9, 'series_index':10,
|
||||
'sort':11, 'author_sort':12, 'formats':13, 'isbn':14, 'path':15,
|
||||
'lccn':16, 'pubdate':17, 'flags':18, 'uuid':19, 'cover':20}
|
||||
'publisher':9, 'series_index':10, 'sort':11, 'author_sort':12,
|
||||
'formats':13, 'isbn':14, 'path':15, 'lccn':16, 'pubdate':17,
|
||||
'flags':18, 'uuid':19, 'cover':20, 'au_map':21}
|
||||
|
||||
for k,v in self.FIELD_MAP.iteritems():
|
||||
self.field_metadata.set_field_record_index(k, v, prefer_custom=False)
|
||||
@ -687,9 +688,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
Convenience method to return metadata as a :class:`Metadata` object.
|
||||
Note that the list of formats is not verified.
|
||||
'''
|
||||
row = self.data._data[idx] if index_is_id else self.data[idx]
|
||||
fm = self.FIELD_MAP
|
||||
|
||||
self.gm_count += 1
|
||||
mi = self.data.get(idx, self.FIELD_MAP['all_metadata'],
|
||||
row_is_id = index_is_id)
|
||||
mi = row[self.FIELD_MAP['all_metadata']]
|
||||
if mi is not None:
|
||||
if get_cover:
|
||||
# Always get the cover, because the value can be wrong if the
|
||||
@ -699,49 +702,46 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
|
||||
self.gm_missed += 1
|
||||
mi = Metadata(None)
|
||||
self.data.set(idx, self.FIELD_MAP['all_metadata'], mi,
|
||||
row_is_id = index_is_id)
|
||||
self.data.set(idx, fm['all_metadata'], mi, row_is_id = index_is_id)
|
||||
|
||||
aut_list = self.authors_with_sort_strings(idx, index_is_id=index_is_id)
|
||||
aut_list = row[fm['au_map']]
|
||||
aut_list = [p.split(':::') for p in aut_list.split(':#:')]
|
||||
aum = []
|
||||
aus = {}
|
||||
for (author, author_sort) in aut_list:
|
||||
aum.append(author)
|
||||
aus[author] = author_sort
|
||||
mi.title = self.title(idx, index_is_id=index_is_id)
|
||||
aus[author] = author_sort.replace('|', ',')
|
||||
mi.title = row[fm['title']]
|
||||
mi.authors = aum
|
||||
mi.author_sort = self.author_sort(idx, index_is_id=index_is_id)
|
||||
mi.author_sort = row[fm['author_sort']]
|
||||
mi.author_sort_map = aus
|
||||
mi.comments = self.comments(idx, index_is_id=index_is_id)
|
||||
mi.publisher = self.publisher(idx, index_is_id=index_is_id)
|
||||
mi.timestamp = self.timestamp(idx, index_is_id=index_is_id)
|
||||
mi.pubdate = self.pubdate(idx, index_is_id=index_is_id)
|
||||
mi.uuid = self.uuid(idx, index_is_id=index_is_id)
|
||||
mi.title_sort = self.title_sort(idx, index_is_id=index_is_id)
|
||||
mi.formats = self.formats(idx, index_is_id=index_is_id,
|
||||
verify_formats=False)
|
||||
if hasattr(mi.formats, 'split'):
|
||||
mi.formats = mi.formats.split(',')
|
||||
else:
|
||||
mi.formats = None
|
||||
tags = self.tags(idx, index_is_id=index_is_id)
|
||||
mi.comments = row[fm['comments']]
|
||||
mi.publisher = row[fm['publisher']]
|
||||
mi.timestamp = row[fm['timestamp']]
|
||||
mi.pubdate = row[fm['pubdate']]
|
||||
mi.uuid = row[fm['uuid']]
|
||||
mi.title_sort = row[fm['sort']]
|
||||
formats = row[fm['formats']]
|
||||
if not formats:
|
||||
formats = None
|
||||
mi.formats = formats
|
||||
tags = row[fm['tags']]
|
||||
if tags:
|
||||
mi.tags = [i.strip() for i in tags.split(',')]
|
||||
mi.series = self.series(idx, index_is_id=index_is_id)
|
||||
mi.series = row[fm['series']]
|
||||
if mi.series:
|
||||
mi.series_index = self.series_index(idx, index_is_id=index_is_id)
|
||||
mi.rating = self.rating(idx, index_is_id=index_is_id)
|
||||
mi.isbn = self.isbn(idx, index_is_id=index_is_id)
|
||||
mi.series_index = row[fm['series_index']]
|
||||
mi.rating = row[fm['rating']]
|
||||
mi.isbn = row[fm['isbn']]
|
||||
id = idx if index_is_id else self.id(idx)
|
||||
mi.application_id = id
|
||||
mi.id = id
|
||||
for key,meta in self.field_metadata.iteritems():
|
||||
if meta['is_custom']:
|
||||
mi.set_user_metadata(key, meta)
|
||||
mi.set(key, val=self.get_custom(idx, label=meta['label'],
|
||||
index_is_id=index_is_id),
|
||||
extra=self.get_custom_extra(idx, label=meta['label'],
|
||||
index_is_id=index_is_id))
|
||||
for key, meta in self.field_metadata.custom_iteritems():
|
||||
mi.set_user_metadata(key, meta)
|
||||
mi.set(key, val=self.get_custom(idx, label=meta['label'],
|
||||
index_is_id=index_is_id),
|
||||
extra=self.get_custom_extra(idx, label=meta['label'],
|
||||
index_is_id=index_is_id))
|
||||
if get_cover:
|
||||
mi.cover = self.cover(id, index_is_id=True, as_path=True)
|
||||
return mi
|
||||
@ -877,18 +877,17 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
|
||||
def formats(self, index, index_is_id=False, verify_formats=True):
|
||||
''' Return available formats as a comma separated list or None if there are no available formats '''
|
||||
id = index if index_is_id else self.id(index)
|
||||
try:
|
||||
formats = self.conn.get('SELECT format FROM data WHERE book=?', (id,))
|
||||
formats = map(lambda x:x[0], formats)
|
||||
except:
|
||||
id_ = index if index_is_id else self.id(index)
|
||||
formats = self.data.get(id_, self.FIELD_MAP['formats'], row_is_id=True)
|
||||
if not formats:
|
||||
return None
|
||||
if not verify_formats:
|
||||
return ','.join(formats)
|
||||
return formats
|
||||
formats = formats.split(',')
|
||||
ans = []
|
||||
for format in formats:
|
||||
if self.format_abspath(id, format, index_is_id=True) is not None:
|
||||
ans.append(format)
|
||||
for fmt in formats:
|
||||
if self.format_abspath(id_, fmt, index_is_id=True) is not None:
|
||||
ans.append(fmt)
|
||||
if not ans:
|
||||
return None
|
||||
return ','.join(ans)
|
||||
@ -1607,6 +1606,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
','.join([a.replace(',', '|') for a in authors]),
|
||||
row_is_id=True)
|
||||
self.data.set(id, self.FIELD_MAP['author_sort'], ss, row_is_id=True)
|
||||
aum = self.authors_with_sort_strings(id, index_is_id=True)
|
||||
self.data.set(id, self.FIELD_MAP['au_map'],
|
||||
':#:'.join([':::'.join((au.replace(',', '|'), aus)) for (au, aus) in aum]),
|
||||
row_is_id=True)
|
||||
|
||||
def set_authors(self, id, authors, notify=True, commit=True):
|
||||
'''
|
||||
|
@ -180,6 +180,15 @@ class FieldMetadata(dict):
|
||||
'search_terms':['author_sort'],
|
||||
'is_custom':False,
|
||||
'is_category':False}),
|
||||
('au_map', {'table':None,
|
||||
'column':None,
|
||||
'datatype':'text',
|
||||
'is_multiple':',',
|
||||
'kind':'field',
|
||||
'name':None,
|
||||
'search_terms':[],
|
||||
'is_custom':False,
|
||||
'is_category':False}),
|
||||
('comments', {'table':None,
|
||||
'column':None,
|
||||
'datatype':'text',
|
||||
@ -400,6 +409,12 @@ class FieldMetadata(dict):
|
||||
for key in self._tb_cats:
|
||||
yield (key, self._tb_cats[key])
|
||||
|
||||
def custom_iteritems(self):
|
||||
for key in self._tb_cats:
|
||||
fm = self._tb_cats[key]
|
||||
if fm['is_custom']:
|
||||
yield (key, self._tb_cats[key])
|
||||
|
||||
def items(self):
|
||||
return list(self.iteritems())
|
||||
|
||||
|
@ -87,6 +87,23 @@ class SortedConcatenate(object):
|
||||
class SafeSortedConcatenate(SortedConcatenate):
|
||||
sep = '|'
|
||||
|
||||
class AumSortedConcatenate(object):
|
||||
'''String concatenation aggregator for the author sort map'''
|
||||
def __init__(self):
|
||||
self.ans = {}
|
||||
|
||||
def step(self, ndx, author, sort):
|
||||
if author is not None:
|
||||
self.ans[ndx] = author + ':::' + sort
|
||||
|
||||
def finalize(self):
|
||||
keys = self.ans.keys()
|
||||
if len(keys) == 0:
|
||||
return None
|
||||
if len(keys) == 1:
|
||||
return self.ans[keys[0]]
|
||||
return ':#:'.join([self.ans[v] for v in sorted(keys)])
|
||||
|
||||
class Connection(sqlite.Connection):
|
||||
|
||||
def get(self, *args, **kw):
|
||||
@ -155,6 +172,7 @@ class DBThread(Thread):
|
||||
c_ext_loaded = load_c_extensions(self.conn)
|
||||
self.conn.row_factory = sqlite.Row if self.row_factory else lambda cursor, row : list(row)
|
||||
self.conn.create_aggregate('concat', 1, Concatenate)
|
||||
self.conn.create_aggregate('aum_sortconcat', 3, AumSortedConcatenate)
|
||||
if not c_ext_loaded:
|
||||
self.conn.create_aggregate('sortconcat', 2, SortedConcatenate)
|
||||
self.conn.create_aggregate('sort_concat', 2, SafeSortedConcatenate)
|
||||
|
@ -118,8 +118,9 @@ class SearchQueryParser(object):
|
||||
failed.append(test[0])
|
||||
return failed
|
||||
|
||||
def __init__(self, locations, test=False):
|
||||
def __init__(self, locations, test=False, optimize=False):
|
||||
self._tests_failed = False
|
||||
self.optimize = optimize
|
||||
# Define a token
|
||||
standard_locations = map(lambda x : CaselessLiteral(x)+Suppress(':'),
|
||||
locations)
|
||||
@ -182,38 +183,52 @@ class SearchQueryParser(object):
|
||||
# empty the list of searches used for recursion testing
|
||||
self.recurse_level = 0
|
||||
self.searches_seen = set([])
|
||||
return self._parse(query)
|
||||
candidates = self.universal_set()
|
||||
return self._parse(query, candidates)
|
||||
|
||||
# this parse is used internally because it doesn't clear the
|
||||
# recursive search test list. However, we permit seeing the
|
||||
# same search a few times because the search might appear within
|
||||
# another search.
|
||||
def _parse(self, query):
|
||||
def _parse(self, query, candidates=None):
|
||||
self.recurse_level += 1
|
||||
res = self._parser.parseString(query)[0]
|
||||
t = self.evaluate(res)
|
||||
if candidates is None:
|
||||
candidates = self.universal_set()
|
||||
t = self.evaluate(res, candidates)
|
||||
self.recurse_level -= 1
|
||||
return t
|
||||
|
||||
def method(self, group_name):
|
||||
return getattr(self, 'evaluate_'+group_name)
|
||||
|
||||
def evaluate(self, parse_result):
|
||||
return self.method(parse_result.getName())(parse_result)
|
||||
def evaluate(self, parse_result, candidates):
|
||||
return self.method(parse_result.getName())(parse_result, candidates)
|
||||
|
||||
def evaluate_and(self, argument):
|
||||
return self.evaluate(argument[0]).intersection(self.evaluate(argument[1]))
|
||||
def evaluate_and(self, argument, candidates):
|
||||
# RHS checks only those items matched by LHS
|
||||
# returns result of RHS check: RHmatches(LHmatches(c))
|
||||
# return self.evaluate(argument[0]).intersection(self.evaluate(argument[1]))
|
||||
l = self.evaluate(argument[0], candidates)
|
||||
return l.intersection(self.evaluate(argument[1], l))
|
||||
|
||||
def evaluate_or(self, argument):
|
||||
return self.evaluate(argument[0]).union(self.evaluate(argument[1]))
|
||||
def evaluate_or(self, argument, candidates):
|
||||
# RHS checks only those elements not matched by LHS
|
||||
# returns LHS union RHS: LHmatches(c) + RHmatches(c-LHmatches(c))
|
||||
# return self.evaluate(argument[0]).union(self.evaluate(argument[1]))
|
||||
l = self.evaluate(argument[0], candidates)
|
||||
return l.union(self.evaluate(argument[1], candidates.difference(l)))
|
||||
|
||||
def evaluate_not(self, argument):
|
||||
return self.universal_set().difference(self.evaluate(argument[0]))
|
||||
def evaluate_not(self, argument, candidates):
|
||||
# unary op checks only candidates. Result: list of items matching
|
||||
# returns: c - matches(c)
|
||||
# return self.universal_set().difference(self.evaluate(argument[0]))
|
||||
return candidates.difference(self.evaluate(argument[0], candidates))
|
||||
|
||||
def evaluate_parenthesis(self, argument):
|
||||
return self.evaluate(argument[0])
|
||||
def evaluate_parenthesis(self, argument, candidates):
|
||||
return self.evaluate(argument[0], candidates)
|
||||
|
||||
def evaluate_token(self, argument):
|
||||
def evaluate_token(self, argument, candidates):
|
||||
location = argument[0]
|
||||
query = argument[1]
|
||||
if location.lower() == 'search':
|
||||
@ -224,17 +239,27 @@ class SearchQueryParser(object):
|
||||
raise ParseException(query, len(query), 'undefined saved search', self)
|
||||
if self.recurse_level > 5:
|
||||
self.searches_seen.add(query)
|
||||
return self._parse(saved_searches().lookup(query))
|
||||
return self._parse(saved_searches().lookup(query), candidates)
|
||||
except: # convert all exceptions (e.g., missing key) to a parse error
|
||||
raise ParseException(query, len(query), 'undefined saved search', self)
|
||||
return self.get_matches(location, query)
|
||||
return self._get_matches(location, query, candidates)
|
||||
|
||||
def get_matches(self, location, query):
|
||||
def _get_matches(self, location, query, candidates):
|
||||
if self.optimize:
|
||||
return self.get_matches(location, query, candidates=candidates)
|
||||
else:
|
||||
return self.get_matches(location, query)
|
||||
|
||||
def get_matches(self, location, query, candidates=None):
|
||||
'''
|
||||
Should return the set of matches for :param:'location` and :param:`query`.
|
||||
|
||||
The search must be performed over all entries is :param:`candidates` is
|
||||
None otherwise only over the items in candidates.
|
||||
|
||||
:param:`location` is one of the items in :member:`SearchQueryParser.DEFAULT_LOCATIONS`.
|
||||
:param:`query` is a string literal.
|
||||
:param: None or a subset of the set returned by :meth:`universal_set`.
|
||||
'''
|
||||
return set([])
|
||||
|
||||
@ -561,7 +586,7 @@ class Tester(SearchQueryParser):
|
||||
def universal_set(self):
|
||||
return self._universal_set
|
||||
|
||||
def get_matches(self, location, query):
|
||||
def get_matches(self, location, query, candidates=None):
|
||||
location = location.lower()
|
||||
if location in self.fields.keys():
|
||||
getter = operator.itemgetter(self.fields[location])
|
||||
@ -573,8 +598,13 @@ class Tester(SearchQueryParser):
|
||||
if not query:
|
||||
return set([])
|
||||
query = query.lower()
|
||||
return set(key for key, val in self.texts.items() \
|
||||
if query and query in getattr(getter(val), 'lower', lambda : '')())
|
||||
if candidates:
|
||||
return set(key for key, val in self.texts.items() \
|
||||
if key in candidates and query and query
|
||||
in getattr(getter(val), 'lower', lambda : '')())
|
||||
else:
|
||||
return set(key for key, val in self.texts.items() \
|
||||
if query and query in getattr(getter(val), 'lower', lambda : '')())
|
||||
|
||||
|
||||
|
||||
@ -592,6 +622,7 @@ class Tester(SearchQueryParser):
|
||||
|
||||
|
||||
def main(args=sys.argv):
|
||||
print 'testing unoptimized'
|
||||
tester = Tester(['authors', 'author', 'series', 'formats', 'format',
|
||||
'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover',
|
||||
'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read',
|
||||
@ -601,6 +632,16 @@ def main(args=sys.argv):
|
||||
print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<'
|
||||
return 1
|
||||
|
||||
print '\n\ntesting optimized'
|
||||
tester = Tester(['authors', 'author', 'series', 'formats', 'format',
|
||||
'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover',
|
||||
'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read',
|
||||
'all', 'search'], test=True, optimize=True)
|
||||
failed = tester.run_tests()
|
||||
if tester._tests_failed or failed:
|
||||
print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<'
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
Loading…
x
Reference in New Issue
Block a user