merge from trunk

This commit is contained in:
ldolse 2011-01-09 20:12:19 +08:00
commit 04bbda5e5a
10 changed files with 241 additions and 109 deletions

View File

@ -29,7 +29,7 @@ class ANDROID(USBMS):
# Motorola # Motorola
0x22b8 : { 0x41d9 : [0x216], 0x2d61 : [0x100], 0x2d67 : [0x100], 0x22b8 : { 0x41d9 : [0x216], 0x2d61 : [0x100], 0x2d67 : [0x100],
0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216], 0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216],
0x4286 : [0x216] }, 0x4286 : [0x216], 0x42b3 : [0x216] },
# Sony Ericsson # Sony Ericsson
0xfce : { 0xd12e : [0x0100]}, 0xfce : { 0xd12e : [0x0100]},

View File

@ -324,14 +324,16 @@ class Metadata(object):
if metadata is None: if metadata is None:
traceback.print_stack() traceback.print_stack()
return return
metadata = copy.deepcopy(metadata) m = {}
if '#value#' not in metadata: for k in metadata:
if metadata['datatype'] == 'text' and metadata['is_multiple']: m[k] = copy.copy(metadata[k])
metadata['#value#'] = [] if '#value#' not in m:
if m['datatype'] == 'text' and m['is_multiple']:
m['#value#'] = []
else: else:
metadata['#value#'] = None m['#value#'] = None
_data = object.__getattribute__(self, '_data') _data = object.__getattribute__(self, '_data')
_data['user_metadata'][field] = metadata _data['user_metadata'][field] = m
def template_to_attribute(self, other, ops): def template_to_attribute(self, other, ops):
''' '''

View File

@ -8,7 +8,6 @@ __docformat__ = 'restructuredtext en'
Transform OEB content into plain text Transform OEB content into plain text
''' '''
import os
import re import re
from lxml import etree from lxml import etree
@ -33,6 +32,15 @@ BLOCK_STYLES = [
'block', 'block',
] ]
HEADING_TAGS = [
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
]
SPACE_TAGS = [ SPACE_TAGS = [
'td', 'td',
'br', 'br',
@ -47,6 +55,10 @@ class TXTMLizer(object):
self.log.info('Converting XHTML to TXT...') self.log.info('Converting XHTML to TXT...')
self.oeb_book = oeb_book self.oeb_book = oeb_book
self.opts = opts self.opts = opts
self.toc_ids = []
self.last_was_heading = False
self.create_flat_toc(self.oeb_book.toc)
return self.mlize_spine() return self.mlize_spine()
@ -58,8 +70,11 @@ class TXTMLizer(object):
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode)) content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
content = self.remove_newlines(content) content = self.remove_newlines(content)
output += self.dump_text(etree.fromstring(content), stylizer) output += self.dump_text(etree.fromstring(content), stylizer, item)
output = self.cleanup_text(u''.join(output)) output += '\n\n\n\n\n\n'
output = u''.join(output)
output = u'\n'.join(l.rstrip() for l in output.splitlines())
output = self.cleanup_text(output)
return output return output
@ -68,6 +83,8 @@ class TXTMLizer(object):
text = text.replace('\r\n', ' ') text = text.replace('\r\n', ' ')
text = text.replace('\n', ' ') text = text.replace('\n', ' ')
text = text.replace('\r', ' ') text = text.replace('\r', ' ')
# Condense redundant spaces created by replacing newlines with spaces.
text = re.sub(r'[ ]{2,}', ' ', text)
return text return text
@ -80,6 +97,14 @@ class TXTMLizer(object):
toc.append(u'* %s\n\n' % item.title) toc.append(u'* %s\n\n' % item.title)
return ''.join(toc) return ''.join(toc)
def create_flat_toc(self, nodes):
'''
Turns a hierarchical list of TOC href's into a flat list.
'''
for item in nodes:
self.toc_ids.append(item.href)
self.create_flat_toc(item.nodes)
def cleanup_text(self, text): def cleanup_text(self, text):
self.log.debug('\tClean up text...') self.log.debug('\tClean up text...')
# Replace bad characters. # Replace bad characters.
@ -92,7 +117,7 @@ class TXTMLizer(object):
text = text.replace('\f+', ' ') text = text.replace('\f+', ' ')
# Single line paragraph. # Single line paragraph.
text = re.sub('(?<=.)%s(?=.)' % os.linesep, ' ', text) text = re.sub('(?<=.)\n(?=.)', ' ', text)
# Remove multiple spaces. # Remove multiple spaces.
text = re.sub('[ ]{2,}', ' ', text) text = re.sub('[ ]{2,}', ' ', text)
@ -101,13 +126,19 @@ class TXTMLizer(object):
text = re.sub('\n[ ]+\n', '\n\n', text) text = re.sub('\n[ ]+\n', '\n\n', text)
if self.opts.remove_paragraph_spacing: if self.opts.remove_paragraph_spacing:
text = re.sub('\n{2,}', '\n', text) text = re.sub('\n{2,}', '\n', text)
text = re.sub('(?imu)^(?=.)', '\t', text) text = re.sub(r'(?msu)^(?P<t>[^\t\n]+?)$', lambda mo: u'%s\n\n' % mo.group('t'), text)
text = re.sub(r'(?msu)(?P<b>[^\n])\n+(?P<t>[^\t\n]+?)(?=\n)', lambda mo: '%s\n\n\n\n\n\n%s' % (mo.group('b'), mo.group('t')), text)
else: else:
text = re.sub('\n{3,}', '\n\n', text) text = re.sub('\n{7,}', '\n\n\n\n\n\n', text)
# Replace spaces at the beginning and end of lines # Replace spaces at the beginning and end of lines
# We don't replace tabs because those are only added
# when remove paragraph spacing is enabled.
text = re.sub('(?imu)^[ ]+', '', text) text = re.sub('(?imu)^[ ]+', '', text)
text = re.sub('(?imu)[ ]+$', '', text) text = re.sub('(?imu)[ ]+$', '', text)
# Remove empty space and newlines at the beginning of the document.
text = re.sub(r'(?u)^[ \n]+', '', text)
if self.opts.max_line_length: if self.opts.max_line_length:
max_length = self.opts.max_line_length max_length = self.opts.max_line_length
@ -145,13 +176,11 @@ class TXTMLizer(object):
return text return text
def dump_text(self, elem, stylizer, end=''): def dump_text(self, elem, stylizer, page):
''' '''
@elem: The element in the etree that we are working on. @elem: The element in the etree that we are working on.
@stylizer: The style information attached to the element. @stylizer: The style information attached to the element.
@end: The last two characters of the text from the previous element. @page: OEB page used to determine absolute urls.
This is used to determine if a blank line is needed when starting
a new block element.
''' '''
if not isinstance(elem.tag, basestring) \ if not isinstance(elem.tag, basestring) \
@ -170,13 +199,22 @@ class TXTMLizer(object):
return [''] return ['']
tag = barename(elem.tag) tag = barename(elem.tag)
tag_id = elem.attrib.get('id', None)
in_block = False in_block = False
in_heading = False
# Are we in a heading?
# This can either be a heading tag or a TOC item.
if tag in HEADING_TAGS or '%s#%s' % (page.href, tag_id) in self.toc_ids:
in_heading = True
if not self.last_was_heading:
text.append('\n\n\n\n\n\n')
# Are we in a paragraph block? # Are we in a paragraph block?
if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES: if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
if self.opts.remove_paragraph_spacing and not in_heading:
text.append(u'\t')
in_block = True in_block = True
if not end.endswith(u'\n\n') and hasattr(elem, 'text') and elem.text:
text.append(u'\n\n')
if tag in SPACE_TAGS: if tag in SPACE_TAGS:
text.append(u' ') text.append(u' ')
@ -185,14 +223,17 @@ class TXTMLizer(object):
if hasattr(elem, 'text') and elem.text: if hasattr(elem, 'text') and elem.text:
text.append(elem.text) text.append(elem.text)
# Recurse down into tags within the tag we are in.
for item in elem: for item in elem:
en = u'' text += self.dump_text(item, stylizer, page)
if len(text) >= 2:
en = text[-1][-2:]
text += self.dump_text(item, stylizer, en)
if in_block: if in_block:
text.append(u'\n\n') text.append(u'\n\n')
if in_heading:
text.append(u'\n')
self.last_was_heading = True
else:
self.last_was_heading = False
if hasattr(elem, 'tail') and elem.tail: if hasattr(elem, 'tail') and elem.tail:
text.append(elem.tail) text.append(elem.tail)

View File

@ -637,7 +637,7 @@ class DeviceMixin(object): # {{{
self.device_manager.mount_device(kls=FOLDER_DEVICE, kind='folder', path=dir) self.device_manager.mount_device(kls=FOLDER_DEVICE, kind='folder', path=dir)
def connect_to_bambook(self): def connect_to_bambook(self):
self.device_manager.mount_device(kls=BAMBOOKWifi, kind='bambook', self.device_manager.mount_device(kls=BAMBOOKWifi, kind='bambook',
path=BAMBOOK.settings().extra_customization) path=BAMBOOK.settings().extra_customization)
def connect_to_itunes(self): def connect_to_itunes(self):
@ -1266,8 +1266,8 @@ class DeviceMixin(object): # {{{
# Force a reset if the caches are not initialized # Force a reset if the caches are not initialized
if reset or not hasattr(self, 'db_book_title_cache'): if reset or not hasattr(self, 'db_book_title_cache'):
# Build a cache (map) of the library, so the search isn't On**2 # Build a cache (map) of the library, so the search isn't On**2
self.db_book_title_cache = {} db_book_title_cache = {}
self.db_book_uuid_cache = {} db_book_uuid_cache = {}
# It might be possible to get here without having initialized the # It might be possible to get here without having initialized the
# library view. In this case, simply give up # library view. In this case, simply give up
try: try:
@ -1278,8 +1278,8 @@ class DeviceMixin(object): # {{{
for id in db.data.iterallids(): for id in db.data.iterallids():
mi = db.get_metadata(id, index_is_id=True) mi = db.get_metadata(id, index_is_id=True)
title = clean_string(mi.title) title = clean_string(mi.title)
if title not in self.db_book_title_cache: if title not in db_book_title_cache:
self.db_book_title_cache[title] = \ db_book_title_cache[title] = \
{'authors':{}, 'author_sort':{}, 'db_ids':{}} {'authors':{}, 'author_sort':{}, 'db_ids':{}}
# If there are multiple books in the library with the same title # If there are multiple books in the library with the same title
# and author, then remember the last one. That is OK, because as # and author, then remember the last one. That is OK, because as
@ -1287,12 +1287,14 @@ class DeviceMixin(object): # {{{
# as another. # as another.
if mi.authors: if mi.authors:
authors = clean_string(authors_to_string(mi.authors)) authors = clean_string(authors_to_string(mi.authors))
self.db_book_title_cache[title]['authors'][authors] = mi db_book_title_cache[title]['authors'][authors] = mi
if mi.author_sort: if mi.author_sort:
aus = clean_string(mi.author_sort) aus = clean_string(mi.author_sort)
self.db_book_title_cache[title]['author_sort'][aus] = mi db_book_title_cache[title]['author_sort'][aus] = mi
self.db_book_title_cache[title]['db_ids'][mi.application_id] = mi db_book_title_cache[title]['db_ids'][mi.application_id] = mi
self.db_book_uuid_cache[mi.uuid] = mi db_book_uuid_cache[mi.uuid] = mi
self.db_book_title_cache = db_book_title_cache
self.db_book_uuid_cache = db_book_uuid_cache
# Now iterate through all the books on the device, setting the # Now iterate through all the books on the device, setting the
# in_library field. If the UUID matches a book in the library, then # in_library field. If the UUID matches a book in the library, then

View File

@ -181,7 +181,7 @@ class ResultCache(SearchQueryParser): # {{{
self.search_restriction = '' self.search_restriction = ''
self.field_metadata = field_metadata self.field_metadata = field_metadata
self.all_search_locations = field_metadata.get_search_terms() self.all_search_locations = field_metadata.get_search_terms()
SearchQueryParser.__init__(self, self.all_search_locations) SearchQueryParser.__init__(self, self.all_search_locations, optimize=True)
self.build_date_relop_dict() self.build_date_relop_dict()
self.build_numeric_relop_dict() self.build_numeric_relop_dict()
@ -264,7 +264,7 @@ class ResultCache(SearchQueryParser): # {{{
'<=':[2, relop_le] '<=':[2, relop_le]
} }
def get_dates_matches(self, location, query): def get_dates_matches(self, location, query, candidates):
matches = set([]) matches = set([])
if len(query) < 2: if len(query) < 2:
return matches return matches
@ -274,13 +274,15 @@ class ResultCache(SearchQueryParser): # {{{
loc = self.field_metadata[location]['rec_index'] loc = self.field_metadata[location]['rec_index']
if query == 'false': if query == 'false':
for item in self._data: for id_ in candidates:
item = self._data[id_]
if item is None: continue if item is None: continue
if item[loc] is None or item[loc] <= UNDEFINED_DATE: if item[loc] is None or item[loc] <= UNDEFINED_DATE:
matches.add(item[0]) matches.add(item[0])
return matches return matches
if query == 'true': if query == 'true':
for item in self._data: for id_ in candidates:
item = self._data[id_]
if item is None: continue if item is None: continue
if item[loc] is not None and item[loc] > UNDEFINED_DATE: if item[loc] is not None and item[loc] > UNDEFINED_DATE:
matches.add(item[0]) matches.add(item[0])
@ -319,7 +321,8 @@ class ResultCache(SearchQueryParser): # {{{
field_count = query.count('-') + 1 field_count = query.count('-') + 1
else: else:
field_count = query.count('/') + 1 field_count = query.count('/') + 1
for item in self._data: for id_ in candidates:
item = self._data[id_]
if item is None or item[loc] is None: continue if item is None or item[loc] is None: continue
if relop(item[loc], qd, field_count): if relop(item[loc], qd, field_count):
matches.add(item[0]) matches.add(item[0])
@ -335,7 +338,7 @@ class ResultCache(SearchQueryParser): # {{{
'<=':[2, lambda r, q: r <= q] '<=':[2, lambda r, q: r <= q]
} }
def get_numeric_matches(self, location, query, val_func = None): def get_numeric_matches(self, location, query, candidates, val_func = None):
matches = set([]) matches = set([])
if len(query) == 0: if len(query) == 0:
return matches return matches
@ -381,7 +384,8 @@ class ResultCache(SearchQueryParser): # {{{
except: except:
return matches return matches
for item in self._data: for id_ in candidates:
item = self._data[id_]
if item is None: if item is None:
continue continue
v = val_func(item) v = val_func(item)
@ -393,8 +397,13 @@ class ResultCache(SearchQueryParser): # {{{
matches.add(item[0]) matches.add(item[0])
return matches return matches
def get_matches(self, location, query, allow_recursion=True): def get_matches(self, location, query, allow_recursion=True, candidates=None):
matches = set([]) matches = set([])
if candidates is None:
candidates = self.universal_set()
if len(candidates) == 0:
return matches
if query and query.strip(): if query and query.strip():
# get metadata key associated with the search term. Eliminates # get metadata key associated with the search term. Eliminates
# dealing with plurals and other aliases # dealing with plurals and other aliases
@ -476,7 +485,8 @@ class ResultCache(SearchQueryParser): # {{{
else: else:
q = query q = query
for item in self._data: for id_ in candidates:
item = self._data[id_]
if item is None: continue if item is None: continue
if col_datatype[loc] == 'bool': # complexity caused by the two-/three-value tweak if col_datatype[loc] == 'bool': # complexity caused by the two-/three-value tweak

View File

@ -195,8 +195,8 @@ class CustomColumns(object):
data = self.custom_column_num_map[num] data = self.custom_column_num_map[num]
row = self.data._data[idx] if index_is_id else self.data[idx] row = self.data._data[idx] if index_is_id else self.data[idx]
ans = row[self.FIELD_MAP[data['num']]] ans = row[self.FIELD_MAP[data['num']]]
if data['is_multiple'] and data['datatype'] == 'text': if ans and data['is_multiple'] and data['datatype'] == 'text':
ans = ans.split('|') if ans else [] ans = ans.split('|')
if data['display'].get('sort_alpha', False): if data['display'].get('sort_alpha', False):
ans.sort(cmp=lambda x,y:cmp(x.lower(), y.lower())) ans.sort(cmp=lambda x,y:cmp(x.lower(), y.lower()))
return ans return ans

View File

@ -256,7 +256,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
'pubdate', 'pubdate',
'flags', 'flags',
'uuid', 'uuid',
'has_cover' 'has_cover',
('au_map', 'authors', 'author', 'aum_sortconcat(link.id, authors.name, authors.sort)')
] ]
lines = [] lines = []
for col in columns: for col in columns:
@ -273,9 +274,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'timestamp':3, self.FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'timestamp':3,
'size':4, 'rating':5, 'tags':6, 'comments':7, 'series':8, 'size':4, 'rating':5, 'tags':6, 'comments':7, 'series':8,
'publisher':9, 'series_index':10, 'publisher':9, 'series_index':10, 'sort':11, 'author_sort':12,
'sort':11, 'author_sort':12, 'formats':13, 'isbn':14, 'path':15, 'formats':13, 'isbn':14, 'path':15, 'lccn':16, 'pubdate':17,
'lccn':16, 'pubdate':17, 'flags':18, 'uuid':19, 'cover':20} 'flags':18, 'uuid':19, 'cover':20, 'au_map':21}
for k,v in self.FIELD_MAP.iteritems(): for k,v in self.FIELD_MAP.iteritems():
self.field_metadata.set_field_record_index(k, v, prefer_custom=False) self.field_metadata.set_field_record_index(k, v, prefer_custom=False)
@ -687,9 +688,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
Convenience method to return metadata as a :class:`Metadata` object. Convenience method to return metadata as a :class:`Metadata` object.
Note that the list of formats is not verified. Note that the list of formats is not verified.
''' '''
row = self.data._data[idx] if index_is_id else self.data[idx]
fm = self.FIELD_MAP
self.gm_count += 1 self.gm_count += 1
mi = self.data.get(idx, self.FIELD_MAP['all_metadata'], mi = row[self.FIELD_MAP['all_metadata']]
row_is_id = index_is_id)
if mi is not None: if mi is not None:
if get_cover: if get_cover:
# Always get the cover, because the value can be wrong if the # Always get the cover, because the value can be wrong if the
@ -699,49 +702,46 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.gm_missed += 1 self.gm_missed += 1
mi = Metadata(None) mi = Metadata(None)
self.data.set(idx, self.FIELD_MAP['all_metadata'], mi, self.data.set(idx, fm['all_metadata'], mi, row_is_id = index_is_id)
row_is_id = index_is_id)
aut_list = self.authors_with_sort_strings(idx, index_is_id=index_is_id) aut_list = row[fm['au_map']]
aut_list = [p.split(':::') for p in aut_list.split(':#:')]
aum = [] aum = []
aus = {} aus = {}
for (author, author_sort) in aut_list: for (author, author_sort) in aut_list:
aum.append(author) aum.append(author)
aus[author] = author_sort aus[author] = author_sort.replace('|', ',')
mi.title = self.title(idx, index_is_id=index_is_id) mi.title = row[fm['title']]
mi.authors = aum mi.authors = aum
mi.author_sort = self.author_sort(idx, index_is_id=index_is_id) mi.author_sort = row[fm['author_sort']]
mi.author_sort_map = aus mi.author_sort_map = aus
mi.comments = self.comments(idx, index_is_id=index_is_id) mi.comments = row[fm['comments']]
mi.publisher = self.publisher(idx, index_is_id=index_is_id) mi.publisher = row[fm['publisher']]
mi.timestamp = self.timestamp(idx, index_is_id=index_is_id) mi.timestamp = row[fm['timestamp']]
mi.pubdate = self.pubdate(idx, index_is_id=index_is_id) mi.pubdate = row[fm['pubdate']]
mi.uuid = self.uuid(idx, index_is_id=index_is_id) mi.uuid = row[fm['uuid']]
mi.title_sort = self.title_sort(idx, index_is_id=index_is_id) mi.title_sort = row[fm['sort']]
mi.formats = self.formats(idx, index_is_id=index_is_id, formats = row[fm['formats']]
verify_formats=False) if not formats:
if hasattr(mi.formats, 'split'): formats = None
mi.formats = mi.formats.split(',') mi.formats = formats
else: tags = row[fm['tags']]
mi.formats = None
tags = self.tags(idx, index_is_id=index_is_id)
if tags: if tags:
mi.tags = [i.strip() for i in tags.split(',')] mi.tags = [i.strip() for i in tags.split(',')]
mi.series = self.series(idx, index_is_id=index_is_id) mi.series = row[fm['series']]
if mi.series: if mi.series:
mi.series_index = self.series_index(idx, index_is_id=index_is_id) mi.series_index = row[fm['series_index']]
mi.rating = self.rating(idx, index_is_id=index_is_id) mi.rating = row[fm['rating']]
mi.isbn = self.isbn(idx, index_is_id=index_is_id) mi.isbn = row[fm['isbn']]
id = idx if index_is_id else self.id(idx) id = idx if index_is_id else self.id(idx)
mi.application_id = id mi.application_id = id
mi.id = id mi.id = id
for key,meta in self.field_metadata.iteritems(): for key, meta in self.field_metadata.custom_iteritems():
if meta['is_custom']: mi.set_user_metadata(key, meta)
mi.set_user_metadata(key, meta) mi.set(key, val=self.get_custom(idx, label=meta['label'],
mi.set(key, val=self.get_custom(idx, label=meta['label'], index_is_id=index_is_id),
index_is_id=index_is_id), extra=self.get_custom_extra(idx, label=meta['label'],
extra=self.get_custom_extra(idx, label=meta['label'], index_is_id=index_is_id))
index_is_id=index_is_id))
if get_cover: if get_cover:
mi.cover = self.cover(id, index_is_id=True, as_path=True) mi.cover = self.cover(id, index_is_id=True, as_path=True)
return mi return mi
@ -877,18 +877,17 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
def formats(self, index, index_is_id=False, verify_formats=True): def formats(self, index, index_is_id=False, verify_formats=True):
''' Return available formats as a comma separated list or None if there are no available formats ''' ''' Return available formats as a comma separated list or None if there are no available formats '''
id = index if index_is_id else self.id(index) id_ = index if index_is_id else self.id(index)
try: formats = self.data.get(id_, self.FIELD_MAP['formats'], row_is_id=True)
formats = self.conn.get('SELECT format FROM data WHERE book=?', (id,)) if not formats:
formats = map(lambda x:x[0], formats)
except:
return None return None
if not verify_formats: if not verify_formats:
return ','.join(formats) return formats
formats = formats.split(',')
ans = [] ans = []
for format in formats: for fmt in formats:
if self.format_abspath(id, format, index_is_id=True) is not None: if self.format_abspath(id_, fmt, index_is_id=True) is not None:
ans.append(format) ans.append(fmt)
if not ans: if not ans:
return None return None
return ','.join(ans) return ','.join(ans)
@ -1607,6 +1606,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
','.join([a.replace(',', '|') for a in authors]), ','.join([a.replace(',', '|') for a in authors]),
row_is_id=True) row_is_id=True)
self.data.set(id, self.FIELD_MAP['author_sort'], ss, row_is_id=True) self.data.set(id, self.FIELD_MAP['author_sort'], ss, row_is_id=True)
aum = self.authors_with_sort_strings(id, index_is_id=True)
self.data.set(id, self.FIELD_MAP['au_map'],
':#:'.join([':::'.join((au.replace(',', '|'), aus)) for (au, aus) in aum]),
row_is_id=True)
def set_authors(self, id, authors, notify=True, commit=True): def set_authors(self, id, authors, notify=True, commit=True):
''' '''

View File

@ -180,6 +180,15 @@ class FieldMetadata(dict):
'search_terms':['author_sort'], 'search_terms':['author_sort'],
'is_custom':False, 'is_custom':False,
'is_category':False}), 'is_category':False}),
('au_map', {'table':None,
'column':None,
'datatype':'text',
'is_multiple':',',
'kind':'field',
'name':None,
'search_terms':[],
'is_custom':False,
'is_category':False}),
('comments', {'table':None, ('comments', {'table':None,
'column':None, 'column':None,
'datatype':'text', 'datatype':'text',
@ -400,6 +409,12 @@ class FieldMetadata(dict):
for key in self._tb_cats: for key in self._tb_cats:
yield (key, self._tb_cats[key]) yield (key, self._tb_cats[key])
def custom_iteritems(self):
for key in self._tb_cats:
fm = self._tb_cats[key]
if fm['is_custom']:
yield (key, self._tb_cats[key])
def items(self): def items(self):
return list(self.iteritems()) return list(self.iteritems())

View File

@ -87,6 +87,23 @@ class SortedConcatenate(object):
class SafeSortedConcatenate(SortedConcatenate): class SafeSortedConcatenate(SortedConcatenate):
sep = '|' sep = '|'
class AumSortedConcatenate(object):
'''String concatenation aggregator for the author sort map'''
def __init__(self):
self.ans = {}
def step(self, ndx, author, sort):
if author is not None:
self.ans[ndx] = author + ':::' + sort
def finalize(self):
keys = self.ans.keys()
if len(keys) == 0:
return None
if len(keys) == 1:
return self.ans[keys[0]]
return ':#:'.join([self.ans[v] for v in sorted(keys)])
class Connection(sqlite.Connection): class Connection(sqlite.Connection):
def get(self, *args, **kw): def get(self, *args, **kw):
@ -155,6 +172,7 @@ class DBThread(Thread):
c_ext_loaded = load_c_extensions(self.conn) c_ext_loaded = load_c_extensions(self.conn)
self.conn.row_factory = sqlite.Row if self.row_factory else lambda cursor, row : list(row) self.conn.row_factory = sqlite.Row if self.row_factory else lambda cursor, row : list(row)
self.conn.create_aggregate('concat', 1, Concatenate) self.conn.create_aggregate('concat', 1, Concatenate)
self.conn.create_aggregate('aum_sortconcat', 3, AumSortedConcatenate)
if not c_ext_loaded: if not c_ext_loaded:
self.conn.create_aggregate('sortconcat', 2, SortedConcatenate) self.conn.create_aggregate('sortconcat', 2, SortedConcatenate)
self.conn.create_aggregate('sort_concat', 2, SafeSortedConcatenate) self.conn.create_aggregate('sort_concat', 2, SafeSortedConcatenate)

View File

@ -118,8 +118,9 @@ class SearchQueryParser(object):
failed.append(test[0]) failed.append(test[0])
return failed return failed
def __init__(self, locations, test=False): def __init__(self, locations, test=False, optimize=False):
self._tests_failed = False self._tests_failed = False
self.optimize = optimize
# Define a token # Define a token
standard_locations = map(lambda x : CaselessLiteral(x)+Suppress(':'), standard_locations = map(lambda x : CaselessLiteral(x)+Suppress(':'),
locations) locations)
@ -182,38 +183,52 @@ class SearchQueryParser(object):
# empty the list of searches used for recursion testing # empty the list of searches used for recursion testing
self.recurse_level = 0 self.recurse_level = 0
self.searches_seen = set([]) self.searches_seen = set([])
return self._parse(query) candidates = self.universal_set()
return self._parse(query, candidates)
# this parse is used internally because it doesn't clear the # this parse is used internally because it doesn't clear the
# recursive search test list. However, we permit seeing the # recursive search test list. However, we permit seeing the
# same search a few times because the search might appear within # same search a few times because the search might appear within
# another search. # another search.
def _parse(self, query): def _parse(self, query, candidates=None):
self.recurse_level += 1 self.recurse_level += 1
res = self._parser.parseString(query)[0] res = self._parser.parseString(query)[0]
t = self.evaluate(res) if candidates is None:
candidates = self.universal_set()
t = self.evaluate(res, candidates)
self.recurse_level -= 1 self.recurse_level -= 1
return t return t
def method(self, group_name): def method(self, group_name):
return getattr(self, 'evaluate_'+group_name) return getattr(self, 'evaluate_'+group_name)
def evaluate(self, parse_result): def evaluate(self, parse_result, candidates):
return self.method(parse_result.getName())(parse_result) return self.method(parse_result.getName())(parse_result, candidates)
def evaluate_and(self, argument): def evaluate_and(self, argument, candidates):
return self.evaluate(argument[0]).intersection(self.evaluate(argument[1])) # RHS checks only those items matched by LHS
# returns result of RHS check: RHmatches(LHmatches(c))
# return self.evaluate(argument[0]).intersection(self.evaluate(argument[1]))
l = self.evaluate(argument[0], candidates)
return l.intersection(self.evaluate(argument[1], l))
def evaluate_or(self, argument): def evaluate_or(self, argument, candidates):
return self.evaluate(argument[0]).union(self.evaluate(argument[1])) # RHS checks only those elements not matched by LHS
# returns LHS union RHS: LHmatches(c) + RHmatches(c-LHmatches(c))
# return self.evaluate(argument[0]).union(self.evaluate(argument[1]))
l = self.evaluate(argument[0], candidates)
return l.union(self.evaluate(argument[1], candidates.difference(l)))
def evaluate_not(self, argument): def evaluate_not(self, argument, candidates):
return self.universal_set().difference(self.evaluate(argument[0])) # unary op checks only candidates. Result: list of items matching
# returns: c - matches(c)
# return self.universal_set().difference(self.evaluate(argument[0]))
return candidates.difference(self.evaluate(argument[0], candidates))
def evaluate_parenthesis(self, argument): def evaluate_parenthesis(self, argument, candidates):
return self.evaluate(argument[0]) return self.evaluate(argument[0], candidates)
def evaluate_token(self, argument): def evaluate_token(self, argument, candidates):
location = argument[0] location = argument[0]
query = argument[1] query = argument[1]
if location.lower() == 'search': if location.lower() == 'search':
@ -224,17 +239,27 @@ class SearchQueryParser(object):
raise ParseException(query, len(query), 'undefined saved search', self) raise ParseException(query, len(query), 'undefined saved search', self)
if self.recurse_level > 5: if self.recurse_level > 5:
self.searches_seen.add(query) self.searches_seen.add(query)
return self._parse(saved_searches().lookup(query)) return self._parse(saved_searches().lookup(query), candidates)
except: # convert all exceptions (e.g., missing key) to a parse error except: # convert all exceptions (e.g., missing key) to a parse error
raise ParseException(query, len(query), 'undefined saved search', self) raise ParseException(query, len(query), 'undefined saved search', self)
return self.get_matches(location, query) return self._get_matches(location, query, candidates)
def get_matches(self, location, query): def _get_matches(self, location, query, candidates):
if self.optimize:
return self.get_matches(location, query, candidates=candidates)
else:
return self.get_matches(location, query)
def get_matches(self, location, query, candidates=None):
''' '''
Should return the set of matches for :param:'location` and :param:`query`. Should return the set of matches for :param:'location` and :param:`query`.
The search must be performed over all entries is :param:`candidates` is
None otherwise only over the items in candidates.
:param:`location` is one of the items in :member:`SearchQueryParser.DEFAULT_LOCATIONS`. :param:`location` is one of the items in :member:`SearchQueryParser.DEFAULT_LOCATIONS`.
:param:`query` is a string literal. :param:`query` is a string literal.
:param: None or a subset of the set returned by :meth:`universal_set`.
''' '''
return set([]) return set([])
@ -561,7 +586,7 @@ class Tester(SearchQueryParser):
def universal_set(self): def universal_set(self):
return self._universal_set return self._universal_set
def get_matches(self, location, query): def get_matches(self, location, query, candidates=None):
location = location.lower() location = location.lower()
if location in self.fields.keys(): if location in self.fields.keys():
getter = operator.itemgetter(self.fields[location]) getter = operator.itemgetter(self.fields[location])
@ -573,8 +598,13 @@ class Tester(SearchQueryParser):
if not query: if not query:
return set([]) return set([])
query = query.lower() query = query.lower()
return set(key for key, val in self.texts.items() \ if candidates:
if query and query in getattr(getter(val), 'lower', lambda : '')()) return set(key for key, val in self.texts.items() \
if key in candidates and query and query
in getattr(getter(val), 'lower', lambda : '')())
else:
return set(key for key, val in self.texts.items() \
if query and query in getattr(getter(val), 'lower', lambda : '')())
@ -592,6 +622,7 @@ class Tester(SearchQueryParser):
def main(args=sys.argv): def main(args=sys.argv):
print 'testing unoptimized'
tester = Tester(['authors', 'author', 'series', 'formats', 'format', tester = Tester(['authors', 'author', 'series', 'formats', 'format',
'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover', 'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover',
'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read', 'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read',
@ -601,6 +632,16 @@ def main(args=sys.argv):
print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<' print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<'
return 1 return 1
print '\n\ntesting optimized'
tester = Tester(['authors', 'author', 'series', 'formats', 'format',
'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover',
'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read',
'all', 'search'], test=True, optimize=True)
failed = tester.run_tests()
if tester._tests_failed or failed:
print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<'
return 1
return 0 return 0
if __name__ == '__main__': if __name__ == '__main__':