mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
merge from trunk
This commit is contained in:
commit
04bbda5e5a
@ -29,7 +29,7 @@ class ANDROID(USBMS):
|
|||||||
# Motorola
|
# Motorola
|
||||||
0x22b8 : { 0x41d9 : [0x216], 0x2d61 : [0x100], 0x2d67 : [0x100],
|
0x22b8 : { 0x41d9 : [0x216], 0x2d61 : [0x100], 0x2d67 : [0x100],
|
||||||
0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216],
|
0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216],
|
||||||
0x4286 : [0x216] },
|
0x4286 : [0x216], 0x42b3 : [0x216] },
|
||||||
|
|
||||||
# Sony Ericsson
|
# Sony Ericsson
|
||||||
0xfce : { 0xd12e : [0x0100]},
|
0xfce : { 0xd12e : [0x0100]},
|
||||||
|
@ -324,14 +324,16 @@ class Metadata(object):
|
|||||||
if metadata is None:
|
if metadata is None:
|
||||||
traceback.print_stack()
|
traceback.print_stack()
|
||||||
return
|
return
|
||||||
metadata = copy.deepcopy(metadata)
|
m = {}
|
||||||
if '#value#' not in metadata:
|
for k in metadata:
|
||||||
if metadata['datatype'] == 'text' and metadata['is_multiple']:
|
m[k] = copy.copy(metadata[k])
|
||||||
metadata['#value#'] = []
|
if '#value#' not in m:
|
||||||
|
if m['datatype'] == 'text' and m['is_multiple']:
|
||||||
|
m['#value#'] = []
|
||||||
else:
|
else:
|
||||||
metadata['#value#'] = None
|
m['#value#'] = None
|
||||||
_data = object.__getattribute__(self, '_data')
|
_data = object.__getattribute__(self, '_data')
|
||||||
_data['user_metadata'][field] = metadata
|
_data['user_metadata'][field] = m
|
||||||
|
|
||||||
def template_to_attribute(self, other, ops):
|
def template_to_attribute(self, other, ops):
|
||||||
'''
|
'''
|
||||||
|
@ -8,7 +8,6 @@ __docformat__ = 'restructuredtext en'
|
|||||||
Transform OEB content into plain text
|
Transform OEB content into plain text
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import os
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
@ -33,6 +32,15 @@ BLOCK_STYLES = [
|
|||||||
'block',
|
'block',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
HEADING_TAGS = [
|
||||||
|
'h1',
|
||||||
|
'h2',
|
||||||
|
'h3',
|
||||||
|
'h4',
|
||||||
|
'h5',
|
||||||
|
'h6',
|
||||||
|
]
|
||||||
|
|
||||||
SPACE_TAGS = [
|
SPACE_TAGS = [
|
||||||
'td',
|
'td',
|
||||||
'br',
|
'br',
|
||||||
@ -47,6 +55,10 @@ class TXTMLizer(object):
|
|||||||
self.log.info('Converting XHTML to TXT...')
|
self.log.info('Converting XHTML to TXT...')
|
||||||
self.oeb_book = oeb_book
|
self.oeb_book = oeb_book
|
||||||
self.opts = opts
|
self.opts = opts
|
||||||
|
self.toc_ids = []
|
||||||
|
self.last_was_heading = False
|
||||||
|
|
||||||
|
self.create_flat_toc(self.oeb_book.toc)
|
||||||
|
|
||||||
return self.mlize_spine()
|
return self.mlize_spine()
|
||||||
|
|
||||||
@ -58,8 +70,11 @@ class TXTMLizer(object):
|
|||||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
||||||
content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
|
content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
|
||||||
content = self.remove_newlines(content)
|
content = self.remove_newlines(content)
|
||||||
output += self.dump_text(etree.fromstring(content), stylizer)
|
output += self.dump_text(etree.fromstring(content), stylizer, item)
|
||||||
output = self.cleanup_text(u''.join(output))
|
output += '\n\n\n\n\n\n'
|
||||||
|
output = u''.join(output)
|
||||||
|
output = u'\n'.join(l.rstrip() for l in output.splitlines())
|
||||||
|
output = self.cleanup_text(output)
|
||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
@ -68,6 +83,8 @@ class TXTMLizer(object):
|
|||||||
text = text.replace('\r\n', ' ')
|
text = text.replace('\r\n', ' ')
|
||||||
text = text.replace('\n', ' ')
|
text = text.replace('\n', ' ')
|
||||||
text = text.replace('\r', ' ')
|
text = text.replace('\r', ' ')
|
||||||
|
# Condense redundant spaces created by replacing newlines with spaces.
|
||||||
|
text = re.sub(r'[ ]{2,}', ' ', text)
|
||||||
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
@ -80,6 +97,14 @@ class TXTMLizer(object):
|
|||||||
toc.append(u'* %s\n\n' % item.title)
|
toc.append(u'* %s\n\n' % item.title)
|
||||||
return ''.join(toc)
|
return ''.join(toc)
|
||||||
|
|
||||||
|
def create_flat_toc(self, nodes):
|
||||||
|
'''
|
||||||
|
Turns a hierarchical list of TOC href's into a flat list.
|
||||||
|
'''
|
||||||
|
for item in nodes:
|
||||||
|
self.toc_ids.append(item.href)
|
||||||
|
self.create_flat_toc(item.nodes)
|
||||||
|
|
||||||
def cleanup_text(self, text):
|
def cleanup_text(self, text):
|
||||||
self.log.debug('\tClean up text...')
|
self.log.debug('\tClean up text...')
|
||||||
# Replace bad characters.
|
# Replace bad characters.
|
||||||
@ -92,7 +117,7 @@ class TXTMLizer(object):
|
|||||||
text = text.replace('\f+', ' ')
|
text = text.replace('\f+', ' ')
|
||||||
|
|
||||||
# Single line paragraph.
|
# Single line paragraph.
|
||||||
text = re.sub('(?<=.)%s(?=.)' % os.linesep, ' ', text)
|
text = re.sub('(?<=.)\n(?=.)', ' ', text)
|
||||||
|
|
||||||
# Remove multiple spaces.
|
# Remove multiple spaces.
|
||||||
text = re.sub('[ ]{2,}', ' ', text)
|
text = re.sub('[ ]{2,}', ' ', text)
|
||||||
@ -101,13 +126,19 @@ class TXTMLizer(object):
|
|||||||
text = re.sub('\n[ ]+\n', '\n\n', text)
|
text = re.sub('\n[ ]+\n', '\n\n', text)
|
||||||
if self.opts.remove_paragraph_spacing:
|
if self.opts.remove_paragraph_spacing:
|
||||||
text = re.sub('\n{2,}', '\n', text)
|
text = re.sub('\n{2,}', '\n', text)
|
||||||
text = re.sub('(?imu)^(?=.)', '\t', text)
|
text = re.sub(r'(?msu)^(?P<t>[^\t\n]+?)$', lambda mo: u'%s\n\n' % mo.group('t'), text)
|
||||||
|
text = re.sub(r'(?msu)(?P<b>[^\n])\n+(?P<t>[^\t\n]+?)(?=\n)', lambda mo: '%s\n\n\n\n\n\n%s' % (mo.group('b'), mo.group('t')), text)
|
||||||
else:
|
else:
|
||||||
text = re.sub('\n{3,}', '\n\n', text)
|
text = re.sub('\n{7,}', '\n\n\n\n\n\n', text)
|
||||||
|
|
||||||
# Replace spaces at the beginning and end of lines
|
# Replace spaces at the beginning and end of lines
|
||||||
|
# We don't replace tabs because those are only added
|
||||||
|
# when remove paragraph spacing is enabled.
|
||||||
text = re.sub('(?imu)^[ ]+', '', text)
|
text = re.sub('(?imu)^[ ]+', '', text)
|
||||||
text = re.sub('(?imu)[ ]+$', '', text)
|
text = re.sub('(?imu)[ ]+$', '', text)
|
||||||
|
|
||||||
|
# Remove empty space and newlines at the beginning of the document.
|
||||||
|
text = re.sub(r'(?u)^[ \n]+', '', text)
|
||||||
|
|
||||||
if self.opts.max_line_length:
|
if self.opts.max_line_length:
|
||||||
max_length = self.opts.max_line_length
|
max_length = self.opts.max_line_length
|
||||||
@ -145,13 +176,11 @@ class TXTMLizer(object):
|
|||||||
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def dump_text(self, elem, stylizer, end=''):
|
def dump_text(self, elem, stylizer, page):
|
||||||
'''
|
'''
|
||||||
@elem: The element in the etree that we are working on.
|
@elem: The element in the etree that we are working on.
|
||||||
@stylizer: The style information attached to the element.
|
@stylizer: The style information attached to the element.
|
||||||
@end: The last two characters of the text from the previous element.
|
@page: OEB page used to determine absolute urls.
|
||||||
This is used to determine if a blank line is needed when starting
|
|
||||||
a new block element.
|
|
||||||
'''
|
'''
|
||||||
|
|
||||||
if not isinstance(elem.tag, basestring) \
|
if not isinstance(elem.tag, basestring) \
|
||||||
@ -170,13 +199,22 @@ class TXTMLizer(object):
|
|||||||
return ['']
|
return ['']
|
||||||
|
|
||||||
tag = barename(elem.tag)
|
tag = barename(elem.tag)
|
||||||
|
tag_id = elem.attrib.get('id', None)
|
||||||
in_block = False
|
in_block = False
|
||||||
|
in_heading = False
|
||||||
|
|
||||||
|
# Are we in a heading?
|
||||||
|
# This can either be a heading tag or a TOC item.
|
||||||
|
if tag in HEADING_TAGS or '%s#%s' % (page.href, tag_id) in self.toc_ids:
|
||||||
|
in_heading = True
|
||||||
|
if not self.last_was_heading:
|
||||||
|
text.append('\n\n\n\n\n\n')
|
||||||
|
|
||||||
# Are we in a paragraph block?
|
# Are we in a paragraph block?
|
||||||
if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
|
if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
|
||||||
|
if self.opts.remove_paragraph_spacing and not in_heading:
|
||||||
|
text.append(u'\t')
|
||||||
in_block = True
|
in_block = True
|
||||||
if not end.endswith(u'\n\n') and hasattr(elem, 'text') and elem.text:
|
|
||||||
text.append(u'\n\n')
|
|
||||||
|
|
||||||
if tag in SPACE_TAGS:
|
if tag in SPACE_TAGS:
|
||||||
text.append(u' ')
|
text.append(u' ')
|
||||||
@ -185,14 +223,17 @@ class TXTMLizer(object):
|
|||||||
if hasattr(elem, 'text') and elem.text:
|
if hasattr(elem, 'text') and elem.text:
|
||||||
text.append(elem.text)
|
text.append(elem.text)
|
||||||
|
|
||||||
|
# Recurse down into tags within the tag we are in.
|
||||||
for item in elem:
|
for item in elem:
|
||||||
en = u''
|
text += self.dump_text(item, stylizer, page)
|
||||||
if len(text) >= 2:
|
|
||||||
en = text[-1][-2:]
|
|
||||||
text += self.dump_text(item, stylizer, en)
|
|
||||||
|
|
||||||
if in_block:
|
if in_block:
|
||||||
text.append(u'\n\n')
|
text.append(u'\n\n')
|
||||||
|
if in_heading:
|
||||||
|
text.append(u'\n')
|
||||||
|
self.last_was_heading = True
|
||||||
|
else:
|
||||||
|
self.last_was_heading = False
|
||||||
|
|
||||||
if hasattr(elem, 'tail') and elem.tail:
|
if hasattr(elem, 'tail') and elem.tail:
|
||||||
text.append(elem.tail)
|
text.append(elem.tail)
|
||||||
|
@ -637,7 +637,7 @@ class DeviceMixin(object): # {{{
|
|||||||
self.device_manager.mount_device(kls=FOLDER_DEVICE, kind='folder', path=dir)
|
self.device_manager.mount_device(kls=FOLDER_DEVICE, kind='folder', path=dir)
|
||||||
|
|
||||||
def connect_to_bambook(self):
|
def connect_to_bambook(self):
|
||||||
self.device_manager.mount_device(kls=BAMBOOKWifi, kind='bambook',
|
self.device_manager.mount_device(kls=BAMBOOKWifi, kind='bambook',
|
||||||
path=BAMBOOK.settings().extra_customization)
|
path=BAMBOOK.settings().extra_customization)
|
||||||
|
|
||||||
def connect_to_itunes(self):
|
def connect_to_itunes(self):
|
||||||
@ -1266,8 +1266,8 @@ class DeviceMixin(object): # {{{
|
|||||||
# Force a reset if the caches are not initialized
|
# Force a reset if the caches are not initialized
|
||||||
if reset or not hasattr(self, 'db_book_title_cache'):
|
if reset or not hasattr(self, 'db_book_title_cache'):
|
||||||
# Build a cache (map) of the library, so the search isn't On**2
|
# Build a cache (map) of the library, so the search isn't On**2
|
||||||
self.db_book_title_cache = {}
|
db_book_title_cache = {}
|
||||||
self.db_book_uuid_cache = {}
|
db_book_uuid_cache = {}
|
||||||
# It might be possible to get here without having initialized the
|
# It might be possible to get here without having initialized the
|
||||||
# library view. In this case, simply give up
|
# library view. In this case, simply give up
|
||||||
try:
|
try:
|
||||||
@ -1278,8 +1278,8 @@ class DeviceMixin(object): # {{{
|
|||||||
for id in db.data.iterallids():
|
for id in db.data.iterallids():
|
||||||
mi = db.get_metadata(id, index_is_id=True)
|
mi = db.get_metadata(id, index_is_id=True)
|
||||||
title = clean_string(mi.title)
|
title = clean_string(mi.title)
|
||||||
if title not in self.db_book_title_cache:
|
if title not in db_book_title_cache:
|
||||||
self.db_book_title_cache[title] = \
|
db_book_title_cache[title] = \
|
||||||
{'authors':{}, 'author_sort':{}, 'db_ids':{}}
|
{'authors':{}, 'author_sort':{}, 'db_ids':{}}
|
||||||
# If there are multiple books in the library with the same title
|
# If there are multiple books in the library with the same title
|
||||||
# and author, then remember the last one. That is OK, because as
|
# and author, then remember the last one. That is OK, because as
|
||||||
@ -1287,12 +1287,14 @@ class DeviceMixin(object): # {{{
|
|||||||
# as another.
|
# as another.
|
||||||
if mi.authors:
|
if mi.authors:
|
||||||
authors = clean_string(authors_to_string(mi.authors))
|
authors = clean_string(authors_to_string(mi.authors))
|
||||||
self.db_book_title_cache[title]['authors'][authors] = mi
|
db_book_title_cache[title]['authors'][authors] = mi
|
||||||
if mi.author_sort:
|
if mi.author_sort:
|
||||||
aus = clean_string(mi.author_sort)
|
aus = clean_string(mi.author_sort)
|
||||||
self.db_book_title_cache[title]['author_sort'][aus] = mi
|
db_book_title_cache[title]['author_sort'][aus] = mi
|
||||||
self.db_book_title_cache[title]['db_ids'][mi.application_id] = mi
|
db_book_title_cache[title]['db_ids'][mi.application_id] = mi
|
||||||
self.db_book_uuid_cache[mi.uuid] = mi
|
db_book_uuid_cache[mi.uuid] = mi
|
||||||
|
self.db_book_title_cache = db_book_title_cache
|
||||||
|
self.db_book_uuid_cache = db_book_uuid_cache
|
||||||
|
|
||||||
# Now iterate through all the books on the device, setting the
|
# Now iterate through all the books on the device, setting the
|
||||||
# in_library field. If the UUID matches a book in the library, then
|
# in_library field. If the UUID matches a book in the library, then
|
||||||
|
@ -181,7 +181,7 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
self.search_restriction = ''
|
self.search_restriction = ''
|
||||||
self.field_metadata = field_metadata
|
self.field_metadata = field_metadata
|
||||||
self.all_search_locations = field_metadata.get_search_terms()
|
self.all_search_locations = field_metadata.get_search_terms()
|
||||||
SearchQueryParser.__init__(self, self.all_search_locations)
|
SearchQueryParser.__init__(self, self.all_search_locations, optimize=True)
|
||||||
self.build_date_relop_dict()
|
self.build_date_relop_dict()
|
||||||
self.build_numeric_relop_dict()
|
self.build_numeric_relop_dict()
|
||||||
|
|
||||||
@ -264,7 +264,7 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
'<=':[2, relop_le]
|
'<=':[2, relop_le]
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_dates_matches(self, location, query):
|
def get_dates_matches(self, location, query, candidates):
|
||||||
matches = set([])
|
matches = set([])
|
||||||
if len(query) < 2:
|
if len(query) < 2:
|
||||||
return matches
|
return matches
|
||||||
@ -274,13 +274,15 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
loc = self.field_metadata[location]['rec_index']
|
loc = self.field_metadata[location]['rec_index']
|
||||||
|
|
||||||
if query == 'false':
|
if query == 'false':
|
||||||
for item in self._data:
|
for id_ in candidates:
|
||||||
|
item = self._data[id_]
|
||||||
if item is None: continue
|
if item is None: continue
|
||||||
if item[loc] is None or item[loc] <= UNDEFINED_DATE:
|
if item[loc] is None or item[loc] <= UNDEFINED_DATE:
|
||||||
matches.add(item[0])
|
matches.add(item[0])
|
||||||
return matches
|
return matches
|
||||||
if query == 'true':
|
if query == 'true':
|
||||||
for item in self._data:
|
for id_ in candidates:
|
||||||
|
item = self._data[id_]
|
||||||
if item is None: continue
|
if item is None: continue
|
||||||
if item[loc] is not None and item[loc] > UNDEFINED_DATE:
|
if item[loc] is not None and item[loc] > UNDEFINED_DATE:
|
||||||
matches.add(item[0])
|
matches.add(item[0])
|
||||||
@ -319,7 +321,8 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
field_count = query.count('-') + 1
|
field_count = query.count('-') + 1
|
||||||
else:
|
else:
|
||||||
field_count = query.count('/') + 1
|
field_count = query.count('/') + 1
|
||||||
for item in self._data:
|
for id_ in candidates:
|
||||||
|
item = self._data[id_]
|
||||||
if item is None or item[loc] is None: continue
|
if item is None or item[loc] is None: continue
|
||||||
if relop(item[loc], qd, field_count):
|
if relop(item[loc], qd, field_count):
|
||||||
matches.add(item[0])
|
matches.add(item[0])
|
||||||
@ -335,7 +338,7 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
'<=':[2, lambda r, q: r <= q]
|
'<=':[2, lambda r, q: r <= q]
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_numeric_matches(self, location, query, val_func = None):
|
def get_numeric_matches(self, location, query, candidates, val_func = None):
|
||||||
matches = set([])
|
matches = set([])
|
||||||
if len(query) == 0:
|
if len(query) == 0:
|
||||||
return matches
|
return matches
|
||||||
@ -381,7 +384,8 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
except:
|
except:
|
||||||
return matches
|
return matches
|
||||||
|
|
||||||
for item in self._data:
|
for id_ in candidates:
|
||||||
|
item = self._data[id_]
|
||||||
if item is None:
|
if item is None:
|
||||||
continue
|
continue
|
||||||
v = val_func(item)
|
v = val_func(item)
|
||||||
@ -393,8 +397,13 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
matches.add(item[0])
|
matches.add(item[0])
|
||||||
return matches
|
return matches
|
||||||
|
|
||||||
def get_matches(self, location, query, allow_recursion=True):
|
def get_matches(self, location, query, allow_recursion=True, candidates=None):
|
||||||
matches = set([])
|
matches = set([])
|
||||||
|
if candidates is None:
|
||||||
|
candidates = self.universal_set()
|
||||||
|
if len(candidates) == 0:
|
||||||
|
return matches
|
||||||
|
|
||||||
if query and query.strip():
|
if query and query.strip():
|
||||||
# get metadata key associated with the search term. Eliminates
|
# get metadata key associated with the search term. Eliminates
|
||||||
# dealing with plurals and other aliases
|
# dealing with plurals and other aliases
|
||||||
@ -476,7 +485,8 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
else:
|
else:
|
||||||
q = query
|
q = query
|
||||||
|
|
||||||
for item in self._data:
|
for id_ in candidates:
|
||||||
|
item = self._data[id_]
|
||||||
if item is None: continue
|
if item is None: continue
|
||||||
|
|
||||||
if col_datatype[loc] == 'bool': # complexity caused by the two-/three-value tweak
|
if col_datatype[loc] == 'bool': # complexity caused by the two-/three-value tweak
|
||||||
|
@ -195,8 +195,8 @@ class CustomColumns(object):
|
|||||||
data = self.custom_column_num_map[num]
|
data = self.custom_column_num_map[num]
|
||||||
row = self.data._data[idx] if index_is_id else self.data[idx]
|
row = self.data._data[idx] if index_is_id else self.data[idx]
|
||||||
ans = row[self.FIELD_MAP[data['num']]]
|
ans = row[self.FIELD_MAP[data['num']]]
|
||||||
if data['is_multiple'] and data['datatype'] == 'text':
|
if ans and data['is_multiple'] and data['datatype'] == 'text':
|
||||||
ans = ans.split('|') if ans else []
|
ans = ans.split('|')
|
||||||
if data['display'].get('sort_alpha', False):
|
if data['display'].get('sort_alpha', False):
|
||||||
ans.sort(cmp=lambda x,y:cmp(x.lower(), y.lower()))
|
ans.sort(cmp=lambda x,y:cmp(x.lower(), y.lower()))
|
||||||
return ans
|
return ans
|
||||||
|
@ -256,7 +256,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
'pubdate',
|
'pubdate',
|
||||||
'flags',
|
'flags',
|
||||||
'uuid',
|
'uuid',
|
||||||
'has_cover'
|
'has_cover',
|
||||||
|
('au_map', 'authors', 'author', 'aum_sortconcat(link.id, authors.name, authors.sort)')
|
||||||
]
|
]
|
||||||
lines = []
|
lines = []
|
||||||
for col in columns:
|
for col in columns:
|
||||||
@ -273,9 +274,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
|
|
||||||
self.FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'timestamp':3,
|
self.FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'timestamp':3,
|
||||||
'size':4, 'rating':5, 'tags':6, 'comments':7, 'series':8,
|
'size':4, 'rating':5, 'tags':6, 'comments':7, 'series':8,
|
||||||
'publisher':9, 'series_index':10,
|
'publisher':9, 'series_index':10, 'sort':11, 'author_sort':12,
|
||||||
'sort':11, 'author_sort':12, 'formats':13, 'isbn':14, 'path':15,
|
'formats':13, 'isbn':14, 'path':15, 'lccn':16, 'pubdate':17,
|
||||||
'lccn':16, 'pubdate':17, 'flags':18, 'uuid':19, 'cover':20}
|
'flags':18, 'uuid':19, 'cover':20, 'au_map':21}
|
||||||
|
|
||||||
for k,v in self.FIELD_MAP.iteritems():
|
for k,v in self.FIELD_MAP.iteritems():
|
||||||
self.field_metadata.set_field_record_index(k, v, prefer_custom=False)
|
self.field_metadata.set_field_record_index(k, v, prefer_custom=False)
|
||||||
@ -687,9 +688,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
Convenience method to return metadata as a :class:`Metadata` object.
|
Convenience method to return metadata as a :class:`Metadata` object.
|
||||||
Note that the list of formats is not verified.
|
Note that the list of formats is not verified.
|
||||||
'''
|
'''
|
||||||
|
row = self.data._data[idx] if index_is_id else self.data[idx]
|
||||||
|
fm = self.FIELD_MAP
|
||||||
|
|
||||||
self.gm_count += 1
|
self.gm_count += 1
|
||||||
mi = self.data.get(idx, self.FIELD_MAP['all_metadata'],
|
mi = row[self.FIELD_MAP['all_metadata']]
|
||||||
row_is_id = index_is_id)
|
|
||||||
if mi is not None:
|
if mi is not None:
|
||||||
if get_cover:
|
if get_cover:
|
||||||
# Always get the cover, because the value can be wrong if the
|
# Always get the cover, because the value can be wrong if the
|
||||||
@ -699,49 +702,46 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
|
|
||||||
self.gm_missed += 1
|
self.gm_missed += 1
|
||||||
mi = Metadata(None)
|
mi = Metadata(None)
|
||||||
self.data.set(idx, self.FIELD_MAP['all_metadata'], mi,
|
self.data.set(idx, fm['all_metadata'], mi, row_is_id = index_is_id)
|
||||||
row_is_id = index_is_id)
|
|
||||||
|
|
||||||
aut_list = self.authors_with_sort_strings(idx, index_is_id=index_is_id)
|
aut_list = row[fm['au_map']]
|
||||||
|
aut_list = [p.split(':::') for p in aut_list.split(':#:')]
|
||||||
aum = []
|
aum = []
|
||||||
aus = {}
|
aus = {}
|
||||||
for (author, author_sort) in aut_list:
|
for (author, author_sort) in aut_list:
|
||||||
aum.append(author)
|
aum.append(author)
|
||||||
aus[author] = author_sort
|
aus[author] = author_sort.replace('|', ',')
|
||||||
mi.title = self.title(idx, index_is_id=index_is_id)
|
mi.title = row[fm['title']]
|
||||||
mi.authors = aum
|
mi.authors = aum
|
||||||
mi.author_sort = self.author_sort(idx, index_is_id=index_is_id)
|
mi.author_sort = row[fm['author_sort']]
|
||||||
mi.author_sort_map = aus
|
mi.author_sort_map = aus
|
||||||
mi.comments = self.comments(idx, index_is_id=index_is_id)
|
mi.comments = row[fm['comments']]
|
||||||
mi.publisher = self.publisher(idx, index_is_id=index_is_id)
|
mi.publisher = row[fm['publisher']]
|
||||||
mi.timestamp = self.timestamp(idx, index_is_id=index_is_id)
|
mi.timestamp = row[fm['timestamp']]
|
||||||
mi.pubdate = self.pubdate(idx, index_is_id=index_is_id)
|
mi.pubdate = row[fm['pubdate']]
|
||||||
mi.uuid = self.uuid(idx, index_is_id=index_is_id)
|
mi.uuid = row[fm['uuid']]
|
||||||
mi.title_sort = self.title_sort(idx, index_is_id=index_is_id)
|
mi.title_sort = row[fm['sort']]
|
||||||
mi.formats = self.formats(idx, index_is_id=index_is_id,
|
formats = row[fm['formats']]
|
||||||
verify_formats=False)
|
if not formats:
|
||||||
if hasattr(mi.formats, 'split'):
|
formats = None
|
||||||
mi.formats = mi.formats.split(',')
|
mi.formats = formats
|
||||||
else:
|
tags = row[fm['tags']]
|
||||||
mi.formats = None
|
|
||||||
tags = self.tags(idx, index_is_id=index_is_id)
|
|
||||||
if tags:
|
if tags:
|
||||||
mi.tags = [i.strip() for i in tags.split(',')]
|
mi.tags = [i.strip() for i in tags.split(',')]
|
||||||
mi.series = self.series(idx, index_is_id=index_is_id)
|
mi.series = row[fm['series']]
|
||||||
if mi.series:
|
if mi.series:
|
||||||
mi.series_index = self.series_index(idx, index_is_id=index_is_id)
|
mi.series_index = row[fm['series_index']]
|
||||||
mi.rating = self.rating(idx, index_is_id=index_is_id)
|
mi.rating = row[fm['rating']]
|
||||||
mi.isbn = self.isbn(idx, index_is_id=index_is_id)
|
mi.isbn = row[fm['isbn']]
|
||||||
id = idx if index_is_id else self.id(idx)
|
id = idx if index_is_id else self.id(idx)
|
||||||
mi.application_id = id
|
mi.application_id = id
|
||||||
mi.id = id
|
mi.id = id
|
||||||
for key,meta in self.field_metadata.iteritems():
|
for key, meta in self.field_metadata.custom_iteritems():
|
||||||
if meta['is_custom']:
|
mi.set_user_metadata(key, meta)
|
||||||
mi.set_user_metadata(key, meta)
|
mi.set(key, val=self.get_custom(idx, label=meta['label'],
|
||||||
mi.set(key, val=self.get_custom(idx, label=meta['label'],
|
index_is_id=index_is_id),
|
||||||
index_is_id=index_is_id),
|
extra=self.get_custom_extra(idx, label=meta['label'],
|
||||||
extra=self.get_custom_extra(idx, label=meta['label'],
|
index_is_id=index_is_id))
|
||||||
index_is_id=index_is_id))
|
|
||||||
if get_cover:
|
if get_cover:
|
||||||
mi.cover = self.cover(id, index_is_id=True, as_path=True)
|
mi.cover = self.cover(id, index_is_id=True, as_path=True)
|
||||||
return mi
|
return mi
|
||||||
@ -877,18 +877,17 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
|
|
||||||
def formats(self, index, index_is_id=False, verify_formats=True):
|
def formats(self, index, index_is_id=False, verify_formats=True):
|
||||||
''' Return available formats as a comma separated list or None if there are no available formats '''
|
''' Return available formats as a comma separated list or None if there are no available formats '''
|
||||||
id = index if index_is_id else self.id(index)
|
id_ = index if index_is_id else self.id(index)
|
||||||
try:
|
formats = self.data.get(id_, self.FIELD_MAP['formats'], row_is_id=True)
|
||||||
formats = self.conn.get('SELECT format FROM data WHERE book=?', (id,))
|
if not formats:
|
||||||
formats = map(lambda x:x[0], formats)
|
|
||||||
except:
|
|
||||||
return None
|
return None
|
||||||
if not verify_formats:
|
if not verify_formats:
|
||||||
return ','.join(formats)
|
return formats
|
||||||
|
formats = formats.split(',')
|
||||||
ans = []
|
ans = []
|
||||||
for format in formats:
|
for fmt in formats:
|
||||||
if self.format_abspath(id, format, index_is_id=True) is not None:
|
if self.format_abspath(id_, fmt, index_is_id=True) is not None:
|
||||||
ans.append(format)
|
ans.append(fmt)
|
||||||
if not ans:
|
if not ans:
|
||||||
return None
|
return None
|
||||||
return ','.join(ans)
|
return ','.join(ans)
|
||||||
@ -1607,6 +1606,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
','.join([a.replace(',', '|') for a in authors]),
|
','.join([a.replace(',', '|') for a in authors]),
|
||||||
row_is_id=True)
|
row_is_id=True)
|
||||||
self.data.set(id, self.FIELD_MAP['author_sort'], ss, row_is_id=True)
|
self.data.set(id, self.FIELD_MAP['author_sort'], ss, row_is_id=True)
|
||||||
|
aum = self.authors_with_sort_strings(id, index_is_id=True)
|
||||||
|
self.data.set(id, self.FIELD_MAP['au_map'],
|
||||||
|
':#:'.join([':::'.join((au.replace(',', '|'), aus)) for (au, aus) in aum]),
|
||||||
|
row_is_id=True)
|
||||||
|
|
||||||
def set_authors(self, id, authors, notify=True, commit=True):
|
def set_authors(self, id, authors, notify=True, commit=True):
|
||||||
'''
|
'''
|
||||||
|
@ -180,6 +180,15 @@ class FieldMetadata(dict):
|
|||||||
'search_terms':['author_sort'],
|
'search_terms':['author_sort'],
|
||||||
'is_custom':False,
|
'is_custom':False,
|
||||||
'is_category':False}),
|
'is_category':False}),
|
||||||
|
('au_map', {'table':None,
|
||||||
|
'column':None,
|
||||||
|
'datatype':'text',
|
||||||
|
'is_multiple':',',
|
||||||
|
'kind':'field',
|
||||||
|
'name':None,
|
||||||
|
'search_terms':[],
|
||||||
|
'is_custom':False,
|
||||||
|
'is_category':False}),
|
||||||
('comments', {'table':None,
|
('comments', {'table':None,
|
||||||
'column':None,
|
'column':None,
|
||||||
'datatype':'text',
|
'datatype':'text',
|
||||||
@ -400,6 +409,12 @@ class FieldMetadata(dict):
|
|||||||
for key in self._tb_cats:
|
for key in self._tb_cats:
|
||||||
yield (key, self._tb_cats[key])
|
yield (key, self._tb_cats[key])
|
||||||
|
|
||||||
|
def custom_iteritems(self):
|
||||||
|
for key in self._tb_cats:
|
||||||
|
fm = self._tb_cats[key]
|
||||||
|
if fm['is_custom']:
|
||||||
|
yield (key, self._tb_cats[key])
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
return list(self.iteritems())
|
return list(self.iteritems())
|
||||||
|
|
||||||
|
@ -87,6 +87,23 @@ class SortedConcatenate(object):
|
|||||||
class SafeSortedConcatenate(SortedConcatenate):
|
class SafeSortedConcatenate(SortedConcatenate):
|
||||||
sep = '|'
|
sep = '|'
|
||||||
|
|
||||||
|
class AumSortedConcatenate(object):
|
||||||
|
'''String concatenation aggregator for the author sort map'''
|
||||||
|
def __init__(self):
|
||||||
|
self.ans = {}
|
||||||
|
|
||||||
|
def step(self, ndx, author, sort):
|
||||||
|
if author is not None:
|
||||||
|
self.ans[ndx] = author + ':::' + sort
|
||||||
|
|
||||||
|
def finalize(self):
|
||||||
|
keys = self.ans.keys()
|
||||||
|
if len(keys) == 0:
|
||||||
|
return None
|
||||||
|
if len(keys) == 1:
|
||||||
|
return self.ans[keys[0]]
|
||||||
|
return ':#:'.join([self.ans[v] for v in sorted(keys)])
|
||||||
|
|
||||||
class Connection(sqlite.Connection):
|
class Connection(sqlite.Connection):
|
||||||
|
|
||||||
def get(self, *args, **kw):
|
def get(self, *args, **kw):
|
||||||
@ -155,6 +172,7 @@ class DBThread(Thread):
|
|||||||
c_ext_loaded = load_c_extensions(self.conn)
|
c_ext_loaded = load_c_extensions(self.conn)
|
||||||
self.conn.row_factory = sqlite.Row if self.row_factory else lambda cursor, row : list(row)
|
self.conn.row_factory = sqlite.Row if self.row_factory else lambda cursor, row : list(row)
|
||||||
self.conn.create_aggregate('concat', 1, Concatenate)
|
self.conn.create_aggregate('concat', 1, Concatenate)
|
||||||
|
self.conn.create_aggregate('aum_sortconcat', 3, AumSortedConcatenate)
|
||||||
if not c_ext_loaded:
|
if not c_ext_loaded:
|
||||||
self.conn.create_aggregate('sortconcat', 2, SortedConcatenate)
|
self.conn.create_aggregate('sortconcat', 2, SortedConcatenate)
|
||||||
self.conn.create_aggregate('sort_concat', 2, SafeSortedConcatenate)
|
self.conn.create_aggregate('sort_concat', 2, SafeSortedConcatenate)
|
||||||
|
@ -118,8 +118,9 @@ class SearchQueryParser(object):
|
|||||||
failed.append(test[0])
|
failed.append(test[0])
|
||||||
return failed
|
return failed
|
||||||
|
|
||||||
def __init__(self, locations, test=False):
|
def __init__(self, locations, test=False, optimize=False):
|
||||||
self._tests_failed = False
|
self._tests_failed = False
|
||||||
|
self.optimize = optimize
|
||||||
# Define a token
|
# Define a token
|
||||||
standard_locations = map(lambda x : CaselessLiteral(x)+Suppress(':'),
|
standard_locations = map(lambda x : CaselessLiteral(x)+Suppress(':'),
|
||||||
locations)
|
locations)
|
||||||
@ -182,38 +183,52 @@ class SearchQueryParser(object):
|
|||||||
# empty the list of searches used for recursion testing
|
# empty the list of searches used for recursion testing
|
||||||
self.recurse_level = 0
|
self.recurse_level = 0
|
||||||
self.searches_seen = set([])
|
self.searches_seen = set([])
|
||||||
return self._parse(query)
|
candidates = self.universal_set()
|
||||||
|
return self._parse(query, candidates)
|
||||||
|
|
||||||
# this parse is used internally because it doesn't clear the
|
# this parse is used internally because it doesn't clear the
|
||||||
# recursive search test list. However, we permit seeing the
|
# recursive search test list. However, we permit seeing the
|
||||||
# same search a few times because the search might appear within
|
# same search a few times because the search might appear within
|
||||||
# another search.
|
# another search.
|
||||||
def _parse(self, query):
|
def _parse(self, query, candidates=None):
|
||||||
self.recurse_level += 1
|
self.recurse_level += 1
|
||||||
res = self._parser.parseString(query)[0]
|
res = self._parser.parseString(query)[0]
|
||||||
t = self.evaluate(res)
|
if candidates is None:
|
||||||
|
candidates = self.universal_set()
|
||||||
|
t = self.evaluate(res, candidates)
|
||||||
self.recurse_level -= 1
|
self.recurse_level -= 1
|
||||||
return t
|
return t
|
||||||
|
|
||||||
def method(self, group_name):
|
def method(self, group_name):
|
||||||
return getattr(self, 'evaluate_'+group_name)
|
return getattr(self, 'evaluate_'+group_name)
|
||||||
|
|
||||||
def evaluate(self, parse_result):
|
def evaluate(self, parse_result, candidates):
|
||||||
return self.method(parse_result.getName())(parse_result)
|
return self.method(parse_result.getName())(parse_result, candidates)
|
||||||
|
|
||||||
def evaluate_and(self, argument):
|
def evaluate_and(self, argument, candidates):
|
||||||
return self.evaluate(argument[0]).intersection(self.evaluate(argument[1]))
|
# RHS checks only those items matched by LHS
|
||||||
|
# returns result of RHS check: RHmatches(LHmatches(c))
|
||||||
|
# return self.evaluate(argument[0]).intersection(self.evaluate(argument[1]))
|
||||||
|
l = self.evaluate(argument[0], candidates)
|
||||||
|
return l.intersection(self.evaluate(argument[1], l))
|
||||||
|
|
||||||
def evaluate_or(self, argument):
|
def evaluate_or(self, argument, candidates):
|
||||||
return self.evaluate(argument[0]).union(self.evaluate(argument[1]))
|
# RHS checks only those elements not matched by LHS
|
||||||
|
# returns LHS union RHS: LHmatches(c) + RHmatches(c-LHmatches(c))
|
||||||
|
# return self.evaluate(argument[0]).union(self.evaluate(argument[1]))
|
||||||
|
l = self.evaluate(argument[0], candidates)
|
||||||
|
return l.union(self.evaluate(argument[1], candidates.difference(l)))
|
||||||
|
|
||||||
def evaluate_not(self, argument):
|
def evaluate_not(self, argument, candidates):
|
||||||
return self.universal_set().difference(self.evaluate(argument[0]))
|
# unary op checks only candidates. Result: list of items matching
|
||||||
|
# returns: c - matches(c)
|
||||||
|
# return self.universal_set().difference(self.evaluate(argument[0]))
|
||||||
|
return candidates.difference(self.evaluate(argument[0], candidates))
|
||||||
|
|
||||||
def evaluate_parenthesis(self, argument):
|
def evaluate_parenthesis(self, argument, candidates):
|
||||||
return self.evaluate(argument[0])
|
return self.evaluate(argument[0], candidates)
|
||||||
|
|
||||||
def evaluate_token(self, argument):
|
def evaluate_token(self, argument, candidates):
|
||||||
location = argument[0]
|
location = argument[0]
|
||||||
query = argument[1]
|
query = argument[1]
|
||||||
if location.lower() == 'search':
|
if location.lower() == 'search':
|
||||||
@ -224,17 +239,27 @@ class SearchQueryParser(object):
|
|||||||
raise ParseException(query, len(query), 'undefined saved search', self)
|
raise ParseException(query, len(query), 'undefined saved search', self)
|
||||||
if self.recurse_level > 5:
|
if self.recurse_level > 5:
|
||||||
self.searches_seen.add(query)
|
self.searches_seen.add(query)
|
||||||
return self._parse(saved_searches().lookup(query))
|
return self._parse(saved_searches().lookup(query), candidates)
|
||||||
except: # convert all exceptions (e.g., missing key) to a parse error
|
except: # convert all exceptions (e.g., missing key) to a parse error
|
||||||
raise ParseException(query, len(query), 'undefined saved search', self)
|
raise ParseException(query, len(query), 'undefined saved search', self)
|
||||||
return self.get_matches(location, query)
|
return self._get_matches(location, query, candidates)
|
||||||
|
|
||||||
def get_matches(self, location, query):
|
def _get_matches(self, location, query, candidates):
|
||||||
|
if self.optimize:
|
||||||
|
return self.get_matches(location, query, candidates=candidates)
|
||||||
|
else:
|
||||||
|
return self.get_matches(location, query)
|
||||||
|
|
||||||
|
def get_matches(self, location, query, candidates=None):
|
||||||
'''
|
'''
|
||||||
Should return the set of matches for :param:'location` and :param:`query`.
|
Should return the set of matches for :param:'location` and :param:`query`.
|
||||||
|
|
||||||
|
The search must be performed over all entries is :param:`candidates` is
|
||||||
|
None otherwise only over the items in candidates.
|
||||||
|
|
||||||
:param:`location` is one of the items in :member:`SearchQueryParser.DEFAULT_LOCATIONS`.
|
:param:`location` is one of the items in :member:`SearchQueryParser.DEFAULT_LOCATIONS`.
|
||||||
:param:`query` is a string literal.
|
:param:`query` is a string literal.
|
||||||
|
:param: None or a subset of the set returned by :meth:`universal_set`.
|
||||||
'''
|
'''
|
||||||
return set([])
|
return set([])
|
||||||
|
|
||||||
@ -561,7 +586,7 @@ class Tester(SearchQueryParser):
|
|||||||
def universal_set(self):
|
def universal_set(self):
|
||||||
return self._universal_set
|
return self._universal_set
|
||||||
|
|
||||||
def get_matches(self, location, query):
|
def get_matches(self, location, query, candidates=None):
|
||||||
location = location.lower()
|
location = location.lower()
|
||||||
if location in self.fields.keys():
|
if location in self.fields.keys():
|
||||||
getter = operator.itemgetter(self.fields[location])
|
getter = operator.itemgetter(self.fields[location])
|
||||||
@ -573,8 +598,13 @@ class Tester(SearchQueryParser):
|
|||||||
if not query:
|
if not query:
|
||||||
return set([])
|
return set([])
|
||||||
query = query.lower()
|
query = query.lower()
|
||||||
return set(key for key, val in self.texts.items() \
|
if candidates:
|
||||||
if query and query in getattr(getter(val), 'lower', lambda : '')())
|
return set(key for key, val in self.texts.items() \
|
||||||
|
if key in candidates and query and query
|
||||||
|
in getattr(getter(val), 'lower', lambda : '')())
|
||||||
|
else:
|
||||||
|
return set(key for key, val in self.texts.items() \
|
||||||
|
if query and query in getattr(getter(val), 'lower', lambda : '')())
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -592,6 +622,7 @@ class Tester(SearchQueryParser):
|
|||||||
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
|
print 'testing unoptimized'
|
||||||
tester = Tester(['authors', 'author', 'series', 'formats', 'format',
|
tester = Tester(['authors', 'author', 'series', 'formats', 'format',
|
||||||
'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover',
|
'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover',
|
||||||
'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read',
|
'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read',
|
||||||
@ -601,6 +632,16 @@ def main(args=sys.argv):
|
|||||||
print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<'
|
print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<'
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
print '\n\ntesting optimized'
|
||||||
|
tester = Tester(['authors', 'author', 'series', 'formats', 'format',
|
||||||
|
'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover',
|
||||||
|
'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read',
|
||||||
|
'all', 'search'], test=True, optimize=True)
|
||||||
|
failed = tester.run_tests()
|
||||||
|
if tester._tests_failed or failed:
|
||||||
|
print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<'
|
||||||
|
return 1
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
Loading…
x
Reference in New Issue
Block a user