diff --git a/Changelog.yaml b/Changelog.yaml index 0b5965fa2d..65f307facf 100644 --- a/Changelog.yaml +++ b/Changelog.yaml @@ -20,6 +20,92 @@ # new recipes: # - title: +- version: 1.29.0 + date: 2014-03-21 + + new features: + - title: "Edit Book: Add support for saved searches. Click Search->Saved Searches to bring up a dialog where you can create and manage saved searches" + + - title: "Edit Book: New tool to specify semantics in EPUB books (semantics are items in the guide such as preface, title-page, dedication, etc.). To use it, go to Tools->Set Semantics" + tickets: [1287025] + + - title: "Edit Book: Preview panel: Add a copy selected text action to the context menu" + + - title: "Edit Book: When inserting hyperlinks, allow specifying the text for the hyperlink in the insert hyperlink dialog" + + bug fixes: + - title: "Fix a regression in the previous release that broke downloading metadata for authors with a double initial such as R. A. Salvatore." + tickets: [1294529] + + - title: "Edit book: When generating inline Table of Contents, mark it as such in the guide section of the OPF." + tickets: [1287018] + + - title: "E-book viewer: Fix right margin for last page in a chapter sometimes disappearing when changing font size." + tickets: [1292822] + + - title: "Edit Book: Fix saving of empty files not working" + + - title: "Edit book: Fix a regression in the previous release that broke saving a copy of the current book on linux and OS X" + + - title: "Edit book: Fix syntax highlighting in HTML files breaks if the closing of a comment or processing instruction is at the start of a new line." + + - title: "Edit book: Fix check book failing in the presence of empty ' + _( + ''' + Where to search/replace: +
+
Current file
+
Search only inside the currently opened file
+
All text files
+
Search in all text (HTML) files
+
All style files
+
Search in all style (CSS) files
+
Selected files
+
Search in the files currently selected in the Files Browser
+
Marked text
+
Search only within the marked text in the currently opened file. You can mark text using the Search menu.
+
''')) + + @dynamic_property + def where(self): + wm = {0:'current', 1:'text', 2:'styles', 3:'selected', 4:'selected-text'} + def fget(self): + return wm[self.currentIndex()] + def fset(self, val): + self.setCurrentIndex({v:k for k, v in wm.iteritems()}[val]) + return property(fget=fget, fset=fset) + +class DirectionBox(QComboBox): + + def __init__(self, parent): + QComboBox.__init__(self, parent) + self.addItems([_('Down'), _('Up')]) + self.setToolTip('' + _( + ''' + Direction to search: +
+
Down
+
Search for the next match from your current position
+
Up
+
Search for the previous match from your current position
+
''')) + + @dynamic_property + def direction(self): + def fget(self): + return 'down' if self.currentIndex() == 0 else 'up' + def fset(self, val): + self.setCurrentIndex(1 if val == 'up' else 0) + return property(fget=fget, fset=fset) + +class ModeBox(QComboBox): + + def __init__(self, parent): + QComboBox.__init__(self, parent) + self.addItems([_('Normal'), _('Regex')]) + self.setToolTip('' + _( + '''Select how the search expression is interpreted +
+
Normal
+
The search expression is treated as normal text, calibre will look for the exact text.
+
Regex
+
The search expression is interpreted as a regular expression. See the User Manual for more help on using regular expressions.
+
''')) + + @dynamic_property + def mode(self): + def fget(self): + return 'normal' if self.currentIndex() == 0 else 'regex' + def fset(self, val): + self.setCurrentIndex({'regex':1}.get(val, 0)) + return property(fget=fget, fset=fset) + + class SearchWidget(QWidget): DEFAULT_STATE = { @@ -37,6 +151,8 @@ class SearchWidget(QWidget): } search_triggered = pyqtSignal(object) + save_search = pyqtSignal() + show_saved_searches = pyqtSignal() def __init__(self, parent=None): QWidget.__init__(self, parent) @@ -46,7 +162,9 @@ class SearchWidget(QWidget): self.fl = fl = QLabel(_('&Find:')) fl.setAlignment(Qt.AlignRight | Qt.AlignCenter) - self.find_text = ft = HistoryLineEdit2(self) + self.find_text = ft = HistoryLineEdit(self, _('Clear search history')) + ft.save_search.connect(self.save_search) + ft.show_saved_searches.connect(self.show_saved_searches) ft.initialize('tweak_book_find_edit') ft.returnPressed.connect(lambda : self.search_triggered.emit('find')) fl.setBuddy(ft) @@ -55,7 +173,9 @@ class SearchWidget(QWidget): self.rl = rl = QLabel(_('&Replace:')) rl.setAlignment(Qt.AlignRight | Qt.AlignCenter) - self.replace_text = rt = HistoryLineEdit2(self) + self.replace_text = rt = HistoryLineEdit(self, _('Clear replace history')) + rt.save_search.connect(self.save_search) + rt.show_saved_searches.connect(self.show_saved_searches) rt.initialize('tweak_book_replace_edit') rl.setBuddy(rt) l.addWidget(rl, 1, 0) @@ -76,52 +196,17 @@ class SearchWidget(QWidget): ml.setAlignment(Qt.AlignRight | Qt.AlignCenter) l.addWidget(ml, 2, 0) l.addLayout(ol, 2, 1, 1, 3) - self.mode_box = mb = QComboBox(self) + self.mode_box = mb = ModeBox(self) mb.setSizePolicy(QSizePolicy.Maximum, QSizePolicy.Maximum) - mb.addItems([_('Normal'), _('Regex')]) - mb.setToolTip('' + _( - '''Select how the search expression is interpreted -
-
Normal
-
The search expression is treated as normal text, calibre will look for the exact text.
-
Regex
-
The search expression is interpreted as a regular expression. See the User Manual for more help on using regular expressions.
-
''')) ml.setBuddy(mb) ol.addWidget(mb) - self.where_box = wb = QComboBox(self) - wb.setSizePolicy(QSizePolicy.Maximum, QSizePolicy.Maximum) - wb.addItems([_('Current file'), _('All text files'), _('All style files'), _('Selected files'), _('Marked text')]) - wb.setToolTip('' + _( - ''' - Where to search/replace: -
-
Current file
-
Search only inside the currently opened file
-
All text files
-
Search in all text (HTML) files
-
All style files
-
Search in all style (CSS) files
-
Selected files
-
Search in the files currently selected in the Files Browser
-
Marked text
-
Search only within the marked text in the currently opened file. You can mark text using the Search menu.
-
''')) + self.where_box = wb = WhereBox(self) + wb.setSizePolicy(QSizePolicy.Maximum, QSizePolicy.Fixed) ol.addWidget(wb) - self.direction_box = db = QComboBox(self) + self.direction_box = db = DirectionBox(self) db.setSizePolicy(QSizePolicy.Maximum, QSizePolicy.Maximum) - db.addItems([_('Down'), _('Up')]) - db.setToolTip('' + _( - ''' - Direction to search: -
-
Down
-
Search for the next match from your current position
-
Up
-
Search for the previous match from your current position
-
''')) ol.addWidget(db) self.cs = cs = QCheckBox(_('&Case sensitive')) @@ -145,9 +230,9 @@ class SearchWidget(QWidget): @dynamic_property def mode(self): def fget(self): - return 'normal' if self.mode_box.currentIndex() == 0 else 'regex' + return self.mode_box.mode def fset(self, val): - self.mode_box.setCurrentIndex({'regex':1}.get(val, 0)) + self.mode_box.mode = val self.da.setVisible(self.mode == 'regex') return property(fget=fget, fset=fset) @@ -169,11 +254,10 @@ class SearchWidget(QWidget): @dynamic_property def where(self): - wm = {0:'current', 1:'text', 2:'styles', 3:'selected', 4:'selected-text'} def fget(self): - return wm[self.where_box.currentIndex()] + return self.where_box.where def fset(self, val): - self.where_box.setCurrentIndex({v:k for k, v in wm.iteritems()}[val]) + self.where_box.where = val return property(fget=fget, fset=fset) @dynamic_property @@ -187,9 +271,9 @@ class SearchWidget(QWidget): @dynamic_property def direction(self): def fget(self): - return 'down' if self.direction_box.currentIndex() == 0 else 'up' + return self.direction_box.direction def fset(self, val): - self.direction_box.setCurrentIndex(1 if val == 'up' else 0) + self.direction_box.direction = val return property(fget=fget, fset=fset) @dynamic_property @@ -236,9 +320,11 @@ class SearchWidget(QWidget): regex_cache = {} -class SearchPanel(QWidget): +class SearchPanel(QWidget): # {{{ search_triggered = pyqtSignal(object) + save_search = pyqtSignal() + show_saved_searches = pyqtSignal() def __init__(self, parent=None): QWidget.__init__(self, parent) @@ -257,6 +343,8 @@ class SearchPanel(QWidget): l.addWidget(self.widget) self.restore_state, self.save_state = self.widget.restore_state, self.widget.save_state self.widget.search_triggered.connect(self.search_triggered) + self.widget.save_search.connect(self.save_search) + self.widget.show_saved_searches.connect(self.show_saved_searches) self.pre_fill = self.widget.pre_fill def hide_panel(self): @@ -276,26 +364,639 @@ class SearchPanel(QWidget): def set_where(self, val): self.widget.where = val - def get_regex(self, state): - raw = state['find'] - if state['mode'] != 'regex': - raw = regex.escape(raw, special_only=True) - flags = REGEX_FLAGS - if not state['case_sensitive']: - flags |= regex.IGNORECASE - if state['mode'] == 'regex' and state['dot_all']: - flags |= regex.DOTALL - if state['direction'] == 'up': - flags |= regex.REVERSE - ans = regex_cache.get((flags, raw), None) - if ans is None: - ans = regex_cache[(flags, raw)] = regex.compile(raw, flags=flags) - return ans - def keyPressEvent(self, ev): if ev.key() == Qt.Key_Escape: self.hide_panel() ev.accept() else: return QWidget.keyPressEvent(self, ev) +# }}} +class SearchesModel(QAbstractListModel): + + def __init__(self, parent): + QAbstractListModel.__init__(self, parent) + self.searches = tprefs['saved_searches'] + self.filtered_searches = list(xrange(len(self.searches))) + + def rowCount(self, parent=QModelIndex()): + return len(self.filtered_searches) + + def data(self, index, role): + if role == Qt.DisplayRole: + search = self.searches[self.filtered_searches[index.row()]] + return QVariant(search['name']) + if role == Qt.ToolTipRole: + search = self.searches[self.filtered_searches[index.row()]] + tt = '\n'.join((search['find'], search['replace'])) + return QVariant(tt) + if role == Qt.UserRole: + search = self.searches[self.filtered_searches[index.row()]] + return QVariant((self.filtered_searches[index.row()], search)) + return NONE + + def do_filter(self, text): + text = unicode(text) + self.filtered_searches = [] + for i, search in enumerate(self.searches): + if primary_contains(text, search['name']): + self.filtered_searches.append(i) + self.reset() + + def move_entry(self, row, delta): + a, b = row, row + delta + if 0 <= b < len(self.filtered_searches): + ai, bi = self.filtered_searches[a], self.filtered_searches[b] + self.searches[ai], self.searches[bi] = self.searches[bi], self.searches[ai] + self.dataChanged.emit(self.index(a), self.index(a)) + self.dataChanged.emit(self.index(b), self.index(b)) + tprefs['saved_searches'] = self.searches + + def add_searches(self, count=1): + self.searches = tprefs['saved_searches'] + self.filtered_searches.extend(xrange(len(self.searches) - 1, len(self.searches) - 1 - count, -1)) + self.reset() + + def remove_searches(self, rows): + rows = sorted(set(rows), reverse=True) + indices = [self.filtered_searches[row] for row in rows] + for row in rows: + self.beginRemoveRows(QModelIndex(), row, row) + del self.filtered_searches[row] + self.endRemoveRows() + for idx in sorted(indices, reverse=True): + del self.searches[idx] + tprefs['saved_searches'] = self.searches + +class EditSearch(Dialog): # {{{ + + def __init__(self, search=None, search_index=-1, parent=None, state=None): + self.search = search or {} + self.original_name = self.search.get('name', None) + self.search_index = search_index + Dialog.__init__(self, _('Edit search'), 'edit-saved-search', parent=parent) + if state is not None: + self.find.setText(state['find']) + self.replace.setText(state['replace']) + self.case_sensitive.setChecked(state['case_sensitive']) + self.dot_all.setChecked(state['dot_all']) + self.mode_box.mode = state.get('mode') + + def sizeHint(self): + ans = Dialog.sizeHint(self) + ans.setWidth(600) + return ans + + def setup_ui(self): + self.l = l = QFormLayout(self) + self.setLayout(l) + + self.search_name = n = QLineEdit(self.search.get('name', ''), self) + n.setPlaceholderText(_('The name with which to save this search')) + l.addRow(_('&Name:'), n) + + self.find = f = QLineEdit(self.search.get('find', ''), self) + f.setPlaceholderText(_('The expression to search for')) + l.addRow(_('&Find:'), f) + + self.replace = r = QLineEdit(self.search.get('replace', ''), self) + r.setPlaceholderText(_('The replace expression')) + l.addRow(_('&Replace:'), r) + + self.case_sensitive = c = QCheckBox(_('Case sensitive')) + c.setChecked(self.search.get('case_sensitive', SearchWidget.DEFAULT_STATE['case_sensitive'])) + l.addRow(c) + + self.dot_all = d = QCheckBox(_('Dot matches all')) + d.setChecked(self.search.get('dot_all', SearchWidget.DEFAULT_STATE['dot_all'])) + l.addRow(d) + + self.mode_box = m = ModeBox(self) + self.mode_box.mode = self.search.get('mode', 'regex') + l.addRow(_('&Mode:'), m) + + l.addRow(self.bb) + + def accept(self): + searches = tprefs['saved_searches'] + all_names = {x['name'] for x in searches} - {self.original_name} + n = unicode(self.search_name.text()).strip() + search = self.search + if not n: + return error_dialog(self, _('Must specify name'), _( + 'You must specify a search name'), show=True) + if n in all_names: + return error_dialog(self, _('Name exists'), _( + 'Another search with the name %s already exists') % n, show=True) + search['name'] = n + + f = unicode(self.find.text()) + if not f: + return error_dialog(self, _('Must specify find'), _( + 'You must specify a find expression'), show=True) + search['find'] = f + + r = unicode(self.replace.text()) + search['replace'] = r + + search['dot_all'] = bool(self.dot_all.isChecked()) + search['case_sensitive'] = bool(self.case_sensitive.isChecked()) + search['mode'] = self.mode_box.mode + + if self.search_index == -1: + searches.append(search) + else: + searches[self.search_index] = search + tprefs.set('saved_searches', searches) + + Dialog.accept(self) +# }}} + +class SearchDelegate(QStyledItemDelegate): + + def sizeHint(self, *args): + ans = QStyledItemDelegate.sizeHint(self, *args) + ans.setHeight(ans.height() + 4) + return ans + +class SavedSearches(Dialog): + + run_saved_searches = pyqtSignal(object, object) + + def __init__(self, parent=None): + Dialog.__init__(self, _('Saved Searches'), 'saved-searches', parent=parent) + + def sizeHint(self): + return QSize(800, 675) + + def setup_ui(self): + self.l = l = QVBoxLayout(self) + self.setLayout(l) + + self.h = h = QHBoxLayout() + self.filter_text = ft = QLineEdit(self) + ft.textChanged.connect(self.do_filter) + ft.setPlaceholderText(_('Filter displayed searches')) + h.addWidget(ft) + self.cft = cft = QToolButton(self) + cft.setToolTip(_('Clear filter')), cft.setIcon(QIcon(I('clear_left.png'))) + cft.clicked.connect(ft.clear) + h.addWidget(cft) + l.addLayout(h) + + self.h2 = h = QHBoxLayout() + self.searches = searches = QListView(self) + searches.doubleClicked.connect(self.edit_search) + self.model = SearchesModel(self.searches) + self.model.dataChanged.connect(self.show_details) + searches.setModel(self.model) + searches.selectionModel().currentChanged.connect(self.show_details) + searches.setSelectionMode(searches.ExtendedSelection) + self.delegate = SearchDelegate(searches) + searches.setItemDelegate(self.delegate) + searches.setAlternatingRowColors(True) + h.addWidget(searches, stretch=10) + self.v = v = QVBoxLayout() + h.addLayout(v) + l.addLayout(h) + + def pb(text, tooltip=None): + b = QPushButton(text, self) + b.setToolTip(tooltip or '') + b.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Fixed) + return b + + mulmsg = '\n\n' + _('The entries are tried in order until the first one matches.') + + for text, action, tooltip in [ + (_('&Find'), 'find', _('Run the search using the selected entries.') + mulmsg), + (_('&Replace'), 'replace', _('Run replace using the selected entries.') + mulmsg), + (_('Replace a&nd Find'), 'replace-find', _('Run replace and then find using the selected entries.') + mulmsg), + (_('Replace &all'), 'replace-all', _('Run Replace All for all selected entries in the order selected')), + (_('&Count all'), 'count', _('Run Count All for all selected entries')), + ]: + b = pb(text, tooltip) + v.addWidget(b) + b.clicked.connect(partial(self.run_search, action)) + + self.d1 = d = QFrame(self) + d.setFrameStyle(QFrame.HLine) + v.addWidget(d) + + self.h3 = h = QHBoxLayout() + self.upb = b = QToolButton(self) + b.setIcon(QIcon(I('arrow-up.png'))), b.setToolTip(_('Move selected entries up')) + b.clicked.connect(partial(self.move_entry, -1)) + self.dnb = b = QToolButton(self) + b.setIcon(QIcon(I('arrow-down.png'))), b.setToolTip(_('Move selected entries down')) + b.clicked.connect(partial(self.move_entry, 1)) + h.addWidget(self.upb), h.addWidget(self.dnb) + v.addLayout(h) + + self.eb = b = pb(_('&Edit search'), _('Edit the currently selected search')) + b.clicked.connect(self.edit_search) + v.addWidget(b) + + self.eb = b = pb(_('Re&move search'), _('Remove the currently selected searches')) + b.clicked.connect(self.remove_search) + v.addWidget(b) + + self.eb = b = pb(_('&Add search'), _('Add a new saved search')) + b.clicked.connect(self.add_search) + v.addWidget(b) + + self.d2 = d = QFrame(self) + d.setFrameStyle(QFrame.HLine) + v.addWidget(d) + + self.where_box = wb = WhereBox(self) + self.where = SearchWidget.DEFAULT_STATE['where'] + v.addWidget(wb) + self.direction_box = db = DirectionBox(self) + self.direction = SearchWidget.DEFAULT_STATE['direction'] + v.addWidget(db) + + self.wr = wr = QCheckBox(_('&Wrap')) + wr.setToolTip('

'+_('When searching reaches the end, wrap around to the beginning and continue the search')) + self.wr.setChecked(SearchWidget.DEFAULT_STATE['wrap']) + v.addWidget(wr) + + self.description = d = QLabel(' \n \n ') + d.setTextFormat(Qt.PlainText) + l.addWidget(d) + + l.addWidget(self.bb) + self.bb.clear() + self.bb.addButton(self.bb.Close) + self.ib = b = self.bb.addButton(_('&Import'), self.bb.ActionRole) + b.clicked.connect(self.import_searches) + self.eb = b = self.bb.addButton(_('E&xport'), self.bb.ActionRole) + self.em = m = QMenu(_('Export')) + m.addAction(_('Export All'), lambda : QTimer.singleShot(0, partial(self.export_searches, all=True))) + m.addAction(_('Export Selected'), lambda : QTimer.singleShot(0, partial(self.export_searches, all=False))) + b.setMenu(m) + + self.searches.setFocus(Qt.OtherFocusReason) + + @dynamic_property + def where(self): + def fget(self): + return self.where_box.where + def fset(self, val): + self.where_box.where = val + return property(fget=fget, fset=fset) + + @dynamic_property + def direction(self): + def fget(self): + return self.direction_box.direction + def fset(self, val): + self.direction_box.direction = val + return property(fget=fget, fset=fset) + + @dynamic_property + def wrap(self): + def fget(self): + return self.wr.isChecked() + def fset(self, val): + self.wr.setChecked(bool(val)) + return property(fget=fget, fset=fset) + + def do_filter(self, text): + self.model.do_filter(text) + self.searches.scrollTo(self.model.index(0)) + + def run_search(self, action): + searches, seen = [], set() + for index in self.searches.selectionModel().selectedIndexes(): + if index.row() in seen: + continue + seen.add(index.row()) + search = SearchWidget.DEFAULT_STATE.copy() + del search['mode'] + search_index, s = index.data(Qt.UserRole).toPyObject() + search.update(s) + search['wrap'] = self.wrap + search['direction'] = self.direction + search['where'] = self.where + search['mode'] = search.get('mode', 'regex') + searches.append(search) + if not searches: + return + self.run_saved_searches.emit(searches, action) + + def move_entry(self, delta): + rows = {index.row() for index in self.searches.selectionModel().selectedIndexes()} - {-1} + if rows: + with tprefs: + for row in sorted(rows, reverse=delta > 0): + self.model.move_entry(row, delta) + nrow = row + delta + index = self.model.index(nrow) + if index.isValid(): + sm = self.searches.selectionModel() + sm.setCurrentIndex(index, sm.ClearAndSelect) + + def edit_search(self): + index = self.searches.currentIndex() + if index.isValid(): + search_index, search = index.data(Qt.UserRole).toPyObject() + d = EditSearch(search=search, search_index=search_index, parent=self) + if d.exec_() == d.Accepted: + self.model.dataChanged.emit(index, index) + + def remove_search(self): + rows = {index.row() for index in self.searches.selectionModel().selectedIndexes()} - {-1} + self.model.remove_searches(rows) + self.show_details() + + def add_search(self): + d = EditSearch(parent=self) + self._add_search(d) + + def _add_search(self, d): + if d.exec_() == d.Accepted: + self.model.add_searches() + index = self.model.index(self.model.rowCount() - 1) + self.searches.scrollTo(index) + sm = self.searches.selectionModel() + sm.setCurrentIndex(index, sm.ClearAndSelect) + self.show_details() + + def add_predefined_search(self, state): + d = EditSearch(parent=self, state=state) + self._add_search(d) + + def show_details(self): + self.description.setText(' \n \n ') + i = self.searches.currentIndex() + if i.isValid(): + search_index, search = i.data(Qt.UserRole).toPyObject() + cs = '✓' if search.get('case_sensitive', SearchWidget.DEFAULT_STATE['case_sensitive']) else '✗' + da = '✓' if search.get('dot_all', SearchWidget.DEFAULT_STATE['dot_all']) else '✗' + if search.get('mode', SearchWidget.DEFAULT_STATE['mode']) == 'regex': + ts = _('(Case sensitive: {0} Dot All: {1})').format(cs, da) + else: + ts = _('(Case sensitive: {0} [Normal search])').format(cs) + self.description.setText(_('{2} {3}\nFind: {0}\nReplace: {1}').format( + search.get('find', ''), search.get('replace', ''), search.get('name', ''), ts)) + + def import_searches(self): + path = choose_files(self, 'import_saved_searches', _('Choose file'), filters=[ + (_('Saved Searches'), ['json'])], all_files=False, select_only_single_file=True) + if path: + with open(path[0], 'rb') as f: + obj = json.loads(f.read()) + needed_keys = {'name', 'find', 'replace', 'case_sensitive', 'dot_all', 'mode'} + def err(): + error_dialog(self, _('Invalid data'), _( + 'The file %s does not contain valid saved searches') % path, show=True) + if not isinstance(obj, dict) or not 'version' in obj or not 'searches' in obj or obj['version'] not in (1,): + return err() + searches = [] + for item in obj['searches']: + if not isinstance(item, dict) or not set(item.iterkeys()).issuperset(needed_keys): + return err + searches.append({k:item[k] for k in needed_keys}) + + if searches: + tprefs['saved_searches'] = tprefs['saved_searches'] + searches + count = len(searches) + self.model.add_searches(count=count) + sm = self.searches.selectionModel() + top, bottom = self.model.index(self.model.rowCount() - count), self.model.index(self.model.rowCount() - 1) + sm.select(QItemSelection(top, bottom), sm.ClearAndSelect) + self.searches.scrollTo(bottom) + + def export_searches(self, all=True): + if all: + searches = copy.deepcopy(tprefs['saved_searches']) + if not searches: + return error_dialog(self, _('No searches'), _( + 'No searches available to be saved'), show=True) + else: + searches = [] + for index in self.searches.selectionModel().selectedIndexes(): + search = index.data(Qt.UserRole).toPyObject()[-1] + searches.append(search.copy()) + if not searches: + return error_dialog(self, _('No searches'), _( + 'No searches selected'), show=True) + [s.__setitem__('mode', s.get('mode', 'regex')) for s in searches] + path = choose_save_file(self, 'export-saved-searches', _('Choose file'), filters=[ + (_('Saved Searches'), ['json'])], all_files=False) + if path: + if not path.lower().endswith('.json'): + path += '.json' + raw = json.dumps({'version':1, 'searches':searches}, ensure_ascii=False, indent=2, sort_keys=True) + with open(path, 'wb') as f: + f.write(raw.encode('utf-8')) + +def validate_search_request(name, searchable_names, has_marked_text, state, gui_parent): + err = None + where = state['where'] + if name is None and where in {'current', 'selected-text'}: + err = _('No file is being edited.') + elif where == 'selected' and not searchable_names['selected']: + err = _('No files are selected in the Files Browser') + elif where == 'selected-text' and not has_marked_text: + err = _('No text is marked. First select some text, and then use' + ' The "Mark selected text" action in the Search menu to mark it.') + if not err and not state['find']: + err = _('No search query specified') + if err: + error_dialog(gui_parent, _('Cannot search'), err, show=True) + return False + return True + +def get_search_regex(state): + raw = state['find'] + if state['mode'] != 'regex': + raw = regex.escape(raw, special_only=True) + flags = REGEX_FLAGS + if not state['case_sensitive']: + flags |= regex.IGNORECASE + if state['mode'] == 'regex' and state['dot_all']: + flags |= regex.DOTALL + if state['direction'] == 'up': + flags |= regex.REVERSE + ans = regex_cache.get((flags, raw), None) + if ans is None: + ans = regex_cache[(flags, raw)] = regex.compile(raw, flags=flags) + return ans + +def initialize_search_request(state, action, current_editor, current_editor_name, searchable_names): + editor = None + where = state['where'] + files = OrderedDict() + do_all = state['wrap'] or action in {'replace-all', 'count'} + marked = False + if where == 'current': + editor = current_editor + elif where in {'styles', 'text', 'selected'}: + files = searchable_names[where] + if current_editor_name in files: + # Start searching in the current editor + editor = current_editor + # Re-order the list of other files so that we search in the same + # order every time. Depending on direction, search the files + # that come after the current file, or before the current file, + # first. + lfiles = list(files) + idx = lfiles.index(current_editor_name) + before, after = lfiles[:idx], lfiles[idx+1:] + if state['direction'] == 'up': + lfiles = list(reversed(before)) + if do_all: + lfiles += list(reversed(after)) + [current_editor_name] + else: + lfiles = after + if do_all: + lfiles += before + [current_editor_name] + files = OrderedDict((m, files[m]) for m in lfiles) + else: + editor = current_editor + marked = True + + return editor, where, files, do_all, marked + +def run_search( + searches, action, current_editor, current_editor_name, searchable_names, + gui_parent, show_editor, edit_file, show_current_diff, add_savepoint, rewind_savepoint, set_modified): + + if isinstance(searches, dict): + searches = [searches] + + editor, where, files, do_all, marked = initialize_search_request(searches[0], action, current_editor, current_editor_name, searchable_names) + wrap = searches[0]['wrap'] + + errfind = searches[0]['find'] + if len(searches) > 1: + errfind = _('the selected searches') + + searches = [(get_search_regex(search), search['replace']) for search in searches] + + def no_match(): + QApplication.restoreOverrideCursor() + msg = '

' + _('No matches were found for %s') % ('

' + prepare_string_for_xml(errfind) + '
') + if not wrap: + msg += '

' + _('You have turned off search wrapping, so all text might not have been searched.' + ' Try the search again, with wrapping enabled. Wrapping is enabled via the' + ' "Wrap" checkbox at the bottom of the search panel.') + return error_dialog( + gui_parent, _('Not found'), msg, show=True) + + def do_find(): + for p, __ in searches: + if editor is not None: + if editor.find(p, marked=marked, save_match='gui'): + return + if wrap and not files and editor.find(p, wrap=True, marked=marked, save_match='gui'): + return + for fname, syntax in files.iteritems(): + ed = editors.get(fname, None) + if ed is not None: + if not wrap and ed is editor: + continue + if ed.find(p, complete=True, save_match='gui'): + return show_editor(fname) + else: + raw = current_container().raw_data(fname) + if p.search(raw) is not None: + edit_file(fname, syntax) + if editors[fname].find(p, complete=True, save_match='gui'): + return + return no_match() + + def no_replace(prefix=''): + QApplication.restoreOverrideCursor() + if prefix: + prefix += ' ' + error_dialog( + gui_parent, _('Cannot replace'), prefix + _( + 'You must first click Find, before trying to replace'), show=True) + return False + + def do_replace(): + if editor is None: + return no_replace() + for p, repl in searches: + if editor.replace(p, repl, saved_match='gui'): + return True + return no_replace(_( + 'Currently selected text does not match the search query.')) + + def count_message(action, count, show_diff=False): + msg = _('%(action)s %(num)s occurrences of %(query)s' % dict(num=count, query=errfind, action=action)) + if show_diff and count > 0: + d = MessageBox(MessageBox.INFO, _('Searching done'), prepare_string_for_xml(msg), parent=gui_parent, show_copy_button=False) + d.diffb = b = d.bb.addButton(_('See what &changed'), d.bb.ActionRole) + b.setIcon(QIcon(I('diff.png'))), d.set_details(None), b.clicked.connect(d.accept) + b.clicked.connect(partial(show_current_diff, allow_revert=True)) + d.exec_() + else: + info_dialog(gui_parent, _('Searching done'), prepare_string_for_xml(msg), show=True) + + def do_all(replace=True): + count = 0 + if not files and editor is None: + return 0 + lfiles = files or {current_editor_name:editor.syntax} + updates = set() + raw_data = {} + for n, syntax in lfiles.iteritems(): + if n in editors: + raw = editors[n].get_raw_data() + else: + raw = current_container().raw_data(n) + raw_data[n] = raw + + for p, repl in searches: + for n, syntax in lfiles.iteritems(): + raw = raw_data[n] + if replace: + raw, num = p.subn(repl, raw) + if num > 0: + updates.add(n) + raw_data[n] = raw + else: + num = len(p.findall(raw)) + count += num + + for n in updates: + raw = raw_data[n] + if n in editors: + editors[n].replace_data(raw) + else: + with current_container().open(n, 'wb') as f: + f.write(raw.encode('utf-8')) + QApplication.restoreOverrideCursor() + count_message(_('Replaced') if replace else _('Found'), count, show_diff=replace) + return count + + with BusyCursor(): + if action == 'find': + return do_find() + if action == 'replace': + return do_replace() + if action == 'replace-find' and do_replace(): + return do_find() + if action == 'replace-all': + if marked: + return count_message(_('Replaced'), sum(editor.all_in_marked(p, repl) for p, repl in searches)) + add_savepoint(_('Before: Replace all')) + count = do_all() + if count == 0: + rewind_savepoint() + else: + set_modified() + return + if action == 'count': + if marked: + return count_message(_('Found'), sum(editor.all_in_marked(p) for p, __ in searches)) + return do_all(replace=False) + +if __name__ == '__main__': + app = QApplication([]) + d = SavedSearches() + d.exec_() diff --git a/src/calibre/gui2/tweak_book/ui.py b/src/calibre/gui2/tweak_book/ui.py index 6915063535..1aa1d60adf 100644 --- a/src/calibre/gui2/tweak_book/ui.py +++ b/src/calibre/gui2/tweak_book/ui.py @@ -29,6 +29,7 @@ from calibre.gui2.tweak_book.undo import CheckpointView from calibre.gui2.tweak_book.preview import Preview from calibre.gui2.tweak_book.search import SearchPanel from calibre.gui2.tweak_book.check import Check +from calibre.gui2.tweak_book.search import SavedSearches from calibre.gui2.tweak_book.toc import TOCViewer from calibre.gui2.tweak_book.char_select import CharSelect from calibre.gui2.tweak_book.editor.widget import register_text_editor_actions @@ -221,6 +222,7 @@ class Main(MainWindow): self.setCentralWidget(self.central) self.check_book = Check(self) self.toc_view = TOCViewer(self) + self.saved_searches = SavedSearches(self) self.image_browser = InsertImage(self, for_browsing=True) self.insert_char = CharSelect(self) @@ -302,6 +304,8 @@ class Main(MainWindow): self.action_new_book = reg('book.png', _('Create &new, empty book'), self.boss.new_book, 'new-book', (), _('Create a new, empty book')) self.action_import_book = reg('book.png', _('&Import an HTML or DOCX file as a new book'), self.boss.import_book, 'import-book', (), _('Import an HTML or DOCX file as a new book')) + self.action_quick_edit = reg('modified.png', _('&Quick open a file to edit'), self.boss.quick_open, 'quick-open', ('Ctrl+T'), _( + 'Quickly open a file from the book to edit it')) # Editor actions group = _('Editor actions') @@ -341,6 +345,8 @@ class Main(MainWindow): _('Insert special character')) self.action_rationalize_folders = reg('mimetypes/dir.png', _('&Arrange into folders'), self.boss.rationalize_folders, 'rationalize-folders', (), _('Arrange into folders')) + self.action_set_semantics = reg('tags.png', _('Set &Semantics'), self.boss.set_semantics, 'set-semantics', (), + _('Set Semantics')) # Polish actions group = _('Polish Book') @@ -389,6 +395,7 @@ class Main(MainWindow): 'count', keys=('Ctrl+N'), description=_('Count number of matches')) self.action_mark = reg(None, _('&Mark selected text'), self.boss.mark_selected_text, 'mark-selected-text', ('Ctrl+Shift+M',), _('Mark selected text')) self.action_go_to_line = reg(None, _('Go to &line'), self.boss.go_to_line_number, 'go-to-line-number', ('Ctrl+.',), _('Go to line number')) + self.action_saved_searches = reg(None, _('Sa&ved searches'), self.boss.saved_searches, 'saved-searches', (), _('Show the saved searches dialog')) # Check Book actions group = _('Check Book') @@ -430,6 +437,7 @@ class Main(MainWindow): f = b.addMenu(_('&File')) f.addAction(self.action_new_file) f.addAction(self.action_import_files) + f.addSeparator() f.addAction(self.action_open_book) f.addAction(self.action_new_book) f.addAction(self.action_import_book) @@ -455,6 +463,7 @@ class Main(MainWindow): e.addAction(self.action_editor_paste) e.addAction(self.action_insert_char) e.addSeparator() + e.addAction(self.action_quick_edit) e.addAction(self.action_preferences) e = b.addMenu(_('&Tools')) @@ -468,6 +477,7 @@ class Main(MainWindow): e.addAction(self.action_fix_html_all) e.addAction(self.action_pretty_all) e.addAction(self.action_rationalize_folders) + e.addAction(self.action_set_semantics) e.addAction(self.action_check_book) e = b.addMenu(_('&View')) @@ -500,6 +510,8 @@ class Main(MainWindow): a(self.action_mark) e.addSeparator() a(self.action_go_to_line) + e.addSeparator() + a(self.action_saved_searches) e = b.addMenu(_('&Help')) a = e.addAction diff --git a/src/calibre/gui2/tweak_book/widgets.py b/src/calibre/gui2/tweak_book/widgets.py index c0344b178f..e3ddc0d13b 100644 --- a/src/calibre/gui2/tweak_book/widgets.py +++ b/src/calibre/gui2/tweak_book/widgets.py @@ -6,12 +6,32 @@ from __future__ import (unicode_literals, division, absolute_import, __license__ = 'GPL v3' __copyright__ = '2014, Kovid Goyal ' +import os +from itertools import izip +from collections import OrderedDict + from PyQt4.Qt import ( QDialog, QDialogButtonBox, QGridLayout, QLabel, QLineEdit, QVBoxLayout, - QFormLayout, QHBoxLayout, QToolButton, QIcon, QApplication, Qt) + QFormLayout, QHBoxLayout, QToolButton, QIcon, QApplication, Qt, QWidget, + QPoint, QSizePolicy, QPainter, QStaticText, pyqtSignal, QTextOption, + QAbstractListModel, QModelIndex, QVariant, QStyledItemDelegate, QStyle, + QListView, QTextDocument, QSize, QComboBox, QFrame, QCursor) -from calibre.gui2 import error_dialog, choose_files, choose_save_file +from calibre import prepare_string_for_xml +from calibre.gui2 import error_dialog, choose_files, choose_save_file, NONE, info_dialog from calibre.gui2.tweak_book import tprefs +from calibre.utils.icu import primary_sort_key, sort_key +from calibre.utils.matcher import get_char, Matcher + +ROOT = QModelIndex() + +class BusyCursor(object): + + def __enter__(self): + QApplication.setOverrideCursor(QCursor(Qt.WaitCursor)) + + def __exit__(self, *args): + QApplication.restoreOverrideCursor() class Dialog(QDialog): @@ -222,8 +242,614 @@ class ImportForeign(Dialog): # {{{ return src, dest # }}} +# Quick Open {{{ + +def make_highlighted_text(emph, text, positions): + positions = sorted(set(positions) - {-1}, reverse=True) + text = prepare_string_for_xml(text) + for p in positions: + ch = get_char(text, p) + text = '%s%s%s' % (text[:p], emph, ch, text[p+len(ch):]) + return text + + +class Results(QWidget): + + EMPH = "color:magenta; font-weight:bold" + MARGIN = 4 + + item_selected = pyqtSignal() + + def __init__(self, parent=None): + QWidget.__init__(self, parent=parent) + + self.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding) + self.results = () + self.current_result = -1 + self.max_result = -1 + self.mouse_hover_result = -1 + self.setMouseTracking(True) + self.setFocusPolicy(Qt.NoFocus) + self.text_option = to = QTextOption() + to.setWrapMode(to.NoWrap) + self.divider = QStaticText('\xa0→ \xa0') + self.divider.setTextFormat(Qt.PlainText) + + def item_from_y(self, y): + if not self.results: + return + delta = self.results[0][0].size().height() + self.MARGIN + maxy = self.height() + pos = 0 + for i, r in enumerate(self.results): + bottom = pos + delta + if pos <= y < bottom: + return i + break + pos = bottom + if pos > min(y, maxy): + break + return -1 + + def mouseMoveEvent(self, ev): + y = ev.pos().y() + prev = self.mouse_hover_result + self.mouse_hover_result = self.item_from_y(y) + if prev != self.mouse_hover_result: + self.update() + + def mousePressEvent(self, ev): + if ev.button() == 1: + i = self.item_from_y(ev.pos().y()) + if i != -1: + ev.accept() + self.current_result = i + self.update() + self.item_selected.emit() + return + return QWidget.mousePressEvent(self, ev) + + def change_current(self, delta=1): + if not self.results: + return + nc = self.current_result + delta + if 0 <= nc <= self.max_result: + self.current_result = nc + self.update() + + def __call__(self, results): + if results: + self.current_result = 0 + prefixes = [QStaticText('%s' % os.path.basename(x)) for x in results] + [(p.setTextFormat(Qt.RichText), p.setTextOption(self.text_option)) for p in prefixes] + self.maxwidth = max([x.size().width() for x in prefixes]) + self.results = tuple((prefix, self.make_text(text, positions), text) + for prefix, (text, positions) in izip(prefixes, results.iteritems())) + else: + self.results = () + self.current_result = -1 + self.max_result = min(10, len(self.results) - 1) + self.mouse_hover_result = -1 + self.update() + + def make_text(self, text, positions): + text = QStaticText(make_highlighted_text(self.EMPH, text, positions)) + text.setTextOption(self.text_option) + text.setTextFormat(Qt.RichText) + return text + + def paintEvent(self, ev): + offset = QPoint(0, 0) + p = QPainter(self) + p.setClipRect(ev.rect()) + bottom = self.rect().bottom() + + if self.results: + for i, (prefix, full, text) in enumerate(self.results): + size = prefix.size() + if offset.y() + size.height() > bottom: + break + self.max_result = i + offset.setX(0) + if i in (self.current_result, self.mouse_hover_result): + p.save() + if i != self.current_result: + p.setPen(Qt.DotLine) + p.drawLine(offset, QPoint(self.width(), offset.y())) + p.restore() + offset.setY(offset.y() + self.MARGIN // 2) + p.drawStaticText(offset, prefix) + offset.setX(self.maxwidth + 5) + p.drawStaticText(offset, self.divider) + offset.setX(offset.x() + self.divider.size().width()) + p.drawStaticText(offset, full) + offset.setY(offset.y() + size.height() + self.MARGIN // 2) + if i in (self.current_result, self.mouse_hover_result): + offset.setX(0) + p.save() + if i != self.current_result: + p.setPen(Qt.DotLine) + p.drawLine(offset, QPoint(self.width(), offset.y())) + p.restore() + else: + p.drawText(self.rect(), Qt.AlignCenter, _('No results found')) + + p.end() + + @property + def selected_result(self): + try: + return self.results[self.current_result][-1] + except IndexError: + pass + +class QuickOpen(Dialog): + + def __init__(self, items, parent=None): + self.matcher = Matcher(items) + self.matches = () + self.selected_result = None + Dialog.__init__(self, _('Choose file to edit'), 'quick-open', parent=parent) + + def sizeHint(self): + ans = Dialog.sizeHint(self) + ans.setWidth(800) + ans.setHeight(max(600, ans.height())) + return ans + + def setup_ui(self): + self.l = l = QVBoxLayout(self) + self.setLayout(l) + + self.text = t = QLineEdit(self) + t.textEdited.connect(self.update_matches) + l.addWidget(t, alignment=Qt.AlignTop) + + example = '

{0}i{1}mages/{0}c{1}hapter1/{0}s{1}cene{0}3{1}.jpg
'.format( + '' % Results.EMPH, '') + chars = '
ics3
' % Results.EMPH + + self.help_label = hl = QLabel(_( + '''

Quickly choose a file by typing in just a few characters from the file name into the field above. + For example, if want to choose the file: + {example} + Simply type in the characters: + {chars} + and press Enter.''').format(example=example, chars=chars)) + hl.setMargin(50), hl.setAlignment(Qt.AlignTop | Qt.AlignHCenter) + l.addWidget(hl) + self.results = Results(self) + self.results.setVisible(False) + self.results.item_selected.connect(self.accept) + l.addWidget(self.results) + + l.addWidget(self.bb, alignment=Qt.AlignBottom) + + def update_matches(self, text): + text = unicode(text).strip() + self.help_label.setVisible(False) + self.results.setVisible(True) + matches = self.matcher(text, limit=100) + self.results(matches) + self.matches = tuple(matches) + + def keyPressEvent(self, ev): + if ev.key() in (Qt.Key_Up, Qt.Key_Down): + ev.accept() + self.results.change_current(delta=-1 if ev.key() == Qt.Key_Up else 1) + return + return Dialog.keyPressEvent(self, ev) + + def accept(self): + self.selected_result = self.results.selected_result + return Dialog.accept(self) + + @classmethod + def test(cls): + import os + from calibre.utils.matcher import get_items_from_dir + items = get_items_from_dir(os.getcwdu(), lambda x:not x.endswith('.pyc')) + d = cls(items) + d.exec_() + print (d.selected_result) + +# }}} + +# Filterable names list {{{ + +class NamesDelegate(QStyledItemDelegate): + + def sizeHint(self, option, index): + ans = QStyledItemDelegate.sizeHint(self, option, index) + ans.setHeight(ans.height() + 10) + return ans + + def paint(self, painter, option, index): + QStyledItemDelegate.paint(self, painter, option, index) + text, positions = index.data(Qt.UserRole).toPyObject() + self.initStyleOption(option, index) + painter.save() + painter.setFont(option.font) + p = option.palette + c = p.HighlightedText if option.state & QStyle.State_Selected else p.Text + group = (p.Active if option.state & QStyle.State_Active else p.Inactive) + c = p.color(group, c) + painter.setClipRect(option.rect) + if positions is None or -1 in positions: + painter.setPen(c) + painter.drawText(option.rect, Qt.AlignLeft | Qt.AlignVCenter | Qt.TextSingleLine, text) + else: + to = QTextOption() + to.setWrapMode(to.NoWrap) + to.setAlignment(Qt.AlignLeft | Qt.AlignVCenter) + positions = sorted(set(positions) - {-1}, reverse=True) + text = '%s' % make_highlighted_text(Results.EMPH, text, positions) + doc = QTextDocument() + c = 'rgb(%d, %d, %d)'%c.getRgb()[:3] + doc.setDefaultStyleSheet(' body { color: %s }'%c) + doc.setHtml(text) + doc.setDefaultFont(option.font) + doc.setDocumentMargin(0.0) + doc.setDefaultTextOption(to) + height = doc.size().height() + painter.translate(option.rect.left(), option.rect.top() + (max(0, option.rect.height() - height) // 2)) + doc.drawContents(painter) + painter.restore() + +class NamesModel(QAbstractListModel): + + filtered = pyqtSignal(object) + + def __init__(self, names, parent=None): + self.items = [] + QAbstractListModel.__init__(self, parent) + self.set_names(names) + + def set_names(self, names): + self.names = names + self.matcher = Matcher(names) + self.filter('') + + def rowCount(self, parent=ROOT): + return len(self.items) + + def data(self, index, role): + if role == Qt.UserRole: + return QVariant(self.items[index.row()]) + if role == Qt.DisplayRole: + return QVariant('\xa0' * 20) + return NONE + + def filter(self, query): + query = unicode(query or '') + if not query: + self.items = tuple((text, None) for text in self.names) + else: + self.items = tuple(self.matcher(query).iteritems()) + self.reset() + self.filtered.emit(not bool(query)) + + def find_name(self, name): + for i, (text, positions) in enumerate(self.items): + if text == name: + return i + +def create_filterable_names_list(names, filter_text=None, parent=None): + nl = QListView(parent) + nl.m = m = NamesModel(names, parent=nl) + m.filtered.connect(lambda all_items: nl.scrollTo(m.index(0))) + nl.setModel(m) + nl.d = NamesDelegate(nl) + nl.setItemDelegate(nl.d) + f = QLineEdit(parent) + f.setPlaceholderText(filter_text or '') + f.textEdited.connect(m.filter) + return nl, f + +# }}} + +# Insert Link {{{ +class InsertLink(Dialog): + + def __init__(self, container, source_name, initial_text=None, parent=None): + self.container = container + self.source_name = source_name + self.initial_text = initial_text + Dialog.__init__(self, _('Insert Hyperlink'), 'insert-hyperlink', parent=parent) + self.anchor_cache = {} + + def sizeHint(self): + return QSize(800, 600) + + def setup_ui(self): + self.l = l = QVBoxLayout(self) + self.setLayout(l) + + self.h = h = QHBoxLayout() + l.addLayout(h) + + names = [n for n, linear in self.container.spine_names] + fn, f = create_filterable_names_list(names, filter_text=_('Filter files'), parent=self) + self.file_names, self.file_names_filter = fn, f + fn.selectionModel().selectionChanged.connect(self.selected_file_changed) + self.fnl = fnl = QVBoxLayout() + self.la1 = la = QLabel(_('Choose a &file to link to:')) + la.setBuddy(fn) + fnl.addWidget(la), fnl.addWidget(f), fnl.addWidget(fn) + h.addLayout(fnl), h.setStretch(0, 2) + + fn, f = create_filterable_names_list([], filter_text=_('Filter locations'), parent=self) + self.anchor_names, self.anchor_names_filter = fn, f + fn.selectionModel().selectionChanged.connect(self.update_target) + fn.doubleClicked.connect(self.accept, type=Qt.QueuedConnection) + self.anl = fnl = QVBoxLayout() + self.la2 = la = QLabel(_('Choose a &location (anchor) in the file:')) + la.setBuddy(fn) + fnl.addWidget(la), fnl.addWidget(f), fnl.addWidget(fn) + h.addLayout(fnl), h.setStretch(1, 1) + + self.tl = tl = QFormLayout() + self.target = t = QLineEdit(self) + t.setPlaceholderText(_('The destination (href) for the link')) + tl.addRow(_('&Target:'), t) + l.addLayout(tl) + + self.text_edit = t = QLineEdit(self) + la.setBuddy(t) + tl.addRow(_('Te&xt:'), t) + t.setText(self.initial_text or '') + t.setPlaceholderText(_('The (optional) text for the link')) + + l.addWidget(self.bb) + + def selected_file_changed(self, *args): + rows = list(self.file_names.selectionModel().selectedRows()) + if not rows: + self.anchor_names.model().set_names([]) + else: + name, positions = self.file_names.model().data(rows[0], Qt.UserRole).toPyObject() + self.populate_anchors(name) + + def populate_anchors(self, name): + if name not in self.anchor_cache: + from calibre.ebooks.oeb.base import XHTML_NS + root = self.container.parsed(name) + self.anchor_cache[name] = sorted( + (set(root.xpath('//*/@id')) | set(root.xpath('//h:a/@name', namespaces={'h':XHTML_NS}))) - {''}, key=primary_sort_key) + self.anchor_names.model().set_names(self.anchor_cache[name]) + self.update_target() + + def update_target(self): + rows = list(self.file_names.selectionModel().selectedRows()) + if not rows: + return + name = self.file_names.model().data(rows[0], Qt.UserRole).toPyObject()[0] + if name == self.source_name: + href = '' + else: + href = self.container.name_to_href(name, self.source_name) + frag = '' + rows = list(self.anchor_names.selectionModel().selectedRows()) + if rows: + anchor = self.anchor_names.model().data(rows[0], Qt.UserRole).toPyObject()[0] + if anchor: + frag = '#' + anchor + href += frag + self.target.setText(href or '#') + + @property + def href(self): + return unicode(self.target.text()).strip() + + @property + def text(self): + return unicode(self.text_edit.text()).strip() + + @classmethod + def test(cls): + import sys + from calibre.ebooks.oeb.polish.container import get_container + c = get_container(sys.argv[-1], tweak_mode=True) + d = cls(c, next(c.spine_names)[0]) + if d.exec_() == d.Accepted: + print (d.href, d.text) + +# }}} + +# Insert Semantics {{{ + +class InsertSemantics(Dialog): + + def __init__(self, container, parent=None): + self.container = container + self.anchor_cache = {} + self.original_type_map = {item.get('type', ''):(container.href_to_name(item.get('href'), container.opf_name), item.get('href', '').partition('#')[-1]) + for item in container.opf_xpath('//opf:guide/opf:reference[@href and @type]')} + self.final_type_map = self.original_type_map.copy() + self.create_known_type_map() + Dialog.__init__(self, _('Set Semantics'), 'insert-semantics', parent=parent) + + def sizeHint(self): + return QSize(800, 600) + + def create_known_type_map(self): + _ = lambda x: x + self.known_type_map = { + 'title-page': _('Title Page'), + 'toc': _('Table of Contents'), + 'index': _('Index'), + 'glossary': _('Glossary'), + 'acknowledgements': _('Acknowledgements'), + 'bibliography': _('Bibliography'), + 'colophon': _('Colophon'), + 'copyright-page': _('Copyright page'), + 'dedication': _('Dedication'), + 'epigraph': _('Epigraph'), + 'foreword': _('Foreword'), + 'loi': _('List of Illustrations'), + 'lot': _('List of Tables'), + 'notes:': _('Notes'), + 'preface': _('Preface'), + 'text': _('Text'), + } + _ = __builtins__['_'] + type_map_help = { + 'title-page': _('Page with title, author, publisher, etc.'), + 'index': _('Back-of-book style index'), + 'text': _('First "real" page of content'), + } + t = _ + all_types = [(k, (('%s (%s)' % (t(v), type_map_help[k])) if k in type_map_help else t(v))) for k, v in self.known_type_map.iteritems()] + all_types.sort(key=lambda x: sort_key(x[1])) + self.all_types = OrderedDict(all_types) + + def setup_ui(self): + self.l = l = QVBoxLayout(self) + self.setLayout(l) + + self.tl = tl = QFormLayout() + self.semantic_type = QComboBox(self) + for key, val in self.all_types.iteritems(): + self.semantic_type.addItem(val, key) + tl.addRow(_('Type of &semantics:'), self.semantic_type) + self.target = t = QLineEdit(self) + t.setPlaceholderText(_('The destination (href) for the link')) + tl.addRow(_('&Target:'), t) + l.addLayout(tl) + + self.hline = hl = QFrame(self) + hl.setFrameStyle(hl.HLine) + l.addWidget(hl) + + self.h = h = QHBoxLayout() + l.addLayout(h) + + names = [n for n, linear in self.container.spine_names] + fn, f = create_filterable_names_list(names, filter_text=_('Filter files'), parent=self) + self.file_names, self.file_names_filter = fn, f + fn.selectionModel().selectionChanged.connect(self.selected_file_changed) + self.fnl = fnl = QVBoxLayout() + self.la1 = la = QLabel(_('Choose a &file:')) + la.setBuddy(fn) + fnl.addWidget(la), fnl.addWidget(f), fnl.addWidget(fn) + h.addLayout(fnl), h.setStretch(0, 2) + + fn, f = create_filterable_names_list([], filter_text=_('Filter locations'), parent=self) + self.anchor_names, self.anchor_names_filter = fn, f + fn.selectionModel().selectionChanged.connect(self.update_target) + fn.doubleClicked.connect(self.accept, type=Qt.QueuedConnection) + self.anl = fnl = QVBoxLayout() + self.la2 = la = QLabel(_('Choose a &location (anchor) in the file:')) + la.setBuddy(fn) + fnl.addWidget(la), fnl.addWidget(f), fnl.addWidget(fn) + h.addLayout(fnl), h.setStretch(1, 1) + + self.bb.addButton(self.bb.Help) + self.bb.helpRequested.connect(self.help_requested) + l.addWidget(self.bb) + self.semantic_type_changed() + self.semantic_type.currentIndexChanged.connect(self.semantic_type_changed) + self.target.textChanged.connect(self.target_text_changed) + + def help_requested(self): + d = info_dialog(self, _('About semantics'), _( + 'Semantics refer to additional information about specific locations in the book.' + ' For example, you can specify that a particular location is the dedication or the preface' + ' or the table of contents and so on.\n\nFirst choose the type of semantic information, then' + ' choose a file and optionally a location within the file to point to.\n\nThe' + ' semantic information will be written in the section of the opf file.')) + d.resize(d.sizeHint()) + d.exec_() + + def semantic_type_changed(self): + item_type = unicode(self.semantic_type.itemData(self.semantic_type.currentIndex()).toString()) + name, frag = self.final_type_map.get(item_type, (None, None)) + self.show_type(name, frag) + + def show_type(self, name, frag): + self.file_names_filter.clear(), self.anchor_names_filter.clear() + self.file_names.clearSelection(), self.anchor_names.clearSelection() + if name is not None: + row = self.file_names.model().find_name(name) + if row is not None: + sm = self.file_names.selectionModel() + sm.select(self.file_names.model().index(row), sm.ClearAndSelect) + if frag: + row = self.anchor_names.model().find_name(frag) + if row is not None: + sm = self.anchor_names.selectionModel() + sm.select(self.anchor_names.model().index(row), sm.ClearAndSelect) + self.target.blockSignals(True) + if name is not None: + self.target.setText(name + (('#' + frag) if frag else '')) + else: + self.target.setText('') + self.target.blockSignals(False) + + def target_text_changed(self): + name, frag = unicode(self.target.text()).partition('#')[::2] + item_type = unicode(self.semantic_type.itemData(self.semantic_type.currentIndex()).toString()) + self.final_type_map[item_type] = (name, frag or None) + + def selected_file_changed(self, *args): + rows = list(self.file_names.selectionModel().selectedRows()) + if not rows: + self.anchor_names.model().set_names([]) + else: + name, positions = self.file_names.model().data(rows[0], Qt.UserRole).toPyObject() + self.populate_anchors(name) + + def populate_anchors(self, name): + if name not in self.anchor_cache: + from calibre.ebooks.oeb.base import XHTML_NS + root = self.container.parsed(name) + self.anchor_cache[name] = sorted( + (set(root.xpath('//*/@id')) | set(root.xpath('//h:a/@name', namespaces={'h':XHTML_NS}))) - {''}, key=primary_sort_key) + self.anchor_names.model().set_names(self.anchor_cache[name]) + self.update_target() + + def update_target(self): + rows = list(self.file_names.selectionModel().selectedRows()) + if not rows: + return + name = self.file_names.model().data(rows[0], Qt.UserRole).toPyObject()[0] + href = name + frag = '' + rows = list(self.anchor_names.selectionModel().selectedRows()) + if rows: + anchor = self.anchor_names.model().data(rows[0], Qt.UserRole).toPyObject()[0] + if anchor: + frag = '#' + anchor + href += frag + self.target.setText(href or '#') + + @property + def changed_type_map(self): + return {k:v for k, v in self.final_type_map.iteritems() if v != self.original_type_map.get(k, None)} + + def apply_changes(self, container): + from calibre.ebooks.oeb.polish.opf import set_guide_item, get_book_language + from calibre.translations.dynamic import translate + lang = get_book_language(container) + for item_type, (name, frag) in self.changed_type_map.iteritems(): + title = self.known_type_map[item_type] + if lang: + title = translate(lang, title) + set_guide_item(container, item_type, title, name, frag=frag) + + @classmethod + def test(cls): + import sys + from calibre.ebooks.oeb.polish.container import get_container + c = get_container(sys.argv[-1], tweak_mode=True) + d = cls(c) + if d.exec_() == d.Accepted: + import pprint + pprint.pprint(d.changed_type_map) + d.apply_changes(d.container) + +# }}} + if __name__ == '__main__': app = QApplication([]) - d = ImportForeign() - d.exec_() - print (d.data) + InsertSemantics.test() diff --git a/src/calibre/gui2/viewer/config.py b/src/calibre/gui2/viewer/config.py index abf46b113e..f8544cb0da 100644 --- a/src/calibre/gui2/viewer/config.py +++ b/src/calibre/gui2/viewer/config.py @@ -35,6 +35,10 @@ def config(defaults=None): help=_("Set the maximum width that the book's text and pictures will take" " when in fullscreen mode. This allows you to read the book text" " without it becoming too wide.")) + c.add_opt('max_fs_height', default=-1, + help=_("Set the maximum height that the book's text and pictures will take" + " when in fullscreen mode. This allows you to read the book text" + " without it becoming too tall. Note that this setting only takes effect in paged mode (which is the default mode).")) c.add_opt('fit_images', default=True, help=_('Resize images larger than the viewer window to fit inside it')) c.add_opt('hyphenate', default=False, help=_('Hyphenate text')) @@ -211,6 +215,7 @@ class ConfigDialog(QDialog, Ui_Dialog): {'serif':0, 'sans':1, 'mono':2}[opts.standard_font]) self.css.setPlainText(opts.user_css) self.max_fs_width.setValue(opts.max_fs_width) + self.max_fs_height.setValue(opts.max_fs_height) pats, names = self.hyphenate_pats, self.hyphenate_names try: idx = pats.index(opts.hyphenate_default_lang) @@ -287,6 +292,10 @@ class ConfigDialog(QDialog, Ui_Dialog): c.set('remember_window_size', self.opt_remember_window_size.isChecked()) c.set('fit_images', self.opt_fit_images.isChecked()) c.set('max_fs_width', int(self.max_fs_width.value())) + max_fs_height = self.max_fs_height.value() + if max_fs_height <= self.max_fs_height.minimum(): + max_fs_height = -1 + c.set('max_fs_height', max_fs_height) c.set('hyphenate', self.hyphenate.isChecked()) c.set('remember_current_page', self.opt_remember_current_page.isChecked()) c.set('wheel_flips_pages', self.opt_wheel_flips_pages.isChecked()) diff --git a/src/calibre/gui2/viewer/config.ui b/src/calibre/gui2/viewer/config.ui index dd7019a157..9900ba45d3 100644 --- a/src/calibre/gui2/viewer/config.ui +++ b/src/calibre/gui2/viewer/config.ui @@ -60,7 +60,7 @@ QToolBox::tab:hover { } - 0 + 2 @@ -404,41 +404,67 @@ QToolBox::tab:hover { - - - - Show &clock in full screen mode - - - - + Show reading &position in full screen mode - + Show &scrollbar in full screen mode - + &Start viewer in full screen mode - + Show &help message when starting full screen mode + + + + Maximum text height in fullscreen (paged mode): + + + + + + + Show &clock in full screen mode + + + + + + + Disabled + + + px + + + 100 + + + 10000 + + + 25 + + + diff --git a/src/calibre/gui2/viewer/documentview.py b/src/calibre/gui2/viewer/documentview.py index 0691a9deb8..130af04424 100644 --- a/src/calibre/gui2/viewer/documentview.py +++ b/src/calibre/gui2/viewer/documentview.py @@ -160,6 +160,7 @@ class Document(QWebPage): # {{{ screen_width = QApplication.desktop().screenGeometry().width() # Leave some space for the scrollbar and some border self.max_fs_width = min(opts.max_fs_width, screen_width-50) + self.max_fs_height = opts.max_fs_height self.fullscreen_clock = opts.fullscreen_clock self.fullscreen_scrollbar = opts.fullscreen_scrollbar self.fullscreen_pos = opts.fullscreen_pos @@ -280,11 +281,16 @@ class Document(QWebPage): # {{{ )) force_fullscreen_layout = bool(getattr(last_loaded_path, 'is_single_page', False)) - f = 'true' if force_fullscreen_layout else 'false' - side_margin = self.javascript('window.paged_display.layout(%s)'%f, typ=int) + self.update_contents_size_for_paged_mode(force_fullscreen_layout) + + def update_contents_size_for_paged_mode(self, force_fullscreen_layout=None): # Setup the contents size to ensure that there is a right most margin. # Without this WebKit renders the final column with no margin, as the # columns extend beyond the boundaries (and margin) of body + if force_fullscreen_layout is None: + force_fullscreen_layout = self.javascript('window.paged_display.is_full_screen_layout', typ=bool) + f = 'true' if force_fullscreen_layout else 'false' + side_margin = self.javascript('window.paged_display.layout(%s)'%f, typ=int) mf = self.mainFrame() sz = mf.contentsSize() scroll_width = self.javascript('document.body.scrollWidth', int) @@ -310,7 +316,7 @@ class Document(QWebPage): # {{{ def switch_to_fullscreen_mode(self): self.in_fullscreen_mode = True - self.javascript('full_screen.on(%d, %s)'%(self.max_fs_width, + self.javascript('full_screen.on(%d, %d, %s)'%(self.max_fs_width, self.max_fs_height, 'true' if self.in_paged_mode else 'false')) def switch_to_window_mode(self): @@ -353,6 +359,8 @@ class Document(QWebPage): # {{{ return ans[0] if ans[1] else 0.0 if typ == 'string': return unicode(ans.toString()) + if typ in {bool, 'bool'}: + return ans.toBool() return ans def javaScriptConsoleMessage(self, msg, lineno, msgid): @@ -1103,8 +1111,12 @@ class DocumentView(QWebView): # {{{ def fget(self): return self.zoomFactor() def fset(self, val): + oval = self.zoomFactor() self.setZoomFactor(val) - self.magnification_changed.emit(val) + if val != oval: + if self.document.in_paged_mode: + self.document.update_contents_size_for_paged_mode() + self.magnification_changed.emit(val) return property(fget=fget, fset=fset) def magnify_fonts(self, amount=None): diff --git a/src/calibre/gui2/viewer/main.py b/src/calibre/gui2/viewer/main.py index 8e1b6163b2..494683d88b 100644 --- a/src/calibre/gui2/viewer/main.py +++ b/src/calibre/gui2/viewer/main.py @@ -1119,7 +1119,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer): event.accept() return if self.isFullScreen(): - self.toggle_fullscreen() + self.action_full_screen.trigger() event.accept() return try: diff --git a/src/calibre/gui2/widgets2.py b/src/calibre/gui2/widgets2.py index c53ae2e93f..8d801a91bd 100644 --- a/src/calibre/gui2/widgets2.py +++ b/src/calibre/gui2/widgets2.py @@ -11,6 +11,11 @@ from calibre.gui2.widgets import history class HistoryLineEdit2(LineEdit): + max_history_items = None + + def __init__(self, parent=None, completer_widget=None, sort_func=lambda x:None): + LineEdit.__init__(self, parent=parent, completer_widget=completer_widget, sort_func=sort_func) + @property def store_name(self): return 'lineedit_history_'+self._name @@ -31,6 +36,13 @@ class HistoryLineEdit2(LineEdit): except ValueError: pass self.history.insert(0, ct) + if self.max_history_items is not None: + del self.history[self.max_history_items:] history.set(self.store_name, self.history) self.update_items_cache(self.history) + def clear_history(self): + self.history = [] + history.set(self.store_name, self.history) + self.update_items_cache(self.history) + diff --git a/src/calibre/library/catalogs/epub_mobi_builder.py b/src/calibre/library/catalogs/epub_mobi_builder.py index 503a9081ab..736b7db6da 100644 --- a/src/calibre/library/catalogs/epub_mobi_builder.py +++ b/src/calibre/library/catalogs/epub_mobi_builder.py @@ -584,10 +584,11 @@ class CatalogBuilder(object): if field_contents == '': field_contents = None - if (self.db.metadata_for_field(rule['field'])['datatype'] == 'bool' and + # Handle condition where bools_are_tristate is False, + # field is a bool and contents is None, which is displayed as No + if (not self.db.prefs.get('bools_are_tristate') and + self.db.metadata_for_field(rule['field'])['datatype'] == 'bool' and field_contents is None): - # Handle condition where field is a bool and contents is None, - # which is displayed as No field_contents = _('False') if field_contents is not None: @@ -1021,8 +1022,11 @@ class CatalogBuilder(object): data = self.plugin.search_sort_db(self.db, self.opts) data = self.process_exclusions(data) - if self.prefix_rules and self.DEBUG: - self.opts.log.info(" Added prefixes:") + if self.DEBUG: + if self.prefix_rules: + self.opts.log.info(" Added prefixes (bools_are_tristate: {0}):".format(self.db.prefs.get('bools_are_tristate'))) + else: + self.opts.log.info(" No added prefixes") # Populate this_title{} from data[{},{}] titles = [] diff --git a/src/calibre/library/cli.py b/src/calibre/library/cli.py index acb0323df4..c3509f5a23 100644 --- a/src/calibre/library/cli.py +++ b/src/calibre/library/cli.py @@ -19,6 +19,7 @@ from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.book.base import field_from_string from calibre.ebooks.metadata.opf2 import OPFCreator, OPF from calibre.utils.date import isoformat +from calibre.utils.localization import canonicalize_lang FIELDS = set(['title', 'authors', 'author_sort', 'publisher', 'rating', 'timestamp', 'size', 'tags', 'comments', 'series', 'series_index', @@ -229,7 +230,7 @@ class DevNull(object): NULL = DevNull() def do_add(db, paths, one_book_per_directory, recurse, add_duplicates, otitle, - oauthors, oisbn, otags, oseries, oseries_index, ocover): + oauthors, oisbn, otags, oseries, oseries_index, ocover, olanguages): orig = sys.stdout #sys.stdout = NULL try: @@ -256,7 +257,7 @@ def do_add(db, paths, one_book_per_directory, recurse, add_duplicates, otitle, mi.title = os.path.splitext(os.path.basename(book))[0] if not mi.authors: mi.authors = [_('Unknown')] - for x in ('title', 'authors', 'isbn', 'tags', 'series'): + for x in ('title', 'authors', 'isbn', 'tags', 'series', 'languages'): val = locals()['o'+x] if val: setattr(mi, x, val) @@ -354,10 +355,12 @@ the directory related options below. help=_('Set the series number of the added book(s)')) parser.add_option('-c', '--cover', default=None, help=_('Path to the cover to use for the added book')) + parser.add_option('-l', '--languages', default=None, + help=_('A comma separated list of languages (best to use ISO639 language codes, though some language names may also be recognized)')) return parser -def do_add_empty(db, title, authors, isbn, tags, series, series_index, cover): +def do_add_empty(db, title, authors, isbn, tags, series, series_index, cover, languages): from calibre.ebooks.metadata import MetaInformation mi = MetaInformation(None) if title is not None: @@ -372,6 +375,8 @@ def do_add_empty(db, title, authors, isbn, tags, series, series_index, cover): mi.series, mi.series_index = series, series_index if cover: mi.cover = cover + if languages: + mi.languages = languages book_id = db.import_book(mi, []) write_dirtied(db) prints(_('Added book ids: %s')%book_id) @@ -383,9 +388,11 @@ def command_add(args, dbpath): opts, args = parser.parse_args(sys.argv[:1] + args) aut = string_to_authors(opts.authors) if opts.authors else [] tags = [x.strip() for x in opts.tags.split(',')] if opts.tags else [] + lcodes = [canonicalize_lang(x) for x in (opts.languages or '').split(',')] + lcodes = [x for x in lcodes if x] if opts.empty: do_add_empty(get_db(dbpath, opts), opts.title, aut, opts.isbn, tags, - opts.series, opts.series_index, opts.cover) + opts.series, opts.series_index, opts.cover, lcodes) return 0 if len(args) < 2: parser.print_help() @@ -394,7 +401,7 @@ def command_add(args, dbpath): return 1 do_add(get_db(dbpath, opts), args[1:], opts.one_book_per_directory, opts.recurse, opts.duplicates, opts.title, aut, opts.isbn, - tags, opts.series, opts.series_index, opts.cover) + tags, opts.series, opts.series_index, opts.cover, lcodes) return 0 def do_remove(db, ids): diff --git a/src/calibre/library/server/base.py b/src/calibre/library/server/base.py index e64319e88b..69d187dfa3 100644 --- a/src/calibre/library/server/base.py +++ b/src/calibre/library/server/base.py @@ -47,6 +47,10 @@ class DispatchController(object): # {{{ aw = kwargs.pop('android_workaround', False) if route != '/': route = self.prefix + route + if isinstance(route, unicode): + # Apparently the routes package chokes on unicode routes, see + # http://www.mobileread.com/forums/showthread.php?t=235366 + route = route.encode('utf-8') elif self.prefix: self.dispatcher.connect(name+'prefix_extra', self.prefix, self, **kwargs) diff --git a/src/calibre/test_build.py b/src/calibre/test_build.py index 618626883e..fd9a36df61 100644 --- a/src/calibre/test_build.py +++ b/src/calibre/test_build.py @@ -113,10 +113,9 @@ def test_ssl(): print ('SSL OK!') def test_icu(): - from calibre.utils.icu import _icu_not_ok, test_roundtrip - if _icu_not_ok: - raise RuntimeError('ICU module not loaded/valid') - test_roundtrip() + print ('Testing ICU') + from calibre.utils.icu_test import test_build + test_build() print ('ICU OK!') def test_wpd(): diff --git a/src/calibre/utils/config.py b/src/calibre/utils/config.py index 2da7863192..c671dbe826 100644 --- a/src/calibre/utils/config.py +++ b/src/calibre/utils/config.py @@ -204,7 +204,7 @@ class DynamicConfig(dict): def decouple(self, prefix): self.file_path = os.path.join(os.path.dirname(self.file_path), prefix + os.path.basename(self.file_path)) - self.refresh(clear_current=False) + self.refresh() def refresh(self, clear_current=True): d = {} @@ -287,7 +287,7 @@ class XMLConfig(dict): def decouple(self, prefix): self.file_path = os.path.join(os.path.dirname(self.file_path), prefix + os.path.basename(self.file_path)) - self.refresh(clear_current=False) + self.refresh() def refresh(self, clear_current=True): d = {} diff --git a/src/calibre/utils/icu.c b/src/calibre/utils/icu.c index db6bdea876..22c9bbb811 100644 --- a/src/calibre/utils/icu.c +++ b/src/calibre/utils/icu.c @@ -1,5 +1,9 @@ #include "icu_calibre_utils.h" +#define UPPER_CASE 0 +#define LOWER_CASE 1 +#define TITLE_CASE 2 + static PyObject* uchar_to_unicode(const UChar *src, int32_t len) { wchar_t *buf = NULL; PyObject *ans = NULL; @@ -66,20 +70,16 @@ icu_Collator_display_name(icu_Collator *self, void *closure) { const char *loc = NULL; UErrorCode status = U_ZERO_ERROR; UChar dname[400]; - char buf[100]; + int32_t sz = 0; loc = ucol_getLocaleByType(self->collator, ULOC_ACTUAL_LOCALE, &status); - if (loc == NULL || U_FAILURE(status)) { + if (loc == NULL) { PyErr_SetString(PyExc_Exception, "Failed to get actual locale"); return NULL; } - ucol_getDisplayName(loc, "en", dname, 100, &status); - if (U_FAILURE(status)) return PyErr_NoMemory(); + sz = ucol_getDisplayName(loc, "en", dname, sizeof(dname), &status); + if (U_FAILURE(status)) {PyErr_SetString(PyExc_ValueError, u_errorName(status)); return NULL; } - u_strToUTF8(buf, 100, NULL, dname, -1, &status); - if (U_FAILURE(status)) { - PyErr_SetString(PyExc_Exception, "Failed to convert dname to UTF-8"); return NULL; - } - return Py_BuildValue("s", buf); + return icu_to_python(dname, sz); } // }}} @@ -131,50 +131,38 @@ icu_Collator_actual_locale(icu_Collator *self, void *closure) { // }}} +// Collator.capsule {{{ +static PyObject * +icu_Collator_capsule(icu_Collator *self, void *closure) { + return PyCapsule_New(self->collator, NULL, NULL); +} // }}} + // Collator.sort_key {{{ static PyObject * icu_Collator_sort_key(icu_Collator *self, PyObject *args, PyObject *kwargs) { - char *input; - int32_t sz; - UChar *buf; - uint8_t *buf2; - PyObject *ans; - int32_t key_size; - UErrorCode status = U_ZERO_ERROR; + int32_t sz = 0, key_size = 0, bsz = 0; + UChar *buf = NULL; + uint8_t *buf2 = NULL; + PyObject *ans = NULL, *input = NULL; - if (!PyArg_ParseTuple(args, "es", "UTF-8", &input)) return NULL; + if (!PyArg_ParseTuple(args, "O", &input)) return NULL; + buf = python_to_icu(input, &sz, 1); + if (buf == NULL) return NULL; - sz = (int32_t)strlen(input); + bsz = 7 * sz + 1; + buf2 = (uint8_t*)calloc(bsz, sizeof(uint8_t)); + if (buf2 == NULL) { PyErr_NoMemory(); goto end; } + key_size = ucol_getSortKey(self->collator, buf, sz, buf2, bsz); + if (key_size > bsz) { + buf2 = realloc(buf2, (key_size + 1) * sizeof(uint8_t)); + if (buf2 == NULL) { PyErr_NoMemory(); goto end; } + key_size = ucol_getSortKey(self->collator, buf, sz, buf2, key_size + 1); + } + ans = PyBytes_FromStringAndSize((char*)buf2, key_size); - buf = (UChar*)calloc(sz*4 + 1, sizeof(UChar)); - - if (buf == NULL) return PyErr_NoMemory(); - - u_strFromUTF8(buf, sz*4 + 1, &key_size, input, sz, &status); - PyMem_Free(input); - - if (U_SUCCESS(status)) { - buf2 = (uint8_t*)calloc(7*sz+1, sizeof(uint8_t)); - if (buf2 == NULL) return PyErr_NoMemory(); - - key_size = ucol_getSortKey(self->collator, buf, -1, buf2, 7*sz+1); - - if (key_size == 0) { - ans = PyBytes_FromString(""); - } else { - if (key_size >= 7*sz+1) { - free(buf2); - buf2 = (uint8_t*)calloc(key_size+1, sizeof(uint8_t)); - if (buf2 == NULL) return PyErr_NoMemory(); - ucol_getSortKey(self->collator, buf, -1, buf2, key_size+1); - } - ans = PyBytes_FromString((char *)buf2); - } - free(buf2); - } else ans = PyBytes_FromString(""); - - free(buf); - if (ans == NULL) return PyErr_NoMemory(); +end: + if (buf != NULL) free(buf); + if (buf2 != NULL) free(buf2); return ans; } // }}} @@ -182,86 +170,106 @@ icu_Collator_sort_key(icu_Collator *self, PyObject *args, PyObject *kwargs) { // Collator.strcmp {{{ static PyObject * icu_Collator_strcmp(icu_Collator *self, PyObject *args, PyObject *kwargs) { - char *a_, *b_; - int32_t asz, bsz; - UChar *a, *b; - UErrorCode status = U_ZERO_ERROR; + PyObject *a_ = NULL, *b_ = NULL; + int32_t asz = 0, bsz = 0; + UChar *a = NULL, *b = NULL; UCollationResult res = UCOL_EQUAL; - if (!PyArg_ParseTuple(args, "eses", "UTF-8", &a_, "UTF-8", &b_)) return NULL; - - asz = (int32_t)strlen(a_); bsz = (int32_t)strlen(b_); + if (!PyArg_ParseTuple(args, "OO", &a_, &b_)) return NULL; - a = (UChar*)calloc(asz*4 + 1, sizeof(UChar)); - b = (UChar*)calloc(bsz*4 + 1, sizeof(UChar)); + a = python_to_icu(a_, &asz, 1); + if (a == NULL) goto end; + b = python_to_icu(b_, &bsz, 1); + if (b == NULL) goto end; + res = ucol_strcoll(self->collator, a, asz, b, bsz); +end: + if (a != NULL) free(a); if (b != NULL) free(b); - - if (a == NULL || b == NULL) return PyErr_NoMemory(); - - u_strFromUTF8(a, asz*4 + 1, NULL, a_, asz, &status); - u_strFromUTF8(b, bsz*4 + 1, NULL, b_, bsz, &status); - PyMem_Free(a_); PyMem_Free(b_); - - if (U_SUCCESS(status)) - res = ucol_strcoll(self->collator, a, -1, b, -1); - - free(a); free(b); - - return Py_BuildValue("i", res); + return (PyErr_Occurred()) ? NULL : Py_BuildValue("i", res); } // }}} // Collator.find {{{ static PyObject * icu_Collator_find(icu_Collator *self, PyObject *args, PyObject *kwargs) { - PyObject *a_, *b_; - int32_t asz, bsz; - UChar *a, *b; - wchar_t *aw, *bw; +#if PY_VERSION_HEX >= 0x03030000 +#error Not implemented for python >= 3.3 +#endif + PyObject *a_ = NULL, *b_ = NULL; + UChar *a = NULL, *b = NULL; + int32_t asz = 0, bsz = 0, pos = -1, length = -1; UErrorCode status = U_ZERO_ERROR; UStringSearch *search = NULL; - int32_t pos = -1, length = -1; - if (!PyArg_ParseTuple(args, "UU", &a_, &b_)) return NULL; - asz = (int32_t)PyUnicode_GetSize(a_); bsz = (int32_t)PyUnicode_GetSize(b_); - - a = (UChar*)calloc(asz*4 + 2, sizeof(UChar)); - b = (UChar*)calloc(bsz*4 + 2, sizeof(UChar)); - aw = (wchar_t*)calloc(asz*4 + 2, sizeof(wchar_t)); - bw = (wchar_t*)calloc(bsz*4 + 2, sizeof(wchar_t)); + if (!PyArg_ParseTuple(args, "OO", &a_, &b_)) return NULL; - if (a == NULL || b == NULL || aw == NULL || bw == NULL) return PyErr_NoMemory(); - - PyUnicode_AsWideChar((PyUnicodeObject*)a_, aw, asz*4+1); - PyUnicode_AsWideChar((PyUnicodeObject*)b_, bw, bsz*4+1); - u_strFromWCS(a, asz*4 + 1, NULL, aw, -1, &status); - u_strFromWCS(b, bsz*4 + 1, NULL, bw, -1, &status); + a = python_to_icu(a_, &asz, 1); + if (a == NULL) goto end; + b = python_to_icu(b_, &bsz, 1); + if (b == NULL) goto end; + search = usearch_openFromCollator(a, asz, b, bsz, self->collator, NULL, &status); if (U_SUCCESS(status)) { - search = usearch_openFromCollator(a, -1, b, -1, self->collator, NULL, &status); - if (U_SUCCESS(status)) { - pos = usearch_first(search, &status); - if (pos != USEARCH_DONE) - length = usearch_getMatchedLength(search); - else - pos = -1; - } - if (search != NULL) usearch_close(search); + pos = usearch_first(search, &status); + if (pos != USEARCH_DONE) { + length = usearch_getMatchedLength(search); +#ifdef Py_UNICODE_WIDE + // We have to return number of unicode characters since the string + // could contain surrogate pairs which are represented as a single + // character in python wide builds + length = u_countChar32(b + pos, length); + pos = u_countChar32(b, pos); +#endif + } else pos = -1; } +end: + if (search != NULL) usearch_close(search); + if (a != NULL) free(a); + if (b != NULL) free(b); - free(a); free(b); free(aw); free(bw); + return (PyErr_Occurred()) ? NULL : Py_BuildValue("ii", pos, length); +} // }}} - return Py_BuildValue("ii", pos, length); +// Collator.contains {{{ +static PyObject * +icu_Collator_contains(icu_Collator *self, PyObject *args, PyObject *kwargs) { + PyObject *a_ = NULL, *b_ = NULL; + UChar *a = NULL, *b = NULL; + int32_t asz = 0, bsz = 0, pos = -1; + uint8_t found = 0; + UErrorCode status = U_ZERO_ERROR; + UStringSearch *search = NULL; + + if (!PyArg_ParseTuple(args, "OO", &a_, &b_)) return NULL; + + a = python_to_icu(a_, &asz, 1); + if (a == NULL) goto end; + if (asz == 0) { found = TRUE; goto end; } + b = python_to_icu(b_, &bsz, 1); + if (b == NULL) goto end; + + search = usearch_openFromCollator(a, asz, b, bsz, self->collator, NULL, &status); + if (U_SUCCESS(status)) { + pos = usearch_first(search, &status); + if (pos != USEARCH_DONE) found = TRUE; + } +end: + if (search != NULL) usearch_close(search); + if (a != NULL) free(a); + if (b != NULL) free(b); + + if (PyErr_Occurred()) return NULL; + if (found) Py_RETURN_TRUE; + Py_RETURN_FALSE; } // }}} // Collator.contractions {{{ static PyObject * icu_Collator_contractions(icu_Collator *self, PyObject *args, PyObject *kwargs) { UErrorCode status = U_ZERO_ERROR; - UChar *str; + UChar *str = NULL; UChar32 start=0, end=0; - int32_t count = 0, len = 0, dlen = 0, i; + int32_t count = 0, len = 0, i; PyObject *ans = Py_None, *pbuf; - wchar_t *buf; if (self->contractions == NULL) { self->contractions = uset_open(1, 0); @@ -269,107 +277,112 @@ icu_Collator_contractions(icu_Collator *self, PyObject *args, PyObject *kwargs) self->contractions = ucol_getTailoredSet(self->collator, &status); } status = U_ZERO_ERROR; + count = uset_getItemCount(self->contractions); str = (UChar*)calloc(100, sizeof(UChar)); - buf = (wchar_t*)calloc(4*100+2, sizeof(wchar_t)); - if (str == NULL || buf == NULL) return PyErr_NoMemory(); - - count = uset_getItemCount(self->contractions); + if (str == NULL) { PyErr_NoMemory(); goto end; } ans = PyTuple_New(count); - if (ans != NULL) { - for (i = 0; i < count; i++) { - len = uset_getItem(self->contractions, i, &start, &end, str, 1000, &status); - if (len >= 2) { - // We have a string - status = U_ZERO_ERROR; - u_strToWCS(buf, 4*100 + 1, &dlen, str, len, &status); - pbuf = PyUnicode_FromWideChar(buf, dlen); - if (pbuf == NULL) return PyErr_NoMemory(); - PyTuple_SetItem(ans, i, pbuf); - } else { - // Ranges dont make sense for contractions, ignore them - PyTuple_SetItem(ans, i, Py_None); - } + if (ans == NULL) { goto end; } + + for (i = 0; i < count; i++) { + len = uset_getItem(self->contractions, i, &start, &end, str, 1000, &status); + if (len >= 2) { + // We have a string + status = U_ZERO_ERROR; + pbuf = icu_to_python(str, len); + if (pbuf == NULL) { Py_DECREF(ans); ans = NULL; goto end; } + PyTuple_SetItem(ans, i, pbuf); + } else { + // Ranges dont make sense for contractions, ignore them + PyTuple_SetItem(ans, i, Py_None); Py_INCREF(Py_None); } } - free(str); free(buf); +end: + if (str != NULL) free(str); - return Py_BuildValue("O", ans); + return ans; } // }}} // Collator.startswith {{{ static PyObject * icu_Collator_startswith(icu_Collator *self, PyObject *args, PyObject *kwargs) { - PyObject *a_, *b_; - int32_t asz, bsz; - int32_t actual_a, actual_b; - UChar *a, *b; - wchar_t *aw, *bw; - UErrorCode status = U_ZERO_ERROR; - int ans = 0; + PyObject *a_ = NULL, *b_ = NULL; + int32_t asz = 0, bsz = 0; + UChar *a = NULL, *b = NULL; + uint8_t ans = 0; - if (!PyArg_ParseTuple(args, "UU", &a_, &b_)) return NULL; - asz = (int32_t)PyUnicode_GetSize(a_); bsz = (int32_t)PyUnicode_GetSize(b_); - if (asz < bsz) Py_RETURN_FALSE; - if (bsz == 0) Py_RETURN_TRUE; + if (!PyArg_ParseTuple(args, "OO", &a_, &b_)) return NULL; + + a = python_to_icu(a_, &asz, 1); + if (a == NULL) goto end; + b = python_to_icu(b_, &bsz, 1); + if (b == NULL) goto end; + + if (asz < bsz) goto end; + if (bsz == 0) { ans = 1; goto end; } - a = (UChar*)calloc(asz*4 + 2, sizeof(UChar)); - b = (UChar*)calloc(bsz*4 + 2, sizeof(UChar)); - aw = (wchar_t*)calloc(asz*4 + 2, sizeof(wchar_t)); - bw = (wchar_t*)calloc(bsz*4 + 2, sizeof(wchar_t)); + ans = ucol_equal(self->collator, a, bsz, b, bsz); - if (a == NULL || b == NULL || aw == NULL || bw == NULL) return PyErr_NoMemory(); +end: + if (a != NULL) free(a); + if (b != NULL) free(b); - actual_a = (int32_t)PyUnicode_AsWideChar((PyUnicodeObject*)a_, aw, asz*4+1); - actual_b = (int32_t)PyUnicode_AsWideChar((PyUnicodeObject*)b_, bw, bsz*4+1); - if (actual_a > -1 && actual_b > -1) { - u_strFromWCS(a, asz*4 + 1, &actual_a, aw, -1, &status); - u_strFromWCS(b, bsz*4 + 1, &actual_b, bw, -1, &status); - - if (U_SUCCESS(status) && ucol_equal(self->collator, a, actual_b, b, actual_b)) - ans = 1; - } - - free(a); free(b); free(aw); free(bw); - if (ans) Py_RETURN_TRUE; + if (PyErr_Occurred()) return NULL; + if (ans) { Py_RETURN_TRUE; } Py_RETURN_FALSE; } // }}} -// Collator.startswith {{{ +// Collator.collation_order {{{ static PyObject * icu_Collator_collation_order(icu_Collator *self, PyObject *args, PyObject *kwargs) { - PyObject *a_; - int32_t asz; - int32_t actual_a; - UChar *a; - wchar_t *aw; + PyObject *a_ = NULL; + int32_t asz = 0; + UChar *a = NULL; UErrorCode status = U_ZERO_ERROR; UCollationElements *iter = NULL; int order = 0, len = -1; - if (!PyArg_ParseTuple(args, "U", &a_)) return NULL; - asz = (int32_t)PyUnicode_GetSize(a_); - - a = (UChar*)calloc(asz*4 + 2, sizeof(UChar)); - aw = (wchar_t*)calloc(asz*4 + 2, sizeof(wchar_t)); + if (!PyArg_ParseTuple(args, "O", &a_)) return NULL; - if (a == NULL || aw == NULL ) return PyErr_NoMemory(); + a = python_to_icu(a_, &asz, 1); + if (a == NULL) goto end; - actual_a = (int32_t)PyUnicode_AsWideChar((PyUnicodeObject*)a_, aw, asz*4+1); - if (actual_a > -1) { - u_strFromWCS(a, asz*4 + 1, &actual_a, aw, -1, &status); - iter = ucol_openElements(self->collator, a, actual_a, &status); - if (iter != NULL && U_SUCCESS(status)) { - order = ucol_next(iter, &status); - len = ucol_getOffset(iter); - ucol_closeElements(iter); iter = NULL; - } - } - - free(a); free(aw); + iter = ucol_openElements(self->collator, a, asz, &status); + if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, u_errorName(status)); goto end; } + order = ucol_next(iter, &status); + len = ucol_getOffset(iter); +end: + if (iter != NULL) ucol_closeElements(iter); iter = NULL; + if (a != NULL) free(a); + if (PyErr_Occurred()) return NULL; return Py_BuildValue("ii", order, len); } // }}} +// Collator.upper_first {{{ +static PyObject * +icu_Collator_get_upper_first(icu_Collator *self, void *closure) { + UErrorCode status = U_ZERO_ERROR; + UColAttributeValue val; + + val = ucol_getAttribute(self->collator, UCOL_CASE_FIRST, &status); + if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, u_errorName(status)); return NULL; } + + if (val == UCOL_OFF) { Py_RETURN_NONE; } + if (val) { + Py_RETURN_TRUE; + } + Py_RETURN_FALSE; +} + +static int +icu_Collator_set_upper_first(icu_Collator *self, PyObject *val, void *closure) { + UErrorCode status = U_ZERO_ERROR; + ucol_setAttribute(self->collator, UCOL_CASE_FIRST, (val == Py_None) ? UCOL_OFF : ((PyObject_IsTrue(val)) ? UCOL_UPPER_FIRST : UCOL_LOWER_FIRST), &status); + if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, u_errorName(status)); return -1; } + return 0; +} +// }}} + static PyObject* icu_Collator_clone(icu_Collator *self, PyObject *args, PyObject *kwargs); @@ -386,6 +399,10 @@ static PyMethodDef icu_Collator_methods[] = { "find(pattern, source) -> returns the position and length of the first occurrence of pattern in source. Returns (-1, -1) if not found." }, + {"contains", (PyCFunction)icu_Collator_contains, METH_VARARGS, + "contains(pattern, source) -> return True iff the pattern was found in the source." + }, + {"contractions", (PyCFunction)icu_Collator_contractions, METH_VARARGS, "contractions() -> returns the contractions defined for this collator." }, @@ -411,6 +428,11 @@ static PyGetSetDef icu_Collator_getsetters[] = { (char *)"Actual locale used by this collator.", NULL}, + {(char *)"capsule", + (getter)icu_Collator_capsule, NULL, + (char *)"A capsule enclosing the pointer to the ICU collator struct", + NULL}, + {(char *)"display_name", (getter)icu_Collator_display_name, NULL, (char *)"Display name of this collator in English. The name reflects the actual data source used.", @@ -421,6 +443,11 @@ static PyGetSetDef icu_Collator_getsetters[] = { (char *)"The strength of this collator.", NULL}, + {(char *)"upper_first", + (getter)icu_Collator_get_upper_first, (setter)icu_Collator_set_upper_first, + (char *)"Whether this collator should always put upper case letters before lower case. Values are: None - means use the tertiary strength of the letters. True - Always sort upper case before lower case. False - Always sort lower case before upper case.", + NULL}, + {(char *)"numeric", (getter)icu_Collator_get_numeric, (setter)icu_Collator_set_numeric, (char *)"If True the collator sorts contiguous digits as numbers rather than strings, so 2 will sort before 10.", @@ -502,139 +529,45 @@ icu_Collator_clone(icu_Collator *self, PyObject *args, PyObject *kwargs) // }}} -// upper {{{ -static PyObject * -icu_upper(PyObject *self, PyObject *args) { - char *input, *ans, *buf3 = NULL; - const char *loc; - int32_t sz; - UChar *buf, *buf2; - PyObject *ret; +// change_case {{{ + +static PyObject* icu_change_case(PyObject *self, PyObject *args) { + char *locale = NULL; + PyObject *input = NULL, *result = NULL; + int which = UPPER_CASE; UErrorCode status = U_ZERO_ERROR; - + UChar *input_buf = NULL, *output_buf = NULL; + int32_t sz = 0; - if (!PyArg_ParseTuple(args, "ses", &loc, "UTF-8", &input)) return NULL; - - sz = (int32_t)strlen(input); - - buf = (UChar*)calloc(sz*4 + 1, sizeof(UChar)); - buf2 = (UChar*)calloc(sz*8 + 1, sizeof(UChar)); - - - if (buf == NULL || buf2 == NULL) return PyErr_NoMemory(); - - u_strFromUTF8(buf, sz*4, NULL, input, sz, &status); - u_strToUpper(buf2, sz*8, buf, -1, loc, &status); - - ans = input; - sz = u_strlen(buf2); - free(buf); - - if (U_SUCCESS(status) && sz > 0) { - buf3 = (char*)calloc(sz*5+1, sizeof(char)); - if (buf3 == NULL) return PyErr_NoMemory(); - u_strToUTF8(buf3, sz*5, NULL, buf2, -1, &status); - if (U_SUCCESS(status)) ans = buf3; + if (!PyArg_ParseTuple(args, "Oiz", &input, &which, &locale)) return NULL; + if (locale == NULL) { + PyErr_SetString(PyExc_NotImplementedError, "You must specify a locale"); // We deliberately use NotImplementedError so that this error can be unambiguously identified + return NULL; } - ret = PyUnicode_DecodeUTF8(ans, strlen(ans), "replace"); - if (ret == NULL) return PyErr_NoMemory(); + input_buf = python_to_icu(input, &sz, 1); + if (input_buf == NULL) goto end; + output_buf = (UChar*) calloc(3 * sz, sizeof(UChar)); + if (output_buf == NULL) { PyErr_NoMemory(); goto end; } - free(buf2); - if (buf3 != NULL) free(buf3); - PyMem_Free(input); - - return ret; -} // }}} - -// lower {{{ -static PyObject * -icu_lower(PyObject *self, PyObject *args) { - char *input, *ans, *buf3 = NULL; - const char *loc; - int32_t sz; - UChar *buf, *buf2; - PyObject *ret; - UErrorCode status = U_ZERO_ERROR; - - - if (!PyArg_ParseTuple(args, "ses", &loc, "UTF-8", &input)) return NULL; - - sz = (int32_t)strlen(input); - - buf = (UChar*)calloc(sz*4 + 1, sizeof(UChar)); - buf2 = (UChar*)calloc(sz*8 + 1, sizeof(UChar)); - - - if (buf == NULL || buf2 == NULL) return PyErr_NoMemory(); - - u_strFromUTF8(buf, sz*4, NULL, input, sz, &status); - u_strToLower(buf2, sz*8, buf, -1, loc, &status); - - ans = input; - sz = u_strlen(buf2); - free(buf); - - if (U_SUCCESS(status) && sz > 0) { - buf3 = (char*)calloc(sz*5+1, sizeof(char)); - if (buf3 == NULL) return PyErr_NoMemory(); - u_strToUTF8(buf3, sz*5, NULL, buf2, -1, &status); - if (U_SUCCESS(status)) ans = buf3; + switch (which) { + case TITLE_CASE: + sz = u_strToTitle(output_buf, 3 * sz, input_buf, sz, NULL, locale, &status); + break; + case UPPER_CASE: + sz = u_strToUpper(output_buf, 3 * sz, input_buf, sz, locale, &status); + break; + default: + sz = u_strToLower(output_buf, 3 * sz, input_buf, sz, locale, &status); } + if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, u_errorName(status)); goto end; } + result = icu_to_python(output_buf, sz); - ret = PyUnicode_DecodeUTF8(ans, strlen(ans), "replace"); - if (ret == NULL) return PyErr_NoMemory(); +end: + if (input_buf != NULL) free(input_buf); + if (output_buf != NULL) free(output_buf); + return result; - free(buf2); - if (buf3 != NULL) free(buf3); - PyMem_Free(input); - - return ret; -} // }}} - -// title {{{ -static PyObject * -icu_title(PyObject *self, PyObject *args) { - char *input, *ans, *buf3 = NULL; - const char *loc; - int32_t sz; - UChar *buf, *buf2; - PyObject *ret; - UErrorCode status = U_ZERO_ERROR; - - - if (!PyArg_ParseTuple(args, "ses", &loc, "UTF-8", &input)) return NULL; - - sz = (int32_t)strlen(input); - - buf = (UChar*)calloc(sz*4 + 1, sizeof(UChar)); - buf2 = (UChar*)calloc(sz*8 + 1, sizeof(UChar)); - - - if (buf == NULL || buf2 == NULL) return PyErr_NoMemory(); - - u_strFromUTF8(buf, sz*4, NULL, input, sz, &status); - u_strToTitle(buf2, sz*8, buf, -1, NULL, loc, &status); - - ans = input; - sz = u_strlen(buf2); - free(buf); - - if (U_SUCCESS(status) && sz > 0) { - buf3 = (char*)calloc(sz*5+1, sizeof(char)); - if (buf3 == NULL) return PyErr_NoMemory(); - u_strToUTF8(buf3, sz*5, NULL, buf2, -1, &status); - if (U_SUCCESS(status)) ans = buf3; - } - - ret = PyUnicode_DecodeUTF8(ans, strlen(ans), "replace"); - if (ret == NULL) return PyErr_NoMemory(); - - free(buf2); - if (buf3 != NULL) free(buf3); - PyMem_Free(input); - - return ret; } // }}} // set_default_encoding {{{ @@ -651,7 +584,7 @@ icu_set_default_encoding(PyObject *self, PyObject *args) { } // }}} -// set_default_encoding {{{ +// set_filesystem_encoding {{{ static PyObject * icu_set_filesystem_encoding(PyObject *self, PyObject *args) { char *encoding; @@ -663,7 +596,7 @@ icu_set_filesystem_encoding(PyObject *self, PyObject *args) { } // }}} -// set_default_encoding {{{ +// get_available_transliterators {{{ static PyObject * icu_get_available_transliterators(PyObject *self, PyObject *args) { PyObject *ans, *l; @@ -824,16 +757,8 @@ icu_roundtrip(PyObject *self, PyObject *args) { // Module initialization {{{ static PyMethodDef icu_methods[] = { - {"upper", icu_upper, METH_VARARGS, - "upper(locale, unicode object) -> upper cased unicode object using locale rules." - }, - - {"lower", icu_lower, METH_VARARGS, - "lower(locale, unicode object) -> lower cased unicode object using locale rules." - }, - - {"title", icu_title, METH_VARARGS, - "title(locale, unicode object) -> Title cased unicode object using locale rules." + {"change_case", icu_change_case, METH_VARARGS, + "change_case(unicode object, which, locale) -> change case to one of UPPER_CASE, LOWER_CASE, TITLE_CASE" }, {"set_default_encoding", icu_set_default_encoding, METH_VARARGS, @@ -935,5 +860,9 @@ initicu(void) ADDUCONST(UNORM_NFKC); ADDUCONST(UNORM_FCD); + ADDUCONST(UPPER_CASE); + ADDUCONST(LOWER_CASE); + ADDUCONST(TITLE_CASE); + } // }}} diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py index 39256f6fd6..0fa9262de9 100644 --- a/src/calibre/utils/icu.py +++ b/src/calibre/utils/icu.py @@ -1,5 +1,7 @@ #!/usr/bin/env python -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal ' @@ -7,535 +9,251 @@ __docformat__ = 'restructuredtext en' # Setup code {{{ import sys -from functools import partial from calibre.constants import plugins from calibre.utils.config_base import tweaks -_icu = _collator = _primary_collator = _sort_collator = _numeric_collator = None -_locale = None +_locale = _collator = _primary_collator = _sort_collator = _numeric_collator = _case_sensitive_collator = None _none = u'' _none2 = b'' +_cmap = {} -def get_locale(): - global _locale - if _locale is None: - from calibre.utils.localization import get_lang - if tweaks['locale_for_sorting']: - _locale = tweaks['locale_for_sorting'] - else: - _locale = get_lang() - return _locale +_icu, err = plugins['icu'] +if _icu is None: + raise RuntimeError('Failed to load icu with error: %s' % err) +del err +icu_unicode_version = getattr(_icu, 'unicode_version', None) +_nmodes = {m:getattr(_icu, 'UNORM_'+m, None) for m in ('NFC', 'NFD', 'NFKC', 'NFKD', 'NONE', 'DEFAULT', 'FCD')} -def load_icu(): - global _icu - if _icu is None: - _icu = plugins['icu'][0] - if _icu is None: - print 'Loading ICU failed with: ', plugins['icu'][1] - else: - if not getattr(_icu, 'ok', False): - print 'icu not ok' - _icu = None - return _icu +try: + senc = sys.getdefaultencoding() + if not senc or senc.lower() == b'ascii': + _icu.set_default_encoding(b'utf-8') + del senc +except: + import traceback + traceback.print_exc() -def load_collator(): - 'The default collator for most locales takes both case and accented letters into account' - global _collator +try: + fenc = sys.getfilesystemencoding() + if not fenc or fenc.lower() == b'ascii': + _icu.set_filesystem_encoding(b'utf-8') + del fenc +except: + import traceback + traceback.print_exc() + +def collator(): + global _collator, _locale if _collator is None: - icu = load_icu() - if icu is not None: - _collator = icu.Collator(get_locale()) + if _locale is None: + from calibre.utils.localization import get_lang + if tweaks['locale_for_sorting']: + _locale = tweaks['locale_for_sorting'] + else: + _locale = get_lang() + try: + _collator = _icu.Collator(_locale) + except Exception as e: + print ('Failed to load collator for locale: %r with error %r, using English' % (_locale, e)) + _collator = _icu.Collator('en') return _collator +def change_locale(locale=None): + global _locale, _collator, _primary_collator, _sort_collator, _numeric_collator, _case_sensitive_collator + _collator = _primary_collator = _sort_collator = _numeric_collator = _case_sensitive_collator = None + _locale = locale + def primary_collator(): 'Ignores case differences and accented characters' global _primary_collator if _primary_collator is None: - _primary_collator = _collator.clone() + _primary_collator = collator().clone() _primary_collator.strength = _icu.UCOL_PRIMARY return _primary_collator def sort_collator(): - 'Ignores case differences and recognizes numbers in strings' + 'Ignores case differences and recognizes numbers in strings (if the tweak is set)' global _sort_collator if _sort_collator is None: - _sort_collator = _collator.clone() + _sort_collator = collator().clone() _sort_collator.strength = _icu.UCOL_SECONDARY - if tweaks['numeric_collation']: - try: - _sort_collator.numeric = True - except AttributeError: - pass + _sort_collator.numeric = tweaks['numeric_collation'] return _sort_collator -def py_sort_key(obj): - if not obj: - return _none - return obj.lower() - -def icu_sort_key(collator, obj): - if not obj: - return _none2 - try: - try: - return _sort_collator.sort_key(obj) - except AttributeError: - return sort_collator().sort_key(obj) - except TypeError: - if isinstance(obj, unicode): - obj = obj.replace(u'\0', u'') - else: - obj = obj.replace(b'\0', b'') - return _sort_collator.sort_key(obj) - def numeric_collator(): + 'Uses natural sorting for numbers inside strings so something2 will sort before something10' global _numeric_collator - _numeric_collator = _collator.clone() - _numeric_collator.strength = _icu.UCOL_SECONDARY - _numeric_collator.numeric = True + if _numeric_collator is None: + _numeric_collator = collator().clone() + _numeric_collator.strength = _icu.UCOL_SECONDARY + _numeric_collator.numeric = True return _numeric_collator -def numeric_sort_key(obj): - 'Uses natural sorting for numbers inside strings so something2 will sort before something10' - if not obj: - return _none2 +def case_sensitive_collator(): + 'Always sorts upper case letter before lower case' + global _case_sensitive_collator + if _case_sensitive_collator is None: + _case_sensitive_collator = collator().clone() + _case_sensitive_collator.numeric = sort_collator().numeric + _case_sensitive_collator.upper_first = True + return _case_sensitive_collator + +# Templates that will be used to generate various concrete +# function implementations based on different collators, to allow lazy loading +# of collators, with maximum runtime performance + +_sort_key_template = ''' +def {name}(obj): try: try: - return _numeric_collator.sort_key(obj) + return {collator}.{func}(obj) except AttributeError: - return numeric_collator().sort_key(obj) + return {collator_func}().{func}(obj) except TypeError: - if isinstance(obj, unicode): - obj = obj.replace(u'\0', u'') - else: - obj = obj.replace(b'\0', b'') - return _numeric_collator.sort_key(obj) + if isinstance(obj, bytes): + try: + obj = obj.decode(sys.getdefaultencoding()) + except ValueError: + return obj + return {collator}.{func}(obj) + return b'' +''' -def icu_change_case(upper, locale, obj): - func = _icu.upper if upper else _icu.lower +_strcmp_template = ''' +def {name}(a, b): try: - return func(locale, obj) + try: + return {collator}.{func}(a, b) + except AttributeError: + return {collator_func}().{func}(a, b) except TypeError: - if isinstance(obj, unicode): - obj = obj.replace(u'\0', u'') - else: - obj = obj.replace(b'\0', b'') - return func(locale, obj) + if isinstance(a, bytes): + try: + a = a.decode(sys.getdefaultencoding()) + except ValueError: + return cmp(a, b) + elif a is None: + a = u'' + if isinstance(b, bytes): + try: + b = b.decode(sys.getdefaultencoding()) + except ValueError: + return cmp(a, b) + elif b is None: + b = u'' + return {collator}.{func}(a, b) +''' -def py_find(pattern, source): - pos = source.find(pattern) - if pos > -1: - return pos, len(pattern) - return -1, -1 +_change_case_template = ''' +def {name}(x): + try: + try: + return _icu.change_case(x, _icu.{which}, _locale) + except NotImplementedError: + collator() # sets _locale + return _icu.change_case(x, _icu.{which}, _locale) + except TypeError: + if isinstance(x, bytes): + try: + x = x.decode(sys.getdefaultencoding()) + except ValueError: + return x + return _icu.change_case(x, _icu.{which}, _locale) + raise +''' + +def _make_func(template, name, **kwargs): + l = globals() + kwargs['name'] = name + kwargs['func'] = kwargs.get('func', 'sort_key') + exec template.format(**kwargs) in l + return l[name] + + +# }}} + +################# The string functions ######################################## +sort_key = _make_func(_sort_key_template, 'sort_key', collator='_sort_collator', collator_func='sort_collator') + +numeric_sort_key = _make_func(_sort_key_template, 'numeric_sort_key', collator='_numeric_collator', collator_func='numeric_collator') + +primary_sort_key = _make_func(_sort_key_template, 'primary_sort_key', collator='_primary_collator', collator_func='primary_collator') + +case_sensitive_sort_key = _make_func(_sort_key_template, 'case_sensitive_sort_key', + collator='_case_sensitive_collator', collator_func='case_sensitive_collator') + +collation_order = _make_func(_sort_key_template, 'collation_order', collator='_sort_collator', collator_func='sort_collator', func='collation_order') + +strcmp = _make_func(_strcmp_template, 'strcmp', collator='_sort_collator', collator_func='sort_collator', func='strcmp') + +case_sensitive_strcmp = _make_func( + _strcmp_template, 'case_sensitive_strcmp', collator='_case_sensitive_collator', collator_func='case_sensitive_collator', func='strcmp') + +primary_strcmp = _make_func(_strcmp_template, 'primary_strcmp', collator='_primary_collator', collator_func='primary_collator', func='strcmp') + +upper = _make_func(_change_case_template, 'upper', which='UPPER_CASE') + +lower = _make_func(_change_case_template, 'lower', which='LOWER_CASE') + +title_case = _make_func(_change_case_template, 'title_case', which='TITLE_CASE') + +def capitalize(x): + try: + return upper(x[0]) + lower(x[1:]) + except (IndexError, TypeError, AttributeError): + return x + +find = _make_func(_strcmp_template, 'find', collator='_collator', collator_func='collator', func='find') + +primary_find = _make_func(_strcmp_template, 'primary_find', collator='_primary_collator', collator_func='primary_collator', func='find') + +contains = _make_func(_strcmp_template, 'contains', collator='_collator', collator_func='collator', func='contains') + +primary_contains = _make_func(_strcmp_template, 'primary_contains', collator='_primary_collator', collator_func='primary_collator', func='contains') + +startswith = _make_func(_strcmp_template, 'startswith', collator='_collator', collator_func='collator', func='startswith') + +primary_startswith = _make_func(_strcmp_template, 'primary_startswith', collator='_primary_collator', collator_func='primary_collator', func='startswith') + +safe_chr = _icu.chr def character_name(string): try: - try: - return _icu.character_name(unicode(string)) or None - except AttributeError: - import unicodedata - return unicodedata.name(unicode(string)[0], None) + return _icu.character_name(unicode(string)) or None except (TypeError, ValueError, KeyError): pass def character_name_from_code(code): try: - try: - return _icu.character_name_from_code(code) or '' - except AttributeError: - import unicodedata - return unicodedata.name(py_safe_chr(code), '') + return _icu.character_name_from_code(code) or '' except (TypeError, ValueError, KeyError): return '' -if sys.maxunicode >= 0x10ffff: - try: - py_safe_chr = unichr - except NameError: - py_safe_chr = chr -else: - def py_safe_chr(i): - # Narrow builds of python cannot represent code point > 0xffff as a - # single character, so we need our own implementation of unichr - # that returns them as a surrogate pair - return (b"\U%s" % (hex(i)[2:].zfill(8))).decode('unicode-escape') - -def safe_chr(code): - try: - return _icu.chr(code) - except AttributeError: - return py_safe_chr(code) - def normalize(text, mode='NFC'): # This is very slightly slower than using unicodedata.normalize, so stick with # that unless you have very good reasons not too. Also, it's speed # decreases on wide python builds, where conversion to/from ICU's string # representation is slower. - try: - return _icu.normalize(_nmodes[mode], unicode(text)) - except (AttributeError, KeyError): - import unicodedata - return unicodedata.normalize(mode, unicode(text)) + return _icu.normalize(_nmodes[mode], unicode(text)) -def icu_find(collator, pattern, source): - try: - return collator.find(pattern, source) - except TypeError: - return collator.find(unicode(pattern), unicode(source)) - -def icu_startswith(collator, a, b): - try: - return collator.startswith(a, b) - except TypeError: - return collator.startswith(unicode(a), unicode(b)) - -def py_case_sensitive_sort_key(obj): - if not obj: - return _none - return obj - -def icu_case_sensitive_sort_key(collator, obj): - if not obj: - return _none2 - return collator.sort_key(obj) - -def icu_strcmp(collator, a, b): - return collator.strcmp(lower(a), lower(b)) - -def py_strcmp(a, b): - return cmp(a.lower(), b.lower()) - -def icu_case_sensitive_strcmp(collator, a, b): - return collator.strcmp(a, b) - -def icu_capitalize(s): - s = lower(s) - return s.replace(s[0], upper(s[0]), 1) if s else s - -_cmap = {} -def icu_contractions(collator): +def contractions(col=None): global _cmap + col = col or _collator + if col is None: + col = collator() ans = _cmap.get(collator, None) if ans is None: - ans = collator.contractions() - ans = frozenset(filter(None, ans)) if ans else {} - _cmap[collator] = ans + ans = col.contractions() + ans = frozenset(filter(None, ans)) + _cmap[col] = ans return ans -def icu_collation_order(collator, a): - try: - return collator.collation_order(a) - except TypeError: - return collator.collation_order(unicode(a)) - -load_icu() -load_collator() -_icu_not_ok = _icu is None or _collator is None -icu_unicode_version = getattr(_icu, 'unicode_version', None) -_nmodes = {m:getattr(_icu, 'UNORM_'+m, None) for m in ('NFC', 'NFD', 'NFKC', 'NFKD', 'NONE', 'DEFAULT', 'FCD')} - -try: - senc = sys.getdefaultencoding() - if not senc or senc.lower() == 'ascii': - _icu.set_default_encoding('utf-8') - del senc -except: - pass - -try: - fenc = sys.getfilesystemencoding() - if not fenc or fenc.lower() == 'ascii': - _icu.set_filesystem_encoding('utf-8') - del fenc -except: - pass - - -# }}} - -################# The string functions ######################################## - -sort_key = py_sort_key if _icu_not_ok else partial(icu_sort_key, _collator) - -strcmp = py_strcmp if _icu_not_ok else partial(icu_strcmp, _collator) - -case_sensitive_sort_key = py_case_sensitive_sort_key if _icu_not_ok else \ - partial(icu_case_sensitive_sort_key, _collator) - -case_sensitive_strcmp = cmp if _icu_not_ok else icu_case_sensitive_strcmp - -upper = (lambda s: s.upper()) if _icu_not_ok else \ - partial(icu_change_case, True, get_locale()) - -lower = (lambda s: s.lower()) if _icu_not_ok else \ - partial(icu_change_case, False, get_locale()) - -title_case = (lambda s: s.title()) if _icu_not_ok else \ - partial(_icu.title, get_locale()) - -capitalize = (lambda s: s.capitalize()) if _icu_not_ok else \ - (lambda s: icu_capitalize(s)) - -find = (py_find if _icu_not_ok else partial(icu_find, _collator)) - -contractions = ((lambda : {}) if _icu_not_ok else (partial(icu_contractions, - _collator))) - -def primary_strcmp(a, b): - 'strcmp that ignores case and accents on letters' - if _icu_not_ok: - from calibre.utils.filenames import ascii_text - return py_strcmp(ascii_text(a), ascii_text(b)) - try: - return _primary_collator.strcmp(a, b) - except AttributeError: - return primary_collator().strcmp(a, b) - -def primary_find(pat, src): - 'find that ignores case and accents on letters' - if _icu_not_ok: - from calibre.utils.filenames import ascii_text - return py_find(ascii_text(pat), ascii_text(src)) - return primary_icu_find(pat, src) - -def primary_icu_find(pat, src): - try: - return icu_find(_primary_collator, pat, src) - except AttributeError: - return icu_find(primary_collator(), pat, src) - -def primary_sort_key(val): - 'A sort key that ignores case and diacritics' - if _icu_not_ok: - from calibre.utils.filenames import ascii_text - return ascii_text(val).lower() - try: - return _primary_collator.sort_key(val) - except AttributeError: - return primary_collator().sort_key(val) - -def primary_startswith(a, b): - if _icu_not_ok: - from calibre.utils.filenames import ascii_text - return ascii_text(a).lower().startswith(ascii_text(b).lower()) - try: - return icu_startswith(_primary_collator, a, b) - except AttributeError: - return icu_startswith(primary_collator(), a, b) - -def collation_order(a): - if _icu_not_ok: - return (ord(a[0]), 1) if a else (0, 0) - try: - return icu_collation_order(_sort_collator, a) - except AttributeError: - return icu_collation_order(sort_collator(), a) ################################################################################ -def test(): # {{{ - from calibre import prints - # Data {{{ - german = ''' - Sonntag -Montag -Dienstag -Januar -Februar -März -Fuße -Fluße -Flusse -flusse -fluße -flüße -flüsse -''' - german_good = ''' - Dienstag -Februar -flusse -Flusse -fluße -Fluße -flüsse -flüße -Fuße -Januar -März -Montag -Sonntag''' - french = ''' -dimanche -lundi -mardi -janvier -février -mars -déjà -Meme -deja -même -dejà -bpef -bœg -Boef -Mémé -bœf -boef -bnef -pêche -pèché -pêché -pêche -pêché''' - french_good = ''' - bnef - boef - Boef - bœf - bœg - bpef - deja - dejà - déjà - dimanche - février - janvier - lundi - mardi - mars - Meme - Mémé - même - pèché - pêche - pêche - pêché - pêché''' - # }}} - - def create(l): - l = l.decode('utf-8').splitlines() - return [x.strip() for x in l if x.strip()] - - def test_strcmp(entries): - for x in entries: - for y in entries: - if strcmp(x, y) != cmp(sort_key(x), sort_key(y)): - print 'strcmp failed for %r, %r'%(x, y) - - german = create(german) - c = _icu.Collator('de') - c.numeric = True - gs = list(sorted(german, key=c.sort_key)) - if gs != create(german_good): - print 'German sorting failed' - return - print - french = create(french) - c = _icu.Collator('fr') - c.numeric = True - fs = list(sorted(french, key=c.sort_key)) - if fs != create(french_good): - print 'French sorting failed (note that French fails with icu < 4.6)' - return - test_strcmp(german + french) - - print '\nTesting case transforms in current locale' - from calibre.utils.titlecase import titlecase - for x in ('a', 'Alice\'s code', 'macdonald\'s machine', '02 the wars'): - print 'Upper: ', x, '->', 'py:', x.upper().encode('utf-8'), 'icu:', upper(x).encode('utf-8') - print 'Lower: ', x, '->', 'py:', x.lower().encode('utf-8'), 'icu:', lower(x).encode('utf-8') - print 'Title: ', x, '->', 'py:', x.title().encode('utf-8'), 'icu:', title_case(x).encode('utf-8'), 'titlecase:', titlecase(x).encode('utf-8') - print 'Capitalize:', x, '->', 'py:', x.capitalize().encode('utf-8'), 'icu:', capitalize(x).encode('utf-8') - print - - print '\nTesting primary collation' - for k, v in {u'pèché': u'peche', u'flüße':u'Flusse', - u'Štepánek':u'ŠtepaneK'}.iteritems(): - if primary_strcmp(k, v) != 0: - prints('primary_strcmp() failed with %s != %s'%(k, v)) - return - if primary_find(v, u' '+k)[0] != 1: - prints('primary_find() failed with %s not in %s'%(v, k)) - return - - n = character_name(safe_chr(0x1f431)) - if n != u'CAT FACE': - raise ValueError('Failed to get correct character name for 0x1f431: %r != %r' % n, u'CAT FACE') - - global _primary_collator - orig = _primary_collator - _primary_collator = _icu.Collator('es') - if primary_strcmp(u'peña', u'pena') == 0: - print 'Primary collation in Spanish locale failed' - return - _primary_collator = orig - - print '\nTesting contractions' - c = _icu.Collator('cs') - if icu_contractions(c) != frozenset([u'Z\u030c', u'z\u030c', u'Ch', - u'C\u030c', u'ch', u'cH', u'c\u030c', u's\u030c', u'r\u030c', u'CH', - u'S\u030c', u'R\u030c']): - print 'Contractions for the Czech language failed' - return - - print '\nTesting startswith' - p = primary_startswith - if (not p('asd', 'asd') or not p('asd', 'A') or - not p('x', '')): - print 'startswith() failed' - return - - print '\nTesting collation_order()' - for group in [ - ('Šaa', 'Smith', 'Solženicyn', 'Štepánek'), - ('calibre', 'Charon', 'Collins'), - ('01', '1'), - ('1', '11', '13'), - ]: - last = None - for x in group: - val = icu_collation_order(sort_collator(), x) - if val[1] != 1: - prints('collation_order() returned incorrect length for', x) - if last is None: - last = val - else: - if val != last: - prints('collation_order() returned incorrect value for', x) - last = val - -# }}} - -def test_roundtrip(): - for r in (u'xxx\0\u2219\U0001f431xxx', u'\0', u'', u'simple'): - rp = _icu.roundtrip(r) - if rp != r: - raise ValueError(u'Roundtripping failed: %r != %r' % (r, rp)) - -def test_normalize_performance(): - import os - if not os.path.exists('t.txt'): - return - raw = open('t.txt', 'rb').read().decode('utf-8') - print (len(raw)) - import time, unicodedata - st = time.time() - count = 100 - for i in xrange(count): - normalize(raw) - print ('ICU time:', time.time() - st) - st = time.time() - for i in xrange(count): - unicodedata.normalize('NFC', unicode(raw)) - print ('py time:', time.time() - st) - if __name__ == '__main__': - test_roundtrip() - test_normalize_performance() - test() + from calibre.utils.icu_test import run + run(verbosity=4) diff --git a/src/calibre/utils/icu_calibre_utils.h b/src/calibre/utils/icu_calibre_utils.h index 5cab803258..a965d0c072 100644 --- a/src/calibre/utils/icu_calibre_utils.h +++ b/src/calibre/utils/icu_calibre_utils.h @@ -21,7 +21,10 @@ #include #include -#if PY_VERSION_HEX < 0x03030000 +#if PY_VERSION_HEX >= 0x03030000 +#error Not implemented for python >= 3.3 +#endif + // Roundtripping will need to be implemented differently for python 3.3+ where strings are stored with variable widths #ifndef NO_PYTHON_TO_ICU @@ -67,5 +70,4 @@ static PyObject* icu_to_python(UChar *src, int32_t sz) { } #endif -#endif diff --git a/src/calibre/utils/icu_test.py b/src/calibre/utils/icu_test.py new file mode 100644 index 0000000000..2c24348169 --- /dev/null +++ b/src/calibre/utils/icu_test.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2014, Kovid Goyal ' + +import unittest, sys +from contextlib import contextmanager + +import calibre.utils.icu as icu + + +@contextmanager +def make_collation_func(name, locale, numeric=True, template='_sort_key_template', func='strcmp'): + c = icu._icu.Collator(locale) + cname = '%s_test_collator%s' % (name, template) + setattr(icu, cname, c) + c.numeric = numeric + yield icu._make_func(getattr(icu, template), name, collator=cname, collator_func='not_used_xxx', func=func) + delattr(icu, cname) + +class TestICU(unittest.TestCase): + + ae = unittest.TestCase.assertEqual + + def setUp(self): + icu.change_locale('en') + + def test_sorting(self): + ' Test the various sorting APIs ' + german = '''Sonntag Montag Dienstag Januar Februar März Fuße Fluße Flusse flusse fluße flüße flüsse'''.split() + german_good = '''Dienstag Februar flusse Flusse fluße Fluße flüsse flüße Fuße Januar März Montag Sonntag'''.split() + french = '''dimanche lundi mardi janvier février mars déjà Meme deja même dejà bpef bœg Boef Mémé bœf boef bnef pêche pèché pêché pêche pêché'''.split() + french_good = '''bnef boef Boef bœf bœg bpef deja dejà déjà dimanche février janvier lundi mardi mars Meme Mémé même pèché pêche pêche pêché pêché'''.split() # noqa + + # Test corner cases + sort_key = icu.sort_key + s = '\U0001f431' + self.ae(sort_key(s), sort_key(s.encode(sys.getdefaultencoding())), 'UTF-8 encoded object not correctly decoded to generate sort key') + self.ae(s.encode('utf-16'), s.encode('utf-16'), 'Undecodable bytestring not returned as itself') + self.ae(b'', sort_key(None)) + self.ae(0, icu.strcmp(None, b'')) + self.ae(0, icu.strcmp(s, s.encode(sys.getdefaultencoding()))) + + # Test locales + with make_collation_func('dsk', 'de', func='sort_key') as dsk: + self.ae(german_good, sorted(german, key=dsk)) + with make_collation_func('dcmp', 'de', template='_strcmp_template') as dcmp: + for x in german: + for y in german: + self.ae(cmp(dsk(x), dsk(y)), dcmp(x, y)) + + with make_collation_func('fsk', 'fr', func='sort_key') as fsk: + self.ae(french_good, sorted(french, key=fsk)) + with make_collation_func('fcmp', 'fr', template='_strcmp_template') as fcmp: + for x in french: + for y in french: + self.ae(cmp(fsk(x), fsk(y)), fcmp(x, y)) + + with make_collation_func('ssk', 'es', func='sort_key') as ssk: + self.assertNotEqual(ssk('peña'), ssk('pena')) + with make_collation_func('scmp', 'es', template='_strcmp_template') as scmp: + self.assertNotEqual(0, scmp('pena', 'peña')) + + for k, v in {u'pèché': u'peche', u'flüße':u'Flusse', u'Štepánek':u'ŠtepaneK'}.iteritems(): + self.ae(0, icu.primary_strcmp(k, v)) + + # Test different types of collation + self.ae(icu.primary_sort_key('Aä'), icu.primary_sort_key('aa')) + self.assertLess(icu.numeric_sort_key('something 2'), icu.numeric_sort_key('something 11')) + self.assertLess(icu.case_sensitive_sort_key('A'), icu.case_sensitive_sort_key('a')) + self.ae(0, icu.strcmp('a', 'A')) + self.ae(cmp('a', 'A'), icu.case_sensitive_strcmp('a', 'A')) + self.ae(0, icu.primary_strcmp('ä', 'A')) + + def test_change_case(self): + ' Test the various ways of changing the case ' + from calibre.utils.titlecase import titlecase + # Test corner cases + self.ae('A', icu.upper(b'a')) + for x in ('', None, False, 1): + self.ae(x, icu.capitalize(x)) + + for x in ('a', 'Alice\'s code', 'macdonald\'s machIne', '02 the wars'): + self.ae(icu.upper(x), x.upper()) + self.ae(icu.lower(x), x.lower()) + # ICU's title case algorithm is different from ours, when there are + # capitals inside words + self.ae(icu.title_case(x), titlecase(x).replace('machIne', 'Machine')) + self.ae(icu.capitalize(x), x[0].upper() + x[1:].lower()) + + def test_find(self): + ' Test searching for substrings ' + self.ae((1, 1), icu.find(b'a', b'1ab')) + self.ae((1, 1 if sys.maxunicode >= 0x10ffff else 2), icu.find('\U0001f431', 'x\U0001f431x')) + self.ae((1 if sys.maxunicode >= 0x10ffff else 2, 1), icu.find('y', '\U0001f431y')) + self.ae((0, 4), icu.primary_find('pena', 'peña')) + for k, v in {u'pèché': u'peche', u'flüße':u'Flusse', u'Štepánek':u'ŠtepaneK'}.iteritems(): + self.ae((1, len(k)), icu.primary_find(v, ' ' + k), 'Failed to find %s in %s' % (v, k)) + self.assertTrue(icu.startswith(b'abc', b'ab')) + self.assertTrue(icu.startswith('abc', 'abc')) + self.assertFalse(icu.startswith('xyz', 'a')) + self.assertTrue(icu.startswith('xxx', '')) + self.assertTrue(icu.primary_startswith('pena', 'peña')) + self.assertTrue(icu.contains('\U0001f431', '\U0001f431')) + self.assertTrue(icu.contains('something', 'some other something else')) + self.assertTrue(icu.contains('', 'a')) + self.assertTrue(icu.contains('', '')) + self.assertFalse(icu.contains('xxx', 'xx')) + self.assertTrue(icu.primary_contains('pena', 'peña')) + + def test_collation_order(self): + 'Testing collation ordering' + for group in [ + ('Šaa', 'Smith', 'Solženicyn', 'Štepánek'), + ('01', '1'), + ('1', '11', '13'), + ]: + last = None + for x in group: + order, length = icu.numeric_collator().collation_order(x) + if last is not None: + self.ae(last, order) + last = order + + def test_roundtrip(self): + for r in (u'xxx\0\u2219\U0001f431xxx', u'\0', u'', u'simple'): + self.ae(r, icu._icu.roundtrip(r)) + + def test_character_name(self): + self.ae(icu.character_name('\U0001f431'), 'CAT FACE') + + def test_contractions(self): + c = icu._icu.Collator('cs') + self.ae(icu.contractions(c), frozenset({u'Z\u030c', u'z\u030c', u'Ch', + u'C\u030c', u'ch', u'cH', u'c\u030c', u's\u030c', u'r\u030c', u'CH', + u'S\u030c', u'R\u030c'})) + +class TestRunner(unittest.main): + + def createTests(self): + tl = unittest.TestLoader() + self.test = tl.loadTestsFromTestCase(TestICU) + +def run(verbosity=4): + TestRunner(verbosity=verbosity, exit=False) + +def test_build(): + result = TestRunner(verbosity=0, buffer=True, catchbreak=True, failfast=True, argv=sys.argv[:1], exit=False).result + if not result.wasSuccessful(): + raise SystemExit(1) + +if __name__ == '__main__': + run(verbosity=4) + diff --git a/src/calibre/utils/ipc/__init__.py b/src/calibre/utils/ipc/__init__.py index 54c10b5058..9735478a40 100644 --- a/src/calibre/utils/ipc/__init__.py +++ b/src/calibre/utils/ipc/__init__.py @@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en' import os, errno from threading import Thread -from calibre.constants import iswindows, get_windows_username +from calibre.constants import iswindows, get_windows_username, islinux ADDRESS = None @@ -37,12 +37,15 @@ def gui_socket_address(): if user: ADDRESS += '-' + user[:100] + 'x' else: - from tempfile import gettempdir - tmp = gettempdir() user = os.environ.get('USER', '') if not user: user = os.path.basename(os.path.expanduser('~')) - ADDRESS = os.path.join(tmp, user+'-calibre-gui.socket') + if islinux: + ADDRESS = (u'\0%s-calibre-gui.socket' % user).encode('ascii') + else: + from tempfile import gettempdir + tmp = gettempdir() + ADDRESS = os.path.join(tmp, user+'-calibre-gui.socket') return ADDRESS class RC(Thread): diff --git a/src/calibre/utils/ipc/server.py b/src/calibre/utils/ipc/server.py index fbbe411f84..9350163be6 100644 --- a/src/calibre/utils/ipc/server.py +++ b/src/calibre/utils/ipc/server.py @@ -6,7 +6,7 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import sys, os, cPickle, time, tempfile +import sys, os, cPickle, time, tempfile, errno from math import ceil from threading import Thread, RLock from Queue import Queue, Empty @@ -18,7 +18,7 @@ from calibre.utils.ipc import eintr_retry_call from calibre.utils.ipc.launch import Worker from calibre.utils.ipc.worker import PARALLEL_FUNCS from calibre import detect_ncpus as cpu_count -from calibre.constants import iswindows, DEBUG +from calibre.constants import iswindows, DEBUG, islinux from calibre.ptempfile import base_dir _counter = 0 @@ -84,6 +84,35 @@ class ConnectedWorker(Thread): class CriticalError(Exception): pass +_name_counter = 0 + +if islinux: + def create_listener(authkey, backlog=4): + # Use abstract named sockets on linux to avoid creating unnecessary temp files + global _name_counter + prefix = u'\0calibre-ipc-listener-%d-%%d' % os.getpid() + while True: + _name_counter += 1 + address = (prefix % _name_counter).encode('ascii') + try: + l = Listener(address=address, authkey=authkey, backlog=backlog) + if hasattr(l._listener._unlink, 'cancel'): + # multiprocessing tries to call unlink even on abstract + # named sockets, prevent it from doing so. + l._listener._unlink.cancel() + return address, l + except EnvironmentError as err: + if err.errno == errno.EADDRINUSE: + continue + raise +else: + def create_listener(authkey, backlog=4): + address = arbitrary_address('AF_PIPE' if iswindows else 'AF_UNIX') + if iswindows and address[1] == ':': + address = address[2:] + listener = Listener(address=address, authkey=authkey, backlog=backlog) + return address, listener + class Server(Thread): def __init__(self, notify_on_job_done=lambda x: x, pool_size=None, @@ -99,11 +128,7 @@ class Server(Thread): self.pool_size = limit if pool_size is None else pool_size self.notify_on_job_done = notify_on_job_done self.auth_key = os.urandom(32) - self.address = arbitrary_address('AF_PIPE' if iswindows else 'AF_UNIX') - if iswindows and self.address[1] == ':': - self.address = self.address[2:] - self.listener = Listener(address=self.address, - authkey=self.auth_key, backlog=4) + self.address, self.listener = create_listener(self.auth_key, backlog=4) self.add_jobs_queue, self.changed_jobs_queue = Queue(), Queue() self.kill_queue = Queue() self.waiting_jobs = [] @@ -162,7 +187,6 @@ class Server(Thread): w = self.launch_worker(gui=gui, redirect_output=redirect_output) w.start_job(job) - def run(self): while True: try: @@ -280,8 +304,6 @@ class Server(Thread): pos += delta return ans - - def close(self): try: self.add_jobs_queue.put(None) diff --git a/src/calibre/utils/ipc/simple_worker.py b/src/calibre/utils/ipc/simple_worker.py index 2d24fec22b..d06550cdce 100644 --- a/src/calibre/utils/ipc/simple_worker.py +++ b/src/calibre/utils/ipc/simple_worker.py @@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en' import os, cPickle, traceback, time, importlib from binascii import hexlify, unhexlify -from multiprocessing.connection import Listener, arbitrary_address, Client +from multiprocessing.connection import Client from threading import Thread from contextlib import closing @@ -117,11 +117,9 @@ def communicate(ans, worker, listener, args, timeout=300, heartbeat=None, ans['result'] = cw.res['result'] def create_worker(env, priority='normal', cwd=None, func='main'): - address = arbitrary_address('AF_PIPE' if iswindows else 'AF_UNIX') - if iswindows and address[1] == ':': - address = address[2:] + from calibre.utils.ipc.server import create_listener auth_key = os.urandom(32) - listener = Listener(address=address, authkey=auth_key) + address, listener = create_listener(auth_key) env = dict(env) env.update({ diff --git a/src/calibre/utils/lock.py b/src/calibre/utils/lock.py index b2156d48c8..5090c11cf8 100644 --- a/src/calibre/utils/lock.py +++ b/src/calibre/utils/lock.py @@ -8,7 +8,7 @@ Secure access to locked files from multiple processes. from calibre.constants import iswindows, __appname__, \ win32api, win32event, winerror, fcntl -import time, atexit, os +import time, atexit, os, stat class LockError(Exception): pass @@ -105,6 +105,12 @@ class WindowsExclFile(object): def closed(self): return self._handle is None +def unix_open(path): + # We cannot use open(a+b) directly because Fedora apparently ships with a + # broken libc that causes seek(0) followed by truncate() to not work for + # files with O_APPEND set. + fd = os.open(path, os.O_RDWR | os.O_CREAT, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH) + return os.fdopen(fd, 'r+b') class ExclusiveFile(object): @@ -113,7 +119,7 @@ class ExclusiveFile(object): self.timeout = timeout def __enter__(self): - self.file = WindowsExclFile(self.path, self.timeout) if iswindows else open(self.path, 'a+b') + self.file = WindowsExclFile(self.path, self.timeout) if iswindows else unix_open(self.path) self.file.seek(0) timeout = self.timeout if not iswindows: diff --git a/src/calibre/gui2/tweak_book/matcher.c b/src/calibre/utils/matcher.c similarity index 81% rename from src/calibre/gui2/tweak_book/matcher.c rename to src/calibre/utils/matcher.c index e9c773a0c3..c2c2210dad 100644 --- a/src/calibre/gui2/tweak_book/matcher.c +++ b/src/calibre/utils/matcher.c @@ -155,28 +155,34 @@ static double calc_score_for_char(MatchInfo *m, UChar32 last, UChar32 current, i } static void convert_positions(int32_t *positions, int32_t *final_positions, UChar *string, int32_t char_len, int32_t byte_len, double score) { +#if PY_VERSION_HEX >= 0x03030000 +#error Not implemented for python >= 3.3 +#endif + // The positions array stores character positions as byte offsets in string, convert them into character offsets int32_t i, *end; - if (score == 0.0) { - for (i = 0; i < char_len; i++) final_positions[i] = -1; - return; - } + if (score == 0.0) { for (i = 0; i < char_len; i++) final_positions[i] = -1; return; } end = final_positions + char_len; for (i = 0; i < byte_len && final_positions < end; i++) { if (positions[i] == -1) continue; +#ifdef Py_UNICODE_WIDE *final_positions = u_countChar32(string, positions[i]); +#else + *final_positions = positions[i]; +#endif final_positions += 1; } } -static double process_item(MatchInfo *m, Stack *stack, int32_t *final_positions) { - UChar32 nc, hc, lc; - UChar *p; +static double process_item(MatchInfo *m, Stack *stack, int32_t *final_positions, UStringSearch **searches) { + UChar32 hc, lc; double final_score = 0.0, score = 0.0, score_for_char = 0.0; int32_t pos, i, j, hidx, nidx, last_idx, distance, *positions = final_positions + m->needle_len; MemoryItem mem = {0}; + UStringSearch *search = NULL; + UErrorCode status = U_ZERO_ERROR; stack_push(stack, 0, 0, 0, 0.0, final_positions); @@ -187,11 +193,14 @@ static double process_item(MatchInfo *m, Stack *stack, int32_t *final_positions) // No memoized result, calculate the score for (i = nidx; i < m->needle_len;) { nidx = i; - U16_NEXT(m->needle, i, m->needle_len, nc); // i now points to next char in needle - if (m->haystack_len - hidx < m->needle_len - nidx) { score = 0.0; break; } - p = u_strchr32(m->haystack + hidx, nc); // TODO: Use primary collation for the find - if (p == NULL) { score = 0.0; break; } - pos = (int32_t)(p - m->haystack); + U16_FWD_1(m->needle, i, m->needle_len);// i now points to next char in needle + search = searches[nidx]; + if (search == NULL || m->haystack_len - hidx < m->needle_len - nidx) { score = 0.0; break; } + status = U_ZERO_ERROR; // We ignore any errors as we already know that hidx is correct + usearch_setOffset(search, hidx, &status); + status = U_ZERO_ERROR; + pos = usearch_next(search, &status); + if (pos == USEARCH_DONE) { score = 0.0; break; } // No matches found distance = u_countChar32(m->haystack + last_idx, pos - last_idx); if (distance <= 1) score_for_char = m->max_score_per_char; else { @@ -222,8 +231,30 @@ static double process_item(MatchInfo *m, Stack *stack, int32_t *final_positions) return final_score; } +static bool create_searches(UStringSearch **searches, UChar *haystack, int32_t haystack_len, UChar *needle, int32_t needle_len, UCollator *collator) { + int32_t i = 0, pos = 0; + UErrorCode status = U_ZERO_ERROR; -static bool match(UChar **items, int32_t *item_lengths, uint32_t item_count, UChar *needle, Match *match_results, int32_t *final_positions, int32_t needle_char_len, UChar *level1, UChar *level2, UChar *level3) { + while (i < needle_len) { + pos = i; + U16_FWD_1(needle, i, needle_len); + if (pos == i) break; + searches[pos] = usearch_openFromCollator(needle + pos, i - pos, haystack, haystack_len, collator, NULL, &status); + if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, u_errorName(status)); searches[pos] = NULL; return FALSE; } + } + + return TRUE; +} + +static void free_searches(UStringSearch **searches, int32_t count) { + int32_t i = 0; + for (i = 0; i < count; i++) { + if (searches[i] != NULL) usearch_close(searches[i]); + searches[i] = NULL; + } +} + +static bool match(UChar **items, int32_t *item_lengths, uint32_t item_count, UChar *needle, Match *match_results, int32_t *final_positions, int32_t needle_char_len, UCollator *collator, UChar *level1, UChar *level2, UChar *level3) { Stack stack = {0}; int32_t i = 0, maxhl = 0; int32_t r = 0, *positions = NULL; @@ -231,6 +262,7 @@ static bool match(UChar **items, int32_t *item_lengths, uint32_t item_count, UCh bool ok = FALSE; MemoryItem ***memo = NULL; int32_t needle_len = u_strlen(needle); + UStringSearch **searches = NULL; if (needle_len <= 0 || item_count <= 0) { for (i = 0; i < (int32_t)item_count; i++) match_results[i].score = 0.0; @@ -240,7 +272,8 @@ static bool match(UChar **items, int32_t *item_lengths, uint32_t item_count, UCh matches = (MatchInfo*)calloc(item_count, sizeof(MatchInfo)); positions = (int32_t*)calloc(2*needle_len, sizeof(int32_t)); // One set of positions is the final answer and one set is working space - if (matches == NULL || positions == NULL) {PyErr_NoMemory(); goto end;} + searches = (UStringSearch**) calloc(needle_len, sizeof(UStringSearch*)); + if (matches == NULL || positions == NULL || searches == NULL) {PyErr_NoMemory(); goto end;} for (i = 0; i < (int32_t)item_count; i++) { matches[i].haystack = items[i]; @@ -265,14 +298,14 @@ static bool match(UChar **items, int32_t *item_lengths, uint32_t item_count, UCh if (stack.items == NULL || memo == NULL) {PyErr_NoMemory(); goto end;} for (i = 0; i < (int32_t)item_count; i++) { - for (r = 0; r < needle_len; r++) { - positions[r] = -1; - } + for (r = 0; r < needle_len; r++) positions[r] = -1; stack_clear(&stack); clear_memory(memo, needle_len, matches[i].haystack_len); + free_searches(searches, needle_len); + if (!create_searches(searches, matches[i].haystack, matches[i].haystack_len, needle, needle_len, collator)) goto end; matches[i].memo = memo; - match_results[i].score = process_item(&matches[i], &stack, positions); - convert_positions(positions, final_positions + i, matches[i].haystack, needle_char_len, needle_len, match_results[i].score); + match_results[i].score = process_item(&matches[i], &stack, positions, searches); + convert_positions(positions, final_positions + i * needle_char_len, matches[i].haystack, needle_char_len, needle_len, match_results[i].score); } ok = TRUE; @@ -281,6 +314,7 @@ end: nullfree(stack.items); nullfree(matches); nullfree(memo); + if (searches != NULL) { free_searches(searches, needle_len); nullfree(searches); } return ok; } @@ -296,6 +330,7 @@ typedef struct { UChar *level1; UChar *level2; UChar *level3; + UCollator *collator; } Matcher; @@ -308,6 +343,7 @@ static void free_matcher(Matcher *self) { } nullfree(self->items); nullfree(self->item_lengths); nullfree(self->level1); nullfree(self->level2); nullfree(self->level3); + if (self->collator != NULL) ucol_close(self->collator); self->collator = NULL; } static void Matcher_dealloc(Matcher* self) @@ -320,10 +356,21 @@ Matcher_dealloc(Matcher* self) static int Matcher_init(Matcher *self, PyObject *args, PyObject *kwds) { - PyObject *items = NULL, *p = NULL, *py_items = NULL, *level1 = NULL, *level2 = NULL, *level3 = NULL; + PyObject *items = NULL, *p = NULL, *py_items = NULL, *level1 = NULL, *level2 = NULL, *level3 = NULL, *collator = NULL; int32_t i = 0; + UErrorCode status = U_ZERO_ERROR; + UCollator *col = NULL; + + if (!PyArg_ParseTuple(args, "OOOOO", &items, &collator, &level1, &level2, &level3)) return -1; + + // Clone the passed in collator (cloning is needed as collators are not thread safe) + if (!PyCapsule_CheckExact(collator)) { PyErr_SetString(PyExc_TypeError, "Collator must be a capsule"); return -1; } + col = (UCollator*)PyCapsule_GetPointer(collator, NULL); + if (col == NULL) return -1; + self->collator = ucol_safeClone(col, NULL, NULL, &status); + col = NULL; + if (U_FAILURE(status)) { self->collator = NULL; PyErr_SetString(PyExc_ValueError, u_errorName(status)); return -1; } - if (!PyArg_ParseTuple(args, "OOOO", &items, &level1, &level2, &level3)) return -1; py_items = PySequence_Fast(items, "Must pass in two sequence objects"); if (py_items == NULL) goto end; self->item_count = (uint32_t)PySequence_Size(items); @@ -378,7 +425,7 @@ Matcher_calculate_scores(Matcher *self, PyObject *args) { } Py_BEGIN_ALLOW_THREADS; - ok = match(self->items, self->item_lengths, self->item_count, needle, matches, final_positions, needle_char_len, self->level1, self->level2, self->level3); + ok = match(self->items, self->item_lengths, self->item_count, needle, matches, final_positions, needle_char_len, self->collator, self->level1, self->level2, self->level3); Py_END_ALLOW_THREADS; if (ok) { @@ -386,7 +433,7 @@ Matcher_calculate_scores(Matcher *self, PyObject *args) { score = PyFloat_FromDouble(matches[i].score); if (score == NULL) { PyErr_NoMemory(); goto end; } PyTuple_SET_ITEM(items, (Py_ssize_t)i, score); - p = final_positions + i; + p = final_positions + (i * needle_char_len); for (j = 0; j < needle_char_len; j++) { score = PyInt_FromLong((long)p[j]); if (score == NULL) { PyErr_NoMemory(); goto end; } diff --git a/src/calibre/utils/matcher.py b/src/calibre/utils/matcher.py new file mode 100644 index 0000000000..895d29082a --- /dev/null +++ b/src/calibre/utils/matcher.py @@ -0,0 +1,305 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2014, Kovid Goyal ' + +import atexit, os, sys +from math import ceil +from unicodedata import normalize +from threading import Thread, Lock +from Queue import Queue +from operator import itemgetter +from collections import OrderedDict +from itertools import islice + +from itertools import izip +from future_builtins import map + +from calibre import detect_ncpus as cpu_count, as_unicode +from calibre.constants import plugins, filesystem_encoding +from calibre.utils.icu import primary_sort_key, primary_find, primary_collator + +DEFAULT_LEVEL1 = '/' +DEFAULT_LEVEL2 = '-_ 0123456789' +DEFAULT_LEVEL3 = '.' + +class PluginFailed(RuntimeError): + pass + +class Worker(Thread): + + daemon = True + + def __init__(self, requests, results): + Thread.__init__(self) + self.requests, self.results = requests, results + atexit.register(lambda : requests.put(None)) + + def run(self): + while True: + x = self.requests.get() + if x is None: + break + try: + i, scorer, query = x + self.results.put((True, (i, scorer(query)))) + except Exception as e: + self.results.put((False, as_unicode(e))) + # import traceback + # traceback.print_exc() +wlock = Lock() +workers = [] + +def split(tasks, pool_size): + ''' + Split a list into a list of sub lists, with the number of sub lists being + no more than pool_size. Each sublist contains + 2-tuples of the form (i, x) where x is an element from the original list + and i is the index of the element x in the original list. + ''' + ans, count = [], 0 + delta = int(ceil(len(tasks)/pool_size)) + while tasks: + section = [(count+i, task) for i, task in enumerate(tasks[:delta])] + tasks = tasks[delta:] + count += len(section) + ans.append(section) + return ans + +def default_scorer(*args, **kwargs): + try: + return CScorer(*args, **kwargs) + except PluginFailed: + return PyScorer(*args, **kwargs) + +class Matcher(object): + + def __init__(self, items, level1=DEFAULT_LEVEL1, level2=DEFAULT_LEVEL2, level3=DEFAULT_LEVEL3, scorer=None): + with wlock: + if not workers: + requests, results = Queue(), Queue() + w = [Worker(requests, results) for i in range(max(1, cpu_count()))] + [x.start() for x in w] + workers.extend(w) + items = map(lambda x: normalize('NFC', unicode(x)), filter(None, items)) + self.items = items = tuple(items) + tasks = split(items, len(workers)) + self.task_maps = [{j:i for j, (i, _) in enumerate(task)} for task in tasks] + scorer = scorer or default_scorer + self.scorers = [scorer(tuple(map(itemgetter(1), task_items))) for task_items in tasks] + self.sort_keys = None + + def __call__(self, query, limit=None): + query = normalize('NFC', unicode(query)) + with wlock: + for i, scorer in enumerate(self.scorers): + workers[0].requests.put((i, scorer, query)) + if self.sort_keys is None: + self.sort_keys = {i:primary_sort_key(x) for i, x in enumerate(self.items)} + num = len(self.task_maps) + scores, positions = {}, {} + error = None + while num > 0: + ok, x = workers[0].results.get() + num -= 1 + if ok: + task_num, vals = x + task_map = self.task_maps[task_num] + for i, (score, pos) in enumerate(vals): + item = task_map[i] + scores[item] = score + positions[item] = pos + else: + error = x + + if error is not None: + raise Exception('Failed to score items: %s' % error) + items = sorted(((-scores[i], item, positions[i]) for i, item in enumerate(self.items)), + key=itemgetter(0)) + if limit is not None: + del items[limit:] + return OrderedDict(x[1:] for x in filter(itemgetter(0), items)) + +def get_items_from_dir(basedir, acceptq=lambda x: True): + if isinstance(basedir, bytes): + basedir = basedir.decode(filesystem_encoding) + relsep = os.sep != '/' + for dirpath, dirnames, filenames in os.walk(basedir): + for f in filenames: + x = os.path.join(dirpath, f) + if acceptq(x): + x = os.path.relpath(x, basedir) + if relsep: + x = x.replace(os.sep, '/') + yield x + +class FilesystemMatcher(Matcher): + + def __init__(self, basedir, *args, **kwargs): + Matcher.__init__(self, get_items_from_dir(basedir), *args, **kwargs) + +# Python implementation of the scoring algorithm {{{ +def calc_score_for_char(ctx, prev, current, distance): + factor = 1.0 + ans = ctx.max_score_per_char + + if prev in ctx.level1: + factor = 0.9 + elif prev in ctx.level2 or (icu_lower(prev) == prev and icu_upper(current) == current): + factor = 0.8 + elif prev in ctx.level3: + factor = 0.7 + else: + factor = (1.0 / distance) * 0.75 + + return ans * factor + +def process_item(ctx, haystack, needle): + # non-recursive implementation using a stack + stack = [(0, 0, 0, 0, [-1]*len(needle))] + final_score, final_positions = stack[0][-2:] + push, pop = stack.append, stack.pop + while stack: + hidx, nidx, last_idx, score, positions = pop() + key = (hidx, nidx, last_idx) + mem = ctx.memory.get(key, None) + if mem is None: + for i in xrange(nidx, len(needle)): + n = needle[i] + if (len(haystack) - hidx < len(needle) - i): + score = 0 + break + pos = primary_find(n, haystack[hidx:])[0] + if pos == -1: + score = 0 + break + pos += hidx + + distance = pos - last_idx + score_for_char = ctx.max_score_per_char if distance <= 1 else calc_score_for_char(ctx, haystack[pos-1], haystack[pos], distance) + hidx = pos + 1 + push((hidx, i, last_idx, score, list(positions))) + last_idx = positions[i] = pos + score += score_for_char + ctx.memory[key] = (score, positions) + else: + score, positions = mem + if score > final_score: + final_score = score + final_positions = positions + return final_score, final_positions + +class PyScorer(object): + __slots__ = ('level1', 'level2', 'level3', 'max_score_per_char', 'items', 'memory') + + def __init__(self, items, level1=DEFAULT_LEVEL1, level2=DEFAULT_LEVEL2, level3=DEFAULT_LEVEL3): + self.level1, self.level2, self.level3 = level1, level2, level3 + self.max_score_per_char = 0 + self.items = items + + def __call__(self, needle): + for item in self.items: + self.max_score_per_char = (1.0 / len(item) + 1.0 / len(needle)) / 2.0 + self.memory = {} + yield process_item(self, item, needle) +# }}} + +class CScorer(object): + + def __init__(self, items, level1=DEFAULT_LEVEL1, level2=DEFAULT_LEVEL2, level3=DEFAULT_LEVEL3): + speedup, err = plugins['matcher'] + if speedup is None: + raise PluginFailed('Failed to load the matcher plugin with error: %s' % err) + self.m = speedup.Matcher(items, primary_collator().capsule, unicode(level1), unicode(level2), unicode(level3)) + + def __call__(self, query): + scores, positions = self.m.calculate_scores(query) + for score, pos in izip(scores, positions): + yield score, pos + +def test(): + import unittest + + class Test(unittest.TestCase): + + def test_mem_leaks(self): + import gc + from calibre.utils.mem import get_memory as memory + m = Matcher(['a'], scorer=CScorer) + m('a') + def doit(c): + m = Matcher([c+'im/one.gif', c+'im/two.gif', c+'text/one.html',], scorer=CScorer) + m('one') + start = memory() + for i in xrange(10): + doit(str(i)) + gc.collect() + used10 = memory() - start + start = memory() + for i in xrange(100): + doit(str(i)) + gc.collect() + used100 = memory() - start + self.assertLessEqual(used100, 2 * used10) + + def test_non_bmp(self): + raw = '_\U0001f431-' + m = Matcher([raw], scorer=CScorer) + positions = next(m(raw).itervalues()) + self.assertEqual(positions, (0, 1, (2 if sys.maxunicode >= 0x10ffff else 3))) + + class TestRunner(unittest.main): + + def createTests(self): + tl = unittest.TestLoader() + self.test = tl.loadTestsFromTestCase(Test) + + TestRunner(verbosity=4) + +if sys.maxunicode >= 0x10ffff: + get_char = lambda string, pos: string[pos] +else: + def get_char(string, pos): + chs = 2 if ('\ud800' <= string[pos] <= '\udbff') else 1 # UTF-16 surrogate pair in python narrow builds + return string[pos:pos+chs] + +def main(basedir=None, query=None): + from calibre import prints + from calibre.utils.terminal import ColoredStream + if basedir is None: + try: + basedir = raw_input('Enter directory to scan [%s]: ' % os.getcwdu()).decode(sys.stdin.encoding).strip() or os.getcwdu() + except (EOFError, KeyboardInterrupt): + return + m = FilesystemMatcher(basedir) + emph = ColoredStream(sys.stdout, fg='red', bold=True) + while True: + if query is None: + try: + query = raw_input('Enter query: ').decode(sys.stdin.encoding) + except (EOFError, KeyboardInterrupt): + break + if not query: + break + for path, positions in islice(m(query).iteritems(), 0, 10): + positions = list(positions) + p = 0 + while positions: + pos = positions.pop(0) + if pos == -1: + continue + prints(path[p:pos], end='') + ch = get_char(path, pos) + with emph: + prints(ch, end='') + p = pos + len(ch) + prints(path[p:]) + query = None + +if __name__ == '__main__': + # main(basedir='/t', query='ns') + # test() + main()