From c2dc54c4da997c29114f8e7e5540456d3a91e618 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 17 Apr 2014 18:03:12 +0530 Subject: [PATCH] Implement change word --- src/calibre/ebooks/oeb/polish/spell.py | 90 +++++++++++++++++++------- src/calibre/gui2/tweak_book/boss.py | 5 ++ src/calibre/gui2/tweak_book/spell.py | 39 ++++++++++- 3 files changed, 108 insertions(+), 26 deletions(-) diff --git a/src/calibre/ebooks/oeb/polish/spell.py b/src/calibre/ebooks/oeb/polish/spell.py index c460bf8bc9..1bf819495b 100644 --- a/src/calibre/ebooks/oeb/polish/spell.py +++ b/src/calibre/ebooks/oeb/polish/spell.py @@ -9,7 +9,7 @@ __copyright__ = '2014, Kovid Goyal ' import sys from collections import defaultdict -from calibre.spell.break_iterator import split_into_words +from calibre.spell.break_iterator import split_into_words, index_of from calibre.spell.dictionary import parse_lang_code from calibre.ebooks.oeb.base import barename from calibre.ebooks.oeb.polish.container import OPF_NAMESPACES, get_container @@ -39,10 +39,11 @@ def patterns(): class Location(object): - __slots__ = ('file_name', 'sourceline', 'original_word') + __slots__ = ('file_name', 'sourceline', 'original_word', 'location_node', 'node_item') - def __init__(self, file_name=None, sourceline=None, original_word=None): + def __init__(self, file_name=None, sourceline=None, original_word=None, location_node=None, node_item=(None, None)): self.file_name, self.sourceline, self.original_word = file_name, sourceline, original_word + self.location_node, self.node_item = location_node, node_item def __repr__(self): return '%s:%s' % (self.file_name, self.sourceline) @@ -63,43 +64,48 @@ def get_words(text, lang): return () return filter(filter_words, ans) -def add_words(text, sourceline, words, file_name, locale): +def add_words(text, node, words, file_name, locale, node_item): candidates = get_words(text, locale.langcode) if candidates: p = patterns() for word in candidates: sword = p.sanitize_invisible_pat.sub('', word) - loc = Location(file_name, sourceline, word) + loc = Location(file_name, node.sourceline, word, node, node_item) words[(sword, locale)].append(loc) +def add_words_from_attr(node, attr, words, file_name, locale): + text = node.get(attr, None) + if text: + add_words(text, node, words, file_name, locale, (True, attr)) + +def add_words_from_text(node, attr, words, file_name, locale): + add_words(getattr(node, attr), node, words, file_name, locale, (False, attr)) + +_opf_file_as = '{%s}file-as' % OPF_NAMESPACES['opf'] + def read_words_from_opf(root, words, file_name, book_locale): for tag in root.xpath('//*[namespace-uri()="%s"]' % OPF_NAMESPACES['dc']): tagname = barename(tag.tag) if not tag.text or tagname in {'identifier', 'language'}: continue - add_words(tag.text, tag.sourceline, words, file_name, book_locale) - file_as = '{%s}file-as' % OPF_NAMESPACES['opf'] - file_as = tag.get(file_as, None) - if file_as: - add_words(file_as, tag.sourceline, words, file_name, book_locale) + add_words_from_text(tag, 'text', words, file_name, book_locale) + add_words_from_attr(tag, _opf_file_as, words, file_name, book_locale) def read_words_from_ncx(root, words, file_name, book_locale): for tag in root.xpath('//*[local-name()="text"]'): if not tag.text: continue - add_words(tag.text, tag.sourceline, words, file_name, book_locale) + add_words_from_text(tag, 'text', words, file_name, book_locale) def read_words_from_html_tag(tag, words, file_name, parent_locale, locale): tagname = barename(tag.tag) if tagname not in {'script', 'style', 'link', 'head'}: if tag.text is not None: - add_words(tag.text, tag.sourceline, words, file_name, locale) + add_words_from_text(tag, 'text', words, file_name, locale) for attr in {'alt', 'title'}: - text = tag.get(attr, None) - if text: - add_words(text, tag.sourceline, words, file_name, locale) + add_words_from_attr(tag, attr, words, file_name, locale) if tag.tail is not None: - add_words(tag.tail, tag.sourceline, words, file_name, parent_locale) + add_words_from_text(tag, 'tail', words, file_name, parent_locale) def locale_from_tag(tag): if 'lang' in tag.attrib: @@ -119,6 +125,13 @@ def read_words_from_html(root, words, file_name, book_locale): read_words_from_html_tag(parent, words, file_name, parent_locale, locale) stack.extend((tag, parent_locale) for tag in parent.iterchildren('*')) +def group_sort(locations): + order = {} + for loc in locations: + if loc.file_name not in order: + order[loc.file_name] = len(order) + return sorted(locations, key=lambda l:(order[l.file_name], l.sourceline)) + def get_all_words(container, book_locale): words = defaultdict(list) file_names = [name for name, linear in container.spine_names] + [container.opf_name] @@ -136,15 +149,46 @@ def get_all_words(container, book_locale): else: read_words_from_html(root, words, file_name, book_locale) - def group_sort(locations): - order = {} - for loc in locations: - if loc.file_name not in order: - order[loc.file_name] = len(order) - return sorted(locations, key=lambda l:(order[l.file_name], l.sourceline)) - return {k:group_sort(v) for k, v in words.iteritems()} +def merge_locations(locs1, locs2): + return group_sort(locs1 + locs2) + +def replace(text, original_word, new_word, lang): + indices = [] + original_word, new_word, text = unicode(original_word), unicode(new_word), unicode(text) + q = text + offset = 0 + while True: + idx = index_of(original_word, q, lang=lang) + if idx == -1: + break + indices.append(offset + idx) + offset += idx + len(original_word) + q = text[offset:] + for idx in reversed(indices): + text = text[:idx] + new_word + text[idx+len(original_word):] + return text, bool(indices) + +def replace_word(container, new_word, locations, locale): + changed = set() + for loc in locations: + node = loc.location_node + is_attr, attr = loc.node_item + if is_attr: + text = node.get(attr) + else: + text = getattr(node, attr) + text, replaced = replace(text, loc.original_word, new_word, locale.langcode) + if replaced: + if is_attr: + node.set(attr, text) + else: + setattr(node, attr, text) + container.replace(loc.file_name, node.getroottree().getroot()) + changed.add(loc.file_name) + return changed + if __name__ == '__main__': import pprint from calibre.gui2.tweak_book import set_book_locale, dictionaries diff --git a/src/calibre/gui2/tweak_book/boss.py b/src/calibre/gui2/tweak_book/boss.py index 01900b2ebc..9384dd9b94 100644 --- a/src/calibre/gui2/tweak_book/boss.py +++ b/src/calibre/gui2/tweak_book/boss.py @@ -113,6 +113,7 @@ class Boss(QObject): self.gui.central.search_panel.show_saved_searches.connect(self.show_saved_searches) self.gui.spell_check.find_word.connect(self.find_word) self.gui.spell_check.refresh_requested.connect(self.commit_all_editors_to_container) + self.gui.spell_check.word_replaced.connect(self.word_replaced) def preferences(self): p = Preferences(self.gui) @@ -709,6 +710,10 @@ class Boss(QObject): break find_next_word(word, locations, ed, name, self.gui, self.show_editor, self.edit_file) + def word_replaced(self, changed_names): + self.set_modified() + self.update_editors_from_container(names=set(changed_names)) + def saved_searches(self): self.gui.saved_searches.show(), self.gui.saved_searches.raise_() diff --git a/src/calibre/gui2/tweak_book/spell.py b/src/calibre/gui2/tweak_book/spell.py index 019052ae44..4e4b66bd19 100644 --- a/src/calibre/gui2/tweak_book/spell.py +++ b/src/calibre/gui2/tweak_book/spell.py @@ -18,6 +18,7 @@ from PyQt4.Qt import ( QComboBox, QListWidget, QListWidgetItem, QInputDialog) from calibre.constants import __appname__, plugins +from calibre.ebooks.oeb.polish.spell import replace_word, get_all_words, merge_locations from calibre.gui2 import choose_files, error_dialog from calibre.gui2.complete2 import LineEdit from calibre.gui2.languages import LanguagesEdit @@ -640,6 +641,25 @@ class WordsModel(QAbstractTableModel): self.spell_map[w] = dictionaries.recognized(*w) self.update_word(w) + def replace_word(self, w, new_word): + if w[0] == new_word: + return w + new_key = (new_word, w[1]) + if new_key in self.words: + self.words[new_key] = merge_locations(self.words[new_key], self.words[w]) + row = self.row_for_word(w) + self.dataChanged.emit(self.index(row, 1), self.index(row, 1)) + else: + self.words[new_key] = self.words[w] + self.spell_map[new_key] = dictionaries.recognized(*new_key) + self.update_word(new_key) + row = self.row_for_word(w) + if row > -1: + self.beginRemoveRows(QModelIndex(), row, row) + del self.items[row] + self.endRemoveRows() + return new_key + def update_word(self, w): should_be_filtered = not self.filter_item(w) row = self.row_for_word(w) @@ -672,6 +692,7 @@ class SpellCheck(Dialog): work_finished = pyqtSignal(object, object) find_word = pyqtSignal(object, object) refresh_requested = pyqtSignal() + word_replaced = pyqtSignal(object) def __init__(self, parent=None): self.__current_word = None @@ -855,7 +876,21 @@ class SpellCheck(Dialog): pass # item is None def change_word(self): - pass + current = self.words_view.currentIndex() + if not current.isValid(): + return + row = current.row() + w = self.words_model.word_for_row(row) + if w is None: + return + new_word = unicode(self.suggested_word.text()) + changed_files = replace_word(current_container(), new_word, self.words_model.words[w], w[1]) + if changed_files: + self.word_replaced.emit(changed_files) + w = self.words_model.replace_word(w, new_word) + row = self.words_model.row_for_word(w) + if row > -1: + self.highlight_row(row) def toggle_ignore(self): current = self.words_view.currentIndex() @@ -914,8 +949,6 @@ class SpellCheck(Dialog): self.thread.start() def get_words(self): - from calibre.ebooks.oeb.polish.spell import get_all_words - try: words = get_all_words(current_container(), dictionaries.default_locale) spell_map = {w:dictionaries.recognized(*w) for w in words}