From c2dc54c4da997c29114f8e7e5540456d3a91e618 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 17 Apr 2014 18:03:12 +0530
Subject: [PATCH] Implement change word

---
 src/calibre/ebooks/oeb/polish/spell.py | 90 +++++++++++++++++++-------
 src/calibre/gui2/tweak_book/boss.py    |  5 ++
 src/calibre/gui2/tweak_book/spell.py   | 39 ++++++++++-
 3 files changed, 108 insertions(+), 26 deletions(-)
diff --git a/src/calibre/ebooks/oeb/polish/spell.py b/src/calibre/ebooks/oeb/polish/spell.py
index c460bf8bc9..1bf819495b 100644
--- a/src/calibre/ebooks/oeb/polish/spell.py
+++ b/src/calibre/ebooks/oeb/polish/spell.py
@@ -9,7 +9,7 @@ __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
 import sys
 from collections import defaultdict
 
-from calibre.spell.break_iterator import split_into_words
+from calibre.spell.break_iterator import split_into_words, index_of
 from calibre.spell.dictionary import parse_lang_code
 from calibre.ebooks.oeb.base import barename
 from calibre.ebooks.oeb.polish.container import OPF_NAMESPACES, get_container
@@ -39,10 +39,11 @@ def patterns():
 
 class Location(object):
 
-    __slots__ = ('file_name', 'sourceline', 'original_word')
+    __slots__ = ('file_name', 'sourceline', 'original_word', 'location_node', 'node_item')
 
-    def __init__(self, file_name=None, sourceline=None, original_word=None):
+    def __init__(self, file_name=None, sourceline=None, original_word=None, location_node=None, node_item=(None, None)):
         self.file_name, self.sourceline, self.original_word = file_name, sourceline, original_word
+        self.location_node, self.node_item = location_node, node_item
 
     def __repr__(self):
         return '%s:%s' % (self.file_name, self.sourceline)
@@ -63,43 +64,48 @@ def get_words(text, lang):
         return ()
     return filter(filter_words, ans)
 
-def add_words(text, sourceline, words, file_name, locale):
+def add_words(text, node, words, file_name, locale, node_item):
     candidates = get_words(text, locale.langcode)
     if candidates:
         p = patterns()
         for word in candidates:
             sword = p.sanitize_invisible_pat.sub('', word)
-            loc = Location(file_name, sourceline, word)
+            loc = Location(file_name, node.sourceline, word, node, node_item)
             words[(sword, locale)].append(loc)
 
+def add_words_from_attr(node, attr, words, file_name, locale):
+    text = node.get(attr, None)
+    if text:
+        add_words(text, node, words, file_name, locale, (True, attr))
+
+def add_words_from_text(node, attr, words, file_name, locale):
+    add_words(getattr(node, attr), node, words, file_name, locale, (False, attr))
+
+_opf_file_as = '{%s}file-as' % OPF_NAMESPACES['opf']
+
 def read_words_from_opf(root, words, file_name, book_locale):
     for tag in root.xpath('//*[namespace-uri()="%s"]' % OPF_NAMESPACES['dc']):
         tagname = barename(tag.tag)
         if not tag.text or tagname in {'identifier', 'language'}:
             continue
-        add_words(tag.text, tag.sourceline, words, file_name, book_locale)
-        file_as = '{%s}file-as' % OPF_NAMESPACES['opf']
-        file_as = tag.get(file_as, None)
-        if file_as:
-            add_words(file_as, tag.sourceline, words, file_name, book_locale)
+        add_words_from_text(tag, 'text', words, file_name, book_locale)
+        add_words_from_attr(tag, _opf_file_as, words, file_name, book_locale)
 
 def read_words_from_ncx(root, words, file_name, book_locale):
     for tag in root.xpath('//*[local-name()="text"]'):
         if not tag.text:
             continue
-        add_words(tag.text, tag.sourceline, words, file_name, book_locale)
+        add_words_from_text(tag, 'text', words, file_name, book_locale)
 
 def read_words_from_html_tag(tag, words, file_name, parent_locale, locale):
     tagname = barename(tag.tag)
     if tagname not in {'script', 'style', 'link', 'head'}:
         if tag.text is not None:
-            add_words(tag.text, tag.sourceline, words, file_name, locale)
+            add_words_from_text(tag, 'text', words, file_name, locale)
         for attr in {'alt', 'title'}:
-            text = tag.get(attr, None)
-            if text:
-                add_words(text, tag.sourceline, words, file_name, locale)
+            add_words_from_attr(tag, attr, words, file_name, locale)
     if tag.tail is not None:
-        add_words(tag.tail, tag.sourceline, words, file_name, parent_locale)
+        add_words_from_text(tag, 'tail', words, file_name, parent_locale)
 
 def locale_from_tag(tag):
     if 'lang' in tag.attrib:
@@ -119,6 +125,13 @@ def read_words_from_html(root, words, file_name, book_locale):
         read_words_from_html_tag(parent, words, file_name, parent_locale, locale)
         stack.extend((tag, parent_locale) for tag in parent.iterchildren('*'))
 
+def group_sort(locations):
+    order = {}
+    for loc in locations:
+        if loc.file_name not in order:
+            order[loc.file_name] = len(order)
+    return sorted(locations, key=lambda l:(order[l.file_name], l.sourceline))
+
 def get_all_words(container, book_locale):
     words = defaultdict(list)
     file_names = [name for name, linear in container.spine_names] + [container.opf_name]
@@ -136,15 +149,46 @@ def get_all_words(container, book_locale):
         else:
             read_words_from_html(root, words, file_name, book_locale)
 
-    def group_sort(locations):
-        order = {}
-        for loc in locations:
-            if loc.file_name not in order:
-                order[loc.file_name] = len(order)
-        return sorted(locations, key=lambda l:(order[l.file_name], l.sourceline))
-
     return {k:group_sort(v) for k, v in words.iteritems()}
 
+def merge_locations(locs1, locs2):
+    return group_sort(locs1 + locs2)
+
+def replace(text, original_word, new_word, lang):
+    indices = []
+    original_word, new_word, text = unicode(original_word), unicode(new_word), unicode(text)
+    q = text
+    offset = 0
+    while True:
+        idx = index_of(original_word, q, lang=lang)
+        if idx == -1:
+            break
+        indices.append(offset + idx)
+        offset += idx + len(original_word)
+        q = text[offset:]
+    for idx in reversed(indices):
+        text = text[:idx] + new_word + text[idx+len(original_word):]
+    return text, bool(indices)
+
+def replace_word(container, new_word, locations, locale):
+    changed = set()
+    for loc in locations:
+        node = loc.location_node
+        is_attr, attr = loc.node_item
+        if is_attr:
+            text = node.get(attr)
+        else:
+            text = getattr(node, attr)
+        text, replaced = replace(text, loc.original_word, new_word, locale.langcode)
+        if replaced:
+            if is_attr:
+                node.set(attr, text)
+            else:
+                setattr(node, attr, text)
+            container.replace(loc.file_name, node.getroottree().getroot())
+            changed.add(loc.file_name)
+    return changed
+
 if __name__ == '__main__':
     import pprint
     from calibre.gui2.tweak_book import set_book_locale, dictionaries
diff --git a/src/calibre/gui2/tweak_book/boss.py b/src/calibre/gui2/tweak_book/boss.py
index 01900b2ebc..9384dd9b94 100644
--- a/src/calibre/gui2/tweak_book/boss.py
+++ b/src/calibre/gui2/tweak_book/boss.py
@@ -113,6 +113,7 @@ class Boss(QObject):
         self.gui.central.search_panel.show_saved_searches.connect(self.show_saved_searches)
         self.gui.spell_check.find_word.connect(self.find_word)
         self.gui.spell_check.refresh_requested.connect(self.commit_all_editors_to_container)
+        self.gui.spell_check.word_replaced.connect(self.word_replaced)
 
     def preferences(self):
         p = Preferences(self.gui)
@@ -709,6 +710,10 @@ class Boss(QObject):
                 break
         find_next_word(word, locations, ed, name, self.gui, self.show_editor, self.edit_file)
 
+    def word_replaced(self, changed_names):
+        self.set_modified()
+        self.update_editors_from_container(names=set(changed_names))
+
     def saved_searches(self):
         self.gui.saved_searches.show(), self.gui.saved_searches.raise_()
 
diff --git a/src/calibre/gui2/tweak_book/spell.py b/src/calibre/gui2/tweak_book/spell.py
index 019052ae44..4e4b66bd19 100644
--- a/src/calibre/gui2/tweak_book/spell.py
+++ b/src/calibre/gui2/tweak_book/spell.py
@@ -18,6 +18,7 @@ from PyQt4.Qt import (
     QComboBox, QListWidget, QListWidgetItem, QInputDialog)
 
 from calibre.constants import __appname__, plugins
+from calibre.ebooks.oeb.polish.spell import replace_word, get_all_words, merge_locations
 from calibre.gui2 import choose_files, error_dialog
 from calibre.gui2.complete2 import LineEdit
 from calibre.gui2.languages import LanguagesEdit
@@ -640,6 +641,25 @@ class WordsModel(QAbstractTableModel):
                 self.spell_map[w] = dictionaries.recognized(*w)
                 self.update_word(w)
 
+    def replace_word(self, w, new_word):
+        if w[0] == new_word:
+            return w
+        new_key = (new_word, w[1])
+        if new_key in self.words:
+            self.words[new_key] = merge_locations(self.words[new_key], self.words[w])
+            row = self.row_for_word(w)
+            self.dataChanged.emit(self.index(row, 1), self.index(row, 1))
+        else:
+            self.words[new_key] = self.words[w]
+            self.spell_map[new_key] = dictionaries.recognized(*new_key)
+            self.update_word(new_key)
+        row = self.row_for_word(w)
+        if row > -1:
+            self.beginRemoveRows(QModelIndex(), row, row)
+            del self.items[row]
+            self.endRemoveRows()
+        return new_key
+
     def update_word(self, w):
         should_be_filtered = not self.filter_item(w)
         row = self.row_for_word(w)
@@ -672,6 +692,7 @@ class SpellCheck(Dialog):
     work_finished = pyqtSignal(object, object)
     find_word = pyqtSignal(object, object)
     refresh_requested = pyqtSignal()
+    word_replaced = pyqtSignal(object)
 
     def __init__(self, parent=None):
         self.__current_word = None
@@ -855,7 +876,21 @@ class SpellCheck(Dialog):
             pass  # item is None
 
     def change_word(self):
-        pass
+        current = self.words_view.currentIndex()
+        if not current.isValid():
+            return
+        row = current.row()
+        w = self.words_model.word_for_row(row)
+        if w is None:
+            return
+        new_word = unicode(self.suggested_word.text())
+        changed_files = replace_word(current_container(), new_word, self.words_model.words[w], w[1])
+        if changed_files:
+            self.word_replaced.emit(changed_files)
+            w = self.words_model.replace_word(w, new_word)
+            row = self.words_model.row_for_word(w)
+            if row > -1:
+                self.highlight_row(row)
 
     def toggle_ignore(self):
         current = self.words_view.currentIndex()
@@ -914,8 +949,6 @@ class SpellCheck(Dialog):
         self.thread.start()
 
     def get_words(self):
-        from calibre.ebooks.oeb.polish.spell import get_all_words
-
         try:
             words = get_all_words(current_container(), dictionaries.default_locale)
             spell_map = {w:dictionaries.recognized(*w) for w in words}