Implement change word

This commit is contained in:
Kovid Goyal 2014-04-17 18:03:12 +05:30
parent ae443dc309
commit c2dc54c4da
3 changed files with 108 additions and 26 deletions

View File

@ -9,7 +9,7 @@ __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
import sys
from collections import defaultdict
from calibre.spell.break_iterator import split_into_words
from calibre.spell.break_iterator import split_into_words, index_of
from calibre.spell.dictionary import parse_lang_code
from calibre.ebooks.oeb.base import barename
from calibre.ebooks.oeb.polish.container import OPF_NAMESPACES, get_container
@ -39,10 +39,11 @@ def patterns():
class Location(object):
__slots__ = ('file_name', 'sourceline', 'original_word')
__slots__ = ('file_name', 'sourceline', 'original_word', 'location_node', 'node_item')
def __init__(self, file_name=None, sourceline=None, original_word=None):
def __init__(self, file_name=None, sourceline=None, original_word=None, location_node=None, node_item=(None, None)):
self.file_name, self.sourceline, self.original_word = file_name, sourceline, original_word
self.location_node, self.node_item = location_node, node_item
def __repr__(self):
return '%s:%s' % (self.file_name, self.sourceline)
@ -63,43 +64,48 @@ def get_words(text, lang):
return ()
return filter(filter_words, ans)
def add_words(text, sourceline, words, file_name, locale):
def add_words(text, node, words, file_name, locale, node_item):
candidates = get_words(text, locale.langcode)
if candidates:
p = patterns()
for word in candidates:
sword = p.sanitize_invisible_pat.sub('', word)
loc = Location(file_name, sourceline, word)
loc = Location(file_name, node.sourceline, word, node, node_item)
words[(sword, locale)].append(loc)
def add_words_from_attr(node, attr, words, file_name, locale):
text = node.get(attr, None)
if text:
add_words(text, node, words, file_name, locale, (True, attr))
def add_words_from_text(node, attr, words, file_name, locale):
add_words(getattr(node, attr), node, words, file_name, locale, (False, attr))
_opf_file_as = '{%s}file-as' % OPF_NAMESPACES['opf']
def read_words_from_opf(root, words, file_name, book_locale):
for tag in root.xpath('//*[namespace-uri()="%s"]' % OPF_NAMESPACES['dc']):
tagname = barename(tag.tag)
if not tag.text or tagname in {'identifier', 'language'}:
continue
add_words(tag.text, tag.sourceline, words, file_name, book_locale)
file_as = '{%s}file-as' % OPF_NAMESPACES['opf']
file_as = tag.get(file_as, None)
if file_as:
add_words(file_as, tag.sourceline, words, file_name, book_locale)
add_words_from_text(tag, 'text', words, file_name, book_locale)
add_words_from_attr(tag, _opf_file_as, words, file_name, book_locale)
def read_words_from_ncx(root, words, file_name, book_locale):
for tag in root.xpath('//*[local-name()="text"]'):
if not tag.text:
continue
add_words(tag.text, tag.sourceline, words, file_name, book_locale)
add_words_from_text(tag, 'text', words, file_name, book_locale)
def read_words_from_html_tag(tag, words, file_name, parent_locale, locale):
tagname = barename(tag.tag)
if tagname not in {'script', 'style', 'link', 'head'}:
if tag.text is not None:
add_words(tag.text, tag.sourceline, words, file_name, locale)
add_words_from_text(tag, 'text', words, file_name, locale)
for attr in {'alt', 'title'}:
text = tag.get(attr, None)
if text:
add_words(text, tag.sourceline, words, file_name, locale)
add_words_from_attr(tag, attr, words, file_name, locale)
if tag.tail is not None:
add_words(tag.tail, tag.sourceline, words, file_name, parent_locale)
add_words_from_text(tag, 'tail', words, file_name, parent_locale)
def locale_from_tag(tag):
if 'lang' in tag.attrib:
@ -119,6 +125,13 @@ def read_words_from_html(root, words, file_name, book_locale):
read_words_from_html_tag(parent, words, file_name, parent_locale, locale)
stack.extend((tag, parent_locale) for tag in parent.iterchildren('*'))
def group_sort(locations):
order = {}
for loc in locations:
if loc.file_name not in order:
order[loc.file_name] = len(order)
return sorted(locations, key=lambda l:(order[l.file_name], l.sourceline))
def get_all_words(container, book_locale):
words = defaultdict(list)
file_names = [name for name, linear in container.spine_names] + [container.opf_name]
@ -136,15 +149,46 @@ def get_all_words(container, book_locale):
else:
read_words_from_html(root, words, file_name, book_locale)
def group_sort(locations):
order = {}
for loc in locations:
if loc.file_name not in order:
order[loc.file_name] = len(order)
return sorted(locations, key=lambda l:(order[l.file_name], l.sourceline))
return {k:group_sort(v) for k, v in words.iteritems()}
def merge_locations(locs1, locs2):
return group_sort(locs1 + locs2)
def replace(text, original_word, new_word, lang):
indices = []
original_word, new_word, text = unicode(original_word), unicode(new_word), unicode(text)
q = text
offset = 0
while True:
idx = index_of(original_word, q, lang=lang)
if idx == -1:
break
indices.append(offset + idx)
offset += idx + len(original_word)
q = text[offset:]
for idx in reversed(indices):
text = text[:idx] + new_word + text[idx+len(original_word):]
return text, bool(indices)
def replace_word(container, new_word, locations, locale):
changed = set()
for loc in locations:
node = loc.location_node
is_attr, attr = loc.node_item
if is_attr:
text = node.get(attr)
else:
text = getattr(node, attr)
text, replaced = replace(text, loc.original_word, new_word, locale.langcode)
if replaced:
if is_attr:
node.set(attr, text)
else:
setattr(node, attr, text)
container.replace(loc.file_name, node.getroottree().getroot())
changed.add(loc.file_name)
return changed
if __name__ == '__main__':
import pprint
from calibre.gui2.tweak_book import set_book_locale, dictionaries

View File

@ -113,6 +113,7 @@ class Boss(QObject):
self.gui.central.search_panel.show_saved_searches.connect(self.show_saved_searches)
self.gui.spell_check.find_word.connect(self.find_word)
self.gui.spell_check.refresh_requested.connect(self.commit_all_editors_to_container)
self.gui.spell_check.word_replaced.connect(self.word_replaced)
def preferences(self):
p = Preferences(self.gui)
@ -709,6 +710,10 @@ class Boss(QObject):
break
find_next_word(word, locations, ed, name, self.gui, self.show_editor, self.edit_file)
def word_replaced(self, changed_names):
self.set_modified()
self.update_editors_from_container(names=set(changed_names))
def saved_searches(self):
self.gui.saved_searches.show(), self.gui.saved_searches.raise_()

View File

@ -18,6 +18,7 @@ from PyQt4.Qt import (
QComboBox, QListWidget, QListWidgetItem, QInputDialog)
from calibre.constants import __appname__, plugins
from calibre.ebooks.oeb.polish.spell import replace_word, get_all_words, merge_locations
from calibre.gui2 import choose_files, error_dialog
from calibre.gui2.complete2 import LineEdit
from calibre.gui2.languages import LanguagesEdit
@ -640,6 +641,25 @@ class WordsModel(QAbstractTableModel):
self.spell_map[w] = dictionaries.recognized(*w)
self.update_word(w)
def replace_word(self, w, new_word):
if w[0] == new_word:
return w
new_key = (new_word, w[1])
if new_key in self.words:
self.words[new_key] = merge_locations(self.words[new_key], self.words[w])
row = self.row_for_word(w)
self.dataChanged.emit(self.index(row, 1), self.index(row, 1))
else:
self.words[new_key] = self.words[w]
self.spell_map[new_key] = dictionaries.recognized(*new_key)
self.update_word(new_key)
row = self.row_for_word(w)
if row > -1:
self.beginRemoveRows(QModelIndex(), row, row)
del self.items[row]
self.endRemoveRows()
return new_key
def update_word(self, w):
should_be_filtered = not self.filter_item(w)
row = self.row_for_word(w)
@ -672,6 +692,7 @@ class SpellCheck(Dialog):
work_finished = pyqtSignal(object, object)
find_word = pyqtSignal(object, object)
refresh_requested = pyqtSignal()
word_replaced = pyqtSignal(object)
def __init__(self, parent=None):
self.__current_word = None
@ -855,7 +876,21 @@ class SpellCheck(Dialog):
pass # item is None
def change_word(self):
pass
current = self.words_view.currentIndex()
if not current.isValid():
return
row = current.row()
w = self.words_model.word_for_row(row)
if w is None:
return
new_word = unicode(self.suggested_word.text())
changed_files = replace_word(current_container(), new_word, self.words_model.words[w], w[1])
if changed_files:
self.word_replaced.emit(changed_files)
w = self.words_model.replace_word(w, new_word)
row = self.words_model.row_for_word(w)
if row > -1:
self.highlight_row(row)
def toggle_ignore(self):
current = self.words_view.currentIndex()
@ -914,8 +949,6 @@ class SpellCheck(Dialog):
self.thread.start()
def get_words(self):
from calibre.ebooks.oeb.polish.spell import get_all_words
try:
words = get_all_words(current_container(), dictionaries.default_locale)
spell_map = {w:dictionaries.recognized(*w) for w in words}