mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
Implement change word
This commit is contained in:
parent
ae443dc309
commit
c2dc54c4da
@ -9,7 +9,7 @@ __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
|
||||
from calibre.spell.break_iterator import split_into_words
|
||||
from calibre.spell.break_iterator import split_into_words, index_of
|
||||
from calibre.spell.dictionary import parse_lang_code
|
||||
from calibre.ebooks.oeb.base import barename
|
||||
from calibre.ebooks.oeb.polish.container import OPF_NAMESPACES, get_container
|
||||
@ -39,10 +39,11 @@ def patterns():
|
||||
|
||||
class Location(object):
|
||||
|
||||
__slots__ = ('file_name', 'sourceline', 'original_word')
|
||||
__slots__ = ('file_name', 'sourceline', 'original_word', 'location_node', 'node_item')
|
||||
|
||||
def __init__(self, file_name=None, sourceline=None, original_word=None):
|
||||
def __init__(self, file_name=None, sourceline=None, original_word=None, location_node=None, node_item=(None, None)):
|
||||
self.file_name, self.sourceline, self.original_word = file_name, sourceline, original_word
|
||||
self.location_node, self.node_item = location_node, node_item
|
||||
|
||||
def __repr__(self):
|
||||
return '%s:%s' % (self.file_name, self.sourceline)
|
||||
@ -63,43 +64,48 @@ def get_words(text, lang):
|
||||
return ()
|
||||
return filter(filter_words, ans)
|
||||
|
||||
def add_words(text, sourceline, words, file_name, locale):
|
||||
def add_words(text, node, words, file_name, locale, node_item):
|
||||
candidates = get_words(text, locale.langcode)
|
||||
if candidates:
|
||||
p = patterns()
|
||||
for word in candidates:
|
||||
sword = p.sanitize_invisible_pat.sub('', word)
|
||||
loc = Location(file_name, sourceline, word)
|
||||
loc = Location(file_name, node.sourceline, word, node, node_item)
|
||||
words[(sword, locale)].append(loc)
|
||||
|
||||
def add_words_from_attr(node, attr, words, file_name, locale):
|
||||
text = node.get(attr, None)
|
||||
if text:
|
||||
add_words(text, node, words, file_name, locale, (True, attr))
|
||||
|
||||
def add_words_from_text(node, attr, words, file_name, locale):
|
||||
add_words(getattr(node, attr), node, words, file_name, locale, (False, attr))
|
||||
|
||||
_opf_file_as = '{%s}file-as' % OPF_NAMESPACES['opf']
|
||||
|
||||
def read_words_from_opf(root, words, file_name, book_locale):
|
||||
for tag in root.xpath('//*[namespace-uri()="%s"]' % OPF_NAMESPACES['dc']):
|
||||
tagname = barename(tag.tag)
|
||||
if not tag.text or tagname in {'identifier', 'language'}:
|
||||
continue
|
||||
add_words(tag.text, tag.sourceline, words, file_name, book_locale)
|
||||
file_as = '{%s}file-as' % OPF_NAMESPACES['opf']
|
||||
file_as = tag.get(file_as, None)
|
||||
if file_as:
|
||||
add_words(file_as, tag.sourceline, words, file_name, book_locale)
|
||||
add_words_from_text(tag, 'text', words, file_name, book_locale)
|
||||
add_words_from_attr(tag, _opf_file_as, words, file_name, book_locale)
|
||||
|
||||
def read_words_from_ncx(root, words, file_name, book_locale):
|
||||
for tag in root.xpath('//*[local-name()="text"]'):
|
||||
if not tag.text:
|
||||
continue
|
||||
add_words(tag.text, tag.sourceline, words, file_name, book_locale)
|
||||
add_words_from_text(tag, 'text', words, file_name, book_locale)
|
||||
|
||||
def read_words_from_html_tag(tag, words, file_name, parent_locale, locale):
|
||||
tagname = barename(tag.tag)
|
||||
if tagname not in {'script', 'style', 'link', 'head'}:
|
||||
if tag.text is not None:
|
||||
add_words(tag.text, tag.sourceline, words, file_name, locale)
|
||||
add_words_from_text(tag, 'text', words, file_name, locale)
|
||||
for attr in {'alt', 'title'}:
|
||||
text = tag.get(attr, None)
|
||||
if text:
|
||||
add_words(text, tag.sourceline, words, file_name, locale)
|
||||
add_words_from_attr(tag, attr, words, file_name, locale)
|
||||
if tag.tail is not None:
|
||||
add_words(tag.tail, tag.sourceline, words, file_name, parent_locale)
|
||||
add_words_from_text(tag, 'tail', words, file_name, parent_locale)
|
||||
|
||||
def locale_from_tag(tag):
|
||||
if 'lang' in tag.attrib:
|
||||
@ -119,6 +125,13 @@ def read_words_from_html(root, words, file_name, book_locale):
|
||||
read_words_from_html_tag(parent, words, file_name, parent_locale, locale)
|
||||
stack.extend((tag, parent_locale) for tag in parent.iterchildren('*'))
|
||||
|
||||
def group_sort(locations):
|
||||
order = {}
|
||||
for loc in locations:
|
||||
if loc.file_name not in order:
|
||||
order[loc.file_name] = len(order)
|
||||
return sorted(locations, key=lambda l:(order[l.file_name], l.sourceline))
|
||||
|
||||
def get_all_words(container, book_locale):
|
||||
words = defaultdict(list)
|
||||
file_names = [name for name, linear in container.spine_names] + [container.opf_name]
|
||||
@ -136,15 +149,46 @@ def get_all_words(container, book_locale):
|
||||
else:
|
||||
read_words_from_html(root, words, file_name, book_locale)
|
||||
|
||||
def group_sort(locations):
|
||||
order = {}
|
||||
for loc in locations:
|
||||
if loc.file_name not in order:
|
||||
order[loc.file_name] = len(order)
|
||||
return sorted(locations, key=lambda l:(order[l.file_name], l.sourceline))
|
||||
|
||||
return {k:group_sort(v) for k, v in words.iteritems()}
|
||||
|
||||
def merge_locations(locs1, locs2):
|
||||
return group_sort(locs1 + locs2)
|
||||
|
||||
def replace(text, original_word, new_word, lang):
|
||||
indices = []
|
||||
original_word, new_word, text = unicode(original_word), unicode(new_word), unicode(text)
|
||||
q = text
|
||||
offset = 0
|
||||
while True:
|
||||
idx = index_of(original_word, q, lang=lang)
|
||||
if idx == -1:
|
||||
break
|
||||
indices.append(offset + idx)
|
||||
offset += idx + len(original_word)
|
||||
q = text[offset:]
|
||||
for idx in reversed(indices):
|
||||
text = text[:idx] + new_word + text[idx+len(original_word):]
|
||||
return text, bool(indices)
|
||||
|
||||
def replace_word(container, new_word, locations, locale):
|
||||
changed = set()
|
||||
for loc in locations:
|
||||
node = loc.location_node
|
||||
is_attr, attr = loc.node_item
|
||||
if is_attr:
|
||||
text = node.get(attr)
|
||||
else:
|
||||
text = getattr(node, attr)
|
||||
text, replaced = replace(text, loc.original_word, new_word, locale.langcode)
|
||||
if replaced:
|
||||
if is_attr:
|
||||
node.set(attr, text)
|
||||
else:
|
||||
setattr(node, attr, text)
|
||||
container.replace(loc.file_name, node.getroottree().getroot())
|
||||
changed.add(loc.file_name)
|
||||
return changed
|
||||
|
||||
if __name__ == '__main__':
|
||||
import pprint
|
||||
from calibre.gui2.tweak_book import set_book_locale, dictionaries
|
||||
|
@ -113,6 +113,7 @@ class Boss(QObject):
|
||||
self.gui.central.search_panel.show_saved_searches.connect(self.show_saved_searches)
|
||||
self.gui.spell_check.find_word.connect(self.find_word)
|
||||
self.gui.spell_check.refresh_requested.connect(self.commit_all_editors_to_container)
|
||||
self.gui.spell_check.word_replaced.connect(self.word_replaced)
|
||||
|
||||
def preferences(self):
|
||||
p = Preferences(self.gui)
|
||||
@ -709,6 +710,10 @@ class Boss(QObject):
|
||||
break
|
||||
find_next_word(word, locations, ed, name, self.gui, self.show_editor, self.edit_file)
|
||||
|
||||
def word_replaced(self, changed_names):
|
||||
self.set_modified()
|
||||
self.update_editors_from_container(names=set(changed_names))
|
||||
|
||||
def saved_searches(self):
|
||||
self.gui.saved_searches.show(), self.gui.saved_searches.raise_()
|
||||
|
||||
|
@ -18,6 +18,7 @@ from PyQt4.Qt import (
|
||||
QComboBox, QListWidget, QListWidgetItem, QInputDialog)
|
||||
|
||||
from calibre.constants import __appname__, plugins
|
||||
from calibre.ebooks.oeb.polish.spell import replace_word, get_all_words, merge_locations
|
||||
from calibre.gui2 import choose_files, error_dialog
|
||||
from calibre.gui2.complete2 import LineEdit
|
||||
from calibre.gui2.languages import LanguagesEdit
|
||||
@ -640,6 +641,25 @@ class WordsModel(QAbstractTableModel):
|
||||
self.spell_map[w] = dictionaries.recognized(*w)
|
||||
self.update_word(w)
|
||||
|
||||
def replace_word(self, w, new_word):
|
||||
if w[0] == new_word:
|
||||
return w
|
||||
new_key = (new_word, w[1])
|
||||
if new_key in self.words:
|
||||
self.words[new_key] = merge_locations(self.words[new_key], self.words[w])
|
||||
row = self.row_for_word(w)
|
||||
self.dataChanged.emit(self.index(row, 1), self.index(row, 1))
|
||||
else:
|
||||
self.words[new_key] = self.words[w]
|
||||
self.spell_map[new_key] = dictionaries.recognized(*new_key)
|
||||
self.update_word(new_key)
|
||||
row = self.row_for_word(w)
|
||||
if row > -1:
|
||||
self.beginRemoveRows(QModelIndex(), row, row)
|
||||
del self.items[row]
|
||||
self.endRemoveRows()
|
||||
return new_key
|
||||
|
||||
def update_word(self, w):
|
||||
should_be_filtered = not self.filter_item(w)
|
||||
row = self.row_for_word(w)
|
||||
@ -672,6 +692,7 @@ class SpellCheck(Dialog):
|
||||
work_finished = pyqtSignal(object, object)
|
||||
find_word = pyqtSignal(object, object)
|
||||
refresh_requested = pyqtSignal()
|
||||
word_replaced = pyqtSignal(object)
|
||||
|
||||
def __init__(self, parent=None):
|
||||
self.__current_word = None
|
||||
@ -855,7 +876,21 @@ class SpellCheck(Dialog):
|
||||
pass # item is None
|
||||
|
||||
def change_word(self):
|
||||
pass
|
||||
current = self.words_view.currentIndex()
|
||||
if not current.isValid():
|
||||
return
|
||||
row = current.row()
|
||||
w = self.words_model.word_for_row(row)
|
||||
if w is None:
|
||||
return
|
||||
new_word = unicode(self.suggested_word.text())
|
||||
changed_files = replace_word(current_container(), new_word, self.words_model.words[w], w[1])
|
||||
if changed_files:
|
||||
self.word_replaced.emit(changed_files)
|
||||
w = self.words_model.replace_word(w, new_word)
|
||||
row = self.words_model.row_for_word(w)
|
||||
if row > -1:
|
||||
self.highlight_row(row)
|
||||
|
||||
def toggle_ignore(self):
|
||||
current = self.words_view.currentIndex()
|
||||
@ -914,8 +949,6 @@ class SpellCheck(Dialog):
|
||||
self.thread.start()
|
||||
|
||||
def get_words(self):
|
||||
from calibre.ebooks.oeb.polish.spell import get_all_words
|
||||
|
||||
try:
|
||||
words = get_all_words(current_container(), dictionaries.default_locale)
|
||||
spell_map = {w:dictionaries.recognized(*w) for w in words}
|
||||
|
Loading…
x
Reference in New Issue
Block a user