mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
Implement change word
This commit is contained in:
parent
ae443dc309
commit
c2dc54c4da
@ -9,7 +9,7 @@ __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
import sys
|
import sys
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
from calibre.spell.break_iterator import split_into_words
|
from calibre.spell.break_iterator import split_into_words, index_of
|
||||||
from calibre.spell.dictionary import parse_lang_code
|
from calibre.spell.dictionary import parse_lang_code
|
||||||
from calibre.ebooks.oeb.base import barename
|
from calibre.ebooks.oeb.base import barename
|
||||||
from calibre.ebooks.oeb.polish.container import OPF_NAMESPACES, get_container
|
from calibre.ebooks.oeb.polish.container import OPF_NAMESPACES, get_container
|
||||||
@ -39,10 +39,11 @@ def patterns():
|
|||||||
|
|
||||||
class Location(object):
|
class Location(object):
|
||||||
|
|
||||||
__slots__ = ('file_name', 'sourceline', 'original_word')
|
__slots__ = ('file_name', 'sourceline', 'original_word', 'location_node', 'node_item')
|
||||||
|
|
||||||
def __init__(self, file_name=None, sourceline=None, original_word=None):
|
def __init__(self, file_name=None, sourceline=None, original_word=None, location_node=None, node_item=(None, None)):
|
||||||
self.file_name, self.sourceline, self.original_word = file_name, sourceline, original_word
|
self.file_name, self.sourceline, self.original_word = file_name, sourceline, original_word
|
||||||
|
self.location_node, self.node_item = location_node, node_item
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return '%s:%s' % (self.file_name, self.sourceline)
|
return '%s:%s' % (self.file_name, self.sourceline)
|
||||||
@ -63,43 +64,48 @@ def get_words(text, lang):
|
|||||||
return ()
|
return ()
|
||||||
return filter(filter_words, ans)
|
return filter(filter_words, ans)
|
||||||
|
|
||||||
def add_words(text, sourceline, words, file_name, locale):
|
def add_words(text, node, words, file_name, locale, node_item):
|
||||||
candidates = get_words(text, locale.langcode)
|
candidates = get_words(text, locale.langcode)
|
||||||
if candidates:
|
if candidates:
|
||||||
p = patterns()
|
p = patterns()
|
||||||
for word in candidates:
|
for word in candidates:
|
||||||
sword = p.sanitize_invisible_pat.sub('', word)
|
sword = p.sanitize_invisible_pat.sub('', word)
|
||||||
loc = Location(file_name, sourceline, word)
|
loc = Location(file_name, node.sourceline, word, node, node_item)
|
||||||
words[(sword, locale)].append(loc)
|
words[(sword, locale)].append(loc)
|
||||||
|
|
||||||
|
def add_words_from_attr(node, attr, words, file_name, locale):
|
||||||
|
text = node.get(attr, None)
|
||||||
|
if text:
|
||||||
|
add_words(text, node, words, file_name, locale, (True, attr))
|
||||||
|
|
||||||
|
def add_words_from_text(node, attr, words, file_name, locale):
|
||||||
|
add_words(getattr(node, attr), node, words, file_name, locale, (False, attr))
|
||||||
|
|
||||||
|
_opf_file_as = '{%s}file-as' % OPF_NAMESPACES['opf']
|
||||||
|
|
||||||
def read_words_from_opf(root, words, file_name, book_locale):
|
def read_words_from_opf(root, words, file_name, book_locale):
|
||||||
for tag in root.xpath('//*[namespace-uri()="%s"]' % OPF_NAMESPACES['dc']):
|
for tag in root.xpath('//*[namespace-uri()="%s"]' % OPF_NAMESPACES['dc']):
|
||||||
tagname = barename(tag.tag)
|
tagname = barename(tag.tag)
|
||||||
if not tag.text or tagname in {'identifier', 'language'}:
|
if not tag.text or tagname in {'identifier', 'language'}:
|
||||||
continue
|
continue
|
||||||
add_words(tag.text, tag.sourceline, words, file_name, book_locale)
|
add_words_from_text(tag, 'text', words, file_name, book_locale)
|
||||||
file_as = '{%s}file-as' % OPF_NAMESPACES['opf']
|
add_words_from_attr(tag, _opf_file_as, words, file_name, book_locale)
|
||||||
file_as = tag.get(file_as, None)
|
|
||||||
if file_as:
|
|
||||||
add_words(file_as, tag.sourceline, words, file_name, book_locale)
|
|
||||||
|
|
||||||
def read_words_from_ncx(root, words, file_name, book_locale):
|
def read_words_from_ncx(root, words, file_name, book_locale):
|
||||||
for tag in root.xpath('//*[local-name()="text"]'):
|
for tag in root.xpath('//*[local-name()="text"]'):
|
||||||
if not tag.text:
|
if not tag.text:
|
||||||
continue
|
continue
|
||||||
add_words(tag.text, tag.sourceline, words, file_name, book_locale)
|
add_words_from_text(tag, 'text', words, file_name, book_locale)
|
||||||
|
|
||||||
def read_words_from_html_tag(tag, words, file_name, parent_locale, locale):
|
def read_words_from_html_tag(tag, words, file_name, parent_locale, locale):
|
||||||
tagname = barename(tag.tag)
|
tagname = barename(tag.tag)
|
||||||
if tagname not in {'script', 'style', 'link', 'head'}:
|
if tagname not in {'script', 'style', 'link', 'head'}:
|
||||||
if tag.text is not None:
|
if tag.text is not None:
|
||||||
add_words(tag.text, tag.sourceline, words, file_name, locale)
|
add_words_from_text(tag, 'text', words, file_name, locale)
|
||||||
for attr in {'alt', 'title'}:
|
for attr in {'alt', 'title'}:
|
||||||
text = tag.get(attr, None)
|
add_words_from_attr(tag, attr, words, file_name, locale)
|
||||||
if text:
|
|
||||||
add_words(text, tag.sourceline, words, file_name, locale)
|
|
||||||
if tag.tail is not None:
|
if tag.tail is not None:
|
||||||
add_words(tag.tail, tag.sourceline, words, file_name, parent_locale)
|
add_words_from_text(tag, 'tail', words, file_name, parent_locale)
|
||||||
|
|
||||||
def locale_from_tag(tag):
|
def locale_from_tag(tag):
|
||||||
if 'lang' in tag.attrib:
|
if 'lang' in tag.attrib:
|
||||||
@ -119,6 +125,13 @@ def read_words_from_html(root, words, file_name, book_locale):
|
|||||||
read_words_from_html_tag(parent, words, file_name, parent_locale, locale)
|
read_words_from_html_tag(parent, words, file_name, parent_locale, locale)
|
||||||
stack.extend((tag, parent_locale) for tag in parent.iterchildren('*'))
|
stack.extend((tag, parent_locale) for tag in parent.iterchildren('*'))
|
||||||
|
|
||||||
|
def group_sort(locations):
|
||||||
|
order = {}
|
||||||
|
for loc in locations:
|
||||||
|
if loc.file_name not in order:
|
||||||
|
order[loc.file_name] = len(order)
|
||||||
|
return sorted(locations, key=lambda l:(order[l.file_name], l.sourceline))
|
||||||
|
|
||||||
def get_all_words(container, book_locale):
|
def get_all_words(container, book_locale):
|
||||||
words = defaultdict(list)
|
words = defaultdict(list)
|
||||||
file_names = [name for name, linear in container.spine_names] + [container.opf_name]
|
file_names = [name for name, linear in container.spine_names] + [container.opf_name]
|
||||||
@ -136,15 +149,46 @@ def get_all_words(container, book_locale):
|
|||||||
else:
|
else:
|
||||||
read_words_from_html(root, words, file_name, book_locale)
|
read_words_from_html(root, words, file_name, book_locale)
|
||||||
|
|
||||||
def group_sort(locations):
|
|
||||||
order = {}
|
|
||||||
for loc in locations:
|
|
||||||
if loc.file_name not in order:
|
|
||||||
order[loc.file_name] = len(order)
|
|
||||||
return sorted(locations, key=lambda l:(order[l.file_name], l.sourceline))
|
|
||||||
|
|
||||||
return {k:group_sort(v) for k, v in words.iteritems()}
|
return {k:group_sort(v) for k, v in words.iteritems()}
|
||||||
|
|
||||||
|
def merge_locations(locs1, locs2):
|
||||||
|
return group_sort(locs1 + locs2)
|
||||||
|
|
||||||
|
def replace(text, original_word, new_word, lang):
|
||||||
|
indices = []
|
||||||
|
original_word, new_word, text = unicode(original_word), unicode(new_word), unicode(text)
|
||||||
|
q = text
|
||||||
|
offset = 0
|
||||||
|
while True:
|
||||||
|
idx = index_of(original_word, q, lang=lang)
|
||||||
|
if idx == -1:
|
||||||
|
break
|
||||||
|
indices.append(offset + idx)
|
||||||
|
offset += idx + len(original_word)
|
||||||
|
q = text[offset:]
|
||||||
|
for idx in reversed(indices):
|
||||||
|
text = text[:idx] + new_word + text[idx+len(original_word):]
|
||||||
|
return text, bool(indices)
|
||||||
|
|
||||||
|
def replace_word(container, new_word, locations, locale):
|
||||||
|
changed = set()
|
||||||
|
for loc in locations:
|
||||||
|
node = loc.location_node
|
||||||
|
is_attr, attr = loc.node_item
|
||||||
|
if is_attr:
|
||||||
|
text = node.get(attr)
|
||||||
|
else:
|
||||||
|
text = getattr(node, attr)
|
||||||
|
text, replaced = replace(text, loc.original_word, new_word, locale.langcode)
|
||||||
|
if replaced:
|
||||||
|
if is_attr:
|
||||||
|
node.set(attr, text)
|
||||||
|
else:
|
||||||
|
setattr(node, attr, text)
|
||||||
|
container.replace(loc.file_name, node.getroottree().getroot())
|
||||||
|
changed.add(loc.file_name)
|
||||||
|
return changed
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import pprint
|
import pprint
|
||||||
from calibre.gui2.tweak_book import set_book_locale, dictionaries
|
from calibre.gui2.tweak_book import set_book_locale, dictionaries
|
||||||
|
@ -113,6 +113,7 @@ class Boss(QObject):
|
|||||||
self.gui.central.search_panel.show_saved_searches.connect(self.show_saved_searches)
|
self.gui.central.search_panel.show_saved_searches.connect(self.show_saved_searches)
|
||||||
self.gui.spell_check.find_word.connect(self.find_word)
|
self.gui.spell_check.find_word.connect(self.find_word)
|
||||||
self.gui.spell_check.refresh_requested.connect(self.commit_all_editors_to_container)
|
self.gui.spell_check.refresh_requested.connect(self.commit_all_editors_to_container)
|
||||||
|
self.gui.spell_check.word_replaced.connect(self.word_replaced)
|
||||||
|
|
||||||
def preferences(self):
|
def preferences(self):
|
||||||
p = Preferences(self.gui)
|
p = Preferences(self.gui)
|
||||||
@ -709,6 +710,10 @@ class Boss(QObject):
|
|||||||
break
|
break
|
||||||
find_next_word(word, locations, ed, name, self.gui, self.show_editor, self.edit_file)
|
find_next_word(word, locations, ed, name, self.gui, self.show_editor, self.edit_file)
|
||||||
|
|
||||||
|
def word_replaced(self, changed_names):
|
||||||
|
self.set_modified()
|
||||||
|
self.update_editors_from_container(names=set(changed_names))
|
||||||
|
|
||||||
def saved_searches(self):
|
def saved_searches(self):
|
||||||
self.gui.saved_searches.show(), self.gui.saved_searches.raise_()
|
self.gui.saved_searches.show(), self.gui.saved_searches.raise_()
|
||||||
|
|
||||||
|
@ -18,6 +18,7 @@ from PyQt4.Qt import (
|
|||||||
QComboBox, QListWidget, QListWidgetItem, QInputDialog)
|
QComboBox, QListWidget, QListWidgetItem, QInputDialog)
|
||||||
|
|
||||||
from calibre.constants import __appname__, plugins
|
from calibre.constants import __appname__, plugins
|
||||||
|
from calibre.ebooks.oeb.polish.spell import replace_word, get_all_words, merge_locations
|
||||||
from calibre.gui2 import choose_files, error_dialog
|
from calibre.gui2 import choose_files, error_dialog
|
||||||
from calibre.gui2.complete2 import LineEdit
|
from calibre.gui2.complete2 import LineEdit
|
||||||
from calibre.gui2.languages import LanguagesEdit
|
from calibre.gui2.languages import LanguagesEdit
|
||||||
@ -640,6 +641,25 @@ class WordsModel(QAbstractTableModel):
|
|||||||
self.spell_map[w] = dictionaries.recognized(*w)
|
self.spell_map[w] = dictionaries.recognized(*w)
|
||||||
self.update_word(w)
|
self.update_word(w)
|
||||||
|
|
||||||
|
def replace_word(self, w, new_word):
|
||||||
|
if w[0] == new_word:
|
||||||
|
return w
|
||||||
|
new_key = (new_word, w[1])
|
||||||
|
if new_key in self.words:
|
||||||
|
self.words[new_key] = merge_locations(self.words[new_key], self.words[w])
|
||||||
|
row = self.row_for_word(w)
|
||||||
|
self.dataChanged.emit(self.index(row, 1), self.index(row, 1))
|
||||||
|
else:
|
||||||
|
self.words[new_key] = self.words[w]
|
||||||
|
self.spell_map[new_key] = dictionaries.recognized(*new_key)
|
||||||
|
self.update_word(new_key)
|
||||||
|
row = self.row_for_word(w)
|
||||||
|
if row > -1:
|
||||||
|
self.beginRemoveRows(QModelIndex(), row, row)
|
||||||
|
del self.items[row]
|
||||||
|
self.endRemoveRows()
|
||||||
|
return new_key
|
||||||
|
|
||||||
def update_word(self, w):
|
def update_word(self, w):
|
||||||
should_be_filtered = not self.filter_item(w)
|
should_be_filtered = not self.filter_item(w)
|
||||||
row = self.row_for_word(w)
|
row = self.row_for_word(w)
|
||||||
@ -672,6 +692,7 @@ class SpellCheck(Dialog):
|
|||||||
work_finished = pyqtSignal(object, object)
|
work_finished = pyqtSignal(object, object)
|
||||||
find_word = pyqtSignal(object, object)
|
find_word = pyqtSignal(object, object)
|
||||||
refresh_requested = pyqtSignal()
|
refresh_requested = pyqtSignal()
|
||||||
|
word_replaced = pyqtSignal(object)
|
||||||
|
|
||||||
def __init__(self, parent=None):
|
def __init__(self, parent=None):
|
||||||
self.__current_word = None
|
self.__current_word = None
|
||||||
@ -855,7 +876,21 @@ class SpellCheck(Dialog):
|
|||||||
pass # item is None
|
pass # item is None
|
||||||
|
|
||||||
def change_word(self):
|
def change_word(self):
|
||||||
pass
|
current = self.words_view.currentIndex()
|
||||||
|
if not current.isValid():
|
||||||
|
return
|
||||||
|
row = current.row()
|
||||||
|
w = self.words_model.word_for_row(row)
|
||||||
|
if w is None:
|
||||||
|
return
|
||||||
|
new_word = unicode(self.suggested_word.text())
|
||||||
|
changed_files = replace_word(current_container(), new_word, self.words_model.words[w], w[1])
|
||||||
|
if changed_files:
|
||||||
|
self.word_replaced.emit(changed_files)
|
||||||
|
w = self.words_model.replace_word(w, new_word)
|
||||||
|
row = self.words_model.row_for_word(w)
|
||||||
|
if row > -1:
|
||||||
|
self.highlight_row(row)
|
||||||
|
|
||||||
def toggle_ignore(self):
|
def toggle_ignore(self):
|
||||||
current = self.words_view.currentIndex()
|
current = self.words_view.currentIndex()
|
||||||
@ -914,8 +949,6 @@ class SpellCheck(Dialog):
|
|||||||
self.thread.start()
|
self.thread.start()
|
||||||
|
|
||||||
def get_words(self):
|
def get_words(self):
|
||||||
from calibre.ebooks.oeb.polish.spell import get_all_words
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
words = get_all_words(current_container(), dictionaries.default_locale)
|
words = get_all_words(current_container(), dictionaries.default_locale)
|
||||||
spell_map = {w:dictionaries.recognized(*w) for w in words}
|
spell_map = {w:dictionaries.recognized(*w) for w in words}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user