From 589c719dc60f7ffc16ea510e12f1182fa2b5c592 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 21 Jan 2015 19:07:47 +0530 Subject: [PATCH] Add a list of characters report to the reports tool --- src/calibre/ebooks/oeb/polish/report.py | 17 +++++- src/calibre/gui2/tweak_book/reports.py | 80 ++++++++++++++++++++++++- 2 files changed, 94 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/oeb/polish/report.py b/src/calibre/ebooks/oeb/polish/report.py index 7cc435e14f..27d9a7bbb5 100644 --- a/src/calibre/ebooks/oeb/polish/report.py +++ b/src/calibre/ebooks/oeb/polish/report.py @@ -11,7 +11,7 @@ from collections import namedtuple, defaultdict from calibre.ebooks.oeb.polish.container import OEB_DOCS, OEB_STYLES, OEB_FONTS from calibre.ebooks.oeb.polish.spell import get_all_words -from calibre.utils.icu import numeric_sort_key +from calibre.utils.icu import numeric_sort_key, ord_string, safe_chr from calibre.utils.magick.draw import identify File = namedtuple('File', 'name dir basename size category') @@ -92,10 +92,25 @@ def word_data(container, book_locale): count, words = get_all_words(container, book_locale, get_word_count=True) return (count, tuple(Word(i, word, locale, v) for i, ((word, locale), v) in enumerate(words.iteritems()))) +Char = namedtuple('Char', 'id char codepoint usage') + +def char_data(container): + chars = defaultdict(list) + for name, is_linear in container.spine_names: + if container.mime_map.get(name) not in OEB_DOCS: + continue + raw = container.raw_data(name) + for i, codepoint in enumerate(ord_string(raw)): + chars[codepoint].append(Location(name, character_offset=i)) + + for i, (codepoint, usage) in enumerate(chars.iteritems()): + yield Char(i, safe_chr(codepoint), codepoint, usage) + def gather_data(container, book_locale): data = {'files':tuple(file_data(container))} img_data = link_data(container) data['images'] = img_data data['words'] = word_data(container, book_locale) + data['chars'] = tuple(char_data(container)) return data diff --git a/src/calibre/gui2/tweak_book/reports.py b/src/calibre/gui2/tweak_book/reports.py index 519dadd2ac..df5c7a6878 100644 --- a/src/calibre/gui2/tweak_book/reports.py +++ b/src/calibre/gui2/tweak_book/reports.py @@ -26,7 +26,7 @@ from calibre.gui2 import error_dialog, question_dialog from calibre.gui2.tweak_book import current_container, tprefs, dictionaries from calibre.gui2.tweak_book.widgets import Dialog from calibre.gui2.progress_indicator import ProgressIndicator -from calibre.utils.icu import primary_contains, numeric_sort_key +from calibre.utils.icu import primary_contains, numeric_sort_key, character_name_from_code from calibre.utils.localization import calibre_langcode_to_name, canonicalize_lang # Utils {{{ @@ -292,7 +292,7 @@ class Jump(object): # {{{ editor.find(regex.compile(regex.escape(loc.text_on_line))) elif loc.character_offset is not None: c = editor.textCursor() - c.setPosition(loc.character_offset) + c.setPosition(loc.character_offset + 1) # put cursor after the character editor.setTextCursor(c) jump = Jump() # }}} @@ -529,6 +529,78 @@ class WordsWidget(QWidget): save_state('words-table', bytearray(self.words.horizontalHeader().saveState())) # }}} +# Characters {{{ + +class CharsModel(FileCollection): + + COLUMN_HEADERS = (_('Character'), _('Name'), _('Codepoint'), _('Times used')) + total_words = 0 + + def __call__(self, data): + self.beginResetModel() + self.files = data['chars'] + psk = numeric_sort_key + self.sort_keys = tuple((psk(entry.char), None, entry.codepoint, len(entry.usage)) for entry in self.files) + self.endResetModel() + + def data(self, index, role=Qt.DisplayRole): + if role == Qt.DisplayRole: + col = index.column() + try: + entry = self.files[index.row()] + except IndexError: + return None + if col == 0: + return entry.char + if col == 1: + return {0xa:'LINE FEED', 0xd:'CARRIAGE RETURN'}.get(entry.codepoint, character_name_from_code(entry.codepoint)) + if col == 2: + return ('U+%04X' if entry.codepoint < 0x10000 else 'U+%06X') % entry.codepoint + if col == 3: + return type('')(len(entry.usage)) + if role == Qt.UserRole: + try: + return self.files[index.row()] + except IndexError: + pass + + def location(self, index): + return None + +class CharsWidget(QWidget): + + def __init__(self, parent=None): + QWidget.__init__(self, parent) + self.l = l = QVBoxLayout(self) + + self.filter_edit = e = QLineEdit(self) + l.addWidget(e) + e.setPlaceholderText(_('Filter')) + self.model = m = CharsModel(self) + self.chars = f = FilesView(m, self) + f.DELETE_POSSIBLE = False + f.double_clicked.connect(self.double_clicked) + e.textChanged.connect(f.proxy.filter_text) + l.addWidget(f) + + try: + self.chars.horizontalHeader().restoreState(read_state('chars-table')) + except TypeError: + self.chars.sortByColumn(0, Qt.AscendingOrder) + + def __call__(self, data): + self.model(data) + self.filter_edit.clear() + + def double_clicked(self, index): + entry = index.data(Qt.UserRole) + if entry is not None: + jump((id(self), entry.id), entry.usage) + + def save(self): + save_state('chars-table', bytearray(self.chars.horizontalHeader().saveState())) +# }}} + # Wrapper UI {{{ class ReportsWidget(QWidget): @@ -563,6 +635,10 @@ class ReportsWidget(QWidget): s.addWidget(i) QListWidgetItem(_('Images'), r) + self.chars = c = CharsWidget(self) + s.addWidget(c) + QListWidgetItem(_('Characters'), r) + self.splitter.setStretchFactor(1, 500) try: self.splitter.restoreState(read_state('splitter-state'))