Edit book: Reports: Show the number of words per file in the Files section of the report

This commit is contained in:
Kovid Goyal 2022-11-20 10:46:23 +05:30
parent c0fce03703
commit 67bcfac6b7
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 30 additions and 8 deletions

View File

@ -18,7 +18,7 @@ from calibre.utils.imghdr import identify
from css_selectors import Select, SelectorError
from polyglot.builtins import iteritems
File = namedtuple('File', 'name dir basename size category')
File = namedtuple('File', 'name dir basename size category word_count')
def get_category(name, mt):
@ -60,9 +60,10 @@ def safe_img_data(container, name, mt):
def files_data(container, *args):
fwc = file_words_counts or {}
for name, path in iteritems(container.name_path_map):
yield File(name, posixpath.dirname(name), posixpath.basename(name), safe_size(container, name),
get_category(name, container.mime_map.get(name, '')))
get_category(name, container.mime_map.get(name, '')), fwc.get(name, -1))
Image = namedtuple('Image', 'name mime_type usage size basename id width height')
@ -198,8 +199,11 @@ def links_data(container, *args):
Word = namedtuple('Word', 'id word locale usage')
file_words_counts = None
def words_data(container, book_locale, *args):
count, words = get_all_words(container, book_locale, get_word_count=True)
count, words = get_all_words(container, book_locale, get_word_count=True, file_words_counts=file_words_counts)
return (count, tuple(Word(i, word, locale, v) for i, ((word, locale), v) in enumerate(iteritems(words))))
@ -349,12 +353,15 @@ def css_data(container, book_locale, result_data, *args):
def gather_data(container, book_locale):
global file_words_counts
timing = {}
data = {}
for x in 'files chars images links words css'.split():
file_words_counts = {}
for x in 'chars images links words css files'.split():
st = time.time()
data[x] = globals()[x + '_data'](container, book_locale, data)
if isinstance(data[x], types.GeneratorType):
data[x] = tuple(data[x])
timing[x] = time.time() - st
file_words_counts = None
return data, timing

View File

@ -70,6 +70,9 @@ class Location:
self.original_word = self.elided_prefix + new_word
file_word_count = 0
def filter_words(word):
if not word:
return False
@ -80,10 +83,12 @@ def filter_words(word):
def get_words(text, lang):
global file_word_count
try:
ans = split_into_words(str(text), lang)
except (TypeError, ValueError):
return ()
file_word_count += len(ans)
return list(filter(filter_words, ans))
@ -299,7 +304,10 @@ def root_is_excluded_from_spell_check(root):
return False
def get_all_words(container, book_locale, get_word_count=False, excluded_files=()):
def get_all_words(container, book_locale, get_word_count=False, excluded_files=(), file_words_counts=None):
global file_word_count
if file_words_counts is None:
file_words_counts = {}
words = defaultdict(list)
words[None] = 0
file_names, ncx_toc = get_checkable_file_names(container)
@ -309,12 +317,15 @@ def get_all_words(container, book_locale, get_word_count=False, excluded_files=(
root = container.parsed(file_name)
if root_is_excluded_from_spell_check(root):
continue
file_word_count = 0
if file_name == container.opf_name:
read_words_from_opf(root, words, file_name, book_locale)
elif file_name == ncx_toc:
read_words_from_ncx(root, words, file_name, book_locale)
elif hasattr(root, 'xpath'):
read_words_from_html(root, words, file_name, book_locale)
file_words_counts[file_name] = file_word_count
file_word_count = 0
count = words.pop(None)
ans = {k:group_sort(v) for k, v in iteritems(words)}
if get_word_count:

View File

@ -236,8 +236,8 @@ class FilesView(QTableView):
class FilesModel(FileCollection):
COLUMN_HEADERS = (_('Folder'), _('Name'), _('Size (KB)'), _('Type'))
alignments = Qt.AlignmentFlag.AlignLeft, Qt.AlignmentFlag.AlignLeft, Qt.AlignmentFlag.AlignRight, Qt.AlignmentFlag.AlignLeft
COLUMN_HEADERS = (_('Folder'), _('Name'), _('Size (KB)'), _('Type'), _('Word count'))
alignments = Qt.AlignmentFlag.AlignLeft, Qt.AlignmentFlag.AlignLeft, Qt.AlignmentFlag.AlignRight, Qt.AlignmentFlag.AlignLeft, Qt.AlignmentFlag.AlignRight
CATEGORY_NAMES = {
'image':_('Image'),
'text': _('Text'),
@ -257,7 +257,7 @@ class FilesModel(FileCollection):
self.total_size = sum(map(itemgetter(3), self.files))
self.images_size = sum(map(itemgetter(3), (f for f in self.files if f.category == 'image')))
self.fonts_size = sum(map(itemgetter(3), (f for f in self.files if f.category == 'font')))
self.sort_keys = tuple((psk(entry.dir), psk(entry.basename), entry.size, psk(self.CATEGORY_NAMES.get(entry.category, '')))
self.sort_keys = tuple((psk(entry.dir), psk(entry.basename), entry.size, psk(self.CATEGORY_NAMES.get(entry.category, '')), entry.word_count)
for entry in self.files)
self.endResetModel()
@ -282,6 +282,10 @@ class FilesModel(FileCollection):
return '%.2f ' % sz
if col == 3:
return self.CATEGORY_NAMES.get(entry.category)
if col == 4:
ans = entry.word_count
if ans > -1:
return str(ans)
elif role == Qt.ItemDataRole.TextAlignmentRole:
return int(Qt.AlignVCenter | self.alignments[index.column()]) # https://bugreports.qt.io/browse/PYSIDE-1974