mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Edit book: Reports: Show the number of words per file in the Files section of the report
This commit is contained in:
parent
c0fce03703
commit
67bcfac6b7
@ -18,7 +18,7 @@ from calibre.utils.imghdr import identify
|
||||
from css_selectors import Select, SelectorError
|
||||
from polyglot.builtins import iteritems
|
||||
|
||||
File = namedtuple('File', 'name dir basename size category')
|
||||
File = namedtuple('File', 'name dir basename size category word_count')
|
||||
|
||||
|
||||
def get_category(name, mt):
|
||||
@ -60,9 +60,10 @@ def safe_img_data(container, name, mt):
|
||||
|
||||
|
||||
def files_data(container, *args):
|
||||
fwc = file_words_counts or {}
|
||||
for name, path in iteritems(container.name_path_map):
|
||||
yield File(name, posixpath.dirname(name), posixpath.basename(name), safe_size(container, name),
|
||||
get_category(name, container.mime_map.get(name, '')))
|
||||
get_category(name, container.mime_map.get(name, '')), fwc.get(name, -1))
|
||||
|
||||
|
||||
Image = namedtuple('Image', 'name mime_type usage size basename id width height')
|
||||
@ -198,8 +199,11 @@ def links_data(container, *args):
|
||||
Word = namedtuple('Word', 'id word locale usage')
|
||||
|
||||
|
||||
file_words_counts = None
|
||||
|
||||
|
||||
def words_data(container, book_locale, *args):
|
||||
count, words = get_all_words(container, book_locale, get_word_count=True)
|
||||
count, words = get_all_words(container, book_locale, get_word_count=True, file_words_counts=file_words_counts)
|
||||
return (count, tuple(Word(i, word, locale, v) for i, ((word, locale), v) in enumerate(iteritems(words))))
|
||||
|
||||
|
||||
@ -349,12 +353,15 @@ def css_data(container, book_locale, result_data, *args):
|
||||
|
||||
|
||||
def gather_data(container, book_locale):
|
||||
global file_words_counts
|
||||
timing = {}
|
||||
data = {}
|
||||
for x in 'files chars images links words css'.split():
|
||||
file_words_counts = {}
|
||||
for x in 'chars images links words css files'.split():
|
||||
st = time.time()
|
||||
data[x] = globals()[x + '_data'](container, book_locale, data)
|
||||
if isinstance(data[x], types.GeneratorType):
|
||||
data[x] = tuple(data[x])
|
||||
timing[x] = time.time() - st
|
||||
file_words_counts = None
|
||||
return data, timing
|
||||
|
@ -70,6 +70,9 @@ class Location:
|
||||
self.original_word = self.elided_prefix + new_word
|
||||
|
||||
|
||||
file_word_count = 0
|
||||
|
||||
|
||||
def filter_words(word):
|
||||
if not word:
|
||||
return False
|
||||
@ -80,10 +83,12 @@ def filter_words(word):
|
||||
|
||||
|
||||
def get_words(text, lang):
|
||||
global file_word_count
|
||||
try:
|
||||
ans = split_into_words(str(text), lang)
|
||||
except (TypeError, ValueError):
|
||||
return ()
|
||||
file_word_count += len(ans)
|
||||
return list(filter(filter_words, ans))
|
||||
|
||||
|
||||
@ -299,7 +304,10 @@ def root_is_excluded_from_spell_check(root):
|
||||
return False
|
||||
|
||||
|
||||
def get_all_words(container, book_locale, get_word_count=False, excluded_files=()):
|
||||
def get_all_words(container, book_locale, get_word_count=False, excluded_files=(), file_words_counts=None):
|
||||
global file_word_count
|
||||
if file_words_counts is None:
|
||||
file_words_counts = {}
|
||||
words = defaultdict(list)
|
||||
words[None] = 0
|
||||
file_names, ncx_toc = get_checkable_file_names(container)
|
||||
@ -309,12 +317,15 @@ def get_all_words(container, book_locale, get_word_count=False, excluded_files=(
|
||||
root = container.parsed(file_name)
|
||||
if root_is_excluded_from_spell_check(root):
|
||||
continue
|
||||
file_word_count = 0
|
||||
if file_name == container.opf_name:
|
||||
read_words_from_opf(root, words, file_name, book_locale)
|
||||
elif file_name == ncx_toc:
|
||||
read_words_from_ncx(root, words, file_name, book_locale)
|
||||
elif hasattr(root, 'xpath'):
|
||||
read_words_from_html(root, words, file_name, book_locale)
|
||||
file_words_counts[file_name] = file_word_count
|
||||
file_word_count = 0
|
||||
count = words.pop(None)
|
||||
ans = {k:group_sort(v) for k, v in iteritems(words)}
|
||||
if get_word_count:
|
||||
|
@ -236,8 +236,8 @@ class FilesView(QTableView):
|
||||
|
||||
class FilesModel(FileCollection):
|
||||
|
||||
COLUMN_HEADERS = (_('Folder'), _('Name'), _('Size (KB)'), _('Type'))
|
||||
alignments = Qt.AlignmentFlag.AlignLeft, Qt.AlignmentFlag.AlignLeft, Qt.AlignmentFlag.AlignRight, Qt.AlignmentFlag.AlignLeft
|
||||
COLUMN_HEADERS = (_('Folder'), _('Name'), _('Size (KB)'), _('Type'), _('Word count'))
|
||||
alignments = Qt.AlignmentFlag.AlignLeft, Qt.AlignmentFlag.AlignLeft, Qt.AlignmentFlag.AlignRight, Qt.AlignmentFlag.AlignLeft, Qt.AlignmentFlag.AlignRight
|
||||
CATEGORY_NAMES = {
|
||||
'image':_('Image'),
|
||||
'text': _('Text'),
|
||||
@ -257,7 +257,7 @@ class FilesModel(FileCollection):
|
||||
self.total_size = sum(map(itemgetter(3), self.files))
|
||||
self.images_size = sum(map(itemgetter(3), (f for f in self.files if f.category == 'image')))
|
||||
self.fonts_size = sum(map(itemgetter(3), (f for f in self.files if f.category == 'font')))
|
||||
self.sort_keys = tuple((psk(entry.dir), psk(entry.basename), entry.size, psk(self.CATEGORY_NAMES.get(entry.category, '')))
|
||||
self.sort_keys = tuple((psk(entry.dir), psk(entry.basename), entry.size, psk(self.CATEGORY_NAMES.get(entry.category, '')), entry.word_count)
|
||||
for entry in self.files)
|
||||
self.endResetModel()
|
||||
|
||||
@ -282,6 +282,10 @@ class FilesModel(FileCollection):
|
||||
return '%.2f ' % sz
|
||||
if col == 3:
|
||||
return self.CATEGORY_NAMES.get(entry.category)
|
||||
if col == 4:
|
||||
ans = entry.word_count
|
||||
if ans > -1:
|
||||
return str(ans)
|
||||
elif role == Qt.ItemDataRole.TextAlignmentRole:
|
||||
return int(Qt.AlignVCenter | self.alignments[index.column()]) # https://bugreports.qt.io/browse/PYSIDE-1974
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user