From 46eb1543cea8f34d87ed907a42f2d04b350c2931 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 27 Jan 2015 14:59:07 +0530 Subject: [PATCH] Edit Book: Add a 'Style Classes' report to the Reports tool --- src/calibre/ebooks/oeb/polish/report.py | 58 ++++++-- src/calibre/gui2/tweak_book/reports.py | 180 ++++++++++++++++++++++-- 2 files changed, 212 insertions(+), 26 deletions(-) diff --git a/src/calibre/ebooks/oeb/polish/report.py b/src/calibre/ebooks/oeb/polish/report.py index 541c17575a..f2b8a0f2fe 100644 --- a/src/calibre/ebooks/oeb/polish/report.py +++ b/src/calibre/ebooks/oeb/polish/report.py @@ -55,7 +55,7 @@ def safe_img_data(container, name, mt): width = height = 0 return width, height -def files_data(container, book_locale): +def files_data(container, *args): for name, path in container.name_path_map.iteritems(): yield File(name, posixpath.dirname(name), posixpath.basename(name), safe_size(container, name), get_category(name, container.mime_map.get(name, ''))) @@ -76,7 +76,7 @@ def safe_href_to_name(container, href, base): except ValueError: pass # Absolute path on windows -def images_data(container, book_locale): +def images_data(container, *args): image_usage = defaultdict(set) link_sources = OEB_STYLES | OEB_DOCS for name, mt in container.mime_map.iteritems(): @@ -138,7 +138,7 @@ def Link(location, text, is_external, href, path_ok, anchor_ok, anchor): ok = path_ok and anchor_ok return L(location, text, is_external, href, path_ok, anchor_ok, anchor, ok) -def links_data(container, book_locale): +def links_data(container, *args): anchor_map = {} links = [] anchor_pat = XPath('//*[@id or @name]') @@ -181,13 +181,13 @@ def links_data(container, book_locale): Word = namedtuple('Word', 'id word locale usage') -def words_data(container, book_locale): +def words_data(container, book_locale, *args): count, words = get_all_words(container, book_locale, get_word_count=True) return (count, tuple(Word(i, word, locale, v) for i, ((word, locale), v) in enumerate(words.iteritems()))) Char = namedtuple('Char', 'id char codepoint usage count') -def chars_data(container, book_locale): +def chars_data(container, *args): chars = defaultdict(set) counter = Counter() def count(codepoint): @@ -216,7 +216,11 @@ MatchLocation = namedtuple('MatchLocation', 'tag sourceline') CSSEntry = namedtuple('CSSEntry', 'rule count matched_files sort_key') CSSFileMatch = namedtuple('CSSFileMatch', 'file_name locations sort_key') -def css_data(container, book_locale): +ClassEntry = namedtuple('ClassEntry', 'cls num_of_matches matched_files sort_key') +ClassFileMatch = namedtuple('ClassFileMatch', 'file_name class_elements sort_key') +ClassElement = namedtuple('ClassElement', 'name line_number text_on_line tag matched_rules') + +def css_data(container, book_locale, result_data, *args): import tinycss from tinycss.css21 import RuleSet, ImportRule @@ -271,14 +275,17 @@ def css_data(container, book_locale): if sheet is not None: yield sheet + tt_cache = {} def tag_text(elem): - tag = elem.tag.rpartition('}')[-1] - if elem.attrib: - attribs = ' '.join('%s="%s"' % (k, prepare_string_for_xml(elem.get(k, ''), True)) for k in elem.keys()) - return '<%s %s>' % (tag, attribs) - return '<%s>' % tag + ans = tt_cache.get(elem) + if ans is None: + tag = elem.tag.rpartition('}')[-1] + if elem.attrib: + attribs = ' '.join('%s="%s"' % (k, prepare_string_for_xml(elem.get(k, ''), True)) for k in elem.keys()) + return '<%s %s>' % (tag, attribs) + ans = tt_cache[elem] = '<%s>' % tag - def matches_for_selector(selector, root): + def matches_for_selector(selector, root, class_map, rule): selector = pseudo_pat.sub('', selector) selector = MIN_SPACE_RE.sub(r'\1', selector) try: @@ -299,13 +306,36 @@ def css_data(container, book_locale): matches = xp(root) except Exception: return () + lsel = selector.lower() + for elem in matches: + for cls in elem.get('class', '').split(): + if '.' + cls.lower() in lsel: + class_map[cls][elem].append(rule) + return (MatchLocation(tag_text(elem), elem.sourceline) for elem in matches) + class_map = defaultdict(lambda : defaultdict(list)) + for name, inline_sheets in html_sheets.iteritems(): root = container.parsed(name) + cmap = defaultdict(lambda : defaultdict(list)) + for elem in root.xpath('//*[@class]'): + for cls in elem.get('class', '').split(): + cmap[cls][elem] = [] for sheet in chain(sheets_for_html(name, root), inline_sheets): for rule in rules_in_sheet(sheet): - rule_map[rule][name].extend(matches_for_selector(rule.selector, root)) + rule_map[rule][name].extend(matches_for_selector(rule.selector, root, cmap, rule)) + for cls, elem_map in cmap.iteritems(): + class_elements = class_map[cls][name] + for elem, usage in elem_map.iteritems(): + class_elements.append( + ClassElement(name, elem.sourceline, elem.get('class'), tag_text(elem), tuple(usage))) + + result_data['classes'] = ans = [] + for cls, name_map in class_map.iteritems(): + la = tuple(ClassFileMatch(name, tuple(class_elements), numeric_sort_key(name)) for name, class_elements in name_map.iteritems() if class_elements) + num_of_matches = sum(sum(len(ce.matched_rules) for ce in cfm.class_elements) for cfm in la) + ans.append(ClassEntry(cls, num_of_matches, la, numeric_sort_key(cls))) ans = [] for rule, loc_map in rule_map.iteritems(): @@ -321,7 +351,7 @@ def gather_data(container, book_locale): data = {} for x in 'files chars images links words css'.split(): st = time.time() - data[x] = globals()[x + '_data'](container, book_locale) + data[x] = globals()[x + '_data'](container, book_locale, data) if isinstance(data[x], types.GeneratorType): data[x] = tuple(data[x]) timing[x] = time.time() - st diff --git a/src/calibre/gui2/tweak_book/reports.py b/src/calibre/gui2/tweak_book/reports.py index aa3574adec..8590b6f47d 100644 --- a/src/calibre/gui2/tweak_book/reports.py +++ b/src/calibre/gui2/tweak_book/reports.py @@ -26,7 +26,9 @@ from PyQt5.Qt import ( from calibre import human_readable, fit_image from calibre.constants import DEBUG -from calibre.ebooks.oeb.polish.report import gather_data, CSSEntry, CSSFileMatch, MatchLocation +from calibre.ebooks.oeb.polish.report import ( + gather_data, CSSEntry, CSSFileMatch, MatchLocation, ClassEntry, + ClassFileMatch, ClassElement, CSSRule, LinkLocation) from calibre.gui2 import error_dialog, question_dialog, choose_save_file, open_url from calibre.gui2.tweak_book import current_container, tprefs, dictionaries from calibre.gui2.tweak_book.widgets import Dialog @@ -902,9 +904,8 @@ class CSSRulesModel(QAbstractItemModel): if not parent.isValid(): return len(self.rules) entry = self.index_to_entry(parent) - if isinstance(entry, (CSSEntry, CSSFileMatch)): - return len(self.to_container(entry)) - return 0 + c = self.to_container(entry) + return 0 if c is entry else len(c) def columnCount(self, parent=ROOT): return 1 @@ -968,6 +969,16 @@ class CSSProxyModel(QSortFilterProxyModel): class CSSWidget(QWidget): + SETTING_PREFIX = 'css-' + MODEL = CSSRulesModel + PROXY = CSSProxyModel + + def read_state(self, name, default=None): + return read_state(self.SETTING_PREFIX+name, default) + + def save_state(self, name, val): + return save_state(self.SETTING_PREFIX + name, val) + def __init__(self, parent=None): QWidget.__init__(self, parent) self.l = l = QVBoxLayout(self) @@ -976,8 +987,8 @@ class CSSWidget(QWidget): self.filter_edit = e = QLineEdit(self) l.addWidget(e) e.setPlaceholderText(_('Filter')) - self.model = m = CSSRulesModel(self) - self.proxy = p = CSSProxyModel(self) + self.model = m = self.MODEL(self) + self.proxy = p = self.PROXY(self) p.setSourceModel(m) self.view = f = QTreeView(self) f.setAlternatingRowColors(True) @@ -990,16 +1001,16 @@ class CSSWidget(QWidget): l.addLayout(h) h.addWidget(QLabel(_('Sort by:'))) self.counts_button = b = QRadioButton(_('&Counts'), self) - b.setChecked(read_state('css-sort-on-counts', True)) + b.setChecked(self.read_state('sort-on-counts', True)) h.addWidget(b) self.name_button = b = QRadioButton(_('&Name'), self) - b.setChecked(not read_state('css-sort-on-counts', True)) + b.setChecked(not self.read_state('sort-on-counts', True)) h.addWidget(b) b.toggled.connect(self.resort) h.addStrut(20) self._sort_order = o = QComboBox(self) o.addItems([_('Ascending'), _('Descending')]) - o.setCurrentIndex(0 if read_state('css-sort-ascending', True) else 1) + o.setCurrentIndex(0 if self.read_state('sort-ascending', True) else 1) o.setEditable(False) o.currentIndexChanged[int].connect(self.resort) h.addWidget(o) @@ -1015,15 +1026,18 @@ class CSSWidget(QWidget): self._sort_order.setCurrentIndex({Qt.AscendingOrder:0}.get(val, 1)) return property(fget=fget, fset=fset) + def update_summary(self): + self.summary.setText(_('{0} rules, {1} unused').format(self.model.rowCount(), self.model.num_unused)) + def __call__(self, data): self.model(data) - self.summary.setText(_('{0} rules, {1} unused').format(self.model.rowCount(), self.model.num_unused)) + self.update_summary() self.filter_edit.clear() self.resort() def save(self): - save_state('css-sort-on-counts', self.counts_button.isChecked()) - save_state('css-sort-ascending', self.sort_order == Qt.AscendingOrder) + self.save_state('sort-on-counts', self.counts_button.isChecked()) + self.save_state('sort-ascending', self.sort_order == Qt.AscendingOrder) def resort(self, *args): self.model.sort_on_count = self.counts_button.isChecked() @@ -1048,6 +1062,9 @@ class CSSWidget(QWidget): entry = self.model.index_to_entry(index) if entry is None: return + self.handle_double_click(entry, index, boss) + + def handle_double_click(self, entry, index, boss): if isinstance(entry, CSSEntry): loc = entry.rule.location name, sourceline, col = loc @@ -1056,6 +1073,9 @@ class CSSWidget(QWidget): else: name = self.model.index_to_entry(index.parent()).file_name sourceline = entry.sourceline + self.show_line(name, sourceline, boss) + + def show_line(self, name, sourceline, boss): editor = boss.edit_file_requested(name) if editor is None: return @@ -1068,6 +1088,138 @@ class CSSWidget(QWidget): # }}} +# Classes {{{ + +class ClassesModel(CSSRulesModel): + + def __init__(self, parent): + self.classes = self.rules = () + CSSRulesModel.__init__(self, parent) + self.sort_on_count = True + self.num_size = 1 + self.num_unused = 0 + self.build_maps() + + def build_maps(self): + self.parent_map = pm = {} + for i, entry in enumerate(self.classes): + container = entry.matched_files + pm[container] = (i, self.classes) + + for i, child in enumerate(container): + gcontainer = child.class_elements + pm[gcontainer] = (i, container) + + for i, gc in enumerate(gcontainer): + ggcontainer = gc.matched_rules + pm[gc] = (i, gcontainer) + + for i, ggc in enumerate(ggcontainer): + pm[ggc] = (i, ggcontainer) + + def to_container(self, entry): + if isinstance(entry, ClassEntry): + return entry.matched_files + elif isinstance(entry, ClassFileMatch): + return entry.class_elements + elif isinstance(entry, ClassElement): + return entry.matched_rules + return entry + + def data(self, index, role=Qt.DisplayRole): + if role == SORT_ROLE: + entry = self.index_to_entry(index) + if isinstance(entry, ClassEntry): + return entry.num_of_matches if self.sort_on_count else entry.sort_key + if isinstance(entry, ClassFileMatch): + return len(entry.class_elements) if self.sort_on_count else entry.sort_key + if isinstance(entry, ClassElement): + return entry.line_number + if isinstance(entry, CSSRule): + return entry.location.file_name + elif role == Qt.DisplayRole: + entry = self.index_to_entry(index) + if isinstance(entry, ClassEntry): + return '[%{}d] %s'.format(self.num_size) % (entry.num_of_matches, entry.cls) + elif isinstance(entry, ClassFileMatch): + return _('%s [%d elements]') % (entry.file_name, len(entry.class_elements)) + elif isinstance(entry, ClassElement): + return '%s @ %s' % (entry.tag, entry.line_number) + elif isinstance(entry, CSSRule): + return '%s @ %s:%s' % (entry.selector, entry.location.file_name, entry.location.line) + elif role == Qt.UserRole: + return self.index_to_entry(index) + elif role == Qt.FontRole: + entry = self.index_to_entry(index) + if isinstance(entry, ClassEntry): + return self.main_font + elif isinstance(entry, ClassFileMatch): + return self.italic_font + + def __call__(self, data): + self.beginResetModel() + self.rules = self.classes = tuple(data['classes']) + self.num_unused = sum(1 for ce in self.classes if ce.num_of_matches == 0) + try: + self.num_size = len(str(max(r.num_of_matches for r in self.classes))) + except ValueError: + self.num_size = 1 + self.build_maps() + self.endResetModel() + +class ClassProxyModel(CSSProxyModel): + + def filterAcceptsRow(self, row, parent): + if not self._filter_text: + return True + sm = self.sourceModel() + entry = sm.index_to_entry(sm.index(row, 0, parent)) + if not isinstance(entry, ClassEntry): + return True + return primary_contains(self._filter_text, entry.cls) + +class ClassesWidget(CSSWidget): + + SETTING_PREFIX = 'classes-' + MODEL = ClassesModel + PROXY = ClassProxyModel + + def update_summary(self): + self.summary.setText(_('{0} classes, {1} unused').format(self.model.rowCount(), self.model.num_unused)) + + def to_csv(self): + buf = BytesIO() + w = csv_writer(buf) + w.writerow([_('Class'), _('Number of matches')]) + for r in xrange(self.proxy.rowCount()): + entry = self.proxy.mapToSource(self.proxy.index(r, 0)).data(Qt.UserRole) + w.writerow([entry.cls, entry.num_of_matches]) + return buf.getvalue() + + def handle_double_click(self, entry, index, boss): + if isinstance(entry, ClassEntry): + def uniq(vals): + vals = vals or () + seen = set() + seen_add = seen.add + return tuple(x for x in vals if x not in seen and not seen_add(x)) + + rules = tuple(uniq([LinkLocation(rule.location.file_name, rule.location.line, None) + for cfm in entry.matched_files for ce in cfm.class_elements for rule in ce.matched_rules])) + if rules: + jump((id(self), id(entry)), rules) + return + elif isinstance(entry, ClassFileMatch): + name, sourceline = entry.file_name, 0 + elif isinstance(entry, ClassElement): + return jump_to_location(entry) + else: + loc = entry.location + name, sourceline, col = loc + self.show_line(name, sourceline, boss) + +# }}} + # Wrapper UI {{{ class ReportsWidget(QWidget): @@ -1106,6 +1258,10 @@ class ReportsWidget(QWidget): s.addWidget(c) QListWidgetItem(_('Style Rules'), r) + self.css = c = ClassesWidget(self) + s.addWidget(c) + QListWidgetItem(_('Style Classes'), r) + self.chars = c = CharsWidget(self) s.addWidget(c) QListWidgetItem(_('Characters'), r)