Edit Book: Add a 'Style Classes' report to the Reports tool

This commit is contained in:
Kovid Goyal 2015-01-27 14:59:07 +05:30
parent 50ba062e82
commit 46eb1543ce
2 changed files with 212 additions and 26 deletions

View File

@ -55,7 +55,7 @@ def safe_img_data(container, name, mt):
width = height = 0 width = height = 0
return width, height return width, height
def files_data(container, book_locale): def files_data(container, *args):
for name, path in container.name_path_map.iteritems(): for name, path in container.name_path_map.iteritems():
yield File(name, posixpath.dirname(name), posixpath.basename(name), safe_size(container, name), yield File(name, posixpath.dirname(name), posixpath.basename(name), safe_size(container, name),
get_category(name, container.mime_map.get(name, ''))) get_category(name, container.mime_map.get(name, '')))
@ -76,7 +76,7 @@ def safe_href_to_name(container, href, base):
except ValueError: except ValueError:
pass # Absolute path on windows pass # Absolute path on windows
def images_data(container, book_locale): def images_data(container, *args):
image_usage = defaultdict(set) image_usage = defaultdict(set)
link_sources = OEB_STYLES | OEB_DOCS link_sources = OEB_STYLES | OEB_DOCS
for name, mt in container.mime_map.iteritems(): for name, mt in container.mime_map.iteritems():
@ -138,7 +138,7 @@ def Link(location, text, is_external, href, path_ok, anchor_ok, anchor):
ok = path_ok and anchor_ok ok = path_ok and anchor_ok
return L(location, text, is_external, href, path_ok, anchor_ok, anchor, ok) return L(location, text, is_external, href, path_ok, anchor_ok, anchor, ok)
def links_data(container, book_locale): def links_data(container, *args):
anchor_map = {} anchor_map = {}
links = [] links = []
anchor_pat = XPath('//*[@id or @name]') anchor_pat = XPath('//*[@id or @name]')
@ -181,13 +181,13 @@ def links_data(container, book_locale):
Word = namedtuple('Word', 'id word locale usage') Word = namedtuple('Word', 'id word locale usage')
def words_data(container, book_locale): def words_data(container, book_locale, *args):
count, words = get_all_words(container, book_locale, get_word_count=True) count, words = get_all_words(container, book_locale, get_word_count=True)
return (count, tuple(Word(i, word, locale, v) for i, ((word, locale), v) in enumerate(words.iteritems()))) return (count, tuple(Word(i, word, locale, v) for i, ((word, locale), v) in enumerate(words.iteritems())))
Char = namedtuple('Char', 'id char codepoint usage count') Char = namedtuple('Char', 'id char codepoint usage count')
def chars_data(container, book_locale): def chars_data(container, *args):
chars = defaultdict(set) chars = defaultdict(set)
counter = Counter() counter = Counter()
def count(codepoint): def count(codepoint):
@ -216,7 +216,11 @@ MatchLocation = namedtuple('MatchLocation', 'tag sourceline')
CSSEntry = namedtuple('CSSEntry', 'rule count matched_files sort_key') CSSEntry = namedtuple('CSSEntry', 'rule count matched_files sort_key')
CSSFileMatch = namedtuple('CSSFileMatch', 'file_name locations sort_key') CSSFileMatch = namedtuple('CSSFileMatch', 'file_name locations sort_key')
def css_data(container, book_locale): ClassEntry = namedtuple('ClassEntry', 'cls num_of_matches matched_files sort_key')
ClassFileMatch = namedtuple('ClassFileMatch', 'file_name class_elements sort_key')
ClassElement = namedtuple('ClassElement', 'name line_number text_on_line tag matched_rules')
def css_data(container, book_locale, result_data, *args):
import tinycss import tinycss
from tinycss.css21 import RuleSet, ImportRule from tinycss.css21 import RuleSet, ImportRule
@ -271,14 +275,17 @@ def css_data(container, book_locale):
if sheet is not None: if sheet is not None:
yield sheet yield sheet
tt_cache = {}
def tag_text(elem): def tag_text(elem):
tag = elem.tag.rpartition('}')[-1] ans = tt_cache.get(elem)
if elem.attrib: if ans is None:
attribs = ' '.join('%s="%s"' % (k, prepare_string_for_xml(elem.get(k, ''), True)) for k in elem.keys()) tag = elem.tag.rpartition('}')[-1]
return '<%s %s>' % (tag, attribs) if elem.attrib:
return '<%s>' % tag attribs = ' '.join('%s="%s"' % (k, prepare_string_for_xml(elem.get(k, ''), True)) for k in elem.keys())
return '<%s %s>' % (tag, attribs)
ans = tt_cache[elem] = '<%s>' % tag
def matches_for_selector(selector, root): def matches_for_selector(selector, root, class_map, rule):
selector = pseudo_pat.sub('', selector) selector = pseudo_pat.sub('', selector)
selector = MIN_SPACE_RE.sub(r'\1', selector) selector = MIN_SPACE_RE.sub(r'\1', selector)
try: try:
@ -299,13 +306,36 @@ def css_data(container, book_locale):
matches = xp(root) matches = xp(root)
except Exception: except Exception:
return () return ()
lsel = selector.lower()
for elem in matches:
for cls in elem.get('class', '').split():
if '.' + cls.lower() in lsel:
class_map[cls][elem].append(rule)
return (MatchLocation(tag_text(elem), elem.sourceline) for elem in matches) return (MatchLocation(tag_text(elem), elem.sourceline) for elem in matches)
class_map = defaultdict(lambda : defaultdict(list))
for name, inline_sheets in html_sheets.iteritems(): for name, inline_sheets in html_sheets.iteritems():
root = container.parsed(name) root = container.parsed(name)
cmap = defaultdict(lambda : defaultdict(list))
for elem in root.xpath('//*[@class]'):
for cls in elem.get('class', '').split():
cmap[cls][elem] = []
for sheet in chain(sheets_for_html(name, root), inline_sheets): for sheet in chain(sheets_for_html(name, root), inline_sheets):
for rule in rules_in_sheet(sheet): for rule in rules_in_sheet(sheet):
rule_map[rule][name].extend(matches_for_selector(rule.selector, root)) rule_map[rule][name].extend(matches_for_selector(rule.selector, root, cmap, rule))
for cls, elem_map in cmap.iteritems():
class_elements = class_map[cls][name]
for elem, usage in elem_map.iteritems():
class_elements.append(
ClassElement(name, elem.sourceline, elem.get('class'), tag_text(elem), tuple(usage)))
result_data['classes'] = ans = []
for cls, name_map in class_map.iteritems():
la = tuple(ClassFileMatch(name, tuple(class_elements), numeric_sort_key(name)) for name, class_elements in name_map.iteritems() if class_elements)
num_of_matches = sum(sum(len(ce.matched_rules) for ce in cfm.class_elements) for cfm in la)
ans.append(ClassEntry(cls, num_of_matches, la, numeric_sort_key(cls)))
ans = [] ans = []
for rule, loc_map in rule_map.iteritems(): for rule, loc_map in rule_map.iteritems():
@ -321,7 +351,7 @@ def gather_data(container, book_locale):
data = {} data = {}
for x in 'files chars images links words css'.split(): for x in 'files chars images links words css'.split():
st = time.time() st = time.time()
data[x] = globals()[x + '_data'](container, book_locale) data[x] = globals()[x + '_data'](container, book_locale, data)
if isinstance(data[x], types.GeneratorType): if isinstance(data[x], types.GeneratorType):
data[x] = tuple(data[x]) data[x] = tuple(data[x])
timing[x] = time.time() - st timing[x] = time.time() - st

View File

@ -26,7 +26,9 @@ from PyQt5.Qt import (
from calibre import human_readable, fit_image from calibre import human_readable, fit_image
from calibre.constants import DEBUG from calibre.constants import DEBUG
from calibre.ebooks.oeb.polish.report import gather_data, CSSEntry, CSSFileMatch, MatchLocation from calibre.ebooks.oeb.polish.report import (
gather_data, CSSEntry, CSSFileMatch, MatchLocation, ClassEntry,
ClassFileMatch, ClassElement, CSSRule, LinkLocation)
from calibre.gui2 import error_dialog, question_dialog, choose_save_file, open_url from calibre.gui2 import error_dialog, question_dialog, choose_save_file, open_url
from calibre.gui2.tweak_book import current_container, tprefs, dictionaries from calibre.gui2.tweak_book import current_container, tprefs, dictionaries
from calibre.gui2.tweak_book.widgets import Dialog from calibre.gui2.tweak_book.widgets import Dialog
@ -902,9 +904,8 @@ class CSSRulesModel(QAbstractItemModel):
if not parent.isValid(): if not parent.isValid():
return len(self.rules) return len(self.rules)
entry = self.index_to_entry(parent) entry = self.index_to_entry(parent)
if isinstance(entry, (CSSEntry, CSSFileMatch)): c = self.to_container(entry)
return len(self.to_container(entry)) return 0 if c is entry else len(c)
return 0
def columnCount(self, parent=ROOT): def columnCount(self, parent=ROOT):
return 1 return 1
@ -968,6 +969,16 @@ class CSSProxyModel(QSortFilterProxyModel):
class CSSWidget(QWidget): class CSSWidget(QWidget):
SETTING_PREFIX = 'css-'
MODEL = CSSRulesModel
PROXY = CSSProxyModel
def read_state(self, name, default=None):
return read_state(self.SETTING_PREFIX+name, default)
def save_state(self, name, val):
return save_state(self.SETTING_PREFIX + name, val)
def __init__(self, parent=None): def __init__(self, parent=None):
QWidget.__init__(self, parent) QWidget.__init__(self, parent)
self.l = l = QVBoxLayout(self) self.l = l = QVBoxLayout(self)
@ -976,8 +987,8 @@ class CSSWidget(QWidget):
self.filter_edit = e = QLineEdit(self) self.filter_edit = e = QLineEdit(self)
l.addWidget(e) l.addWidget(e)
e.setPlaceholderText(_('Filter')) e.setPlaceholderText(_('Filter'))
self.model = m = CSSRulesModel(self) self.model = m = self.MODEL(self)
self.proxy = p = CSSProxyModel(self) self.proxy = p = self.PROXY(self)
p.setSourceModel(m) p.setSourceModel(m)
self.view = f = QTreeView(self) self.view = f = QTreeView(self)
f.setAlternatingRowColors(True) f.setAlternatingRowColors(True)
@ -990,16 +1001,16 @@ class CSSWidget(QWidget):
l.addLayout(h) l.addLayout(h)
h.addWidget(QLabel(_('Sort by:'))) h.addWidget(QLabel(_('Sort by:')))
self.counts_button = b = QRadioButton(_('&Counts'), self) self.counts_button = b = QRadioButton(_('&Counts'), self)
b.setChecked(read_state('css-sort-on-counts', True)) b.setChecked(self.read_state('sort-on-counts', True))
h.addWidget(b) h.addWidget(b)
self.name_button = b = QRadioButton(_('&Name'), self) self.name_button = b = QRadioButton(_('&Name'), self)
b.setChecked(not read_state('css-sort-on-counts', True)) b.setChecked(not self.read_state('sort-on-counts', True))
h.addWidget(b) h.addWidget(b)
b.toggled.connect(self.resort) b.toggled.connect(self.resort)
h.addStrut(20) h.addStrut(20)
self._sort_order = o = QComboBox(self) self._sort_order = o = QComboBox(self)
o.addItems([_('Ascending'), _('Descending')]) o.addItems([_('Ascending'), _('Descending')])
o.setCurrentIndex(0 if read_state('css-sort-ascending', True) else 1) o.setCurrentIndex(0 if self.read_state('sort-ascending', True) else 1)
o.setEditable(False) o.setEditable(False)
o.currentIndexChanged[int].connect(self.resort) o.currentIndexChanged[int].connect(self.resort)
h.addWidget(o) h.addWidget(o)
@ -1015,15 +1026,18 @@ class CSSWidget(QWidget):
self._sort_order.setCurrentIndex({Qt.AscendingOrder:0}.get(val, 1)) self._sort_order.setCurrentIndex({Qt.AscendingOrder:0}.get(val, 1))
return property(fget=fget, fset=fset) return property(fget=fget, fset=fset)
def update_summary(self):
self.summary.setText(_('{0} rules, {1} unused').format(self.model.rowCount(), self.model.num_unused))
def __call__(self, data): def __call__(self, data):
self.model(data) self.model(data)
self.summary.setText(_('{0} rules, {1} unused').format(self.model.rowCount(), self.model.num_unused)) self.update_summary()
self.filter_edit.clear() self.filter_edit.clear()
self.resort() self.resort()
def save(self): def save(self):
save_state('css-sort-on-counts', self.counts_button.isChecked()) self.save_state('sort-on-counts', self.counts_button.isChecked())
save_state('css-sort-ascending', self.sort_order == Qt.AscendingOrder) self.save_state('sort-ascending', self.sort_order == Qt.AscendingOrder)
def resort(self, *args): def resort(self, *args):
self.model.sort_on_count = self.counts_button.isChecked() self.model.sort_on_count = self.counts_button.isChecked()
@ -1048,6 +1062,9 @@ class CSSWidget(QWidget):
entry = self.model.index_to_entry(index) entry = self.model.index_to_entry(index)
if entry is None: if entry is None:
return return
self.handle_double_click(entry, index, boss)
def handle_double_click(self, entry, index, boss):
if isinstance(entry, CSSEntry): if isinstance(entry, CSSEntry):
loc = entry.rule.location loc = entry.rule.location
name, sourceline, col = loc name, sourceline, col = loc
@ -1056,6 +1073,9 @@ class CSSWidget(QWidget):
else: else:
name = self.model.index_to_entry(index.parent()).file_name name = self.model.index_to_entry(index.parent()).file_name
sourceline = entry.sourceline sourceline = entry.sourceline
self.show_line(name, sourceline, boss)
def show_line(self, name, sourceline, boss):
editor = boss.edit_file_requested(name) editor = boss.edit_file_requested(name)
if editor is None: if editor is None:
return return
@ -1068,6 +1088,138 @@ class CSSWidget(QWidget):
# }}} # }}}
# Classes {{{
class ClassesModel(CSSRulesModel):
def __init__(self, parent):
self.classes = self.rules = ()
CSSRulesModel.__init__(self, parent)
self.sort_on_count = True
self.num_size = 1
self.num_unused = 0
self.build_maps()
def build_maps(self):
self.parent_map = pm = {}
for i, entry in enumerate(self.classes):
container = entry.matched_files
pm[container] = (i, self.classes)
for i, child in enumerate(container):
gcontainer = child.class_elements
pm[gcontainer] = (i, container)
for i, gc in enumerate(gcontainer):
ggcontainer = gc.matched_rules
pm[gc] = (i, gcontainer)
for i, ggc in enumerate(ggcontainer):
pm[ggc] = (i, ggcontainer)
def to_container(self, entry):
if isinstance(entry, ClassEntry):
return entry.matched_files
elif isinstance(entry, ClassFileMatch):
return entry.class_elements
elif isinstance(entry, ClassElement):
return entry.matched_rules
return entry
def data(self, index, role=Qt.DisplayRole):
if role == SORT_ROLE:
entry = self.index_to_entry(index)
if isinstance(entry, ClassEntry):
return entry.num_of_matches if self.sort_on_count else entry.sort_key
if isinstance(entry, ClassFileMatch):
return len(entry.class_elements) if self.sort_on_count else entry.sort_key
if isinstance(entry, ClassElement):
return entry.line_number
if isinstance(entry, CSSRule):
return entry.location.file_name
elif role == Qt.DisplayRole:
entry = self.index_to_entry(index)
if isinstance(entry, ClassEntry):
return '[%{}d] %s'.format(self.num_size) % (entry.num_of_matches, entry.cls)
elif isinstance(entry, ClassFileMatch):
return _('%s [%d elements]') % (entry.file_name, len(entry.class_elements))
elif isinstance(entry, ClassElement):
return '%s @ %s' % (entry.tag, entry.line_number)
elif isinstance(entry, CSSRule):
return '%s @ %s:%s' % (entry.selector, entry.location.file_name, entry.location.line)
elif role == Qt.UserRole:
return self.index_to_entry(index)
elif role == Qt.FontRole:
entry = self.index_to_entry(index)
if isinstance(entry, ClassEntry):
return self.main_font
elif isinstance(entry, ClassFileMatch):
return self.italic_font
def __call__(self, data):
self.beginResetModel()
self.rules = self.classes = tuple(data['classes'])
self.num_unused = sum(1 for ce in self.classes if ce.num_of_matches == 0)
try:
self.num_size = len(str(max(r.num_of_matches for r in self.classes)))
except ValueError:
self.num_size = 1
self.build_maps()
self.endResetModel()
class ClassProxyModel(CSSProxyModel):
def filterAcceptsRow(self, row, parent):
if not self._filter_text:
return True
sm = self.sourceModel()
entry = sm.index_to_entry(sm.index(row, 0, parent))
if not isinstance(entry, ClassEntry):
return True
return primary_contains(self._filter_text, entry.cls)
class ClassesWidget(CSSWidget):
SETTING_PREFIX = 'classes-'
MODEL = ClassesModel
PROXY = ClassProxyModel
def update_summary(self):
self.summary.setText(_('{0} classes, {1} unused').format(self.model.rowCount(), self.model.num_unused))
def to_csv(self):
buf = BytesIO()
w = csv_writer(buf)
w.writerow([_('Class'), _('Number of matches')])
for r in xrange(self.proxy.rowCount()):
entry = self.proxy.mapToSource(self.proxy.index(r, 0)).data(Qt.UserRole)
w.writerow([entry.cls, entry.num_of_matches])
return buf.getvalue()
def handle_double_click(self, entry, index, boss):
if isinstance(entry, ClassEntry):
def uniq(vals):
vals = vals or ()
seen = set()
seen_add = seen.add
return tuple(x for x in vals if x not in seen and not seen_add(x))
rules = tuple(uniq([LinkLocation(rule.location.file_name, rule.location.line, None)
for cfm in entry.matched_files for ce in cfm.class_elements for rule in ce.matched_rules]))
if rules:
jump((id(self), id(entry)), rules)
return
elif isinstance(entry, ClassFileMatch):
name, sourceline = entry.file_name, 0
elif isinstance(entry, ClassElement):
return jump_to_location(entry)
else:
loc = entry.location
name, sourceline, col = loc
self.show_line(name, sourceline, boss)
# }}}
# Wrapper UI {{{ # Wrapper UI {{{
class ReportsWidget(QWidget): class ReportsWidget(QWidget):
@ -1106,6 +1258,10 @@ class ReportsWidget(QWidget):
s.addWidget(c) s.addWidget(c)
QListWidgetItem(_('Style Rules'), r) QListWidgetItem(_('Style Rules'), r)
self.css = c = ClassesWidget(self)
s.addWidget(c)
QListWidgetItem(_('Style Classes'), r)
self.chars = c = CharsWidget(self) self.chars = c = CharsWidget(self)
s.addWidget(c) s.addWidget(c)
QListWidgetItem(_('Characters'), r) QListWidgetItem(_('Characters'), r)