diff --git a/src/calibre/ebooks/oeb/polish/report.py b/src/calibre/ebooks/oeb/polish/report.py index 01a9ea1811..541c17575a 100644 --- a/src/calibre/ebooks/oeb/polish/report.py +++ b/src/calibre/ebooks/oeb/polish/report.py @@ -11,7 +11,7 @@ from collections import namedtuple, defaultdict, Counter from itertools import chain from calibre import prepare_string_for_xml, force_unicode -from calibre.ebooks.oeb.base import XPath +from calibre.ebooks.oeb.base import XPath, xml2text from calibre.ebooks.oeb.polish.container import OEB_DOCS, OEB_STYLES, OEB_FONTS from calibre.ebooks.oeb.polish.css import build_selector, PSEUDO_PAT, MIN_SPACE_RE from calibre.ebooks.oeb.polish.spell import get_all_words @@ -95,6 +95,90 @@ def images_data(container, book_locale): posixpath.basename(name), len(image_data), *safe_img_data(container, name, mt))) return tuple(image_data) +def description_for_anchor(elem): + def check(x, min_len=4): + if x: + x = x.strip() + if len(x) >= min_len: + return x[:30] + + desc = check(elem.get('title')) + if desc is not None: + return desc + desc = check(elem.text) + if desc is not None: + return desc + if len(elem) > 0: + desc = check(elem[0].text) + if desc is not None: + return desc + # Get full text for tags that have only a few descendants + for i, x in enumerate(elem.iterdescendants('*')): + if i > 5: + break + else: + desc = check(xml2text(elem), min_len=1) + if desc is not None: + return desc + +def create_anchor_map(root, pat, name): + ans = {} + for elem in pat(root): + anchor = elem.get('id') or elem.get('name') + if anchor and anchor not in ans: + ans[anchor] = (LinkLocation(name, elem.sourceline, anchor), description_for_anchor(elem)) + return ans + +Anchor = namedtuple('Anchor', 'id location text') +L = namedtuple('Link', 'location text is_external href path_ok anchor_ok anchor ok') +def Link(location, text, is_external, href, path_ok, anchor_ok, anchor): + if is_external: + ok = None + else: + ok = path_ok and anchor_ok + return L(location, text, is_external, href, path_ok, anchor_ok, anchor, ok) + +def links_data(container, book_locale): + anchor_map = {} + links = [] + anchor_pat = XPath('//*[@id or @name]') + link_pat = XPath('//h:a[@href]') + for name, mt in container.mime_map.iteritems(): + if mt in OEB_DOCS: + root = container.parsed(name) + anchor_map[name] = create_anchor_map(root, anchor_pat, name) + for a in link_pat(root): + href = a.get('href') + text = description_for_anchor(a) + if href: + base, frag = href.partition('#')[0::2] + if frag and not base: + dest = name + else: + dest = safe_href_to_name(container, href, name) + location = LinkLocation(name, a.sourceline, href) + links.append((base, frag, dest, location, text)) + else: + links.append(('', '', None, location, text)) + + for base, frag, dest, location, text in links: + if dest is None: + link = Link(location, text, True, base, True, True, Anchor(frag, None, None)) + else: + if dest in anchor_map: + loc = LinkLocation(dest, None, None) + if frag: + anchor = anchor_map[dest].get(frag) + if anchor is None: + link = Link(location, text, False, dest, True, False, Anchor(frag, loc, None)) + else: + link = Link(location, text, False, dest, True, True, Anchor(frag, *anchor)) + else: + link = Link(location, text, False, dest, True, True, Anchor(None, loc, None)) + else: + link = Link(location, text, False, dest, False, False, Anchor(frag, None, None)) + yield link + Word = namedtuple('Word', 'id word locale usage') def words_data(container, book_locale): @@ -235,7 +319,7 @@ def css_data(container, book_locale): def gather_data(container, book_locale): timing = {} data = {} - for x in 'files chars images words css'.split(): + for x in 'files chars images links words css'.split(): st = time.time() data[x] = globals()[x + '_data'](container, book_locale) if isinstance(data[x], types.GeneratorType): diff --git a/src/calibre/gui2/tweak_book/completion/basic.py b/src/calibre/gui2/tweak_book/completion/basic.py index e68b16e91f..d7d52eb428 100644 --- a/src/calibre/gui2/tweak_book/completion/basic.py +++ b/src/calibre/gui2/tweak_book/completion/basic.py @@ -12,9 +12,9 @@ from collections import namedtuple, OrderedDict from PyQt5.Qt import QObject, pyqtSignal, Qt from calibre import prepare_string_for_xml -from calibre.ebooks.oeb.base import xml2text from calibre.ebooks.oeb.polish.container import OEB_STYLES, OEB_FONTS, name_to_href from calibre.ebooks.oeb.polish.parsing import parse +from calibre.ebooks.oeb.polish.report import description_for_anchor from calibre.gui2 import is_gui_thread from calibre.gui2.tweak_book import current_container, editors from calibre.gui2.tweak_book.completion.utils import control, data, DataError @@ -91,33 +91,6 @@ def complete_names(names_data, data_conn): descriptions = {href:d(name) for name, href in nmap.iteritems()} return items, descriptions, {} - -def description_for_anchor(elem): - def check(x, min_len=4): - if x: - x = x.strip() - if len(x) >= min_len: - return x[:30] - - desc = check(elem.get('title')) - if desc is not None: - return desc - desc = check(elem.text) - if desc is not None: - return desc - if len(elem) > 0: - desc = check(elem[0].text) - if desc is not None: - return desc - # Get full text for tags that have only a few descendants - for i, x in enumerate(elem.iterdescendants('*')): - if i > 5: - break - else: - desc = check(xml2text(elem), min_len=1) - if desc is not None: - return desc - def create_anchor_map(root): ans = {} for elem in root.xpath('//*[@id or @name]'): diff --git a/src/calibre/gui2/tweak_book/reports.py b/src/calibre/gui2/tweak_book/reports.py index f7286bbbfa..aa3574adec 100644 --- a/src/calibre/gui2/tweak_book/reports.py +++ b/src/calibre/gui2/tweak_book/reports.py @@ -6,7 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import, __license__ = 'GPL v3' __copyright__ = '2015, Kovid Goyal ' -import time +import time, textwrap, os from threading import Thread from future_builtins import map from operator import itemgetter @@ -22,12 +22,12 @@ from PyQt5.Qt import ( QListWidgetItem, QLineEdit, QStackedWidget, QSplitter, QByteArray, QPixmap, QStyledItemDelegate, QModelIndex, QRect, QStyle, QPalette, QTimer, QMenu, QAbstractItemModel, QTreeView, QFont, QRadioButton, QHBoxLayout, - QFontDatabase, QComboBox) + QFontDatabase, QComboBox, QUrl, QWebView) from calibre import human_readable, fit_image from calibre.constants import DEBUG from calibre.ebooks.oeb.polish.report import gather_data, CSSEntry, CSSFileMatch, MatchLocation -from calibre.gui2 import error_dialog, question_dialog, choose_save_file +from calibre.gui2 import error_dialog, question_dialog, choose_save_file, open_url from calibre.gui2.tweak_book import current_container, tprefs, dictionaries from calibre.gui2.tweak_book.widgets import Dialog from calibre.gui2.progress_indicator import ProgressIndicator @@ -112,6 +112,7 @@ class FilesView(QTableView): double_clicked = pyqtSignal(object) delete_requested = pyqtSignal(object, object) + current_changed = pyqtSignal(object, object) DELETE_POSSIBLE = True def __init__(self, model, parent=None): @@ -126,6 +127,10 @@ class FilesView(QTableView): self.setContextMenuPolicy(Qt.CustomContextMenu) self.customContextMenuRequested.connect(self.show_context_menu) + def currentChanged(self, current, previous): + QTableView.currentChanged(self, current, previous) + self.current_changed.emit(*map(self.proxy.mapToSource, (current, previous))) + def customize_context_menu(self, menu, selected_locations, current_location): pass @@ -479,6 +484,159 @@ class ImagesWidget(QWidget): self.files.save_table('image-files-table') # }}} +# Links {{{ + +class LinksModel(FileCollection): + + COLUMN_HEADERS = [_('OK'), _('Source'), _('Source text'), _('Target'), _('Anchor'), _('Target text')] + + def __init__(self, parent=None): + FileCollection.__init__(self, parent) + self.num_bad = 0 + + def __call__(self, data): + self.beginResetModel() + self.links = self.files = data['links'] + self.total_size = len(self.links) + self.num_bad = sum(1 for link in self.links if link.ok is False) + psk = numeric_sort_key + self.sort_keys = tuple(( + link.ok, psk(link.location.name), psk(link.text or ''), psk(link.href or ''), psk(link.anchor.id or ''), psk(link.anchor.text or '')) + for link in self.links) + self.endResetModel() + + def data(self, index, role=Qt.DisplayRole): + if role == SORT_ROLE: + try: + return self.sort_keys[index.row()][index.column()] + except IndexError: + pass + elif role == Qt.DisplayRole: + col = index.column() + try: + link = self.links[index.row()] + except IndexError: + return None + if col == 0: + return {True:'✓ ', False:'✗'}.get(link.ok) + if col == 1: + return link.location.name + if col == 2: + return link.text + if col == 3: + return link.href + if col == 4: + return link.anchor.id + if col == 5: + return link.anchor.text + elif role == Qt.ToolTipRole: + col = index.column() + try: + link = self.links[index.row()] + except IndexError: + return None + if col == 0: + return {True:_('The link destination exists'), False:_('The link destination does not exist')}.get( + link.ok, _('The link destination could not be verified')) + if col == 2: + if link.text: + return textwrap.fill(link.text) + if col == 5: + if link.anchor.text: + return textwrap.fill(link.anchor.text) + elif role == Qt.UserRole: + try: + return self.links[index.row()] + except IndexError: + pass + +class WebView(QWebView): + + def sizeHint(self): + return QSize(600, 200) + +class LinksWidget(QWidget): + + def __init__(self, parent=None): + QWidget.__init__(self, parent) + self.l = l = QVBoxLayout(self) + + self.filter_edit = e = QLineEdit(self) + l.addWidget(e) + self.splitter = s = QSplitter(Qt.Vertical, self) + l.addWidget(s) + e.setPlaceholderText(_('Filter')) + self.model = m = LinksModel(self) + self.links = f = FilesView(m, self) + f.DELETE_POSSIBLE = False + self.to_csv = f.to_csv + f.double_clicked.connect(self.double_clicked) + e.textChanged.connect(f.proxy.filter_text) + s.addWidget(f) + self.links.restore_table('links-table', sort_column=1) + self.view = WebView(self) + s.addWidget(self.view) + self.ignore_current_change = False + self.current_url = None + f.current_changed.connect(self.current_changed) + try: + s.restoreState(read_state('links-view-splitter')) + except TypeError: + pass + s.setCollapsible(0, False), s.setCollapsible(1, True) + s.setStretchFactor(0, 10) + + def __call__(self, data): + self.ignore_current_change = True + self.model(data) + self.filter_edit.clear() + self.links.resize_rows() + self.view.setHtml('

'+_( + 'Click entries above to see their destination here')) + self.ignore_current_change = False + + def current_changed(self, current, previous): + link = current.data(Qt.UserRole) + if link is None: + return + url = None + if link.is_external: + if link.href: + frag = ('#' + link.anchor.id) if link.anchor.id else '' + url = QUrl(link.href + frag) + elif link.anchor.location: + path = current_container().name_to_abspath(link.anchor.location.name) + if path and os.path.exists(path): + url = QUrl.fromLocalFile(path) + if link.anchor.id: + url.setFragment(link.anchor.id) + if url is None: + self.view.setHtml('

' + _('No destination found for this link')) + self.current_url = url + elif url != self.current_url: + self.current_url = url + self.view.setUrl(url) + + def double_clicked(self, index): + link = index.data(Qt.UserRole) + if link is None: + return + if index.column() < 3: + # Jump to source + jump_to_location(link.location) + else: + # Jump to destination + if link.is_external: + if link.href: + open_url(link.href) + elif link.anchor.location: + jump_to_location(link.anchor.location) + + def save(self): + self.links.save_table('links-table') + save_state('links-view-splitter', bytearray(self.splitter.saveState())) +# }}} + # Words {{{ class WordsModel(FileCollection): @@ -952,6 +1110,10 @@ class ReportsWidget(QWidget): s.addWidget(c) QListWidgetItem(_('Characters'), r) + self.links = li = LinksWidget(self) + s.addWidget(li) + QListWidgetItem(_('Links'), r) + self.splitter.setStretchFactor(1, 500) try: self.splitter.restoreState(read_state('splitter-state'))