From 9759944cc8fa5cda47a29860bfc94da49bd6fcb2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 9 Sep 2015 10:45:22 +0530 Subject: [PATCH] Edit Book: Add a tool to check external links (links pointing to websites). Can be accessed via Tools->Check external links --- manual/edit.rst | 8 + src/calibre/ebooks/oeb/polish/check/links.py | 46 ++++++ src/calibre/gui2/tweak_book/boss.py | 6 + src/calibre/gui2/tweak_book/check_links.py | 163 +++++++++++++++++++ src/calibre/gui2/tweak_book/ui.py | 5 + 5 files changed, 228 insertions(+) create mode 100644 src/calibre/gui2/tweak_book/check_links.py diff --git a/manual/edit.rst b/manual/edit.rst index 295746d9f5..5d4581c66f 100644 --- a/manual/edit.rst +++ b/manual/edit.rst @@ -681,6 +681,14 @@ Note that editing the styles does not actually make changes to the book contents, it only allows for quick experimentation. The ability to live edit inside the Inspector is under development. +Checking external links +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You can use this tool to check all links in your book that point to external +websites. The tool will try to visit every externally linked website, and +if the visit fails, it will report all broken links in a convenient format for +you to fix. + Arrange files into folders by type ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/src/calibre/ebooks/oeb/polish/check/links.py b/src/calibre/ebooks/oeb/polish/check/links.py index 2709955c9e..130f504519 100644 --- a/src/calibre/ebooks/oeb/polish/check/links.py +++ b/src/calibre/ebooks/oeb/polish/check/links.py @@ -9,7 +9,11 @@ __copyright__ = '2013, Kovid Goyal ' import os from collections import defaultdict from urlparse import urlparse +from future_builtins import map +from threading import Thread +from Queue import Queue, Empty +from calibre import browser from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES from calibre.ebooks.oeb.polish.container import OEB_FONTS from calibre.ebooks.oeb.polish.utils import guess_type, actual_case_for_name, corrected_case_for_name @@ -336,3 +340,45 @@ def check_links(container): a(Bookmarks(name)) return errors + +def check_external_links(container, progress_callback=lambda num, total:None): + progress_callback(0, 0) + external_links = defaultdict(list) + for name, mt in container.mime_map.iteritems(): + if mt in OEB_DOCS or mt in OEB_STYLES: + for href, lnum, col in container.iterlinks(name): + purl = urlparse(href) + if purl.scheme in ('http', 'https'): + key = href.partition('#')[0] + external_links[key].append((name, href, lnum, col)) + if not external_links: + return [] + items = Queue() + ans = [] + tuple(map(items.put, external_links.iteritems())) + progress_callback(0, len(external_links)) + done = [] + + def check_links(): + br = browser(honor_time=False, verify_ssl_certificates=False) + while True: + try: + href, locations = items.get_nowait() + except Empty: + return + try: + br.open(href, timeout=10).close() + except Exception as e: + ans.append((locations, e, href)) + finally: + done.append(None) + progress_callback(len(done), len(external_links)) + + workers = [Thread(name="CheckLinks", target=check_links) for i in xrange(min(10, len(external_links)))] + for w in workers: + w.daemon = True + w.start() + + for w in workers: + w.join() + return ans diff --git a/src/calibre/gui2/tweak_book/boss.py b/src/calibre/gui2/tweak_book/boss.py index 5bbd9f38c2..a72b1c9deb 100644 --- a/src/calibre/gui2/tweak_book/boss.py +++ b/src/calibre/gui2/tweak_book/boss.py @@ -1190,6 +1190,12 @@ class Boss(QObject): mt = current_container().mime_map.get(name, guess_type(name)) self.edit_file_requested(name, None, mt) + def check_external_links(self): + if current_container() is None: + return error_dialog(self.gui, _('No book open'), _( + 'You must first open a book in order to check links.'), show=True) + self.gui.check_external_links.show() + def sync_editor_to_preview(self, name, sourceline_address): editor = self.edit_file(name, 'html') self.ignore_preview_to_editor_sync = True diff --git a/src/calibre/gui2/tweak_book/check_links.py b/src/calibre/gui2/tweak_book/check_links.py new file mode 100644 index 0000000000..7fa639cb31 --- /dev/null +++ b/src/calibre/gui2/tweak_book/check_links.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +# License: GPLv3 Copyright: 2015, Kovid Goyal + +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +from collections import defaultdict +from threading import Thread + +from PyQt5.Qt import ( + QVBoxLayout, QTextBrowser, QProgressBar, Qt, QWidget, QStackedWidget, + QLabel, QSizePolicy, pyqtSignal, QIcon, QInputDialog +) + +from calibre.gui2 import error_dialog +from calibre.gui2.tweak_book import current_container, set_current_container, editors +from calibre.gui2.tweak_book.boss import get_boss +from calibre.gui2.tweak_book.widgets import Dialog + +def get_data(name): + 'Get the data for name. Returns a unicode string if name is a text document/stylesheet' + if name in editors: + return editors[name].get_raw_data() + return current_container().raw_data(name) + +def set_data(name, val): + if name in editors: + editors[name].replace_data(val, only_if_different=False) + else: + with current_container().open(name, 'wb') as f: + f.write(val) + get_boss().set_modified() + +class CheckExternalLinks(Dialog): + + progress_made = pyqtSignal(object, object) + + def __init__(self, parent=None): + Dialog.__init__(self, _('Check external links'), 'check-external-links-dialog', parent) + self.progress_made.connect(self.on_progress_made, type=Qt.QueuedConnection) + + def show(self): + if self.rb.isEnabled(): + self.refresh() + return Dialog.show(self) + + def refresh(self): + self.stack.setCurrentIndex(0) + self.rb.setEnabled(False) + t = Thread(name='CheckLinksMaster', target=self.run) + t.daemon = True + t.start() + + def setup_ui(self): + self.pb = pb = QProgressBar(self) + pb.setTextVisible(True) + pb.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed) + pb.setRange(0, 0) + self.w = w = QWidget(self) + self.w.l = l = QVBoxLayout(w) + l.addStretch(), l.addWidget(pb) + self.w.la = la = QLabel(_('Checking external links, please wait...')) + la.setStyleSheet('QLabel { font-size: 20px; font-weight: bold }') + l.addWidget(la, 0, Qt.AlignCenter), l.addStretch() + + self.l = l = QVBoxLayout(self) + self.results = QTextBrowser(self) + self.results.setOpenLinks(False) + self.results.anchorClicked.connect(self.anchor_clicked) + self.stack = s = QStackedWidget(self) + s.addWidget(w), s.addWidget(self.results) + l.addWidget(s) + l.addWidget(self.bb) + self.bb.setStandardButtons(self.bb.Close) + self.rb = b = self.bb.addButton(_('&Refresh'), self.bb.ActionRole) + b.setIcon(QIcon(I('view-refresh.png'))) + b.clicked.connect(self.refresh) + + def sizeHint(self): + ans = Dialog.sizeHint(self) + ans.setHeight(600) + ans.setWidth(max(ans.width(), 800)) + return ans + + def run(self): + from calibre.ebooks.oeb.polish.check.links import check_external_links + self.tb = None + self.errors = [] + try: + self.errors = check_external_links(current_container(), self.progress_made.emit) + except Exception: + import traceback + self.tb = traceback.format_exc() + self.progress_made.emit(None, None) + + def on_progress_made(self, curr, total): + if curr is None: + self.results.setText('') + self.stack.setCurrentIndex(1) + self.fixed_errors = set() + self.rb.setEnabled(True) + if self.tb is not None: + return error_dialog(self, _('Checking failed'), _( + 'There was an error while checking links, click "Show Details" for more information'), + det_msg=self.tb, show=True) + if not self.errors: + self.results.setText(_('No broken links found')) + else: + self.populate_results() + else: + self.pb.setMaximum(total), self.pb.setValue(curr) + + def populate_results(self, preserve_pos=False): + text = '

%s

    ' % (_('Found %d broken links') % (len(self.errors) - len(self.fixed_errors))) + for i, (locations, err, url) in enumerate(self.errors): + if i in self.fixed_errors: + continue + text += '
  1. %s \xa0[%s]
    %s
      ' % (url, i, _('Fix this link'), err) + for name, href, lnum, col in locations: + text += '
    • {name} \xa0[{line}: {lnum}]
    • '.format( + name=name, lnum=lnum, line=_('line number')) + text += '

  2. ' + self.results.setHtml(text) + + def anchor_clicked(self, qurl): + url = qurl.toString() + if url.startswith('err:'): + errnum = int(url[4:]) + err = self.errors[errnum] + newurl, ok = QInputDialog.getText(self, _('Fix URL'), _('Enter the corrected URL:') + '\xa0'*40, text=err[2]) + if not ok: + return + nmap = defaultdict(set) + for name, href in {(l[0], l[1]) for l in err[0]}: + nmap[name].add(href) + + for name, hrefs in nmap.iteritems(): + raw = oraw = get_data(name) + for href in hrefs: + raw = raw.replace(href, newurl) + if raw != oraw: + set_data(name, raw) + self.fixed_errors.add(errnum) + self.populate_results() + elif url.startswith('loc:'): + lnum, name = url[4:].partition(',')[::2] + lnum = int(lnum or 1) + editor = get_boss().edit_file(name) + if lnum and editor is not None and editor.has_line_numbers: + editor.current_line = lnum + + +if __name__ == '__main__': + import sys + from calibre.gui2 import Application + from calibre.gui2.tweak_book.boss import get_container + app = Application([]) + set_current_container(get_container(sys.argv[-1])) + d = CheckExternalLinks() + d.refresh() + d.exec_() + del app diff --git a/src/calibre/gui2/tweak_book/ui.py b/src/calibre/gui2/tweak_book/ui.py index b8b1ac295d..4c58240a0b 100644 --- a/src/calibre/gui2/tweak_book/ui.py +++ b/src/calibre/gui2/tweak_book/ui.py @@ -33,6 +33,7 @@ from calibre.gui2.tweak_book.preview import Preview from calibre.gui2.tweak_book.plugin import create_plugin_actions from calibre.gui2.tweak_book.search import SearchPanel from calibre.gui2.tweak_book.check import Check +from calibre.gui2.tweak_book.check_links import CheckExternalLinks from calibre.gui2.tweak_book.spell import SpellCheck from calibre.gui2.tweak_book.search import SavedSearches from calibre.gui2.tweak_book.toc import TOCViewer @@ -250,6 +251,7 @@ class Main(MainWindow): self.saved_searches = SavedSearches(self) self.image_browser = InsertImage(self, for_browsing=True) self.reports = Reports(self) + self.check_external_links = CheckExternalLinks(self) self.insert_char = CharSelect(self) self.manage_fonts = ManageFonts(self) self.sr_debug_output = DebugOutput(self) @@ -382,6 +384,8 @@ class Main(MainWindow): self.action_add_cover = treg('default_cover.png', _('Add &cover'), self.boss.add_cover, 'add-cover', (), _('Add a cover to the book')) self.action_reports = treg( 'reports.png', _('&Reports'), self.boss.show_reports, 'show-reports', ('Ctrl+Shift+R',), _('Show a report on various aspects of the book')) + self.action_check_external_links = treg('insert-link.png', _('Check &external links'), self.boss.check_external_links, 'check-external-links', (), _( + 'Check external links in the book')) def ereg(icon, text, target, sid, keys, description): return reg(icon, text, partial(self.boss.editor_action, target), sid, keys, description) @@ -538,6 +542,7 @@ class Main(MainWindow): e.addAction(self.action_set_semantics) e.addAction(self.action_filter_css) e.addAction(self.action_spell_check_book) + e.addAction(self.action_check_external_links) e.addAction(self.action_check_book) e.addAction(self.action_reports)