Edit Book: Add a tool to check external links (links pointing to websites). Can be accessed via Tools->Check external links

This commit is contained in:
Kovid Goyal 2015-09-09 10:45:22 +05:30
parent de237a13e9
commit 9759944cc8
5 changed files with 228 additions and 0 deletions

View File

@ -681,6 +681,14 @@ Note that editing the styles does not actually make changes to the book
contents, it only allows for quick experimentation. The ability to live edit
inside the Inspector is under development.
Checking external links
^^^^^^^^^^^^^^^^^^^^^^^^^^^
You can use this tool to check all links in your book that point to external
websites. The tool will try to visit every externally linked website, and
if the visit fails, it will report all broken links in a convenient format for
you to fix.
Arrange files into folders by type
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -9,7 +9,11 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import os
from collections import defaultdict
from urlparse import urlparse
from future_builtins import map
from threading import Thread
from Queue import Queue, Empty
from calibre import browser
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
from calibre.ebooks.oeb.polish.container import OEB_FONTS
from calibre.ebooks.oeb.polish.utils import guess_type, actual_case_for_name, corrected_case_for_name
@ -336,3 +340,45 @@ def check_links(container):
a(Bookmarks(name))
return errors
def check_external_links(container, progress_callback=lambda num, total:None):
progress_callback(0, 0)
external_links = defaultdict(list)
for name, mt in container.mime_map.iteritems():
if mt in OEB_DOCS or mt in OEB_STYLES:
for href, lnum, col in container.iterlinks(name):
purl = urlparse(href)
if purl.scheme in ('http', 'https'):
key = href.partition('#')[0]
external_links[key].append((name, href, lnum, col))
if not external_links:
return []
items = Queue()
ans = []
tuple(map(items.put, external_links.iteritems()))
progress_callback(0, len(external_links))
done = []
def check_links():
br = browser(honor_time=False, verify_ssl_certificates=False)
while True:
try:
href, locations = items.get_nowait()
except Empty:
return
try:
br.open(href, timeout=10).close()
except Exception as e:
ans.append((locations, e, href))
finally:
done.append(None)
progress_callback(len(done), len(external_links))
workers = [Thread(name="CheckLinks", target=check_links) for i in xrange(min(10, len(external_links)))]
for w in workers:
w.daemon = True
w.start()
for w in workers:
w.join()
return ans

View File

@ -1190,6 +1190,12 @@ class Boss(QObject):
mt = current_container().mime_map.get(name, guess_type(name))
self.edit_file_requested(name, None, mt)
def check_external_links(self):
if current_container() is None:
return error_dialog(self.gui, _('No book open'), _(
'You must first open a book in order to check links.'), show=True)
self.gui.check_external_links.show()
def sync_editor_to_preview(self, name, sourceline_address):
editor = self.edit_file(name, 'html')
self.ignore_preview_to_editor_sync = True

View File

@ -0,0 +1,163 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import (unicode_literals, division, absolute_import,
print_function)
from collections import defaultdict
from threading import Thread
from PyQt5.Qt import (
QVBoxLayout, QTextBrowser, QProgressBar, Qt, QWidget, QStackedWidget,
QLabel, QSizePolicy, pyqtSignal, QIcon, QInputDialog
)
from calibre.gui2 import error_dialog
from calibre.gui2.tweak_book import current_container, set_current_container, editors
from calibre.gui2.tweak_book.boss import get_boss
from calibre.gui2.tweak_book.widgets import Dialog
def get_data(name):
'Get the data for name. Returns a unicode string if name is a text document/stylesheet'
if name in editors:
return editors[name].get_raw_data()
return current_container().raw_data(name)
def set_data(name, val):
if name in editors:
editors[name].replace_data(val, only_if_different=False)
else:
with current_container().open(name, 'wb') as f:
f.write(val)
get_boss().set_modified()
class CheckExternalLinks(Dialog):
progress_made = pyqtSignal(object, object)
def __init__(self, parent=None):
Dialog.__init__(self, _('Check external links'), 'check-external-links-dialog', parent)
self.progress_made.connect(self.on_progress_made, type=Qt.QueuedConnection)
def show(self):
if self.rb.isEnabled():
self.refresh()
return Dialog.show(self)
def refresh(self):
self.stack.setCurrentIndex(0)
self.rb.setEnabled(False)
t = Thread(name='CheckLinksMaster', target=self.run)
t.daemon = True
t.start()
def setup_ui(self):
self.pb = pb = QProgressBar(self)
pb.setTextVisible(True)
pb.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed)
pb.setRange(0, 0)
self.w = w = QWidget(self)
self.w.l = l = QVBoxLayout(w)
l.addStretch(), l.addWidget(pb)
self.w.la = la = QLabel(_('Checking external links, please wait...'))
la.setStyleSheet('QLabel { font-size: 20px; font-weight: bold }')
l.addWidget(la, 0, Qt.AlignCenter), l.addStretch()
self.l = l = QVBoxLayout(self)
self.results = QTextBrowser(self)
self.results.setOpenLinks(False)
self.results.anchorClicked.connect(self.anchor_clicked)
self.stack = s = QStackedWidget(self)
s.addWidget(w), s.addWidget(self.results)
l.addWidget(s)
l.addWidget(self.bb)
self.bb.setStandardButtons(self.bb.Close)
self.rb = b = self.bb.addButton(_('&Refresh'), self.bb.ActionRole)
b.setIcon(QIcon(I('view-refresh.png')))
b.clicked.connect(self.refresh)
def sizeHint(self):
ans = Dialog.sizeHint(self)
ans.setHeight(600)
ans.setWidth(max(ans.width(), 800))
return ans
def run(self):
from calibre.ebooks.oeb.polish.check.links import check_external_links
self.tb = None
self.errors = []
try:
self.errors = check_external_links(current_container(), self.progress_made.emit)
except Exception:
import traceback
self.tb = traceback.format_exc()
self.progress_made.emit(None, None)
def on_progress_made(self, curr, total):
if curr is None:
self.results.setText('')
self.stack.setCurrentIndex(1)
self.fixed_errors = set()
self.rb.setEnabled(True)
if self.tb is not None:
return error_dialog(self, _('Checking failed'), _(
'There was an error while checking links, click "Show Details" for more information'),
det_msg=self.tb, show=True)
if not self.errors:
self.results.setText(_('No broken links found'))
else:
self.populate_results()
else:
self.pb.setMaximum(total), self.pb.setValue(curr)
def populate_results(self, preserve_pos=False):
text = '<h3>%s</h3><ol>' % (_('Found %d broken links') % (len(self.errors) - len(self.fixed_errors)))
for i, (locations, err, url) in enumerate(self.errors):
if i in self.fixed_errors:
continue
text += '<li><b>%s</b> \xa0<a href="err:%d">[%s]</a><br>%s<br><ul>' % (url, i, _('Fix this link'), err)
for name, href, lnum, col in locations:
text += '<li>{name} \xa0<a href="loc:{lnum},{name}">[{line}: {lnum}]</a></li>'.format(
name=name, lnum=lnum, line=_('line number'))
text += '</ul></li><hr>'
self.results.setHtml(text)
def anchor_clicked(self, qurl):
url = qurl.toString()
if url.startswith('err:'):
errnum = int(url[4:])
err = self.errors[errnum]
newurl, ok = QInputDialog.getText(self, _('Fix URL'), _('Enter the corrected URL:') + '\xa0'*40, text=err[2])
if not ok:
return
nmap = defaultdict(set)
for name, href in {(l[0], l[1]) for l in err[0]}:
nmap[name].add(href)
for name, hrefs in nmap.iteritems():
raw = oraw = get_data(name)
for href in hrefs:
raw = raw.replace(href, newurl)
if raw != oraw:
set_data(name, raw)
self.fixed_errors.add(errnum)
self.populate_results()
elif url.startswith('loc:'):
lnum, name = url[4:].partition(',')[::2]
lnum = int(lnum or 1)
editor = get_boss().edit_file(name)
if lnum and editor is not None and editor.has_line_numbers:
editor.current_line = lnum
if __name__ == '__main__':
import sys
from calibre.gui2 import Application
from calibre.gui2.tweak_book.boss import get_container
app = Application([])
set_current_container(get_container(sys.argv[-1]))
d = CheckExternalLinks()
d.refresh()
d.exec_()
del app

View File

@ -33,6 +33,7 @@ from calibre.gui2.tweak_book.preview import Preview
from calibre.gui2.tweak_book.plugin import create_plugin_actions
from calibre.gui2.tweak_book.search import SearchPanel
from calibre.gui2.tweak_book.check import Check
from calibre.gui2.tweak_book.check_links import CheckExternalLinks
from calibre.gui2.tweak_book.spell import SpellCheck
from calibre.gui2.tweak_book.search import SavedSearches
from calibre.gui2.tweak_book.toc import TOCViewer
@ -250,6 +251,7 @@ class Main(MainWindow):
self.saved_searches = SavedSearches(self)
self.image_browser = InsertImage(self, for_browsing=True)
self.reports = Reports(self)
self.check_external_links = CheckExternalLinks(self)
self.insert_char = CharSelect(self)
self.manage_fonts = ManageFonts(self)
self.sr_debug_output = DebugOutput(self)
@ -382,6 +384,8 @@ class Main(MainWindow):
self.action_add_cover = treg('default_cover.png', _('Add &cover'), self.boss.add_cover, 'add-cover', (), _('Add a cover to the book'))
self.action_reports = treg(
'reports.png', _('&Reports'), self.boss.show_reports, 'show-reports', ('Ctrl+Shift+R',), _('Show a report on various aspects of the book'))
self.action_check_external_links = treg('insert-link.png', _('Check &external links'), self.boss.check_external_links, 'check-external-links', (), _(
'Check external links in the book'))
def ereg(icon, text, target, sid, keys, description):
return reg(icon, text, partial(self.boss.editor_action, target), sid, keys, description)
@ -538,6 +542,7 @@ class Main(MainWindow):
e.addAction(self.action_set_semantics)
e.addAction(self.action_filter_css)
e.addAction(self.action_spell_check_book)
e.addAction(self.action_check_external_links)
e.addAction(self.action_check_book)
e.addAction(self.action_reports)