mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Edit Book: Add a tool to check external links (links pointing to websites). Can be accessed via Tools->Check external links
This commit is contained in:
parent
de237a13e9
commit
9759944cc8
@ -681,6 +681,14 @@ Note that editing the styles does not actually make changes to the book
|
||||
contents, it only allows for quick experimentation. The ability to live edit
|
||||
inside the Inspector is under development.
|
||||
|
||||
Checking external links
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
You can use this tool to check all links in your book that point to external
|
||||
websites. The tool will try to visit every externally linked website, and
|
||||
if the visit fails, it will report all broken links in a convenient format for
|
||||
you to fix.
|
||||
|
||||
Arrange files into folders by type
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
|
@ -9,7 +9,11 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
import os
|
||||
from collections import defaultdict
|
||||
from urlparse import urlparse
|
||||
from future_builtins import map
|
||||
from threading import Thread
|
||||
from Queue import Queue, Empty
|
||||
|
||||
from calibre import browser
|
||||
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
|
||||
from calibre.ebooks.oeb.polish.container import OEB_FONTS
|
||||
from calibre.ebooks.oeb.polish.utils import guess_type, actual_case_for_name, corrected_case_for_name
|
||||
@ -336,3 +340,45 @@ def check_links(container):
|
||||
a(Bookmarks(name))
|
||||
|
||||
return errors
|
||||
|
||||
def check_external_links(container, progress_callback=lambda num, total:None):
|
||||
progress_callback(0, 0)
|
||||
external_links = defaultdict(list)
|
||||
for name, mt in container.mime_map.iteritems():
|
||||
if mt in OEB_DOCS or mt in OEB_STYLES:
|
||||
for href, lnum, col in container.iterlinks(name):
|
||||
purl = urlparse(href)
|
||||
if purl.scheme in ('http', 'https'):
|
||||
key = href.partition('#')[0]
|
||||
external_links[key].append((name, href, lnum, col))
|
||||
if not external_links:
|
||||
return []
|
||||
items = Queue()
|
||||
ans = []
|
||||
tuple(map(items.put, external_links.iteritems()))
|
||||
progress_callback(0, len(external_links))
|
||||
done = []
|
||||
|
||||
def check_links():
|
||||
br = browser(honor_time=False, verify_ssl_certificates=False)
|
||||
while True:
|
||||
try:
|
||||
href, locations = items.get_nowait()
|
||||
except Empty:
|
||||
return
|
||||
try:
|
||||
br.open(href, timeout=10).close()
|
||||
except Exception as e:
|
||||
ans.append((locations, e, href))
|
||||
finally:
|
||||
done.append(None)
|
||||
progress_callback(len(done), len(external_links))
|
||||
|
||||
workers = [Thread(name="CheckLinks", target=check_links) for i in xrange(min(10, len(external_links)))]
|
||||
for w in workers:
|
||||
w.daemon = True
|
||||
w.start()
|
||||
|
||||
for w in workers:
|
||||
w.join()
|
||||
return ans
|
||||
|
@ -1190,6 +1190,12 @@ class Boss(QObject):
|
||||
mt = current_container().mime_map.get(name, guess_type(name))
|
||||
self.edit_file_requested(name, None, mt)
|
||||
|
||||
def check_external_links(self):
|
||||
if current_container() is None:
|
||||
return error_dialog(self.gui, _('No book open'), _(
|
||||
'You must first open a book in order to check links.'), show=True)
|
||||
self.gui.check_external_links.show()
|
||||
|
||||
def sync_editor_to_preview(self, name, sourceline_address):
|
||||
editor = self.edit_file(name, 'html')
|
||||
self.ignore_preview_to_editor_sync = True
|
||||
|
163
src/calibre/gui2/tweak_book/check_links.py
Normal file
163
src/calibre/gui2/tweak_book/check_links.py
Normal file
@ -0,0 +1,163 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
from collections import defaultdict
|
||||
from threading import Thread
|
||||
|
||||
from PyQt5.Qt import (
|
||||
QVBoxLayout, QTextBrowser, QProgressBar, Qt, QWidget, QStackedWidget,
|
||||
QLabel, QSizePolicy, pyqtSignal, QIcon, QInputDialog
|
||||
)
|
||||
|
||||
from calibre.gui2 import error_dialog
|
||||
from calibre.gui2.tweak_book import current_container, set_current_container, editors
|
||||
from calibre.gui2.tweak_book.boss import get_boss
|
||||
from calibre.gui2.tweak_book.widgets import Dialog
|
||||
|
||||
def get_data(name):
|
||||
'Get the data for name. Returns a unicode string if name is a text document/stylesheet'
|
||||
if name in editors:
|
||||
return editors[name].get_raw_data()
|
||||
return current_container().raw_data(name)
|
||||
|
||||
def set_data(name, val):
|
||||
if name in editors:
|
||||
editors[name].replace_data(val, only_if_different=False)
|
||||
else:
|
||||
with current_container().open(name, 'wb') as f:
|
||||
f.write(val)
|
||||
get_boss().set_modified()
|
||||
|
||||
class CheckExternalLinks(Dialog):
|
||||
|
||||
progress_made = pyqtSignal(object, object)
|
||||
|
||||
def __init__(self, parent=None):
|
||||
Dialog.__init__(self, _('Check external links'), 'check-external-links-dialog', parent)
|
||||
self.progress_made.connect(self.on_progress_made, type=Qt.QueuedConnection)
|
||||
|
||||
def show(self):
|
||||
if self.rb.isEnabled():
|
||||
self.refresh()
|
||||
return Dialog.show(self)
|
||||
|
||||
def refresh(self):
|
||||
self.stack.setCurrentIndex(0)
|
||||
self.rb.setEnabled(False)
|
||||
t = Thread(name='CheckLinksMaster', target=self.run)
|
||||
t.daemon = True
|
||||
t.start()
|
||||
|
||||
def setup_ui(self):
|
||||
self.pb = pb = QProgressBar(self)
|
||||
pb.setTextVisible(True)
|
||||
pb.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed)
|
||||
pb.setRange(0, 0)
|
||||
self.w = w = QWidget(self)
|
||||
self.w.l = l = QVBoxLayout(w)
|
||||
l.addStretch(), l.addWidget(pb)
|
||||
self.w.la = la = QLabel(_('Checking external links, please wait...'))
|
||||
la.setStyleSheet('QLabel { font-size: 20px; font-weight: bold }')
|
||||
l.addWidget(la, 0, Qt.AlignCenter), l.addStretch()
|
||||
|
||||
self.l = l = QVBoxLayout(self)
|
||||
self.results = QTextBrowser(self)
|
||||
self.results.setOpenLinks(False)
|
||||
self.results.anchorClicked.connect(self.anchor_clicked)
|
||||
self.stack = s = QStackedWidget(self)
|
||||
s.addWidget(w), s.addWidget(self.results)
|
||||
l.addWidget(s)
|
||||
l.addWidget(self.bb)
|
||||
self.bb.setStandardButtons(self.bb.Close)
|
||||
self.rb = b = self.bb.addButton(_('&Refresh'), self.bb.ActionRole)
|
||||
b.setIcon(QIcon(I('view-refresh.png')))
|
||||
b.clicked.connect(self.refresh)
|
||||
|
||||
def sizeHint(self):
|
||||
ans = Dialog.sizeHint(self)
|
||||
ans.setHeight(600)
|
||||
ans.setWidth(max(ans.width(), 800))
|
||||
return ans
|
||||
|
||||
def run(self):
|
||||
from calibre.ebooks.oeb.polish.check.links import check_external_links
|
||||
self.tb = None
|
||||
self.errors = []
|
||||
try:
|
||||
self.errors = check_external_links(current_container(), self.progress_made.emit)
|
||||
except Exception:
|
||||
import traceback
|
||||
self.tb = traceback.format_exc()
|
||||
self.progress_made.emit(None, None)
|
||||
|
||||
def on_progress_made(self, curr, total):
|
||||
if curr is None:
|
||||
self.results.setText('')
|
||||
self.stack.setCurrentIndex(1)
|
||||
self.fixed_errors = set()
|
||||
self.rb.setEnabled(True)
|
||||
if self.tb is not None:
|
||||
return error_dialog(self, _('Checking failed'), _(
|
||||
'There was an error while checking links, click "Show Details" for more information'),
|
||||
det_msg=self.tb, show=True)
|
||||
if not self.errors:
|
||||
self.results.setText(_('No broken links found'))
|
||||
else:
|
||||
self.populate_results()
|
||||
else:
|
||||
self.pb.setMaximum(total), self.pb.setValue(curr)
|
||||
|
||||
def populate_results(self, preserve_pos=False):
|
||||
text = '<h3>%s</h3><ol>' % (_('Found %d broken links') % (len(self.errors) - len(self.fixed_errors)))
|
||||
for i, (locations, err, url) in enumerate(self.errors):
|
||||
if i in self.fixed_errors:
|
||||
continue
|
||||
text += '<li><b>%s</b> \xa0<a href="err:%d">[%s]</a><br>%s<br><ul>' % (url, i, _('Fix this link'), err)
|
||||
for name, href, lnum, col in locations:
|
||||
text += '<li>{name} \xa0<a href="loc:{lnum},{name}">[{line}: {lnum}]</a></li>'.format(
|
||||
name=name, lnum=lnum, line=_('line number'))
|
||||
text += '</ul></li><hr>'
|
||||
self.results.setHtml(text)
|
||||
|
||||
def anchor_clicked(self, qurl):
|
||||
url = qurl.toString()
|
||||
if url.startswith('err:'):
|
||||
errnum = int(url[4:])
|
||||
err = self.errors[errnum]
|
||||
newurl, ok = QInputDialog.getText(self, _('Fix URL'), _('Enter the corrected URL:') + '\xa0'*40, text=err[2])
|
||||
if not ok:
|
||||
return
|
||||
nmap = defaultdict(set)
|
||||
for name, href in {(l[0], l[1]) for l in err[0]}:
|
||||
nmap[name].add(href)
|
||||
|
||||
for name, hrefs in nmap.iteritems():
|
||||
raw = oraw = get_data(name)
|
||||
for href in hrefs:
|
||||
raw = raw.replace(href, newurl)
|
||||
if raw != oraw:
|
||||
set_data(name, raw)
|
||||
self.fixed_errors.add(errnum)
|
||||
self.populate_results()
|
||||
elif url.startswith('loc:'):
|
||||
lnum, name = url[4:].partition(',')[::2]
|
||||
lnum = int(lnum or 1)
|
||||
editor = get_boss().edit_file(name)
|
||||
if lnum and editor is not None and editor.has_line_numbers:
|
||||
editor.current_line = lnum
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
from calibre.gui2 import Application
|
||||
from calibre.gui2.tweak_book.boss import get_container
|
||||
app = Application([])
|
||||
set_current_container(get_container(sys.argv[-1]))
|
||||
d = CheckExternalLinks()
|
||||
d.refresh()
|
||||
d.exec_()
|
||||
del app
|
@ -33,6 +33,7 @@ from calibre.gui2.tweak_book.preview import Preview
|
||||
from calibre.gui2.tweak_book.plugin import create_plugin_actions
|
||||
from calibre.gui2.tweak_book.search import SearchPanel
|
||||
from calibre.gui2.tweak_book.check import Check
|
||||
from calibre.gui2.tweak_book.check_links import CheckExternalLinks
|
||||
from calibre.gui2.tweak_book.spell import SpellCheck
|
||||
from calibre.gui2.tweak_book.search import SavedSearches
|
||||
from calibre.gui2.tweak_book.toc import TOCViewer
|
||||
@ -250,6 +251,7 @@ class Main(MainWindow):
|
||||
self.saved_searches = SavedSearches(self)
|
||||
self.image_browser = InsertImage(self, for_browsing=True)
|
||||
self.reports = Reports(self)
|
||||
self.check_external_links = CheckExternalLinks(self)
|
||||
self.insert_char = CharSelect(self)
|
||||
self.manage_fonts = ManageFonts(self)
|
||||
self.sr_debug_output = DebugOutput(self)
|
||||
@ -382,6 +384,8 @@ class Main(MainWindow):
|
||||
self.action_add_cover = treg('default_cover.png', _('Add &cover'), self.boss.add_cover, 'add-cover', (), _('Add a cover to the book'))
|
||||
self.action_reports = treg(
|
||||
'reports.png', _('&Reports'), self.boss.show_reports, 'show-reports', ('Ctrl+Shift+R',), _('Show a report on various aspects of the book'))
|
||||
self.action_check_external_links = treg('insert-link.png', _('Check &external links'), self.boss.check_external_links, 'check-external-links', (), _(
|
||||
'Check external links in the book'))
|
||||
|
||||
def ereg(icon, text, target, sid, keys, description):
|
||||
return reg(icon, text, partial(self.boss.editor_action, target), sid, keys, description)
|
||||
@ -538,6 +542,7 @@ class Main(MainWindow):
|
||||
e.addAction(self.action_set_semantics)
|
||||
e.addAction(self.action_filter_css)
|
||||
e.addAction(self.action_spell_check_book)
|
||||
e.addAction(self.action_check_external_links)
|
||||
e.addAction(self.action_check_book)
|
||||
e.addAction(self.action_reports)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user