mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Edit Book: Add a tool to check external links (links pointing to websites). Can be accessed via Tools->Check external links
This commit is contained in:
parent
de237a13e9
commit
9759944cc8
@ -681,6 +681,14 @@ Note that editing the styles does not actually make changes to the book
|
|||||||
contents, it only allows for quick experimentation. The ability to live edit
|
contents, it only allows for quick experimentation. The ability to live edit
|
||||||
inside the Inspector is under development.
|
inside the Inspector is under development.
|
||||||
|
|
||||||
|
Checking external links
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
You can use this tool to check all links in your book that point to external
|
||||||
|
websites. The tool will try to visit every externally linked website, and
|
||||||
|
if the visit fails, it will report all broken links in a convenient format for
|
||||||
|
you to fix.
|
||||||
|
|
||||||
Arrange files into folders by type
|
Arrange files into folders by type
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
@ -9,7 +9,11 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
import os
|
import os
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from urlparse import urlparse
|
from urlparse import urlparse
|
||||||
|
from future_builtins import map
|
||||||
|
from threading import Thread
|
||||||
|
from Queue import Queue, Empty
|
||||||
|
|
||||||
|
from calibre import browser
|
||||||
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
|
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
|
||||||
from calibre.ebooks.oeb.polish.container import OEB_FONTS
|
from calibre.ebooks.oeb.polish.container import OEB_FONTS
|
||||||
from calibre.ebooks.oeb.polish.utils import guess_type, actual_case_for_name, corrected_case_for_name
|
from calibre.ebooks.oeb.polish.utils import guess_type, actual_case_for_name, corrected_case_for_name
|
||||||
@ -336,3 +340,45 @@ def check_links(container):
|
|||||||
a(Bookmarks(name))
|
a(Bookmarks(name))
|
||||||
|
|
||||||
return errors
|
return errors
|
||||||
|
|
||||||
|
def check_external_links(container, progress_callback=lambda num, total:None):
|
||||||
|
progress_callback(0, 0)
|
||||||
|
external_links = defaultdict(list)
|
||||||
|
for name, mt in container.mime_map.iteritems():
|
||||||
|
if mt in OEB_DOCS or mt in OEB_STYLES:
|
||||||
|
for href, lnum, col in container.iterlinks(name):
|
||||||
|
purl = urlparse(href)
|
||||||
|
if purl.scheme in ('http', 'https'):
|
||||||
|
key = href.partition('#')[0]
|
||||||
|
external_links[key].append((name, href, lnum, col))
|
||||||
|
if not external_links:
|
||||||
|
return []
|
||||||
|
items = Queue()
|
||||||
|
ans = []
|
||||||
|
tuple(map(items.put, external_links.iteritems()))
|
||||||
|
progress_callback(0, len(external_links))
|
||||||
|
done = []
|
||||||
|
|
||||||
|
def check_links():
|
||||||
|
br = browser(honor_time=False, verify_ssl_certificates=False)
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
href, locations = items.get_nowait()
|
||||||
|
except Empty:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
br.open(href, timeout=10).close()
|
||||||
|
except Exception as e:
|
||||||
|
ans.append((locations, e, href))
|
||||||
|
finally:
|
||||||
|
done.append(None)
|
||||||
|
progress_callback(len(done), len(external_links))
|
||||||
|
|
||||||
|
workers = [Thread(name="CheckLinks", target=check_links) for i in xrange(min(10, len(external_links)))]
|
||||||
|
for w in workers:
|
||||||
|
w.daemon = True
|
||||||
|
w.start()
|
||||||
|
|
||||||
|
for w in workers:
|
||||||
|
w.join()
|
||||||
|
return ans
|
||||||
|
@ -1190,6 +1190,12 @@ class Boss(QObject):
|
|||||||
mt = current_container().mime_map.get(name, guess_type(name))
|
mt = current_container().mime_map.get(name, guess_type(name))
|
||||||
self.edit_file_requested(name, None, mt)
|
self.edit_file_requested(name, None, mt)
|
||||||
|
|
||||||
|
def check_external_links(self):
|
||||||
|
if current_container() is None:
|
||||||
|
return error_dialog(self.gui, _('No book open'), _(
|
||||||
|
'You must first open a book in order to check links.'), show=True)
|
||||||
|
self.gui.check_external_links.show()
|
||||||
|
|
||||||
def sync_editor_to_preview(self, name, sourceline_address):
|
def sync_editor_to_preview(self, name, sourceline_address):
|
||||||
editor = self.edit_file(name, 'html')
|
editor = self.edit_file(name, 'html')
|
||||||
self.ignore_preview_to_editor_sync = True
|
self.ignore_preview_to_editor_sync = True
|
||||||
|
163
src/calibre/gui2/tweak_book/check_links.py
Normal file
163
src/calibre/gui2/tweak_book/check_links.py
Normal file
@ -0,0 +1,163 @@
|
|||||||
|
#!/usr/bin/env python2
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
from collections import defaultdict
|
||||||
|
from threading import Thread
|
||||||
|
|
||||||
|
from PyQt5.Qt import (
|
||||||
|
QVBoxLayout, QTextBrowser, QProgressBar, Qt, QWidget, QStackedWidget,
|
||||||
|
QLabel, QSizePolicy, pyqtSignal, QIcon, QInputDialog
|
||||||
|
)
|
||||||
|
|
||||||
|
from calibre.gui2 import error_dialog
|
||||||
|
from calibre.gui2.tweak_book import current_container, set_current_container, editors
|
||||||
|
from calibre.gui2.tweak_book.boss import get_boss
|
||||||
|
from calibre.gui2.tweak_book.widgets import Dialog
|
||||||
|
|
||||||
|
def get_data(name):
|
||||||
|
'Get the data for name. Returns a unicode string if name is a text document/stylesheet'
|
||||||
|
if name in editors:
|
||||||
|
return editors[name].get_raw_data()
|
||||||
|
return current_container().raw_data(name)
|
||||||
|
|
||||||
|
def set_data(name, val):
|
||||||
|
if name in editors:
|
||||||
|
editors[name].replace_data(val, only_if_different=False)
|
||||||
|
else:
|
||||||
|
with current_container().open(name, 'wb') as f:
|
||||||
|
f.write(val)
|
||||||
|
get_boss().set_modified()
|
||||||
|
|
||||||
|
class CheckExternalLinks(Dialog):
|
||||||
|
|
||||||
|
progress_made = pyqtSignal(object, object)
|
||||||
|
|
||||||
|
def __init__(self, parent=None):
|
||||||
|
Dialog.__init__(self, _('Check external links'), 'check-external-links-dialog', parent)
|
||||||
|
self.progress_made.connect(self.on_progress_made, type=Qt.QueuedConnection)
|
||||||
|
|
||||||
|
def show(self):
|
||||||
|
if self.rb.isEnabled():
|
||||||
|
self.refresh()
|
||||||
|
return Dialog.show(self)
|
||||||
|
|
||||||
|
def refresh(self):
|
||||||
|
self.stack.setCurrentIndex(0)
|
||||||
|
self.rb.setEnabled(False)
|
||||||
|
t = Thread(name='CheckLinksMaster', target=self.run)
|
||||||
|
t.daemon = True
|
||||||
|
t.start()
|
||||||
|
|
||||||
|
def setup_ui(self):
|
||||||
|
self.pb = pb = QProgressBar(self)
|
||||||
|
pb.setTextVisible(True)
|
||||||
|
pb.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed)
|
||||||
|
pb.setRange(0, 0)
|
||||||
|
self.w = w = QWidget(self)
|
||||||
|
self.w.l = l = QVBoxLayout(w)
|
||||||
|
l.addStretch(), l.addWidget(pb)
|
||||||
|
self.w.la = la = QLabel(_('Checking external links, please wait...'))
|
||||||
|
la.setStyleSheet('QLabel { font-size: 20px; font-weight: bold }')
|
||||||
|
l.addWidget(la, 0, Qt.AlignCenter), l.addStretch()
|
||||||
|
|
||||||
|
self.l = l = QVBoxLayout(self)
|
||||||
|
self.results = QTextBrowser(self)
|
||||||
|
self.results.setOpenLinks(False)
|
||||||
|
self.results.anchorClicked.connect(self.anchor_clicked)
|
||||||
|
self.stack = s = QStackedWidget(self)
|
||||||
|
s.addWidget(w), s.addWidget(self.results)
|
||||||
|
l.addWidget(s)
|
||||||
|
l.addWidget(self.bb)
|
||||||
|
self.bb.setStandardButtons(self.bb.Close)
|
||||||
|
self.rb = b = self.bb.addButton(_('&Refresh'), self.bb.ActionRole)
|
||||||
|
b.setIcon(QIcon(I('view-refresh.png')))
|
||||||
|
b.clicked.connect(self.refresh)
|
||||||
|
|
||||||
|
def sizeHint(self):
|
||||||
|
ans = Dialog.sizeHint(self)
|
||||||
|
ans.setHeight(600)
|
||||||
|
ans.setWidth(max(ans.width(), 800))
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
from calibre.ebooks.oeb.polish.check.links import check_external_links
|
||||||
|
self.tb = None
|
||||||
|
self.errors = []
|
||||||
|
try:
|
||||||
|
self.errors = check_external_links(current_container(), self.progress_made.emit)
|
||||||
|
except Exception:
|
||||||
|
import traceback
|
||||||
|
self.tb = traceback.format_exc()
|
||||||
|
self.progress_made.emit(None, None)
|
||||||
|
|
||||||
|
def on_progress_made(self, curr, total):
|
||||||
|
if curr is None:
|
||||||
|
self.results.setText('')
|
||||||
|
self.stack.setCurrentIndex(1)
|
||||||
|
self.fixed_errors = set()
|
||||||
|
self.rb.setEnabled(True)
|
||||||
|
if self.tb is not None:
|
||||||
|
return error_dialog(self, _('Checking failed'), _(
|
||||||
|
'There was an error while checking links, click "Show Details" for more information'),
|
||||||
|
det_msg=self.tb, show=True)
|
||||||
|
if not self.errors:
|
||||||
|
self.results.setText(_('No broken links found'))
|
||||||
|
else:
|
||||||
|
self.populate_results()
|
||||||
|
else:
|
||||||
|
self.pb.setMaximum(total), self.pb.setValue(curr)
|
||||||
|
|
||||||
|
def populate_results(self, preserve_pos=False):
|
||||||
|
text = '<h3>%s</h3><ol>' % (_('Found %d broken links') % (len(self.errors) - len(self.fixed_errors)))
|
||||||
|
for i, (locations, err, url) in enumerate(self.errors):
|
||||||
|
if i in self.fixed_errors:
|
||||||
|
continue
|
||||||
|
text += '<li><b>%s</b> \xa0<a href="err:%d">[%s]</a><br>%s<br><ul>' % (url, i, _('Fix this link'), err)
|
||||||
|
for name, href, lnum, col in locations:
|
||||||
|
text += '<li>{name} \xa0<a href="loc:{lnum},{name}">[{line}: {lnum}]</a></li>'.format(
|
||||||
|
name=name, lnum=lnum, line=_('line number'))
|
||||||
|
text += '</ul></li><hr>'
|
||||||
|
self.results.setHtml(text)
|
||||||
|
|
||||||
|
def anchor_clicked(self, qurl):
|
||||||
|
url = qurl.toString()
|
||||||
|
if url.startswith('err:'):
|
||||||
|
errnum = int(url[4:])
|
||||||
|
err = self.errors[errnum]
|
||||||
|
newurl, ok = QInputDialog.getText(self, _('Fix URL'), _('Enter the corrected URL:') + '\xa0'*40, text=err[2])
|
||||||
|
if not ok:
|
||||||
|
return
|
||||||
|
nmap = defaultdict(set)
|
||||||
|
for name, href in {(l[0], l[1]) for l in err[0]}:
|
||||||
|
nmap[name].add(href)
|
||||||
|
|
||||||
|
for name, hrefs in nmap.iteritems():
|
||||||
|
raw = oraw = get_data(name)
|
||||||
|
for href in hrefs:
|
||||||
|
raw = raw.replace(href, newurl)
|
||||||
|
if raw != oraw:
|
||||||
|
set_data(name, raw)
|
||||||
|
self.fixed_errors.add(errnum)
|
||||||
|
self.populate_results()
|
||||||
|
elif url.startswith('loc:'):
|
||||||
|
lnum, name = url[4:].partition(',')[::2]
|
||||||
|
lnum = int(lnum or 1)
|
||||||
|
editor = get_boss().edit_file(name)
|
||||||
|
if lnum and editor is not None and editor.has_line_numbers:
|
||||||
|
editor.current_line = lnum
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import sys
|
||||||
|
from calibre.gui2 import Application
|
||||||
|
from calibre.gui2.tweak_book.boss import get_container
|
||||||
|
app = Application([])
|
||||||
|
set_current_container(get_container(sys.argv[-1]))
|
||||||
|
d = CheckExternalLinks()
|
||||||
|
d.refresh()
|
||||||
|
d.exec_()
|
||||||
|
del app
|
@ -33,6 +33,7 @@ from calibre.gui2.tweak_book.preview import Preview
|
|||||||
from calibre.gui2.tweak_book.plugin import create_plugin_actions
|
from calibre.gui2.tweak_book.plugin import create_plugin_actions
|
||||||
from calibre.gui2.tweak_book.search import SearchPanel
|
from calibre.gui2.tweak_book.search import SearchPanel
|
||||||
from calibre.gui2.tweak_book.check import Check
|
from calibre.gui2.tweak_book.check import Check
|
||||||
|
from calibre.gui2.tweak_book.check_links import CheckExternalLinks
|
||||||
from calibre.gui2.tweak_book.spell import SpellCheck
|
from calibre.gui2.tweak_book.spell import SpellCheck
|
||||||
from calibre.gui2.tweak_book.search import SavedSearches
|
from calibre.gui2.tweak_book.search import SavedSearches
|
||||||
from calibre.gui2.tweak_book.toc import TOCViewer
|
from calibre.gui2.tweak_book.toc import TOCViewer
|
||||||
@ -250,6 +251,7 @@ class Main(MainWindow):
|
|||||||
self.saved_searches = SavedSearches(self)
|
self.saved_searches = SavedSearches(self)
|
||||||
self.image_browser = InsertImage(self, for_browsing=True)
|
self.image_browser = InsertImage(self, for_browsing=True)
|
||||||
self.reports = Reports(self)
|
self.reports = Reports(self)
|
||||||
|
self.check_external_links = CheckExternalLinks(self)
|
||||||
self.insert_char = CharSelect(self)
|
self.insert_char = CharSelect(self)
|
||||||
self.manage_fonts = ManageFonts(self)
|
self.manage_fonts = ManageFonts(self)
|
||||||
self.sr_debug_output = DebugOutput(self)
|
self.sr_debug_output = DebugOutput(self)
|
||||||
@ -382,6 +384,8 @@ class Main(MainWindow):
|
|||||||
self.action_add_cover = treg('default_cover.png', _('Add &cover'), self.boss.add_cover, 'add-cover', (), _('Add a cover to the book'))
|
self.action_add_cover = treg('default_cover.png', _('Add &cover'), self.boss.add_cover, 'add-cover', (), _('Add a cover to the book'))
|
||||||
self.action_reports = treg(
|
self.action_reports = treg(
|
||||||
'reports.png', _('&Reports'), self.boss.show_reports, 'show-reports', ('Ctrl+Shift+R',), _('Show a report on various aspects of the book'))
|
'reports.png', _('&Reports'), self.boss.show_reports, 'show-reports', ('Ctrl+Shift+R',), _('Show a report on various aspects of the book'))
|
||||||
|
self.action_check_external_links = treg('insert-link.png', _('Check &external links'), self.boss.check_external_links, 'check-external-links', (), _(
|
||||||
|
'Check external links in the book'))
|
||||||
|
|
||||||
def ereg(icon, text, target, sid, keys, description):
|
def ereg(icon, text, target, sid, keys, description):
|
||||||
return reg(icon, text, partial(self.boss.editor_action, target), sid, keys, description)
|
return reg(icon, text, partial(self.boss.editor_action, target), sid, keys, description)
|
||||||
@ -538,6 +542,7 @@ class Main(MainWindow):
|
|||||||
e.addAction(self.action_set_semantics)
|
e.addAction(self.action_set_semantics)
|
||||||
e.addAction(self.action_filter_css)
|
e.addAction(self.action_filter_css)
|
||||||
e.addAction(self.action_spell_check_book)
|
e.addAction(self.action_spell_check_book)
|
||||||
|
e.addAction(self.action_check_external_links)
|
||||||
e.addAction(self.action_check_book)
|
e.addAction(self.action_check_book)
|
||||||
e.addAction(self.action_reports)
|
e.addAction(self.action_reports)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user