mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Check all links in the book
This commit is contained in:
parent
3b7c0e9ac6
commit
b44426f170
61
src/calibre/ebooks/oeb/polish/check/links.py
Normal file
61
src/calibre/ebooks/oeb/polish/check/links.py
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
from collections import defaultdict
|
||||||
|
from urlparse import urlparse
|
||||||
|
|
||||||
|
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
|
||||||
|
from calibre.ebooks.oeb.polish.container import guess_type
|
||||||
|
from calibre.ebooks.oeb.polish.check.base import BaseError, WARN
|
||||||
|
|
||||||
|
class BadLink(BaseError):
|
||||||
|
|
||||||
|
HELP = _('The resource pointed to by this link does not exist. You should'
|
||||||
|
' either fix, or remove the link.')
|
||||||
|
level = WARN
|
||||||
|
|
||||||
|
class FileLink(BadLink):
|
||||||
|
|
||||||
|
HELP = _('This link uses the file:// URL scheme. This does not work with many ebook readers.'
|
||||||
|
' Remove the file:// prefix and make sure the link points to a file inside the book.')
|
||||||
|
|
||||||
|
class LocalLink(BadLink):
|
||||||
|
|
||||||
|
HELP = _('This link points to a file outside the book. It will not work if the'
|
||||||
|
' book is read on any computer other than the one it was created on.'
|
||||||
|
' Either fix or remove the link.')
|
||||||
|
|
||||||
|
def check_links(container):
|
||||||
|
links_map = defaultdict(set)
|
||||||
|
xml_types = {guess_type('a.opf'), guess_type('a.ncx')}
|
||||||
|
errors = []
|
||||||
|
a = errors.append
|
||||||
|
|
||||||
|
def fl(x):
|
||||||
|
x = repr(x)
|
||||||
|
if x.startswith('u'):
|
||||||
|
x = x[1:]
|
||||||
|
return x
|
||||||
|
|
||||||
|
for name, mt in container.mime_map.iteritems():
|
||||||
|
if mt in OEB_DOCS or mt in OEB_STYLES or mt in xml_types:
|
||||||
|
for href, lnum, col in container.iterlinks(name):
|
||||||
|
tname = container.href_to_name(href, name)
|
||||||
|
if tname is not None:
|
||||||
|
if container.exists(tname):
|
||||||
|
links_map[tname].add(name)
|
||||||
|
else:
|
||||||
|
a(BadLink(_('The linked resource %s does not exist') % fl(href), name, lnum, col))
|
||||||
|
else:
|
||||||
|
purl = urlparse(href)
|
||||||
|
if purl.scheme == 'file':
|
||||||
|
a(FileLink(_('The link %s is a file:// URL') % fl(href), name, lnum, col))
|
||||||
|
elif purl.path and purl.path.startswith('/') and purl.scheme in {'', 'file'}:
|
||||||
|
a(LocalLink(_('The link %s points to a file outside the book') % fl(href), name, lnum, col))
|
||||||
|
|
||||||
|
return errors
|
@ -14,6 +14,7 @@ from calibre.ebooks.oeb.polish.cover import is_raster_image
|
|||||||
from calibre.ebooks.oeb.polish.check.base import run_checkers
|
from calibre.ebooks.oeb.polish.check.base import run_checkers
|
||||||
from calibre.ebooks.oeb.polish.check.parsing import check_xml_parsing
|
from calibre.ebooks.oeb.polish.check.parsing import check_xml_parsing
|
||||||
from calibre.ebooks.oeb.polish.check.images import check_raster_images
|
from calibre.ebooks.oeb.polish.check.images import check_raster_images
|
||||||
|
from calibre.ebooks.oeb.polish.check.links import check_links
|
||||||
|
|
||||||
XML_TYPES = frozenset(map(guess_type, ('a.xml', 'a.svg', 'a.opf', 'a.ncx')))
|
XML_TYPES = frozenset(map(guess_type, ('a.xml', 'a.svg', 'a.opf', 'a.ncx')))
|
||||||
|
|
||||||
@ -37,6 +38,8 @@ def run_checks(container):
|
|||||||
errors.extend(run_checkers(check_xml_parsing, html_items))
|
errors.extend(run_checkers(check_xml_parsing, html_items))
|
||||||
errors.extend(run_checkers(check_raster_images, raster_images))
|
errors.extend(run_checkers(check_raster_images, raster_images))
|
||||||
|
|
||||||
|
errors += check_links(container)
|
||||||
|
|
||||||
return errors
|
return errors
|
||||||
|
|
||||||
def fix_errors(container, errors):
|
def fix_errors(container, errors):
|
||||||
|
@ -9,6 +9,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
import os, logging, sys, hashlib, uuid, re, shutil
|
import os, logging, sys, hashlib, uuid, re, shutil
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
from bisect import bisect
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from urlparse import urlparse
|
from urlparse import urlparse
|
||||||
from future_builtins import zip
|
from future_builtins import zip
|
||||||
@ -293,8 +294,17 @@ class Container(object): # {{{
|
|||||||
if get_line_numbers:
|
if get_line_numbers:
|
||||||
with self.open(name) as f:
|
with self.open(name) as f:
|
||||||
raw = self.decode(f.read())
|
raw = self.decode(f.read())
|
||||||
|
new_lines = tuple(m.start() + 1 for m in re.finditer(r'\n', raw))
|
||||||
|
def position(pos):
|
||||||
|
lnum = bisect(new_lines, pos)
|
||||||
|
try:
|
||||||
|
offset = abs(pos - new_lines[lnum - 1])
|
||||||
|
except IndexError:
|
||||||
|
offset = pos
|
||||||
|
return (lnum + 1, offset)
|
||||||
for link, offset in itercsslinks(raw):
|
for link, offset in itercsslinks(raw):
|
||||||
yield link, 0, offset
|
lnum, col = position(offset)
|
||||||
|
yield link, lnum, col
|
||||||
else:
|
else:
|
||||||
for link in getUrls(self.parsed(name)):
|
for link in getUrls(self.parsed(name)):
|
||||||
yield link
|
yield link
|
||||||
|
Loading…
x
Reference in New Issue
Block a user