From b4694392708415e9b2aeb9979ffbaa0a61aca4db Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 13 Feb 2014 13:57:57 +0530 Subject: [PATCH] Edit Book: Add a check for links that do not point to HTML documents --- src/calibre/ebooks/oeb/polish/check/links.py | 24 ++++++++++++++++++++ src/calibre/ebooks/oeb/polish/check/main.py | 4 ++-- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/oeb/polish/check/links.py b/src/calibre/ebooks/oeb/polish/check/links.py index f887852e63..75c465a277 100644 --- a/src/calibre/ebooks/oeb/polish/check/links.py +++ b/src/calibre/ebooks/oeb/polish/check/links.py @@ -49,6 +49,20 @@ class CaseMismatch(BadLink): container.replace_links(self.name, replacer) return replacer.replaced +class BadDestinationType(BaseError): + + level = WARN + + def __init__(self, link_source, link_dest, link_elem): + BaseError.__init__(self, _('Link points to a file that is not a text document'), link_source, line=link_elem.sourceline) + self.HELP = _('The link "{0}" points to a file {1} that is not a text (HTML) document.' + ' Many ebook readers will be unable to follow such a link. You should' + ' either remove the link or change it to point to a text document.' + ' For example, if it points to an image, you can create small wrapper' + ' document that contains the image and change the link to point to that.').format( + link_elem.get('href'), link_dest) + self.bad_href = link_elem.get('href') + class FileLink(BadLink): HELP = _('This link uses the file:// URL scheme. This does not work with many ebook readers.' @@ -136,6 +150,16 @@ def check_mimetypes(container): a(MimetypeMismatch(container, name, mt, gt)) return errors +def check_link_destinations(container): + errors = [] + for name, mt in container.mime_map.iteritems(): + if mt in OEB_DOCS: + for a in container.parsed(name).xpath('//*[local-name()="a" and @href]'): + tname = container.href_to_name(a.get('href'), name) + if tname and tname in container.mime_map and container.mime_map[tname] not in OEB_DOCS: + errors.append(BadDestinationType(name, tname, a)) + return errors + def check_links(container): links_map = defaultdict(set) xml_types = {guess_type('a.opf'), guess_type('a.ncx')} diff --git a/src/calibre/ebooks/oeb/polish/check/main.py b/src/calibre/ebooks/oeb/polish/check/main.py index 46060ca987..3f5c2fafad 100644 --- a/src/calibre/ebooks/oeb/polish/check/main.py +++ b/src/calibre/ebooks/oeb/polish/check/main.py @@ -15,7 +15,7 @@ from calibre.ebooks.oeb.polish.check.base import run_checkers from calibre.ebooks.oeb.polish.check.parsing import ( check_filenames, check_xml_parsing, check_css_parsing, fix_style_tag, check_html_size, check_ids) from calibre.ebooks.oeb.polish.check.images import check_raster_images -from calibre.ebooks.oeb.polish.check.links import check_links, check_mimetypes +from calibre.ebooks.oeb.polish.check.links import check_links, check_mimetypes, check_link_destinations from calibre.ebooks.oeb.polish.check.fonts import check_fonts from calibre.ebooks.oeb.polish.check.opf import check_opf @@ -58,7 +58,7 @@ def run_checks(container): errors.extend(check_css_parsing(name, raw, line_offset=elem.sourceline - 1, is_declaration=True)) errors += check_mimetypes(container) - errors += check_links(container) + errors += check_links(container) + check_link_destinations(container) errors += check_fonts(container) errors += check_filenames(container) errors += check_ids(container)