From b4694392708415e9b2aeb9979ffbaa0a61aca4db Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 13 Feb 2014 13:57:57 +0530
Subject: [PATCH] Edit Book: Add a check for links that do not point to HTML
 documents

---
 src/calibre/ebooks/oeb/polish/check/links.py | 24 ++++++++++++++++++++
 src/calibre/ebooks/oeb/polish/check/main.py  |  4 ++--
 2 files changed, 26 insertions(+), 2 deletions(-)
diff --git a/src/calibre/ebooks/oeb/polish/check/links.py b/src/calibre/ebooks/oeb/polish/check/links.py
index f887852e63..75c465a277 100644
--- a/src/calibre/ebooks/oeb/polish/check/links.py
+++ b/src/calibre/ebooks/oeb/polish/check/links.py
@@ -49,6 +49,20 @@ class CaseMismatch(BadLink):
         container.replace_links(self.name, replacer)
         return replacer.replaced
 
+class BadDestinationType(BaseError):
+
+    level = WARN
+
+    def __init__(self, link_source, link_dest, link_elem):
+        BaseError.__init__(self, _('Link points to a file that is not a text document'), link_source, line=link_elem.sourceline)
+        self.HELP = _('The link "{0}" points to a file <i>{1}</i> that is not a text (HTML) document.'
+                      ' Many ebook readers will be unable to follow such a link. You should'
+                      ' either remove the link or change it to point to a text document.'
+                      ' For example, if it points to an image, you can create small wrapper'
+                      ' document that contains the image and change the link to point to that.').format(
+                          link_elem.get('href'), link_dest)
+        self.bad_href = link_elem.get('href')
+
 class FileLink(BadLink):
 
     HELP = _('This link uses the file:// URL scheme. This does not work with many ebook readers.'
@@ -136,6 +150,16 @@ def check_mimetypes(container):
             a(MimetypeMismatch(container, name, mt, gt))
     return errors
 
+def check_link_destinations(container):
+    errors = []
+    for name, mt in container.mime_map.iteritems():
+        if mt in OEB_DOCS:
+            for a in container.parsed(name).xpath('//*[local-name()="a" and @href]'):
+                tname = container.href_to_name(a.get('href'), name)
+                if tname and tname in container.mime_map and container.mime_map[tname] not in OEB_DOCS:
+                    errors.append(BadDestinationType(name, tname, a))
+    return errors
+
 def check_links(container):
     links_map = defaultdict(set)
     xml_types = {guess_type('a.opf'), guess_type('a.ncx')}
diff --git a/src/calibre/ebooks/oeb/polish/check/main.py b/src/calibre/ebooks/oeb/polish/check/main.py
index 46060ca987..3f5c2fafad 100644
--- a/src/calibre/ebooks/oeb/polish/check/main.py
+++ b/src/calibre/ebooks/oeb/polish/check/main.py
@@ -15,7 +15,7 @@ from calibre.ebooks.oeb.polish.check.base import run_checkers
 from calibre.ebooks.oeb.polish.check.parsing import (
     check_filenames, check_xml_parsing, check_css_parsing, fix_style_tag, check_html_size, check_ids)
 from calibre.ebooks.oeb.polish.check.images import check_raster_images
-from calibre.ebooks.oeb.polish.check.links import check_links, check_mimetypes
+from calibre.ebooks.oeb.polish.check.links import check_links, check_mimetypes, check_link_destinations
 from calibre.ebooks.oeb.polish.check.fonts import check_fonts
 from calibre.ebooks.oeb.polish.check.opf import check_opf
 
@@ -58,7 +58,7 @@ def run_checks(container):
                 errors.extend(check_css_parsing(name, raw, line_offset=elem.sourceline - 1, is_declaration=True))
 
     errors += check_mimetypes(container)
-    errors += check_links(container)
+    errors += check_links(container) + check_link_destinations(container)
     errors += check_fonts(container)
     errors += check_filenames(container)
     errors += check_ids(container)