From 2f404dbdcdbdba7db368ac95ad6ff5c917642597 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 5 Nov 2014 22:07:55 +0530 Subject: [PATCH] Also recognize links that are linked back to as footnote links --- resources/compiled_coffeescript.zip | Bin 101230 -> 101116 bytes src/calibre/ebooks/oeb/display/extract.coffee | 20 +++++---- src/calibre/ebooks/oeb/iterator/book.py | 24 +++++++++- src/calibre/ebooks/oeb/iterator/spine.py | 14 +++++- src/calibre/gui2/viewer/documentview.py | 7 +-- src/calibre/gui2/viewer/footnote.py | 42 +++++++----------- 6 files changed, 64 insertions(+), 43 deletions(-) diff --git a/resources/compiled_coffeescript.zip b/resources/compiled_coffeescript.zip index 70ae3522edc9fb4425110f0c51c4f124ff76b3b3..d8a1aa6331d778de9d633e95673286a24319fe0a 100644 GIT binary patch delta 573 zcmaDijqT4=wheBHf*&@dx-N@b_H3Ix0|N-lPEK4ewmCO(2d9pXLQZC0c4|s|Nq&4{ zUUEi$QL&~%HJ1VqlqD7^Bxhviq!gv*=_sUTrfjy%R%4v3Tp%csT2Pu4Us73+s$gqt ztDp`tLLH=Sa==uH$rrNOHpl0PGP44;DA-O;$P}5ZugW?(E>p}VFEKY&UDFzDd}f-0 z21tdP8m1HUG7^jZ%ku(?@(WUnN-9C>G$-5TC{FM7Wfa@om4AnI^P18&rb*c{LWwZv zDkv1CmXsFdDNHt~6p={O%P2}s(@`j>j87`cOi54GD@)8NP1T&9ugEAmS)oFx`F!Q} z^OcNWxcN{+XZpof#%Lzp#>w_QqLVK)@=XtHW7J~W*bU?>OrHy4e_S}Zev$n27a;bP zbs%-qHQO1Dn12d0PIl-KnLbB^kz;yQJEIoMb5%x$>AO1^rKcYTav6Q5ziVgoVRW8u z-NC5O@vnuE;U81A()5q*jPlbDcQRT}U*E~dK0Tm|(Rcd2Hb$=LU%OypXFGtJ!R9)2 zGwM!f?qn33-rB+FINhKdXct=-&`c0(a&97*j*^vvl4V-5QIdg~p?PAOrE#jEQKChP sxw*NSfvIV-v87pxK}wpXg^`7c;dI4rMsct&OF;I6O`F@rXu((u06AsFq5uE@ delta 563 zcmZWlU5gT76rMAdmWHILL9qGm28(UmZlj~w%JzY+dDRj@u!v1><7*C1j;Xn9OVCoI zGuGXZ=OQZT2e`o$d(~cc*$=3o+pZR*E+ZQCB4Q65IOloJ$8*l_7w)~j`(2I(pY_?w z*NlHVMmTQdQykvcA!@JF!Qs#;vgsv`wP}OM6-6yISk1m(odtVW4ZEz>_YwPH{o6s| zE^A3;O>5P(7L!V}MaeReq&rHj6l+zK=E6Q6FR2i=Z?-#pU>|1jGV5+ z=_9n(EI7SIr&++jQ%K`A0v_w*dhkhcW2d+QzkL4xKZBAE1^&&}=sRKS)|MZ?>oCWk zd_D${bRfygZzsvX057&X4xQVcAUhq1yQ(3uhJ#SQ3vqsJ7R4@HJ+Qk7T;G5wzV5=D z>q8#!b{?kiM;DHI>N;?L_)QG-AcPlna3h+K!Jh`)u break return epub_type -is_footnote_link = (node, url) -> +is_footnote_link = (node, url, linked_to_anchors) -> if not url or url.substr(0, 'file://'.length).toLowerCase() != 'file://' return false # Ignore non-local links epub_type = get_epub_type(node, ['noteref']) if epub_type and epub_type.toLowerCase() == 'noteref' return true + if epub_type and epub_type == 'link' + return false # Check if node or any of its first few parents have vertical-align set [x, num] = [node, 3] @@ -63,6 +65,12 @@ is_footnote_link = (node, url) -> if style.verticalAlign in ['sub', 'super'] return true + eid = node.getAttribute('id') or node.getAttribute('name') + if eid and linked_to_anchors.hasOwnProperty(eid) + # An link that is linked back from some other + # file in the spine, most likely a footnote + return true + return false is_epub_footnote = (node) -> @@ -112,14 +120,8 @@ class CalibreExtract cnode = inline_styles(node) return cnode.outerHTML - get_footnote_data: () => - ans = {} - for a in document.querySelectorAll('a[href]') - url = a.href # .href returns the full URL while getAttribute() returns the value of the attribute - if not is_footnote_link(a, url) - continue - ans[url] = 1 - return JSON.stringify(ans) + is_footnote_link: (a) -> + return is_footnote_link(a, a.href, py_bridge.value) show_footnote: (target, known_targets) -> if not target diff --git a/src/calibre/ebooks/oeb/iterator/book.py b/src/calibre/ebooks/oeb/iterator/book.py index b2dc51d234..9ab0c991b4 100644 --- a/src/calibre/ebooks/oeb/iterator/book.py +++ b/src/calibre/ebooks/oeb/iterator/book.py @@ -24,6 +24,7 @@ from calibre import (guess_type, prepare_string_for_xml, from calibre.ebooks.oeb.transforms.cover import CoverManager from calibre.ebooks.oeb.iterator.spine import (SpineItem, create_indexing_data) from calibre.ebooks.oeb.iterator.bookmarks import BookmarksMixin +from calibre.ebooks.oeb.base import urlparse, urlunquote TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace( '__ar__', 'none').replace('__viewbox__', '0 0 600 800' @@ -75,7 +76,7 @@ class EbookIterator(BookmarksMixin): return i def __enter__(self, processed=False, only_input_plugin=False, - run_char_count=True, read_anchor_map=True, view_kepub=False): + run_char_count=True, read_anchor_map=True, view_kepub=False, read_links=True): ''' Convert an ebook file into an exploded OEB book suitable for display in viewers/preprocessing etc. ''' @@ -124,7 +125,7 @@ class EbookIterator(BookmarksMixin): ordered = [i for i in self.opf.spine if i.is_linear] + \ [i for i in self.opf.spine if not i.is_linear] self.spine = [] - Spiny = partial(SpineItem, read_anchor_map=read_anchor_map, + Spiny = partial(SpineItem, read_anchor_map=read_anchor_map, read_links=read_links, run_char_count=run_char_count, from_epub=self.book_format == 'EPUB') is_comic = plumber.input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'} for i in ordered: @@ -175,10 +176,29 @@ class EbookIterator(BookmarksMixin): if read_anchor_map: create_indexing_data(self.spine, self.toc) + self.verify_links() + self.read_bookmarks() return self + def verify_links(self): + spine_paths = {s:s for s in self.spine} + for item in self.spine: + base = os.path.dirname(item) + for link in item.all_links: + try: + p = urlparse(urlunquote(link)) + except Exception: + continue + if not p.scheme and not p.netloc and p.path: + try: + path = spine_paths[os.path.abspath(os.path.join(base, p.path))] + except Exception: + continue + if not p.fragment or p.fragment in path.anchor_map: + item.verified_links.add((path, p.fragment)) + def __exit__(self, *args): self._tdir.__exit__(*args) for x in self.delete_on_exit: diff --git a/src/calibre/ebooks/oeb/iterator/spine.py b/src/calibre/ebooks/oeb/iterator/spine.py index 86ab7bcf78..1d436aa820 100644 --- a/src/calibre/ebooks/oeb/iterator/spine.py +++ b/src/calibre/ebooks/oeb/iterator/spine.py @@ -13,7 +13,7 @@ from functools import partial from operator import attrgetter from collections import namedtuple -from calibre import guess_type +from calibre import guess_type, replace_entities from calibre.ebooks.chardet import xml_to_unicode def character_count(html): @@ -33,10 +33,18 @@ def anchor_map(html): ans[anchor] = ans.get(anchor, match.start()) return ans +def all_links(html): + ''' Return set of all links in the file ''' + ans = set() + for match in re.finditer( + r'''<\s*[Aa]\s+.*?[hH][Rr][Ee][Ff]\s*=\s*(['"])(.+?)\1''', html, re.MULTILINE|re.DOTALL): + ans.add(replace_entities(match.group(2))) + return ans + class SpineItem(unicode): def __new__(cls, path, mime_type=None, read_anchor_map=True, - run_char_count=True, from_epub=False): + run_char_count=True, from_epub=False, read_links=True): ppath = path.partition('#')[0] if not os.path.exists(path) and os.path.exists(ppath): path = ppath @@ -62,6 +70,8 @@ class SpineItem(unicode): raw, obj.encoding = xml_to_unicode(raw) obj.character_count = character_count(raw) if run_char_count else 10000 obj.anchor_map = anchor_map(raw) if read_anchor_map else {} + obj.all_links = all_links(raw) if read_links else set() + obj.verified_links = set() obj.start_page = -1 obj.pages = -1 obj.max_page = -1 diff --git a/src/calibre/gui2/viewer/documentview.py b/src/calibre/gui2/viewer/documentview.py index 0facce5a8d..e0a86f3b2d 100644 --- a/src/calibre/gui2/viewer/documentview.py +++ b/src/calibre/gui2/viewer/documentview.py @@ -1316,9 +1316,10 @@ class DocumentView(QWebView): # {{{ return QWebView.event(self, ev) def mouseReleaseEvent(self, ev): - url = self.document.mainFrame().hitTestContent(ev.pos()).linkUrl() - if url.isValid() and self.manager is not None: - fd = self.footnotes.get_footnote_data(url) + r = self.document.mainFrame().hitTestContent(ev.pos()) + a, url = r.linkElement(), r.linkUrl() + if url.isValid() and not a.isNull() and self.manager is not None: + fd = self.footnotes.get_footnote_data(a, url) if fd: self.footnotes.show_footnote(fd) self.manager.show_footnote_view() diff --git a/src/calibre/gui2/viewer/footnote.py b/src/calibre/gui2/viewer/footnote.py index c42f0a92f1..ed87e6c4e3 100644 --- a/src/calibre/gui2/viewer/footnote.py +++ b/src/calibre/gui2/viewer/footnote.py @@ -98,7 +98,6 @@ class Footnotes(object): settings.setUserStyleSheetUrl(source.userStyleSheetUrl()) def clear(self): - self.footnote_data_cache = {} self.known_footnote_targets = defaultdict(set) self.showing_url = None @@ -109,33 +108,22 @@ class Footnotes(object): except (AttributeError, ValueError): pass - def load_footnote_data(self, current_url): - fd = self.footnote_data_cache[current_url] = {} - try: - raw = self.view.document.javascript('window.calibre_extract.get_footnote_data()', typ='string') - for x in json.loads(raw or '{}'): - if x not in fd: - qu = QUrl(x) - path = qu.toLocalFile() - spath = self.spine_path(path) - if spath is not None: - target = qu.fragment(QUrl.FullyDecoded) - fd[qu.toString()] = (spath, target, qu) - self.known_footnote_targets[spath].add(target) - except Exception: - prints('Failed to get footnote data, with error:') - import traceback - traceback.print_exc() - return fd - - def get_footnote_data(self, qurl): - current_url = unicode(self.view.document.mainFrame().baseUrl().toLocalFile()) - if not current_url: + def get_footnote_data(self, a, qurl): + current_path = unicode(self.view.document.mainFrame().baseUrl().toLocalFile()) + if not current_path: return # Not viewing a local file - fd = self.footnote_data_cache.get(current_url) - if fd is None: - fd = self.load_footnote_data(current_url) - return fd.get(qurl.toString()) + dest_path = self.spine_path(qurl.toLocalFile()) + if dest_path is not None: + linked_to_anchors = {anchor:0 for path, anchor in dest_path.verified_links if path == current_path} + self.view.document.bridge_value = linked_to_anchors + if a.evaluateJavaScript('calibre_extract.is_footnote_link(this)'): + if dest_path not in self.known_footnote_targets: + self.known_footnote_targets[dest_path] = s = set() + for item in self.view.manager.iterator.spine: + for path, target in item.verified_links: + if target and path == dest_path: + s.add(target) + return (dest_path, qurl.fragment(QUrl.FullyDecoded), qurl) def show_footnote(self, fd): path, target, self.showing_url = fd