Also recognize links that are linked back to as footnote links

This commit is contained in:
Kovid Goyal 2014-11-05 22:07:55 +05:30
parent 851617caed
commit 2f404dbdcd
6 changed files with 64 additions and 43 deletions

Binary file not shown.

View File

@ -40,12 +40,14 @@ get_epub_type = (node, possible_values) ->
break
return epub_type
is_footnote_link = (node, url) ->
is_footnote_link = (node, url, linked_to_anchors) ->
if not url or url.substr(0, 'file://'.length).toLowerCase() != 'file://'
return false # Ignore non-local links
epub_type = get_epub_type(node, ['noteref'])
if epub_type and epub_type.toLowerCase() == 'noteref'
return true
if epub_type and epub_type == 'link'
return false
# Check if node or any of its first few parents have vertical-align set
[x, num] = [node, 3]
@ -63,6 +65,12 @@ is_footnote_link = (node, url) ->
if style.verticalAlign in ['sub', 'super']
return true
eid = node.getAttribute('id') or node.getAttribute('name')
if eid and linked_to_anchors.hasOwnProperty(eid)
# An <a href="..." id="..."> link that is linked back from some other
# file in the spine, most likely a footnote
return true
return false
is_epub_footnote = (node) ->
@ -112,14 +120,8 @@ class CalibreExtract
cnode = inline_styles(node)
return cnode.outerHTML
get_footnote_data: () =>
ans = {}
for a in document.querySelectorAll('a[href]')
url = a.href # .href returns the full URL while getAttribute() returns the value of the attribute
if not is_footnote_link(a, url)
continue
ans[url] = 1
return JSON.stringify(ans)
is_footnote_link: (a) ->
return is_footnote_link(a, a.href, py_bridge.value)
show_footnote: (target, known_targets) ->
if not target

View File

@ -24,6 +24,7 @@ from calibre import (guess_type, prepare_string_for_xml,
from calibre.ebooks.oeb.transforms.cover import CoverManager
from calibre.ebooks.oeb.iterator.spine import (SpineItem, create_indexing_data)
from calibre.ebooks.oeb.iterator.bookmarks import BookmarksMixin
from calibre.ebooks.oeb.base import urlparse, urlunquote
TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(
'__ar__', 'none').replace('__viewbox__', '0 0 600 800'
@ -75,7 +76,7 @@ class EbookIterator(BookmarksMixin):
return i
def __enter__(self, processed=False, only_input_plugin=False,
run_char_count=True, read_anchor_map=True, view_kepub=False):
run_char_count=True, read_anchor_map=True, view_kepub=False, read_links=True):
''' Convert an ebook file into an exploded OEB book suitable for
display in viewers/preprocessing etc. '''
@ -124,7 +125,7 @@ class EbookIterator(BookmarksMixin):
ordered = [i for i in self.opf.spine if i.is_linear] + \
[i for i in self.opf.spine if not i.is_linear]
self.spine = []
Spiny = partial(SpineItem, read_anchor_map=read_anchor_map,
Spiny = partial(SpineItem, read_anchor_map=read_anchor_map, read_links=read_links,
run_char_count=run_char_count, from_epub=self.book_format == 'EPUB')
is_comic = plumber.input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}
for i in ordered:
@ -175,10 +176,29 @@ class EbookIterator(BookmarksMixin):
if read_anchor_map:
create_indexing_data(self.spine, self.toc)
self.verify_links()
self.read_bookmarks()
return self
def verify_links(self):
spine_paths = {s:s for s in self.spine}
for item in self.spine:
base = os.path.dirname(item)
for link in item.all_links:
try:
p = urlparse(urlunquote(link))
except Exception:
continue
if not p.scheme and not p.netloc and p.path:
try:
path = spine_paths[os.path.abspath(os.path.join(base, p.path))]
except Exception:
continue
if not p.fragment or p.fragment in path.anchor_map:
item.verified_links.add((path, p.fragment))
def __exit__(self, *args):
self._tdir.__exit__(*args)
for x in self.delete_on_exit:

View File

@ -13,7 +13,7 @@ from functools import partial
from operator import attrgetter
from collections import namedtuple
from calibre import guess_type
from calibre import guess_type, replace_entities
from calibre.ebooks.chardet import xml_to_unicode
def character_count(html):
@ -33,10 +33,18 @@ def anchor_map(html):
ans[anchor] = ans.get(anchor, match.start())
return ans
def all_links(html):
''' Return set of all links in the file '''
ans = set()
for match in re.finditer(
r'''<\s*[Aa]\s+.*?[hH][Rr][Ee][Ff]\s*=\s*(['"])(.+?)\1''', html, re.MULTILINE|re.DOTALL):
ans.add(replace_entities(match.group(2)))
return ans
class SpineItem(unicode):
def __new__(cls, path, mime_type=None, read_anchor_map=True,
run_char_count=True, from_epub=False):
run_char_count=True, from_epub=False, read_links=True):
ppath = path.partition('#')[0]
if not os.path.exists(path) and os.path.exists(ppath):
path = ppath
@ -62,6 +70,8 @@ class SpineItem(unicode):
raw, obj.encoding = xml_to_unicode(raw)
obj.character_count = character_count(raw) if run_char_count else 10000
obj.anchor_map = anchor_map(raw) if read_anchor_map else {}
obj.all_links = all_links(raw) if read_links else set()
obj.verified_links = set()
obj.start_page = -1
obj.pages = -1
obj.max_page = -1

View File

@ -1316,9 +1316,10 @@ class DocumentView(QWebView): # {{{
return QWebView.event(self, ev)
def mouseReleaseEvent(self, ev):
url = self.document.mainFrame().hitTestContent(ev.pos()).linkUrl()
if url.isValid() and self.manager is not None:
fd = self.footnotes.get_footnote_data(url)
r = self.document.mainFrame().hitTestContent(ev.pos())
a, url = r.linkElement(), r.linkUrl()
if url.isValid() and not a.isNull() and self.manager is not None:
fd = self.footnotes.get_footnote_data(a, url)
if fd:
self.footnotes.show_footnote(fd)
self.manager.show_footnote_view()

View File

@ -98,7 +98,6 @@ class Footnotes(object):
settings.setUserStyleSheetUrl(source.userStyleSheetUrl())
def clear(self):
self.footnote_data_cache = {}
self.known_footnote_targets = defaultdict(set)
self.showing_url = None
@ -109,33 +108,22 @@ class Footnotes(object):
except (AttributeError, ValueError):
pass
def load_footnote_data(self, current_url):
fd = self.footnote_data_cache[current_url] = {}
try:
raw = self.view.document.javascript('window.calibre_extract.get_footnote_data()', typ='string')
for x in json.loads(raw or '{}'):
if x not in fd:
qu = QUrl(x)
path = qu.toLocalFile()
spath = self.spine_path(path)
if spath is not None:
target = qu.fragment(QUrl.FullyDecoded)
fd[qu.toString()] = (spath, target, qu)
self.known_footnote_targets[spath].add(target)
except Exception:
prints('Failed to get footnote data, with error:')
import traceback
traceback.print_exc()
return fd
def get_footnote_data(self, qurl):
current_url = unicode(self.view.document.mainFrame().baseUrl().toLocalFile())
if not current_url:
def get_footnote_data(self, a, qurl):
current_path = unicode(self.view.document.mainFrame().baseUrl().toLocalFile())
if not current_path:
return # Not viewing a local file
fd = self.footnote_data_cache.get(current_url)
if fd is None:
fd = self.load_footnote_data(current_url)
return fd.get(qurl.toString())
dest_path = self.spine_path(qurl.toLocalFile())
if dest_path is not None:
linked_to_anchors = {anchor:0 for path, anchor in dest_path.verified_links if path == current_path}
self.view.document.bridge_value = linked_to_anchors
if a.evaluateJavaScript('calibre_extract.is_footnote_link(this)'):
if dest_path not in self.known_footnote_targets:
self.known_footnote_targets[dest_path] = s = set()
for item in self.view.manager.iterator.spine:
for path, target in item.verified_links:
if target and path == dest_path:
s.add(target)
return (dest_path, qurl.fragment(QUrl.FullyDecoded), qurl)
def show_footnote(self, fd):
path, target, self.showing_url = fd