mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Also recognize links that are linked back to as footnote links
This commit is contained in:
parent
851617caed
commit
2f404dbdcd
Binary file not shown.
@ -40,12 +40,14 @@ get_epub_type = (node, possible_values) ->
|
|||||||
break
|
break
|
||||||
return epub_type
|
return epub_type
|
||||||
|
|
||||||
is_footnote_link = (node, url) ->
|
is_footnote_link = (node, url, linked_to_anchors) ->
|
||||||
if not url or url.substr(0, 'file://'.length).toLowerCase() != 'file://'
|
if not url or url.substr(0, 'file://'.length).toLowerCase() != 'file://'
|
||||||
return false # Ignore non-local links
|
return false # Ignore non-local links
|
||||||
epub_type = get_epub_type(node, ['noteref'])
|
epub_type = get_epub_type(node, ['noteref'])
|
||||||
if epub_type and epub_type.toLowerCase() == 'noteref'
|
if epub_type and epub_type.toLowerCase() == 'noteref'
|
||||||
return true
|
return true
|
||||||
|
if epub_type and epub_type == 'link'
|
||||||
|
return false
|
||||||
|
|
||||||
# Check if node or any of its first few parents have vertical-align set
|
# Check if node or any of its first few parents have vertical-align set
|
||||||
[x, num] = [node, 3]
|
[x, num] = [node, 3]
|
||||||
@ -63,6 +65,12 @@ is_footnote_link = (node, url) ->
|
|||||||
if style.verticalAlign in ['sub', 'super']
|
if style.verticalAlign in ['sub', 'super']
|
||||||
return true
|
return true
|
||||||
|
|
||||||
|
eid = node.getAttribute('id') or node.getAttribute('name')
|
||||||
|
if eid and linked_to_anchors.hasOwnProperty(eid)
|
||||||
|
# An <a href="..." id="..."> link that is linked back from some other
|
||||||
|
# file in the spine, most likely a footnote
|
||||||
|
return true
|
||||||
|
|
||||||
return false
|
return false
|
||||||
|
|
||||||
is_epub_footnote = (node) ->
|
is_epub_footnote = (node) ->
|
||||||
@ -112,14 +120,8 @@ class CalibreExtract
|
|||||||
cnode = inline_styles(node)
|
cnode = inline_styles(node)
|
||||||
return cnode.outerHTML
|
return cnode.outerHTML
|
||||||
|
|
||||||
get_footnote_data: () =>
|
is_footnote_link: (a) ->
|
||||||
ans = {}
|
return is_footnote_link(a, a.href, py_bridge.value)
|
||||||
for a in document.querySelectorAll('a[href]')
|
|
||||||
url = a.href # .href returns the full URL while getAttribute() returns the value of the attribute
|
|
||||||
if not is_footnote_link(a, url)
|
|
||||||
continue
|
|
||||||
ans[url] = 1
|
|
||||||
return JSON.stringify(ans)
|
|
||||||
|
|
||||||
show_footnote: (target, known_targets) ->
|
show_footnote: (target, known_targets) ->
|
||||||
if not target
|
if not target
|
||||||
|
@ -24,6 +24,7 @@ from calibre import (guess_type, prepare_string_for_xml,
|
|||||||
from calibre.ebooks.oeb.transforms.cover import CoverManager
|
from calibre.ebooks.oeb.transforms.cover import CoverManager
|
||||||
from calibre.ebooks.oeb.iterator.spine import (SpineItem, create_indexing_data)
|
from calibre.ebooks.oeb.iterator.spine import (SpineItem, create_indexing_data)
|
||||||
from calibre.ebooks.oeb.iterator.bookmarks import BookmarksMixin
|
from calibre.ebooks.oeb.iterator.bookmarks import BookmarksMixin
|
||||||
|
from calibre.ebooks.oeb.base import urlparse, urlunquote
|
||||||
|
|
||||||
TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(
|
TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(
|
||||||
'__ar__', 'none').replace('__viewbox__', '0 0 600 800'
|
'__ar__', 'none').replace('__viewbox__', '0 0 600 800'
|
||||||
@ -75,7 +76,7 @@ class EbookIterator(BookmarksMixin):
|
|||||||
return i
|
return i
|
||||||
|
|
||||||
def __enter__(self, processed=False, only_input_plugin=False,
|
def __enter__(self, processed=False, only_input_plugin=False,
|
||||||
run_char_count=True, read_anchor_map=True, view_kepub=False):
|
run_char_count=True, read_anchor_map=True, view_kepub=False, read_links=True):
|
||||||
''' Convert an ebook file into an exploded OEB book suitable for
|
''' Convert an ebook file into an exploded OEB book suitable for
|
||||||
display in viewers/preprocessing etc. '''
|
display in viewers/preprocessing etc. '''
|
||||||
|
|
||||||
@ -124,7 +125,7 @@ class EbookIterator(BookmarksMixin):
|
|||||||
ordered = [i for i in self.opf.spine if i.is_linear] + \
|
ordered = [i for i in self.opf.spine if i.is_linear] + \
|
||||||
[i for i in self.opf.spine if not i.is_linear]
|
[i for i in self.opf.spine if not i.is_linear]
|
||||||
self.spine = []
|
self.spine = []
|
||||||
Spiny = partial(SpineItem, read_anchor_map=read_anchor_map,
|
Spiny = partial(SpineItem, read_anchor_map=read_anchor_map, read_links=read_links,
|
||||||
run_char_count=run_char_count, from_epub=self.book_format == 'EPUB')
|
run_char_count=run_char_count, from_epub=self.book_format == 'EPUB')
|
||||||
is_comic = plumber.input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}
|
is_comic = plumber.input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}
|
||||||
for i in ordered:
|
for i in ordered:
|
||||||
@ -175,10 +176,29 @@ class EbookIterator(BookmarksMixin):
|
|||||||
if read_anchor_map:
|
if read_anchor_map:
|
||||||
create_indexing_data(self.spine, self.toc)
|
create_indexing_data(self.spine, self.toc)
|
||||||
|
|
||||||
|
self.verify_links()
|
||||||
|
|
||||||
self.read_bookmarks()
|
self.read_bookmarks()
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
def verify_links(self):
|
||||||
|
spine_paths = {s:s for s in self.spine}
|
||||||
|
for item in self.spine:
|
||||||
|
base = os.path.dirname(item)
|
||||||
|
for link in item.all_links:
|
||||||
|
try:
|
||||||
|
p = urlparse(urlunquote(link))
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
if not p.scheme and not p.netloc and p.path:
|
||||||
|
try:
|
||||||
|
path = spine_paths[os.path.abspath(os.path.join(base, p.path))]
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
if not p.fragment or p.fragment in path.anchor_map:
|
||||||
|
item.verified_links.add((path, p.fragment))
|
||||||
|
|
||||||
def __exit__(self, *args):
|
def __exit__(self, *args):
|
||||||
self._tdir.__exit__(*args)
|
self._tdir.__exit__(*args)
|
||||||
for x in self.delete_on_exit:
|
for x in self.delete_on_exit:
|
||||||
|
@ -13,7 +13,7 @@ from functools import partial
|
|||||||
from operator import attrgetter
|
from operator import attrgetter
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
|
|
||||||
from calibre import guess_type
|
from calibre import guess_type, replace_entities
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
|
||||||
def character_count(html):
|
def character_count(html):
|
||||||
@ -33,10 +33,18 @@ def anchor_map(html):
|
|||||||
ans[anchor] = ans.get(anchor, match.start())
|
ans[anchor] = ans.get(anchor, match.start())
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
def all_links(html):
|
||||||
|
''' Return set of all links in the file '''
|
||||||
|
ans = set()
|
||||||
|
for match in re.finditer(
|
||||||
|
r'''<\s*[Aa]\s+.*?[hH][Rr][Ee][Ff]\s*=\s*(['"])(.+?)\1''', html, re.MULTILINE|re.DOTALL):
|
||||||
|
ans.add(replace_entities(match.group(2)))
|
||||||
|
return ans
|
||||||
|
|
||||||
class SpineItem(unicode):
|
class SpineItem(unicode):
|
||||||
|
|
||||||
def __new__(cls, path, mime_type=None, read_anchor_map=True,
|
def __new__(cls, path, mime_type=None, read_anchor_map=True,
|
||||||
run_char_count=True, from_epub=False):
|
run_char_count=True, from_epub=False, read_links=True):
|
||||||
ppath = path.partition('#')[0]
|
ppath = path.partition('#')[0]
|
||||||
if not os.path.exists(path) and os.path.exists(ppath):
|
if not os.path.exists(path) and os.path.exists(ppath):
|
||||||
path = ppath
|
path = ppath
|
||||||
@ -62,6 +70,8 @@ class SpineItem(unicode):
|
|||||||
raw, obj.encoding = xml_to_unicode(raw)
|
raw, obj.encoding = xml_to_unicode(raw)
|
||||||
obj.character_count = character_count(raw) if run_char_count else 10000
|
obj.character_count = character_count(raw) if run_char_count else 10000
|
||||||
obj.anchor_map = anchor_map(raw) if read_anchor_map else {}
|
obj.anchor_map = anchor_map(raw) if read_anchor_map else {}
|
||||||
|
obj.all_links = all_links(raw) if read_links else set()
|
||||||
|
obj.verified_links = set()
|
||||||
obj.start_page = -1
|
obj.start_page = -1
|
||||||
obj.pages = -1
|
obj.pages = -1
|
||||||
obj.max_page = -1
|
obj.max_page = -1
|
||||||
|
@ -1316,9 +1316,10 @@ class DocumentView(QWebView): # {{{
|
|||||||
return QWebView.event(self, ev)
|
return QWebView.event(self, ev)
|
||||||
|
|
||||||
def mouseReleaseEvent(self, ev):
|
def mouseReleaseEvent(self, ev):
|
||||||
url = self.document.mainFrame().hitTestContent(ev.pos()).linkUrl()
|
r = self.document.mainFrame().hitTestContent(ev.pos())
|
||||||
if url.isValid() and self.manager is not None:
|
a, url = r.linkElement(), r.linkUrl()
|
||||||
fd = self.footnotes.get_footnote_data(url)
|
if url.isValid() and not a.isNull() and self.manager is not None:
|
||||||
|
fd = self.footnotes.get_footnote_data(a, url)
|
||||||
if fd:
|
if fd:
|
||||||
self.footnotes.show_footnote(fd)
|
self.footnotes.show_footnote(fd)
|
||||||
self.manager.show_footnote_view()
|
self.manager.show_footnote_view()
|
||||||
|
@ -98,7 +98,6 @@ class Footnotes(object):
|
|||||||
settings.setUserStyleSheetUrl(source.userStyleSheetUrl())
|
settings.setUserStyleSheetUrl(source.userStyleSheetUrl())
|
||||||
|
|
||||||
def clear(self):
|
def clear(self):
|
||||||
self.footnote_data_cache = {}
|
|
||||||
self.known_footnote_targets = defaultdict(set)
|
self.known_footnote_targets = defaultdict(set)
|
||||||
self.showing_url = None
|
self.showing_url = None
|
||||||
|
|
||||||
@ -109,33 +108,22 @@ class Footnotes(object):
|
|||||||
except (AttributeError, ValueError):
|
except (AttributeError, ValueError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def load_footnote_data(self, current_url):
|
def get_footnote_data(self, a, qurl):
|
||||||
fd = self.footnote_data_cache[current_url] = {}
|
current_path = unicode(self.view.document.mainFrame().baseUrl().toLocalFile())
|
||||||
try:
|
if not current_path:
|
||||||
raw = self.view.document.javascript('window.calibre_extract.get_footnote_data()', typ='string')
|
|
||||||
for x in json.loads(raw or '{}'):
|
|
||||||
if x not in fd:
|
|
||||||
qu = QUrl(x)
|
|
||||||
path = qu.toLocalFile()
|
|
||||||
spath = self.spine_path(path)
|
|
||||||
if spath is not None:
|
|
||||||
target = qu.fragment(QUrl.FullyDecoded)
|
|
||||||
fd[qu.toString()] = (spath, target, qu)
|
|
||||||
self.known_footnote_targets[spath].add(target)
|
|
||||||
except Exception:
|
|
||||||
prints('Failed to get footnote data, with error:')
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
return fd
|
|
||||||
|
|
||||||
def get_footnote_data(self, qurl):
|
|
||||||
current_url = unicode(self.view.document.mainFrame().baseUrl().toLocalFile())
|
|
||||||
if not current_url:
|
|
||||||
return # Not viewing a local file
|
return # Not viewing a local file
|
||||||
fd = self.footnote_data_cache.get(current_url)
|
dest_path = self.spine_path(qurl.toLocalFile())
|
||||||
if fd is None:
|
if dest_path is not None:
|
||||||
fd = self.load_footnote_data(current_url)
|
linked_to_anchors = {anchor:0 for path, anchor in dest_path.verified_links if path == current_path}
|
||||||
return fd.get(qurl.toString())
|
self.view.document.bridge_value = linked_to_anchors
|
||||||
|
if a.evaluateJavaScript('calibre_extract.is_footnote_link(this)'):
|
||||||
|
if dest_path not in self.known_footnote_targets:
|
||||||
|
self.known_footnote_targets[dest_path] = s = set()
|
||||||
|
for item in self.view.manager.iterator.spine:
|
||||||
|
for path, target in item.verified_links:
|
||||||
|
if target and path == dest_path:
|
||||||
|
s.add(target)
|
||||||
|
return (dest_path, qurl.fragment(QUrl.FullyDecoded), qurl)
|
||||||
|
|
||||||
def show_footnote(self, fd):
|
def show_footnote(self, fd):
|
||||||
path, target, self.showing_url = fd
|
path, target, self.showing_url = fd
|
||||||
|
Loading…
x
Reference in New Issue
Block a user