mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Code to read landmarks from book
This commit is contained in:
parent
cb8c641e4f
commit
1600c3a347
@ -15,7 +15,7 @@ from calibre.ebooks.oeb.polish.create import create_book
|
|||||||
from calibre.ebooks.oeb.polish.cover import (
|
from calibre.ebooks.oeb.polish.cover import (
|
||||||
find_cover_image, mark_as_cover, find_cover_page, mark_as_titlepage, clean_opf
|
find_cover_image, mark_as_cover, find_cover_page, mark_as_titlepage, clean_opf
|
||||||
)
|
)
|
||||||
from calibre.ebooks.oeb.polish.toc import get_toc, from_xpaths as toc_from_xpaths
|
from calibre.ebooks.oeb.polish.toc import get_toc, from_xpaths as toc_from_xpaths, get_landmarks
|
||||||
from calibre.ebooks.oeb.polish.utils import guess_type
|
from calibre.ebooks.oeb.polish.utils import guess_type
|
||||||
from calibre.ebooks.oeb.base import OEB_DOCS
|
from calibre.ebooks.oeb.base import OEB_DOCS
|
||||||
from calibre.ebooks.metadata.book.base import Metadata
|
from calibre.ebooks.metadata.book.base import Metadata
|
||||||
@ -35,6 +35,8 @@ OPF_TEMPLATE = '''
|
|||||||
|
|
||||||
def create_manifest_item(name, data=b'', properties=None):
|
def create_manifest_item(name, data=b'', properties=None):
|
||||||
return (name, data, properties)
|
return (name, data, properties)
|
||||||
|
|
||||||
|
|
||||||
cmi = create_manifest_item
|
cmi = create_manifest_item
|
||||||
|
|
||||||
|
|
||||||
@ -50,7 +52,7 @@ def create_epub(manifest, spine=(), guide=(), meta_cover=None, ver=3):
|
|||||||
if not spine:
|
if not spine:
|
||||||
spine = [x[0] for x in manifest if guess_type(x[0]) in OEB_DOCS]
|
spine = [x[0] for x in manifest if guess_type(x[0]) in OEB_DOCS]
|
||||||
spine = ''.join('<itemref idref="%s"/>' % name for name in spine)
|
spine = ''.join('<itemref idref="%s"/>' % name for name in spine)
|
||||||
guide = ''.join('<reference href="%s" type="%s"/>' % (name, typ) for name, typ in guide)
|
guide = ''.join('<reference href="%s" type="%s" title="%s"/>' % (name, typ, title) for name, typ, title in guide)
|
||||||
opf = OPF_TEMPLATE.format(manifest=mo, ver='%d.0'%ver, metadata=metadata, spine=spine, guide=guide)
|
opf = OPF_TEMPLATE.format(manifest=mo, ver='%d.0'%ver, metadata=metadata, spine=spine, guide=guide)
|
||||||
buf = BytesIO()
|
buf = BytesIO()
|
||||||
with ZipFile(buf, 'w', ZIP_STORED) as zf:
|
with ZipFile(buf, 'w', ZIP_STORED) as zf:
|
||||||
@ -68,6 +70,7 @@ def create_epub(manifest, spine=(), guide=(), meta_cover=None, ver=3):
|
|||||||
buf.seek(0)
|
buf.seek(0)
|
||||||
return buf
|
return buf
|
||||||
|
|
||||||
|
|
||||||
counter = count()
|
counter = count()
|
||||||
|
|
||||||
|
|
||||||
@ -120,6 +123,23 @@ class Structure(BaseTest):
|
|||||||
tfx('32123', '321[2[3]]')
|
tfx('32123', '321[2[3]]')
|
||||||
tfx('123123', '1[2[3]]1[2[3]]')
|
tfx('123123', '1[2[3]]1[2[3]]')
|
||||||
|
|
||||||
|
def test_landmarks_detection(self):
|
||||||
|
c = self.create_epub([cmi('xxx.html'), cmi('a.html')], guide=[('xxx.html#moo', 'x', 'XXX'), ('a.html', '', 'YYY')], ver=2)
|
||||||
|
self.assertEqual(2, c.opf_version_parsed.major)
|
||||||
|
self.assertEqual([
|
||||||
|
{'dest':'xxx.html', 'frag':'moo', 'type':'x', 'title':'XXX'}, {'dest':'a.html', 'frag':'', 'type':'', 'title':'YYY'}
|
||||||
|
], get_landmarks(c))
|
||||||
|
c = self.create_epub([cmi('xxx.html'), cmi('a.html')], ver=3)
|
||||||
|
self.assertEqual(3, c.opf_version_parsed.major)
|
||||||
|
c.add_file('xxx/nav.html', b'<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">'
|
||||||
|
'<body><nav epub:type="landmarks"><ol><li><a epub:type="x" href="../xxx.html#moo">XXX </a></li>'
|
||||||
|
'<li><a href="../a.html"> YYY </a></li>'
|
||||||
|
'</ol></nav></body></html>',
|
||||||
|
process_manifest_item=lambda item:item.set('properties', 'nav'))
|
||||||
|
self.assertEqual([
|
||||||
|
{'dest':'xxx.html', 'frag':'moo', 'type':'x', 'title':'XXX'}, {'dest':'a.html', 'frag':'', 'type':'', 'title':'YYY'}
|
||||||
|
], get_landmarks(c))
|
||||||
|
|
||||||
def test_epub3_covers(self):
|
def test_epub3_covers(self):
|
||||||
# cover image
|
# cover image
|
||||||
ce = partial(self.create_epub, ver=3)
|
ce = partial(self.create_epub, ver=3)
|
||||||
@ -145,7 +165,7 @@ class Structure(BaseTest):
|
|||||||
|
|
||||||
# clean opf of all cover information
|
# clean opf of all cover information
|
||||||
c = ce([cmi('c.jpg', b'z', 'cover-image'), cmi('c.html', b'', 'calibre:title-page'), cmi('d.html')],
|
c = ce([cmi('c.jpg', b'z', 'cover-image'), cmi('c.html', b'', 'calibre:title-page'), cmi('d.html')],
|
||||||
meta_cover='c.jpg', guide=[('c.jpg', 'cover'), ('d.html', 'cover')])
|
meta_cover='c.jpg', guide=[('c.jpg', 'cover', ''), ('d.html', 'cover', '')])
|
||||||
self.assertEqual(set(clean_opf(c)), {'c.jpg', 'c.html', 'd.html'})
|
self.assertEqual(set(clean_opf(c)), {'c.jpg', 'c.html', 'd.html'})
|
||||||
self.assertFalse(c.opf_xpath('//*/@name'))
|
self.assertFalse(c.opf_xpath('//*/@name'))
|
||||||
self.assertFalse(c.opf_xpath('//*/@type'))
|
self.assertFalse(c.opf_xpath('//*/@type'))
|
||||||
@ -159,7 +179,7 @@ class Structure(BaseTest):
|
|||||||
self.assertIsNone(find_cover_image(c))
|
self.assertIsNone(find_cover_image(c))
|
||||||
c = ce([cmi('c.jpg')], meta_cover='c.jpg')
|
c = ce([cmi('c.jpg')], meta_cover='c.jpg')
|
||||||
self.assertEqual('c.jpg', find_cover_image(c))
|
self.assertEqual('c.jpg', find_cover_image(c))
|
||||||
c = ce([cmi('c.jpg'), cmi('d.jpg')], guide=[('c.jpg', 'cover')])
|
c = ce([cmi('c.jpg'), cmi('d.jpg')], guide=[('c.jpg', 'cover', '')])
|
||||||
self.assertEqual('c.jpg', find_cover_image(c))
|
self.assertEqual('c.jpg', find_cover_image(c))
|
||||||
mark_as_cover(c, 'd.jpg')
|
mark_as_cover(c, 'd.jpg')
|
||||||
self.assertEqual('d.jpg', find_cover_image(c))
|
self.assertEqual('d.jpg', find_cover_image(c))
|
||||||
|
@ -281,6 +281,43 @@ def get_toc(container, verify_destinations=True):
|
|||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
|
||||||
|
def get_guide_landmarks(container):
|
||||||
|
for ref in container.opf_xpath('./opf:guide/opf:reference'):
|
||||||
|
href, title, rtype = ref.get('href'), ref.get('title'), ref.get('type')
|
||||||
|
href, frag = href.partition('#')[::2]
|
||||||
|
name = container.href_to_name(href, container.opf_name)
|
||||||
|
if container.has_name(name):
|
||||||
|
yield {'dest':name, 'frag':frag, 'title':title or '', 'type':rtype or ''}
|
||||||
|
|
||||||
|
|
||||||
|
def get_nav_landmarks(container):
|
||||||
|
nav = find_existing_nav_toc(container)
|
||||||
|
if nav and container.has_name(nav):
|
||||||
|
root = container.parsed(nav)
|
||||||
|
et = '{%s}type' % EPUB_NS
|
||||||
|
for elem in root.iterdescendants(XHTML('nav')):
|
||||||
|
if elem.get(et) == 'landmarks':
|
||||||
|
for li in elem.iterdescendants(XHTML('li')):
|
||||||
|
for a in li.iterdescendants(XHTML('a')):
|
||||||
|
href, rtype = a.get('href'), a.get(et)
|
||||||
|
title = etree.tostring(a, method='text', encoding=unicode, with_tail=False).strip()
|
||||||
|
href, frag = href.partition('#')[::2]
|
||||||
|
name = container.href_to_name(href, nav)
|
||||||
|
if container.has_name(name):
|
||||||
|
yield {'dest':name, 'frag':frag, 'title':title or '', 'type':rtype or ''}
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
def get_landmarks(container):
|
||||||
|
ver = container.opf_version_parsed
|
||||||
|
if ver.major < 3:
|
||||||
|
return list(get_guide_landmarks(container))
|
||||||
|
ans = list(get_nav_landmarks(container))
|
||||||
|
if len(ans) == 0:
|
||||||
|
ans = list(get_guide_landmarks(container))
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
def ensure_id(elem):
|
def ensure_id(elem):
|
||||||
if elem.tag == XHTML('a'):
|
if elem.tag == XHTML('a'):
|
||||||
anchor = elem.get('name', None)
|
anchor = elem.get('name', None)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user