Add a test for ToC detection

This commit is contained in:
Kovid Goyal 2016-06-23 07:43:54 +05:30
parent 1823f4d642
commit 7f76d0292f
4 changed files with 45 additions and 11 deletions

View File

@ -281,7 +281,7 @@ class Container(ContainerBase): # {{{
for name, path in self.name_path_map.iteritems()}
}
def add_name_to_manifest(self, name):
def add_name_to_manifest(self, name, process_manifest_item=None):
' Add an entry to the manifest for a file with the specified name. Returns the manifest id. '
all_ids = {x.get('id') for x in self.opf_xpath('//*[@id]')}
c = 0
@ -295,6 +295,8 @@ class Container(ContainerBase): # {{{
id=item_id, href=href)
item.set('media-type', self.mime_map[name])
self.insert_into_xml(manifest, item)
if process_manifest_item is not None:
process_manifest_item(item)
self.dirty(self.opf_name)
return item_id
@ -304,7 +306,7 @@ class Container(ContainerBase): # {{{
all_hrefs = {x.get('href') for x in self.opf_xpath('//opf:manifest/opf:item[@href]')}
return href in all_hrefs
def add_file(self, name, data, media_type=None, spine_index=None, modify_name_if_needed=False):
def add_file(self, name, data, media_type=None, spine_index=None, modify_name_if_needed=False, process_manifest_item=None):
''' Add a file to this container. Entries for the file are
automatically created in the OPF manifest and spine
(if the file is a text document) '''
@ -336,7 +338,7 @@ class Container(ContainerBase): # {{{
self.mime_map[name] = mt
if self.ok_to_be_unmanifested(name):
return name
item_id = self.add_name_to_manifest(name)
item_id = self.add_name_to_manifest(name, process_manifest_item=process_manifest_item)
if mt in OEB_DOCS:
manifest = self.opf_xpath('//opf:manifest')[0]
spine = self.opf_xpath('//opf:spine')[0]

View File

@ -51,7 +51,7 @@ def get_simple_book(fmt='epub'):
if needs_recompile(ans, src):
with TemporaryDirectory('bpt') as tdir:
with CurrentDir(tdir):
raw = open(src, 'rb').read().decode('utf-8')
raw = lopen(src, 'rb').read().decode('utf-8')
raw = add_resources(raw, {
'LMONOI': P('fonts/liberation/LiberationMono-Italic.ttf'),
'LMONOR': P('fonts/liberation/LiberationMono-Regular.ttf'),
@ -60,7 +60,7 @@ def get_simple_book(fmt='epub'):
})
shutil.copy2(I('lt.png'), '.')
x = 'index.html'
with open(x, 'wb') as f:
with lopen(x, 'wb') as f:
f.write(raw.encode('utf-8'))
build_book(x, ans, args=[
'--level1-toc=//h:h2', '--language=en', '--authors=Kovid Goyal', '--cover=lt.png'])
@ -72,9 +72,9 @@ def get_split_book(fmt='epub'):
src = os.path.join(os.path.dirname(__file__), 'split.html')
if needs_recompile(ans, src):
x = src.replace('split.html', 'index.html')
raw = open(src, 'rb').read().decode('utf-8')
raw = lopen(src, 'rb').read().decode('utf-8')
try:
with open(x, 'wb') as f:
with lopen(x, 'wb') as f:
f.write(raw.encode('utf-8'))
build_book(x, ans, args=['--level1-toc=//h:h2', '--language=en', '--authors=Kovid Goyal',
'--cover=' + I('lt.png')])

View File

@ -184,19 +184,20 @@ class ParsingTests(BaseTest):
self.assertIn('extra', root.nsmap, 'Extra namespace declaration on <html> tag not preserved')
def timing():
import time, sys
import sys
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.monotonic import monotonic
from html5lib import parse as vanilla
filename = sys.argv[-1]
with open(filename, 'rb') as f:
with lopen(filename, 'rb') as f:
raw = f.read()
raw = xml_to_unicode(raw)[0]
for name, f in (('calibre', partial(parse, line_numbers=False)), ('html5lib', vanilla), ('calibre-old', html5_parse)):
timings = []
for i in xrange(10):
st = time.time()
st = monotonic()
f(raw)
timings.append(time.time() - st)
timings.append(monotonic() - st)
avg = sum(timings)/len(timings)
print ('Average time for %s: %.2g' % (name, avg))

View File

@ -0,0 +1,31 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import (unicode_literals, division, absolute_import,
print_function)
import os
from calibre.ebooks.oeb.polish.tests.base import BaseTest
from calibre.ebooks.oeb.polish.container import get_container
from calibre.ebooks.oeb.polish.create import create_book
from calibre.ebooks.oeb.polish.toc import get_toc
from calibre.ebooks.metadata.book.base import Metadata
class Structure(BaseTest):
def test_toc_detection(self):
ep = os.path.join(self.tdir, 'book.epub')
create_book(Metadata('Test ToC'), ep)
c = get_container(ep, tdir=os.path.join(self.tdir, 'container'), tweak_mode=True)
self.assertEqual(2, c.opf_version_parsed.major)
self.assertTrue(len(get_toc(c)))
c.opf.set('version', '3.0')
self.assertEqual(3, c.opf_version_parsed.major)
self.assertTrue(len(get_toc(c))) # detect NCX toc even in epub 3 files
c.add_file('nav.html', b'<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">'
'<body><nav epub:type="toc"><ol><li><a href="start.xhtml">EPUB 3 nav</a></li></ol></nav></body></html>',
process_manifest_item=lambda item:item.set('properties', 'nav'))
toc = get_toc(c)
self.assertTrue(len(toc))
self.assertEqual(toc.as_dict['children'][0]['title'], 'EPUB 3 nav')