From 7f76d0292f6fbdbb7b2d61e03543a2c0dd09dbcc Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 23 Jun 2016 07:43:54 +0530 Subject: [PATCH] Add a test for ToC detection --- src/calibre/ebooks/oeb/polish/container.py | 8 +++-- src/calibre/ebooks/oeb/polish/tests/base.py | 8 ++--- .../ebooks/oeb/polish/tests/parsing.py | 9 +++--- .../ebooks/oeb/polish/tests/structure.py | 31 +++++++++++++++++++ 4 files changed, 45 insertions(+), 11 deletions(-) create mode 100644 src/calibre/ebooks/oeb/polish/tests/structure.py diff --git a/src/calibre/ebooks/oeb/polish/container.py b/src/calibre/ebooks/oeb/polish/container.py index 867c9dd629..a748a3ec6a 100644 --- a/src/calibre/ebooks/oeb/polish/container.py +++ b/src/calibre/ebooks/oeb/polish/container.py @@ -281,7 +281,7 @@ class Container(ContainerBase): # {{{ for name, path in self.name_path_map.iteritems()} } - def add_name_to_manifest(self, name): + def add_name_to_manifest(self, name, process_manifest_item=None): ' Add an entry to the manifest for a file with the specified name. Returns the manifest id. ' all_ids = {x.get('id') for x in self.opf_xpath('//*[@id]')} c = 0 @@ -295,6 +295,8 @@ class Container(ContainerBase): # {{{ id=item_id, href=href) item.set('media-type', self.mime_map[name]) self.insert_into_xml(manifest, item) + if process_manifest_item is not None: + process_manifest_item(item) self.dirty(self.opf_name) return item_id @@ -304,7 +306,7 @@ class Container(ContainerBase): # {{{ all_hrefs = {x.get('href') for x in self.opf_xpath('//opf:manifest/opf:item[@href]')} return href in all_hrefs - def add_file(self, name, data, media_type=None, spine_index=None, modify_name_if_needed=False): + def add_file(self, name, data, media_type=None, spine_index=None, modify_name_if_needed=False, process_manifest_item=None): ''' Add a file to this container. Entries for the file are automatically created in the OPF manifest and spine (if the file is a text document) ''' @@ -336,7 +338,7 @@ class Container(ContainerBase): # {{{ self.mime_map[name] = mt if self.ok_to_be_unmanifested(name): return name - item_id = self.add_name_to_manifest(name) + item_id = self.add_name_to_manifest(name, process_manifest_item=process_manifest_item) if mt in OEB_DOCS: manifest = self.opf_xpath('//opf:manifest')[0] spine = self.opf_xpath('//opf:spine')[0] diff --git a/src/calibre/ebooks/oeb/polish/tests/base.py b/src/calibre/ebooks/oeb/polish/tests/base.py index 111a4c82b0..c8ef3ddf32 100644 --- a/src/calibre/ebooks/oeb/polish/tests/base.py +++ b/src/calibre/ebooks/oeb/polish/tests/base.py @@ -51,7 +51,7 @@ def get_simple_book(fmt='epub'): if needs_recompile(ans, src): with TemporaryDirectory('bpt') as tdir: with CurrentDir(tdir): - raw = open(src, 'rb').read().decode('utf-8') + raw = lopen(src, 'rb').read().decode('utf-8') raw = add_resources(raw, { 'LMONOI': P('fonts/liberation/LiberationMono-Italic.ttf'), 'LMONOR': P('fonts/liberation/LiberationMono-Regular.ttf'), @@ -60,7 +60,7 @@ def get_simple_book(fmt='epub'): }) shutil.copy2(I('lt.png'), '.') x = 'index.html' - with open(x, 'wb') as f: + with lopen(x, 'wb') as f: f.write(raw.encode('utf-8')) build_book(x, ans, args=[ '--level1-toc=//h:h2', '--language=en', '--authors=Kovid Goyal', '--cover=lt.png']) @@ -72,9 +72,9 @@ def get_split_book(fmt='epub'): src = os.path.join(os.path.dirname(__file__), 'split.html') if needs_recompile(ans, src): x = src.replace('split.html', 'index.html') - raw = open(src, 'rb').read().decode('utf-8') + raw = lopen(src, 'rb').read().decode('utf-8') try: - with open(x, 'wb') as f: + with lopen(x, 'wb') as f: f.write(raw.encode('utf-8')) build_book(x, ans, args=['--level1-toc=//h:h2', '--language=en', '--authors=Kovid Goyal', '--cover=' + I('lt.png')]) diff --git a/src/calibre/ebooks/oeb/polish/tests/parsing.py b/src/calibre/ebooks/oeb/polish/tests/parsing.py index b6f09853f2..39e3f48170 100644 --- a/src/calibre/ebooks/oeb/polish/tests/parsing.py +++ b/src/calibre/ebooks/oeb/polish/tests/parsing.py @@ -184,19 +184,20 @@ class ParsingTests(BaseTest): self.assertIn('extra', root.nsmap, 'Extra namespace declaration on tag not preserved') def timing(): - import time, sys + import sys from calibre.ebooks.chardet import xml_to_unicode + from calibre.utils.monotonic import monotonic from html5lib import parse as vanilla filename = sys.argv[-1] - with open(filename, 'rb') as f: + with lopen(filename, 'rb') as f: raw = f.read() raw = xml_to_unicode(raw)[0] for name, f in (('calibre', partial(parse, line_numbers=False)), ('html5lib', vanilla), ('calibre-old', html5_parse)): timings = [] for i in xrange(10): - st = time.time() + st = monotonic() f(raw) - timings.append(time.time() - st) + timings.append(monotonic() - st) avg = sum(timings)/len(timings) print ('Average time for %s: %.2g' % (name, avg)) diff --git a/src/calibre/ebooks/oeb/polish/tests/structure.py b/src/calibre/ebooks/oeb/polish/tests/structure.py new file mode 100644 index 0000000000..ecbb2d6d45 --- /dev/null +++ b/src/calibre/ebooks/oeb/polish/tests/structure.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +# License: GPLv3 Copyright: 2016, Kovid Goyal + +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +import os +from calibre.ebooks.oeb.polish.tests.base import BaseTest +from calibre.ebooks.oeb.polish.container import get_container +from calibre.ebooks.oeb.polish.create import create_book +from calibre.ebooks.oeb.polish.toc import get_toc +from calibre.ebooks.metadata.book.base import Metadata + +class Structure(BaseTest): + + def test_toc_detection(self): + ep = os.path.join(self.tdir, 'book.epub') + create_book(Metadata('Test ToC'), ep) + c = get_container(ep, tdir=os.path.join(self.tdir, 'container'), tweak_mode=True) + self.assertEqual(2, c.opf_version_parsed.major) + self.assertTrue(len(get_toc(c))) + c.opf.set('version', '3.0') + self.assertEqual(3, c.opf_version_parsed.major) + self.assertTrue(len(get_toc(c))) # detect NCX toc even in epub 3 files + c.add_file('nav.html', b'' + '', + process_manifest_item=lambda item:item.set('properties', 'nav')) + toc = get_toc(c) + self.assertTrue(len(toc)) + self.assertEqual(toc.as_dict['children'][0]['title'], 'EPUB 3 nav')