Test for file splitting

This commit is contained in:
Kovid Goyal 2013-11-21 18:17:49 +05:30
parent aadbcf2299
commit 58bed7e3de
4 changed files with 92 additions and 4 deletions

View File

@ -158,14 +158,19 @@ class SplitLinkReplacer(object):
self.replaced = True self.replaced = True
return url return url
def split(container, name, loc): def split(container, name, loc_or_xpath, before=True):
''' Split the file specified by name at the position specified by loc_or_xpath. '''
root = container.parsed(name) root = container.parsed(name)
split_point = node_from_loc(root, loc) if isinstance(loc_or_xpath, type('')):
split_point = root.xpath(loc_or_xpath)[0]
else:
split_point = node_from_loc(root, loc_or_xpath)
if in_table(split_point): if in_table(split_point):
raise ValueError('Cannot split inside tables') raise ValueError('Cannot split inside tables')
if split_point.tag.endswith('}body'): if split_point.tag.endswith('}body'):
raise ValueError('Cannot split on the <body> tag') raise ValueError('Cannot split on the <body> tag')
tree1, tree2 = do_split(split_point, container.log) tree1, tree2 = do_split(split_point, container.log, before=before)
root1, root2 = tree1.getroot(), tree2.getroot() root1, root2 = tree1.getroot(), tree2.getroot()
anchors_in_top = frozenset(root1.xpath('//*/@id')) | frozenset(root1.xpath('//*/@name')) | {''} anchors_in_top = frozenset(root1.xpath('//*/@id')) | frozenset(root1.xpath('//*/@name')) | {''}
anchors_in_bottom = frozenset(root2.xpath('//*/@id')) | frozenset(root2.xpath('//*/@name')) anchors_in_bottom = frozenset(root2.xpath('//*/@id')) | frozenset(root2.xpath('//*/@name'))

View File

@ -55,6 +55,22 @@ def get_simple_book(fmt='epub'):
os.remove(x) os.remove(x)
return ans return ans
def get_split_book(fmt='epub'):
cache = get_cache()
ans = os.path.join(cache, 'split.'+fmt)
src = os.path.join(os.path.dirname(__file__), 'split.html')
if needs_recompile(ans, src):
x = src.replace('split.html', 'index.html')
raw = open(src, 'rb').read().decode('utf-8')
try:
with open(x, 'wb') as f:
f.write(raw.encode('utf-8'))
build_book(x, ans, args=['--level1-toc=//h:h2', '--language=en', '--authors=Kovid Goyal',
'--cover=' + I('lt.png')])
finally:
os.remove(x)
return ans
devnull = DevNull() devnull = DevNull()
class BaseTest(unittest.TestCase): class BaseTest(unittest.TestCase):

View File

@ -8,10 +8,11 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import os, subprocess import os, subprocess
from calibre.ebooks.oeb.polish.tests.base import BaseTest, get_simple_book from calibre.ebooks.oeb.polish.tests.base import BaseTest, get_simple_book, get_split_book
from calibre.ebooks.oeb.polish.container import get_container as _gc, clone_container, OCF_NS from calibre.ebooks.oeb.polish.container import get_container as _gc, clone_container, OCF_NS
from calibre.ebooks.oeb.polish.replace import rename_files from calibre.ebooks.oeb.polish.replace import rename_files
from calibre.ebooks.oeb.polish.split import split
from calibre.utils.filenames import nlinks_file from calibre.utils.filenames import nlinks_file
from calibre.ptempfile import TemporaryFile from calibre.ptempfile import TemporaryFile
@ -175,3 +176,15 @@ class ContainerTests(BaseTest):
self.assertNotIn(name, {x[0] for x in c.spine_names}) self.assertNotIn(name, {x[0] for x in c.spine_names})
self.check_links(c) self.check_links(c)
def test_split_file(self):
' Test splitting of files '
book = get_split_book()
c = get_container(book)
name = 'index.html'
nname = split(c, name, '//*[@id="page2"]')
root = c.parsed(nname)
troot = c.parsed(name)
self.assertEqual(1, len(root.xpath('//*[@id="container"]')), 'Split point was not adjusted')
self.assertEqual(0, len(troot.xpath('//*[@id="container"]')), 'Split point was not adjusted')
self.check_links(c)

View File

@ -0,0 +1,54 @@
<!DOCTYPE html>
<html>
<head>
<title>split</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
</head>
<body>
<h3 id="page1">Page One</h3>
<!--lorem-->
<p>Or pursues or desires to obtain pain of itself, because it is pain, but
because occasionally circumstances occur in which toil and pain can procure him
some great pleasure. To take a trivial example, which of us ever undertakes
laborious physical exercise, except to obtain some advantage from it? But who
has any right to find fault with a man who chooses to enjoy a pleasure that has
no annoying consequences, or one who avoids a pain that produces no resultant
pleasure?</p>
<p>On the other hand, we denounce with righteous indignation and dislike men
who are so beguiled and demoralized by the.</p>
<!--/lorem-->
<p>
<a href="#page1">Link to page one</a>
<a href="#page2">Link to page two</a>
<a href="index.html#page1">Link to page one</a>
<a href="index.html#page2">Link to page two</a>
</p>
<div id="container">
<h3 id="page2">Page Two</h3>
<!--lorem-->
<p>Same as saying through shrinking from toil and pain. These cases are
perfectly simple and easy to distinguish. In a free hour, when our power of
choice is untrammelled and when nothing prevents our being able to do what we
like best, every pleasure is to be welcomed and every pain avoided.</p>
<p>But in certain circumstances and owing to the claims of duty or the
obligations of business it will frequently occur that pleasures have to be
repudiated and annoyances accepted. The wise man therefore always holds in
these matters to this principle of selection: he rejects pleasures to secure
other greater.</p>
<!--/lorem-->
<p>
<a href="#page1">Link to page one</a>
<a href="#page2">Link to page two</a>
<a href="index.html#page1">Link to page one</a>
<a href="index.html#page2">Link to page two</a>
</p>
</div>
</body>
</html>