Test for file splitting

2025-07-09 03:04:10 -04:00 · 2013-11-21 18:17:49 +05:30 · 2013-11-21 18:17:49 +05:30 · 58bed7e3de
commit 58bed7e3de
parent aadbcf2299
4 changed files with 92 additions and 4 deletions
--- a/src/calibre/ebooks/oeb/polish/split.py
+++ b/src/calibre/ebooks/oeb/polish/split.py
@ -158,14 +158,19 @@ class SplitLinkReplacer(object):
            self.replaced = True
        return url
-def split(container, name, loc):
+def split(container, name, loc_or_xpath, before=True):
    ''' Split the file specified by name at the position specified by loc_or_xpath. '''
    root = container.parsed(name)
-    split_point = node_from_loc(root, loc)
+    if isinstance(loc_or_xpath, type('')):
        split_point = root.xpath(loc_or_xpath)[0]
    else:
        split_point = node_from_loc(root, loc_or_xpath)
    if in_table(split_point):
        raise ValueError('Cannot split inside tables')
    if split_point.tag.endswith('}body'):
        raise ValueError('Cannot split on the <body> tag')
-    tree1, tree2 = do_split(split_point, container.log)
+    tree1, tree2 = do_split(split_point, container.log, before=before)
    root1, root2 = tree1.getroot(), tree2.getroot()
    anchors_in_top = frozenset(root1.xpath('//*/@id')) | frozenset(root1.xpath('//*/@name')) | {''}
    anchors_in_bottom = frozenset(root2.xpath('//*/@id')) | frozenset(root2.xpath('//*/@name'))
--- a/src/calibre/ebooks/oeb/polish/tests/base.py
+++ b/src/calibre/ebooks/oeb/polish/tests/base.py
@ -55,6 +55,22 @@ def get_simple_book(fmt='epub'):
            os.remove(x)
    return ans
 def get_split_book(fmt='epub'):
    cache = get_cache()
    ans = os.path.join(cache, 'split.'+fmt)
    src = os.path.join(os.path.dirname(__file__), 'split.html')
    if needs_recompile(ans, src):
        x = src.replace('split.html', 'index.html')
        raw = open(src, 'rb').read().decode('utf-8')
        try:
            with open(x, 'wb') as f:
                f.write(raw.encode('utf-8'))
            build_book(x, ans, args=['--level1-toc=//h:h2', '--language=en', '--authors=Kovid Goyal',
                                        '--cover=' + I('lt.png')])
        finally:
            os.remove(x)
    return ans
 devnull = DevNull()
 class BaseTest(unittest.TestCase):
--- a/src/calibre/ebooks/oeb/polish/tests/container.py
+++ b/src/calibre/ebooks/oeb/polish/tests/container.py
@ -8,10 +8,11 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 import os, subprocess
-from calibre.ebooks.oeb.polish.tests.base import BaseTest, get_simple_book
+from calibre.ebooks.oeb.polish.tests.base import BaseTest, get_simple_book, get_split_book
 from calibre.ebooks.oeb.polish.container import get_container as _gc, clone_container, OCF_NS
 from calibre.ebooks.oeb.polish.replace import rename_files
 from calibre.ebooks.oeb.polish.split import split
 from calibre.utils.filenames import nlinks_file
 from calibre.ptempfile import TemporaryFile
@ -175,3 +176,15 @@ class ContainerTests(BaseTest):
        self.assertNotIn(name, {x[0] for x in c.spine_names})
        self.check_links(c)
    def test_split_file(self):
        ' Test splitting of files '
        book = get_split_book()
        c = get_container(book)
        name = 'index.html'
        nname = split(c, name, '//*[@id="page2"]')
        root = c.parsed(nname)
        troot = c.parsed(name)
        self.assertEqual(1, len(root.xpath('//*[@id="container"]')), 'Split point was not adjusted')
        self.assertEqual(0, len(troot.xpath('//*[@id="container"]')), 'Split point was not adjusted')
        self.check_links(c)
--- a/src/calibre/ebooks/oeb/polish/tests/split.html
+++ b/src/calibre/ebooks/oeb/polish/tests/split.html
@ -0,0 +1,54 @@
 <!DOCTYPE html>
 <html>
 	<head>
 		<title>split</title>
 		<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
 	</head>
 	<body>
    <h3 id="page1">Page One</h3>
 <!--lorem-->
 <p>Or pursues or desires to obtain pain of itself, because it is pain, but
 because occasionally circumstances occur in which toil and pain can procure him
 some great pleasure. To take a trivial example, which of us ever undertakes
 laborious physical exercise, except to obtain some advantage from it? But who
 has any right to find fault with a man who chooses to enjoy a pleasure that has
 no annoying consequences, or one who avoids a pain that produces no resultant
 pleasure?</p>
 <p>On the other hand, we denounce with righteous indignation and dislike men
 who are so beguiled and demoralized by the.</p>
 <!--/lorem-->
 <p>
 <a href="#page1">Link to page one</a>
 <a href="#page2">Link to page two</a>
 <a href="index.html#page1">Link to page one</a>
 <a href="index.html#page2">Link to page two</a>
 </p>
 <div id="container">
    <h3 id="page2">Page Two</h3>
 <!--lorem-->
 <p>Same as saying through shrinking from toil and pain. These cases are
 perfectly simple and easy to distinguish. In a free hour, when our power of
 choice is untrammelled and when nothing prevents our being able to do what we
 like best, every pleasure is to be welcomed and every pain avoided.</p>
 <p>But in certain circumstances and owing to the claims of duty or the
 obligations of business it will frequently occur that pleasures have to be
 repudiated and annoyances accepted. The wise man therefore always holds in
 these matters to this principle of selection: he rejects pleasures to secure
 other greater.</p>
 <!--/lorem-->
 <p>
 <a href="#page1">Link to page one</a>
 <a href="#page2">Link to page two</a>
 <a href="index.html#page1">Link to page one</a>
 <a href="index.html#page2">Link to page two</a>
 </p>
 </div>
 	</body>
 </html>