diff --git a/setup/test.py b/setup/test.py index 4144178f41..4825407a48 100644 --- a/setup/test.py +++ b/setup/test.py @@ -112,6 +112,8 @@ def find_tests(which_tests=None): a(find_tests()) from calibre.ebooks.metadata.html import find_tests a(find_tests()) + from calibre.ebooks.pdf.test_html_writer import find_tests + a(find_tests()) if ok('misc'): from calibre.ebooks.metadata.tag_mapper import find_tests a(find_tests()) diff --git a/src/calibre/ebooks/pdf/html_writer.py b/src/calibre/ebooks/pdf/html_writer.py index a1289cdaeb..43608fff8c 100644 --- a/src/calibre/ebooks/pdf/html_writer.py +++ b/src/calibre/ebooks/pdf/html_writer.py @@ -520,13 +520,54 @@ def add_pagenum_toc(root, toc, opts, page_number_display_map): class Range(object): - __slots__ = ('first', 'last', 'width') + __slots__ = ('first', 'last', 'widths', 'sort_order') - def __init__(self, first, last, width): - self.first, self.last, self.width = first, last, width + def __init__(self, first, last, widths): + self.first, self.last, self.widths = first, last, widths # Sort by first with larger ranges coming before smaller ones self.sort_order = self.first, -self.last + def __repr__(self): + return '({}, {}, {})'.format(self.first, self.last, self.widths) + + def merge(self, r): + if r.last <= self.last: + return # is a subset + if r.first > self.last: + if r.first == self.last + 1 and self.has_single_width == r.has_single_width: + if self.has_single_width: + if r.widths[0] == self.widths[0]: + self.last = r.last + return + else: + self.last = r.last + delta = self.last - self.first + 1 - len(self.widths) + self.widths.extend(r.widths[-delta:]) + return + return r + if self.has_single_width != r.has_single_width: + # make r disjoint + delta = self.last + 1 - r.first + r.first = self.last + 1 + if len(r.widths) > 1: + del r.widths[:delta] + return r if r.widths else None + # subsume r into self + self.last = r.last + if not self.has_single_width: + delta = self.last - self.first + 1 - len(self.widths) + self.widths.extend(r.widths[-delta:]) + + @property + def as_item(self): + if self.has_single_width: + return self.first, self.last, self.widths[0] + return self.first, self.widths + + @property + def has_single_width(self): + return len(self.widths) == 1 + def merge_w_arrays(arrays): ranges = [] @@ -536,10 +577,10 @@ def merge_w_arrays(arrays): elem = w[i] next_elem = w[i+1] if isinstance(next_elem, list): - ranges.extend(Range(elem + c, elem + c, w) for c, w in enumerate(next_elem)) + ranges.append(Range(elem, elem + len(next_elem) - 1, next_elem)) i += 2 elif i + 2 < len(w): - ranges.append(Range(elem, next_elem, w[i+2])) + ranges.append(Range(elem, next_elem, [w[i+2]])) i += 3 else: break @@ -552,7 +593,10 @@ def merge_w_arrays(arrays): merged_ranges.append(left_over) if not merged_ranges: return [] - # combine consecutive single value ranges + ans = [] + for r in merged_ranges: + ans.extend(r.as_item) + return ans def merge_font(fonts): diff --git a/src/calibre/ebooks/pdf/test_html_writer.py b/src/calibre/ebooks/pdf/test_html_writer.py new file mode 100644 index 0000000000..ec8adf3f2f --- /dev/null +++ b/src/calibre/ebooks/pdf/test_html_writer.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +# License: GPL v3 Copyright: 2019, Kovid Goyal + +from __future__ import absolute_import, division, print_function, unicode_literals + +import unittest +from .html_writer import merge_w_arrays + + +class TestPDFWriter(unittest.TestCase): + + def test_merge_w_arrays(self): + self.assertEqual(merge_w_arrays(( # merge neighbor arrays + [1, 3, 0.1], [3, [0.1, 0.2]])), [1, 3, 0.1, 4, 4, 0.2]) + self.assertEqual(merge_w_arrays(( # merge neighbor ranges + [1, 5, 0.1], [6, 8, 0.1])), [1, 8, 0.1]) + self.assertEqual(merge_w_arrays(( # merge neighbor ranges + [1, 5, 0.1], [6, 8, 0.2])), [1, 5, 0.1, 6, 8, 0.2]) + + self.assertEqual(merge_w_arrays(( # disjoin overlap + [1, 4, 0.1], [3, [0.1, 0.1, 0.2, 0.3]])), [1, 4, 0.1, 5, [0.2, 0.3]]) + self.assertEqual(merge_w_arrays(( # disjoin overlap + [1, [0.1, 0.2]], [2, 4, 0.2])), [1, [0.1, 0.2], 3, 4, 0.2]) + + self.assertEqual(merge_w_arrays(( # split overlapping arrays + [1, [0.1, 0.2, 0.3]], [3, 5, 0.3])), [1, [0.1, 0.2, 0.3], 4, 5, 0.3]) + self.assertEqual(merge_w_arrays(( # merge overlapping ranges, using first width + [1, 5, 0.1], [2, 4, 0.2])), [1, 5, 0.1]) + self.assertEqual(merge_w_arrays(( # merge overlapping arrays + [1, [0.1, 0.1]], [3, [0.2, 0.2]])), [1, [0.1, 0.1, 0.2, 0.2]]) + + self.assertEqual(merge_w_arrays(( + [1, 10, 99, 20, [1, 2, 3, 4]], + [3, 10, 99, 11, 13, 77, 19, [77, 1]])), + [1, 10, 99, 11, 13, 77, 19, [77, 1, 2, 3, 4]] + ) + + +def find_tests(): + return unittest.defaultTestLoader.loadTestsFromTestCase(TestPDFWriter)