Added way to make custom labeled page break in APNX file using ARIA ( Accessible Rich Internet Applications) tag in html.

Using <any_html_element *** pagebreak *** aria-label="some_label" ***> will produce page break location. Possible usage <span id="pg159" role="doc-pagebreak" aria-label="159"/> <h1 id="pg_header1" role="doc-pagebreak" aria-label="Header 1">Header</h> <p role="doc-pagebreak" aria-label="§ 1 part 4 page 6 of 9">Text</h>
2025-07-09 03:04:10 -04:00 · 2022-08-24 00:36:01 +02:00 · 2022-08-24 00:36:01 +02:00 · c6bcce78b2
commit c6bcce78b2
parent 8f0226d8b1
12 changed files with 526 additions and 217 deletions
--- a/src/calibre/devices/kindle/apnx.py
+++ b/src/calibre/devices/kindle/apnx.py
@ -1,4 +1,4 @@
-__license__   = 'GPL v3'
+__license__ = 'GPL v3'
 __copyright__ = '2011, John Schember <john at nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

@ -6,10 +6,8 @@ __docformat__ = 'restructuredtext en'
 Generates and writes an APNX page mapping file.
 '''

-import re
 import struct

-from calibre.ebooks.mobi.reader.mobi6 import MobiReader
 from calibre.ebooks.pdb.header import PdbHeaderReader
 from calibre.ebooks.mobi.reader.headers import MetadataHeader
 from calibre.utils.logging import default_log
@ -17,29 +15,69 @@ from calibre import prints, fsync
 from calibre.constants import DEBUG
 from polyglot.builtins import as_unicode, as_bytes

+from calibre.devices.kindle.apnx_page_generator.generators.accurate_page_generator import AccuratePageGenerator
+from calibre.devices.kindle.apnx_page_generator.generators.pagebreak_page_generator import PagebreakPageGenerator
+from calibre.devices.kindle.apnx_page_generator.generators.aria_pagebreak_page_generator import \
+    AriaPagebreakPageGenerator
+from calibre.devices.kindle.apnx_page_generator.generators.exact_page_generator import ExactPageGenerator
+from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator
+from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator
+from calibre.devices.kindle.apnx_page_generator.pages import Pages
+

 class APNXBuilder:
-    '''
+    """
    Create an APNX file using a pseudo page mapping.
-    '''
+    """

-    def write_apnx(self, mobi_file_path, apnx_path, method=None, page_count=0):
-        '''
+    generators: dict[str, IPageGenerator] = {
+        FastPageGenerator.instance.name(): FastPageGenerator.instance,
+        AccuratePageGenerator.instance.name(): AccuratePageGenerator.instance,
+        PagebreakPageGenerator.instance.name(): PagebreakPageGenerator.instance,
+        AriaPagebreakPageGenerator.instance.name(): AriaPagebreakPageGenerator.instance,
+        # ExactPageGenerator.instance.name(): ExactPageGenerator.instance,
+    }
+
+    def write_apnx(self, mobi_file_path: str, apnx_path: str, method: str | None = None, page_count: int = 0):
+        """
        If you want a fixed number of pages (such as from a custom column) then
        pass in a value to page_count, otherwise a count will be estimated
        using either the fast or accurate algorithm.
-        '''
-        import uuid
-        apnx_meta = {'guid': str(uuid.uuid4()).replace('-', '')[:8], 'asin':
-                '', 'cdetype': 'EBOK', 'format': 'MOBI_7', 'acr': ''}
+        """
+        apnx_meta = self.get_apnx_meta(mobi_file_path)

+        if page_count:
+            generator: IPageGenerator = ExactPageGenerator.instance
+        else:
+            generator: IPageGenerator = self.generators.setdefault(method, FastPageGenerator.instance)
+
+        pages = generator.generate(mobi_file_path, page_count)
+        if pages.number_of_pages == 0:
+            raise Exception(_('Could not generate page mapping.'))
+        # Generate the APNX file from the page mapping.
+        apnx = self.generate_apnx(pages, apnx_meta)
+
+        # Write the APNX.
+        with lopen(apnx_path, 'wb') as apnxf:
+            apnxf.write(apnx)
+            fsync(apnxf)
+
+    @staticmethod
+    def get_apnx_meta(mobi_file_path) -> dict[str, str]:
+        import uuid
+        apnx_meta = {
+            'guid': str(uuid.uuid4()).replace('-', '')[:8],
+            'asin': '',
+            'cdetype': 'EBOK',
+            'format': 'MOBI_7',
+            'acr': ''
+        }
        with lopen(mobi_file_path, 'rb') as mf:
            ident = PdbHeaderReader(mf).identity()
            if as_bytes(ident) != b'BOOKMOBI':
                # Check that this is really a MOBI file.
                raise Exception(_('Not a valid MOBI file. Reports identity of %s') % ident)
            apnx_meta['acr'] = as_unicode(PdbHeaderReader(mf).name(), errors='replace')
-
        # We'll need the PDB name, the MOBI version, and some metadata to make FW 3.4 happy with KF8 files...
        with lopen(mobi_file_path, 'rb') as mf:
            mh = MetadataHeader(mf, default_log)
@ -55,41 +93,10 @@ class APNXBuilder:
                apnx_meta['asin'] = ''
            else:
                apnx_meta['asin'] = str(mh.exth.uuid)
+        return apnx_meta

-        # Get the pages depending on the chosen parser
-        pages = []
-        if page_count:
-            pages = self.get_pages_exact(mobi_file_path, page_count)
-        else:
-            try:
-                if method == 'accurate':
-                    pages = self.get_pages_accurate(mobi_file_path)
-                elif method == 'pagebreak':
-                    pages = self.get_pages_pagebreak_tag(mobi_file_path)
-                    if not pages:
-                        pages = self.get_pages_accurate(mobi_file_path)
-                else:
-                    raise Exception('%r is not a valid apnx generation method' % method)
-            except:
-                # Fall back to the fast parser if we can't
-                # use the accurate one. Typically this is
-                # due to the file having DRM.
-                pages = self.get_pages_fast(mobi_file_path)
-
-        if not pages:
-            pages = self.get_pages_fast(mobi_file_path)
-        if not pages:
-            raise Exception(_('Could not generate page mapping.'))
-
-        # Generate the APNX file from the page mapping.
-        apnx = self.generate_apnx(pages, apnx_meta)
-
-        # Write the APNX.
-        with lopen(apnx_path, 'wb') as apnxf:
-            apnxf.write(apnx)
-            fsync(apnxf)
-
-    def generate_apnx(self, pages, apnx_meta):
+    @staticmethod
+    def generate_apnx(pages: Pages, apnx_meta) -> bytes:
        apnx = b''

        if DEBUG:
@ -107,8 +114,8 @@ class APNXBuilder:
            # legacy mobi files, too. But, since they still handle this one too, let's
            # try not to break old devices, and keep using the simple header ;).
            content_header = '{"contentGuid":"%(guid)s","asin":"%(asin)s","cdeType":"%(cdetype)s","fileRevisionId":"1"}' % apnx_meta
-        page_header = '{"asin":"%(asin)s","pageMap":"(1,a,1)"}' % apnx_meta
-
+        page_header = '{"asin":"%(asin)s","pageMap":"' % apnx_meta
+        page_header += pages.page_maps + '"}'
        if DEBUG:
            prints('APNX Content Header:', content_header)
        content_header = as_bytes(content_header)
@ -120,177 +127,12 @@ class APNXBuilder:
        apnx += content_header
        apnx += struct.pack('>H', 1)
        apnx += struct.pack('>H', len(page_header))
-        apnx += struct.pack('>H', len(pages))
+        apnx += struct.pack('>H', pages.number_of_pages)
        apnx += struct.pack('>H', 32)
        apnx += page_header

        # Write page values to APNX.
-        for page in pages:
-            apnx += struct.pack('>I', page)
+        for location in pages.page_locations:
+            apnx += struct.pack('>I', location)

        return apnx
-
-    def get_pages_exact(self, mobi_file_path, page_count):
-        '''
-        Given a specified page count (such as from a custom column),
-        create our array of pages for the apnx file by dividing by
-        the content size of the book.
-        '''
-        pages = []
-        count = 0
-
-        with lopen(mobi_file_path, 'rb') as mf:
-            phead = PdbHeaderReader(mf)
-            r0 = phead.section_data(0)
-            text_length = struct.unpack('>I', r0[4:8])[0]
-
-        chars_per_page = int(text_length // page_count)
-        while count < text_length:
-            pages.append(count)
-            count += chars_per_page
-
-        if len(pages) > page_count:
-            # Rounding created extra page entries
-            pages = pages[:page_count]
-
-        return pages
-
-    def get_pages_fast(self, mobi_file_path):
-        '''
-        2300 characters of uncompressed text per page. This is
-        not meant to map 1 to 1 to a print book but to be a
-        close enough measure.
-
-        A test book was chosen and the characters were counted
-        on one page. This number was round to 2240 then 60
-        characters of markup were added to the total giving
-        2300.
-
-        Uncompressed text length is used because it's easily
-        accessible in MOBI files (part of the header). Also,
-        It's faster to work off of the length then to
-        decompress and parse the actual text.
-        '''
-        text_length = 0
-        pages = []
-        count = 0
-
-        with lopen(mobi_file_path, 'rb') as mf:
-            phead = PdbHeaderReader(mf)
-            r0 = phead.section_data(0)
-            text_length = struct.unpack('>I', r0[4:8])[0]
-
-        while count < text_length:
-            pages.append(count)
-            count += 2300
-
-        return pages
-
-    def get_pages_accurate(self, mobi_file_path):
-        '''
-        A more accurate but much more resource intensive and slower
-        method to calculate the page length.
-
-        Parses the uncompressed text. In an average paper back book
-        There are 32 lines per page and a maximum of 70 characters
-        per line.
-
-        Each paragraph starts a new line and every 70 characters
-        (minus markup) in a paragraph starts a new line. The
-        position after every 30 lines will be marked as a new
-        page.
-
-        This can be make more accurate by accounting for
-        <div class="mbp_pagebreak" /> as a new page marker.
-        And <br> elements as an empty line.
-        '''
-        pages = []
-
-        # Get the MOBI html.
-        mr = MobiReader(mobi_file_path, default_log)
-        if mr.book_header.encryption_type != 0:
-            # DRMed book
-            return self.get_pages_fast(mobi_file_path)
-        mr.extract_text()
-
-        # States
-        in_tag = False
-        in_p = False
-        check_p = False
-        closing = False
-        p_char_count = 0
-
-        # Get positions of every line
-        # A line is either a paragraph starting
-        # or every 70 characters in a paragraph.
-        lines = []
-        pos = -1
-        # We want this to be as fast as possible so we
-        # are going to do one pass across the text. re
-        # and string functions will parse the text each
-        # time they are called.
-        #
-        # We can can use .lower() here because we are
-        # not modifying the text. In this case the case
-        # doesn't matter just the absolute character and
-        # the position within the stream.
-        data = bytearray(as_bytes(mr.mobi_html.lower()))
-        slash, p, lt, gt = map(ord, '/p<>')
-        for c in data:
-            pos += 1
-
-            # Check if we are starting or stopping a p tag.
-            if check_p:
-                if c == slash:
-                    closing = True
-                    continue
-                elif c == p:
-                    if closing:
-                        in_p = False
-                    else:
-                        in_p = True
-                        lines.append(pos - 2)
-                check_p = False
-                closing = False
-                continue
-
-            if c == lt:
-                in_tag = True
-                check_p = True
-                continue
-            elif c == gt:
-                in_tag = False
-                check_p = False
-                continue
-
-            if in_p and not in_tag:
-                p_char_count += 1
-                if p_char_count == 70:
-                    lines.append(pos)
-                    p_char_count = 0
-
-        # Every 30 lines is a new page
-        for i in range(0, len(lines), 32):
-            pages.append(lines[i])
-
-        return pages
-
-    def get_pages_pagebreak_tag(self, mobi_file_path):
-        '''
-        Determine pages based on the presence of
-        <mbp:pagebreak>.
-        '''
-        pages = []
-
-        # Get the MOBI html.
-        mr = MobiReader(mobi_file_path, default_log)
-        if mr.book_header.encryption_type != 0:
-            # DRMed book
-            return self.get_pages_fast(mobi_file_path)
-        mr.extract_text()
-
-        html = as_bytes(mr.mobi_html.lower())
-        for m in re.finditer(b'<[^>]*pagebreak[^>]*>', html):
-            pages.append(m.end())
-
-        return pages
--- a/src/calibre/devices/kindle/apnx_page_generator/init.py
+++ b/src/calibre/devices/kindle/apnx_page_generator/init.py
--- a/src/calibre/devices/kindle/apnx_page_generator/generators/accurate_page_generator.py
+++ b/src/calibre/devices/kindle/apnx_page_generator/generators/accurate_page_generator.py
@ -0,0 +1,103 @@
+__license__ = 'GPL v3'
+__copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator
+from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator
+from calibre.devices.kindle.apnx_page_generator.pages import Pages
+
+
+class AccuratePageGenerator(IPageGenerator):
+
+    def name(self) -> str:
+        return "accurate"
+
+    def _generate_fallback(self, mobi_file_path: str, real_count: int | None) -> Pages:
+        return FastPageGenerator.instance.generate(mobi_file_path, real_count)
+
+    def _generate(self, mobi_file_path: str, real_count: int | None) -> Pages:
+        """
+        A more accurate but much more resource intensive and slower
+        method to calculate the page length.
+
+        Parses the uncompressed text. In an average paper back book
+        There are 32 lines per page and a maximum of 70 characters
+        per line.
+
+        Each paragraph starts a new line and every 70 characters
+        (minus markup) in a paragraph starts a new line. The
+        position after every 30 lines will be marked as a new
+        page.
+
+        This can be make more accurate by accounting for
+        <div class="mbp_pagebreak" /> as a new page marker.
+        And <br> elements as an empty line.
+        """
+        pages = []
+
+        html = self.mobi_html(mobi_file_path)
+
+        # States
+        in_tag = False
+        in_p = False
+        check_p = False
+        closing = False
+        p_char_count = 0
+
+        # Get positions of every line
+        # A line is either a paragraph starting
+        # or every 70 characters in a paragraph.
+        lines = []
+        pos = -1
+        # We want this to be as fast as possible so we
+        # are going to do one pass across the text. re
+        # and string functions will parse the text each
+        # time they are called.
+        #
+        # We can can use .lower() here because we are
+        # not modifying the text. In this case the case
+        # doesn't matter just the absolute character and
+        # the position within the stream.
+        data = bytearray(html)
+        slash, p, lt, gt = map(ord, '/p<>')
+        for c in data:
+            pos += 1
+
+            # Check if we are starting or stopping a p tag.
+            if check_p:
+                if c == slash:
+                    closing = True
+                    continue
+                elif c == p:
+                    if closing:
+                        in_p = False
+                    else:
+                        in_p = True
+                        lines.append(pos - 2)
+                check_p = False
+                closing = False
+                continue
+
+            if c == lt:
+                in_tag = True
+                check_p = True
+                continue
+            elif c == gt:
+                in_tag = False
+                check_p = False
+                continue
+
+            if in_p and not in_tag:
+                p_char_count += 1
+                if p_char_count == 70:
+                    lines.append(pos)
+                    p_char_count = 0
+
+        # Every 30 lines is a new page
+        for i in range(0, len(lines), 32):
+            pages.append(lines[i])
+
+        return Pages(pages)
+
+
+AccuratePageGenerator.instance = AccuratePageGenerator()
--- a/src/calibre/devices/kindle/apnx_page_generator/generators/aria_pagebreak_page_generator.py
+++ b/src/calibre/devices/kindle/apnx_page_generator/generators/aria_pagebreak_page_generator.py
@ -0,0 +1,84 @@
+__license__ = 'GPL v3'
+__copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator
+from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator
+from calibre.devices.kindle.apnx_page_generator.page_number_type import PageNumberTypes
+from calibre.devices.kindle.apnx_page_generator.pages import Pages
+from calibre.devices.kindle.apnx_page_generator.page_group import PageGroup
+import re
+
+roman_numeral_map = (('m', 1000), ('cm', 900), ('d', 500), ('cd', 400), ('c', 100), ('xc', 90), ('l', 50), ('xl', 40),
+                     ('x', 10), ('ix', 9), ('v', 5), ('iv', 4), ('i', 1))
+
+roman_numeral_pattern = re.compile("""^m{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|V?i{0,3})$""", re.VERBOSE)
+
+
+def from_roman(s: str) -> int:
+    """convert Roman numeral to integer"""
+    if not s:
+        raise ValueError('Input can not be blank')
+    if not roman_numeral_pattern.match(s):
+        raise ValueError('Invalid Roman numeral: %s' % s)
+
+    result = 0
+    index = 0
+    for numeral, integer in roman_numeral_map:
+        while s[index:index + len(numeral)] == numeral:
+            result += integer
+            index += len(numeral)
+    return result
+
+
+class LabelDescriptor:
+    def __init__(self, label: str, value: int, label_type: PageNumberTypes):
+        self.label: str = label
+        self.value: int = value
+        self.label_type: PageNumberTypes = label_type
+
+
+class AriaPagebreakPageGenerator(IPageGenerator):
+
+    def name(self) -> str:
+        return "aria_pagebreak"
+
+    def _generate_fallback(self, mobi_file_path: str, real_count: int | None) -> Pages:
+        return FastPageGenerator.instance.generate(mobi_file_path, real_count)
+
+    def _generate(self, mobi_file_path: str, real_count: int | None) -> Pages:
+        html = self.mobi_html(mobi_file_path)
+        pages = Pages()
+
+        for m in re.finditer(b'<[^>]*role="doc-pagebreak"[^>]*aria-label="([^"|]+)"[^>]*>', html):
+            label_descriptor = self.get_label(m.group(1))
+            if pages.number_of_pages == 0:
+                pages.append(PageGroup(m.end(), label_descriptor.label_type, label_descriptor.value,
+                                       label_descriptor.label))
+            elif (
+                    pages.last_group.last_value == label_descriptor.value - 1 or label_descriptor.label_type ==
+                    PageNumberTypes.Custom) and pages.last_group.page_number_types == label_descriptor.label_type:
+
+                if label_descriptor.label_type != PageNumberTypes.Custom:
+                    pages.last_group.append(m.end())
+                else:
+                    pages.last_group.append((m.end(), label_descriptor.label))
+            else:
+                pages.append(PageGroup(m.end(), label_descriptor.label_type, label_descriptor.value,
+                                       label_descriptor.label))
+
+        return pages
+
+    @staticmethod
+    def get_label(label: bytes) -> LabelDescriptor:
+        label_string = label.decode()
+        try:
+            return LabelDescriptor(label_string, int(label_string), PageNumberTypes.Arabic)
+        except ValueError:
+            try:
+                return LabelDescriptor(label_string, from_roman(label_string), PageNumberTypes.Roman)
+            except ValueError:
+                return LabelDescriptor(label_string, 0, PageNumberTypes.Custom)
+
+
+AriaPagebreakPageGenerator.instance = AriaPagebreakPageGenerator()
--- a/src/calibre/devices/kindle/apnx_page_generator/generators/exact_page_generator.py
+++ b/src/calibre/devices/kindle/apnx_page_generator/generators/exact_page_generator.py
@ -0,0 +1,41 @@
+__license__ = 'GPL v3'
+__copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator
+from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator
+from calibre.devices.kindle.apnx_page_generator.pages import Pages
+
+
+class ExactPageGenerator(IPageGenerator):
+
+    def name(self) -> str:
+        return "exact"
+
+    def _generate_fallback(self, mobi_file_path: str, real_count: int | None) -> Pages:
+        return FastPageGenerator.instance.generate(mobi_file_path, real_count)
+
+    def _generate(self, mobi_file_path: str, real_count: int | None) -> Pages:
+        """
+        Given a specified page count (such as from a custom column),
+        create our array of pages for the apnx file by dividing by
+        the content size of the book.
+        """
+        pages = []
+        count = 0
+
+        text_length = self.mobi_html_length(mobi_file_path)
+
+        chars_per_page = int(text_length // real_count)
+        while count < text_length:
+            pages.append(count)
+            count += chars_per_page
+
+        if len(pages) > real_count:
+            # Rounding created extra page entries
+            pages = pages[:real_count]
+
+        return Pages(pages)
+
+
+ExactPageGenerator.instance = ExactPageGenerator()
--- a/src/calibre/devices/kindle/apnx_page_generator/generators/fast_page_generator.py
+++ b/src/calibre/devices/kindle/apnx_page_generator/generators/fast_page_generator.py
@ -0,0 +1,46 @@
+__license__ = 'GPL v3'
+__copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator
+from calibre.devices.kindle.apnx_page_generator.pages import Pages
+
+
+class FastPageGenerator(IPageGenerator):
+
+    def name(self) -> str:
+        return "fast"
+
+    def _generate_fallback(self, mobi_file_path: str, real_count: int | None) -> Pages:
+        raise Exception("Fast calculation impossible.")
+
+    def _generate(self, mobi_file_path: str, real_count: int | None) -> Pages:
+        """
+        2300 characters of uncompressed text per page. This is
+        not meant to map 1 to 1 to a print book but to be a
+        close enough measure.
+
+        A test book was chosen and the characters were counted
+        on one page. This number was round to 2240 then 60
+        characters of markup were added to the total giving
+        2300.
+
+        Uncompressed text length is used because it's easily
+        accessible in MOBI files (part of the header). Also,
+        It's faster to work off of the length then to
+        decompress and parse the actual text.
+        """
+
+        pages = []
+        count = 0
+
+        text_length = self.mobi_html_length(mobi_file_path)
+
+        while count < text_length:
+            pages.append(count)
+            count += 2300
+
+        return Pages(pages)
+
+
+FastPageGenerator.instance = FastPageGenerator()
--- a/src/calibre/devices/kindle/apnx_page_generator/generators/pagebreak_page_generator.py
+++ b/src/calibre/devices/kindle/apnx_page_generator/generators/pagebreak_page_generator.py
@ -0,0 +1,29 @@
+__license__ = 'GPL v3'
+__copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator
+from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator
+from calibre.devices.kindle.apnx_page_generator.pages import Pages
+import re
+
+
+class PagebreakPageGenerator(IPageGenerator):
+
+    def name(self) -> str:
+        return "pagebreak"
+
+    def _generate_fallback(self, mobi_file_path: str, real_count: int | None) -> Pages:
+        return FastPageGenerator.instance.generate(mobi_file_path, real_count)
+
+    def _generate(self, mobi_file_path: str, real_count: int | None) -> Pages:
+        """ Determine pages based on the presence of <*pagebreak*/>. """
+        html = self.mobi_html(mobi_file_path)
+        pages = []
+        for m in re.finditer(b'<[^>]*pagebreak[^>]*>', html):
+            pages.append(m.end())
+
+        return Pages(pages)
+
+
+PagebreakPageGenerator.instance = PagebreakPageGenerator()
--- a/src/calibre/devices/kindle/apnx_page_generator/i_page_generator.py
+++ b/src/calibre/devices/kindle/apnx_page_generator/i_page_generator.py
@ -0,0 +1,53 @@
+__license__ = 'GPL v3'
+__copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
+__docformat__ = 'restructuredtext en'
+
+import struct
+from abc import abstractmethod, ABCMeta
+from calibre.devices.kindle.apnx_page_generator.pages import Pages
+from calibre.ebooks.mobi.reader.mobi6 import MobiReader
+from calibre.utils.logging import default_log
+from polyglot.builtins import as_bytes
+from calibre.ebooks.pdb.header import PdbHeaderReader
+
+
+class IPageGenerator(metaclass=ABCMeta):
+
+    @abstractmethod
+    def _generate(self, mobi_file_path: str, real_count: int | None) -> Pages:
+        pass
+
+    @abstractmethod
+    def _generate_fallback(self, mobi_file_path: str, real_count: int | None) -> Pages:
+        pass
+
+    def generate(self, mobi_file_path: str, real_count: int | None) -> Pages:
+        try:
+            result = self._generate(mobi_file_path, real_count)
+            if result.number_of_pages > 0:
+                return result
+            return self._generate_fallback(mobi_file_path, real_count)
+        except Exception as e:
+            if self.__class__.__name__ == "FastPageGenerator":
+                raise e
+            return self._generate_fallback(mobi_file_path, real_count)
+
+    @abstractmethod
+    def name(self) -> str:
+        pass
+
+    @staticmethod
+    def mobi_html(mobi_file_path: str) -> bytes:
+        mr = MobiReader(mobi_file_path, default_log)
+        if mr.book_header.encryption_type != 0:
+            raise Exception("DRMed book")
+        mr.extract_text()
+        return as_bytes(mr.mobi_html.lower())
+
+    @staticmethod
+    def mobi_html_length(mobi_file_path: str) -> int:
+        with lopen(mobi_file_path, 'rb') as mf:
+            pdb_header = PdbHeaderReader(mf)
+            r0 = pdb_header.section_data(0)
+            return struct.unpack('>I', r0[4:8])[0]
+
--- a/src/calibre/devices/kindle/apnx_page_generator/page_group.py
+++ b/src/calibre/devices/kindle/apnx_page_generator/page_group.py
@ -0,0 +1,55 @@
+__license__ = 'GPL v3'
+__copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.devices.kindle.apnx_page_generator.page_number_type import PageNumberTypes
+
+
+class PageGroup:
+    """Simulate constructor overloading"""
+    def __init__(self, page_locations: int | list[int], page_number_type: PageNumberTypes, first_value: int,
+                 page_labels: str | list[str] | None = None):
+        if page_locations.__class__ == int:
+            self.page_locations: list[int] = [page_locations]
+        else:
+            self.page_locations: list[int] = page_locations
+        self.__page_number_type: PageNumberTypes = page_number_type
+        self.__first_value = first_value
+        if page_number_type == PageNumberTypes.Custom:
+            assert(page_labels is not None)
+            if page_labels.__class__ == str:
+                assert (1 == len(self.page_locations) and len(page_labels) > 0)
+                self.__page_number_labels: list[str] = [page_labels]
+            else:
+                assert (len(page_labels) == len(self.page_locations))
+                assert(all(len(label) > 0 for label in page_labels))
+                self.__page_number_labels: list[str] = page_labels
+
+    def append(self, page_location: int | tuple[int, str]) -> None:
+        if page_location.__class__ == int:
+            assert (self.__page_number_type != PageNumberTypes.Custom)
+            self.page_locations.append(page_location)
+        else:
+            assert (self.__page_number_type == PageNumberTypes.Custom)
+            self.page_locations.append(page_location[0])
+            self.__page_number_labels.append(page_location[1])
+        return
+
+    @property
+    def page_number_types(self) -> PageNumberTypes:
+        return self.__page_number_type
+
+    @property
+    def number_of_pages(self) -> int:
+        return len(self.page_locations)
+
+    @property
+    def last_value(self) -> int:
+        return self.__first_value + len(self.page_locations) - 1
+
+    def get_page_map(self, starting_location: int) -> str:
+        if self.__page_number_type != PageNumberTypes.Custom:
+            values = str(self.__first_value)
+        else:
+            values = "|".join(self.__page_number_labels)
+        return "(%s,%s,%s)" % (starting_location, self.__page_number_type.value, values)
--- a/src/calibre/devices/kindle/apnx_page_generator/page_number_type.py
+++ b/src/calibre/devices/kindle/apnx_page_generator/page_number_type.py
@ -0,0 +1,11 @@
+__license__ = 'GPL v3'
+__copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
+__docformat__ = 'restructuredtext en'
+
+import enum
+
+
+class PageNumberTypes(str, enum.Enum):
+    Arabic = "a"
+    Roman = "r"
+    Custom = 'c'
--- a/src/calibre/devices/kindle/apnx_page_generator/pages.py
+++ b/src/calibre/devices/kindle/apnx_page_generator/pages.py
@ -0,0 +1,43 @@
+__license__ = 'GPL v3'
+__copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
+__docformat__ = 'restructuredtext en'
+
+import itertools
+
+from calibre.devices.kindle.apnx_page_generator.page_group import PageGroup
+from calibre.devices.kindle.apnx_page_generator.page_number_type import PageNumberTypes
+
+
+class Pages:
+    def __init__(self, page_locations: list[int] | None = None):
+        if page_locations.__class__ == list:
+            self.__pages_groups: list[PageGroup] = [PageGroup(page_locations, PageNumberTypes.Arabic, 1)]
+        else:
+            self.__pages_groups: list[PageGroup] = []
+
+    def append(self, page_location: PageGroup) -> None:
+        self.__pages_groups.append(page_location)
+        return
+
+    @property
+    def last_group(self) -> PageGroup:
+        return self.__pages_groups[-1]
+
+    @property
+    def page_maps(self) -> str:
+        location = 1
+        result = []
+        for group in self.__pages_groups:
+            result.append(group.get_page_map(location))
+            location += group.number_of_pages
+        return ",".join(result)
+
+    @property
+    def page_locations(self) -> list[int]:
+        return list(itertools.chain.from_iterable(list(map(lambda pg: pg.page_locations, self.__pages_groups))))
+
+    @property
+    def number_of_pages(self) -> int:
+        return sum(list(map(lambda pg: len(pg.page_locations), self.__pages_groups)))
+
+
--- a/src/calibre/devices/kindle/driver.py
+++ b/src/calibre/devices/kindle/driver.py
@ -2,6 +2,8 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john at nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

+from calibre.devices.kindle.apnx import APNXBuilder
+
 '''
 Device driver for Amazon's Kindle
 '''
@ -409,7 +411,7 @@ class KINDLE2(KINDLE):
    OPT_APNX_CUST_COL        = 2
    OPT_APNX_METHOD_COL      = 3
    OPT_APNX_OVERWRITE       = 4
-    EXTRA_CUSTOMIZATION_CHOICES = {OPT_APNX_METHOD:{'fast', 'accurate', 'pagebreak'}}
+    EXTRA_CUSTOMIZATION_CHOICES = {OPT_APNX_METHOD: APNXBuilder.generators.keys()}

    # x330 on the PaperWhite
    # x262 on the Touch. Doesn't choke on x330, though.