Added way to make custom labeled page break in APNX file using ARIA ( Accessible Rich Internet Applications) tag in html.

Using <any_html_element *** pagebreak *** aria-label="some_label" ***> will produce page break location. Possible usage <span id="pg159" role="doc-pagebreak" aria-label="159"/> <h1 id="pg_header1" role="doc-pagebreak" aria-label="Header 1">Header</h> <p role="doc-pagebreak" aria-label="§ 1 part 4 page 6 of 9">Text</h>
2025-07-09 03:04:10 -04:00 · 2022-08-24 00:36:01 +02:00 · 2022-08-24 00:36:01 +02:00 · c6bcce78b2
commit c6bcce78b2
parent 8f0226d8b1
12 changed files with 526 additions and 217 deletions
--- a/src/calibre/devices/kindle/apnx.py
+++ b/src/calibre/devices/kindle/apnx.py
@ -6,10 +6,8 @@ __docformat__ = 'restructuredtext en'
 Generates and writes an APNX page mapping file.
 '''
 import re
 import struct
 from calibre.ebooks.mobi.reader.mobi6 import MobiReader
 from calibre.ebooks.pdb.header import PdbHeaderReader
 from calibre.ebooks.mobi.reader.headers import MetadataHeader
 from calibre.utils.logging import default_log
@ -17,29 +15,69 @@ from calibre import prints, fsync
 from calibre.constants import DEBUG
 from polyglot.builtins import as_unicode, as_bytes
 from calibre.devices.kindle.apnx_page_generator.generators.accurate_page_generator import AccuratePageGenerator
 from calibre.devices.kindle.apnx_page_generator.generators.pagebreak_page_generator import PagebreakPageGenerator
 from calibre.devices.kindle.apnx_page_generator.generators.aria_pagebreak_page_generator import \
    AriaPagebreakPageGenerator
 from calibre.devices.kindle.apnx_page_generator.generators.exact_page_generator import ExactPageGenerator
 from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator
 from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator
 from calibre.devices.kindle.apnx_page_generator.pages import Pages
 class APNXBuilder:
-    '''
+    """
    Create an APNX file using a pseudo page mapping.
-    '''
+    """
-    def write_apnx(self, mobi_file_path, apnx_path, method=None, page_count=0):
+    generators: dict[str, IPageGenerator] = {
-        '''
+        FastPageGenerator.instance.name(): FastPageGenerator.instance,
        AccuratePageGenerator.instance.name(): AccuratePageGenerator.instance,
        PagebreakPageGenerator.instance.name(): PagebreakPageGenerator.instance,
        AriaPagebreakPageGenerator.instance.name(): AriaPagebreakPageGenerator.instance,
        # ExactPageGenerator.instance.name(): ExactPageGenerator.instance,
    }
    def write_apnx(self, mobi_file_path: str, apnx_path: str, method: str | None = None, page_count: int = 0):
        """
        If you want a fixed number of pages (such as from a custom column) then
        pass in a value to page_count, otherwise a count will be estimated
        using either the fast or accurate algorithm.
-        '''
+        """
-        import uuid
+        apnx_meta = self.get_apnx_meta(mobi_file_path)
        apnx_meta = {'guid': str(uuid.uuid4()).replace('-', '')[:8], 'asin':
                '', 'cdetype': 'EBOK', 'format': 'MOBI_7', 'acr': ''}
        if page_count:
            generator: IPageGenerator = ExactPageGenerator.instance
        else:
            generator: IPageGenerator = self.generators.setdefault(method, FastPageGenerator.instance)
        pages = generator.generate(mobi_file_path, page_count)
        if pages.number_of_pages == 0:
            raise Exception(_('Could not generate page mapping.'))
        # Generate the APNX file from the page mapping.
        apnx = self.generate_apnx(pages, apnx_meta)
        # Write the APNX.
        with lopen(apnx_path, 'wb') as apnxf:
            apnxf.write(apnx)
            fsync(apnxf)
    @staticmethod
    def get_apnx_meta(mobi_file_path) -> dict[str, str]:
        import uuid
        apnx_meta = {
            'guid': str(uuid.uuid4()).replace('-', '')[:8],
            'asin': '',
            'cdetype': 'EBOK',
            'format': 'MOBI_7',
            'acr': ''
        }
        with lopen(mobi_file_path, 'rb') as mf:
            ident = PdbHeaderReader(mf).identity()
            if as_bytes(ident) != b'BOOKMOBI':
                # Check that this is really a MOBI file.
                raise Exception(_('Not a valid MOBI file. Reports identity of %s') % ident)
            apnx_meta['acr'] = as_unicode(PdbHeaderReader(mf).name(), errors='replace')
        # We'll need the PDB name, the MOBI version, and some metadata to make FW 3.4 happy with KF8 files...
        with lopen(mobi_file_path, 'rb') as mf:
            mh = MetadataHeader(mf, default_log)
@ -55,41 +93,10 @@ class APNXBuilder:
                apnx_meta['asin'] = ''
            else:
                apnx_meta['asin'] = str(mh.exth.uuid)
        return apnx_meta
-        # Get the pages depending on the chosen parser
+    @staticmethod
-        pages = []
+    def generate_apnx(pages: Pages, apnx_meta) -> bytes:
        if page_count:
            pages = self.get_pages_exact(mobi_file_path, page_count)
        else:
            try:
                if method == 'accurate':
                    pages = self.get_pages_accurate(mobi_file_path)
                elif method == 'pagebreak':
                    pages = self.get_pages_pagebreak_tag(mobi_file_path)
                    if not pages:
                        pages = self.get_pages_accurate(mobi_file_path)
                else:
                    raise Exception('%r is not a valid apnx generation method' % method)
            except:
                # Fall back to the fast parser if we can't
                # use the accurate one. Typically this is
                # due to the file having DRM.
                pages = self.get_pages_fast(mobi_file_path)
        if not pages:
            pages = self.get_pages_fast(mobi_file_path)
        if not pages:
            raise Exception(_('Could not generate page mapping.'))
        # Generate the APNX file from the page mapping.
        apnx = self.generate_apnx(pages, apnx_meta)
        # Write the APNX.
        with lopen(apnx_path, 'wb') as apnxf:
            apnxf.write(apnx)
            fsync(apnxf)
    def generate_apnx(self, pages, apnx_meta):
        apnx = b''
        if DEBUG:
@ -107,8 +114,8 @@ class APNXBuilder:
            # legacy mobi files, too. But, since they still handle this one too, let's
            # try not to break old devices, and keep using the simple header ;).
            content_header = '{"contentGuid":"%(guid)s","asin":"%(asin)s","cdeType":"%(cdetype)s","fileRevisionId":"1"}' % apnx_meta
-        page_header = '{"asin":"%(asin)s","pageMap":"(1,a,1)"}' % apnx_meta
+        page_header = '{"asin":"%(asin)s","pageMap":"' % apnx_meta
-
+        page_header += pages.page_maps + '"}'
        if DEBUG:
            prints('APNX Content Header:', content_header)
        content_header = as_bytes(content_header)
@ -120,177 +127,12 @@ class APNXBuilder:
        apnx += content_header
        apnx += struct.pack('>H', 1)
        apnx += struct.pack('>H', len(page_header))
-        apnx += struct.pack('>H', len(pages))
+        apnx += struct.pack('>H', pages.number_of_pages)
        apnx += struct.pack('>H', 32)
        apnx += page_header
        # Write page values to APNX.
-        for page in pages:
+        for location in pages.page_locations:
-            apnx += struct.pack('>I', page)
+            apnx += struct.pack('>I', location)
        return apnx
    def get_pages_exact(self, mobi_file_path, page_count):
        '''
        Given a specified page count (such as from a custom column),
        create our array of pages for the apnx file by dividing by
        the content size of the book.
        '''
        pages = []
        count = 0
        with lopen(mobi_file_path, 'rb') as mf:
            phead = PdbHeaderReader(mf)
            r0 = phead.section_data(0)
            text_length = struct.unpack('>I', r0[4:8])[0]
        chars_per_page = int(text_length // page_count)
        while count < text_length:
            pages.append(count)
            count += chars_per_page
        if len(pages) > page_count:
            # Rounding created extra page entries
            pages = pages[:page_count]
        return pages
    def get_pages_fast(self, mobi_file_path):
        '''
        2300 characters of uncompressed text per page. This is
        not meant to map 1 to 1 to a print book but to be a
        close enough measure.
        A test book was chosen and the characters were counted
        on one page. This number was round to 2240 then 60
        characters of markup were added to the total giving
        2300.
        Uncompressed text length is used because it's easily
        accessible in MOBI files (part of the header). Also,
        It's faster to work off of the length then to
        decompress and parse the actual text.
        '''
        text_length = 0
        pages = []
        count = 0
        with lopen(mobi_file_path, 'rb') as mf:
            phead = PdbHeaderReader(mf)
            r0 = phead.section_data(0)
            text_length = struct.unpack('>I', r0[4:8])[0]
        while count < text_length:
            pages.append(count)
            count += 2300
        return pages
    def get_pages_accurate(self, mobi_file_path):
        '''
        A more accurate but much more resource intensive and slower
        method to calculate the page length.
        Parses the uncompressed text. In an average paper back book
        There are 32 lines per page and a maximum of 70 characters
        per line.
        Each paragraph starts a new line and every 70 characters
        (minus markup) in a paragraph starts a new line. The
        position after every 30 lines will be marked as a new
        page.
        This can be make more accurate by accounting for
        <div class="mbp_pagebreak" /> as a new page marker.
        And <br> elements as an empty line.
        '''
        pages = []
        # Get the MOBI html.
        mr = MobiReader(mobi_file_path, default_log)
        if mr.book_header.encryption_type != 0:
            # DRMed book
            return self.get_pages_fast(mobi_file_path)
        mr.extract_text()
        # States
        in_tag = False
        in_p = False
        check_p = False
        closing = False
        p_char_count = 0
        # Get positions of every line
        # A line is either a paragraph starting
        # or every 70 characters in a paragraph.
        lines = []
        pos = -1
        # We want this to be as fast as possible so we
        # are going to do one pass across the text. re
        # and string functions will parse the text each
        # time they are called.
        #
        # We can can use .lower() here because we are
        # not modifying the text. In this case the case
        # doesn't matter just the absolute character and
        # the position within the stream.
        data = bytearray(as_bytes(mr.mobi_html.lower()))
        slash, p, lt, gt = map(ord, '/p<>')
        for c in data:
            pos += 1
            # Check if we are starting or stopping a p tag.
            if check_p:
                if c == slash:
                    closing = True
                    continue
                elif c == p:
                    if closing:
                        in_p = False
                    else:
                        in_p = True
                        lines.append(pos - 2)
                check_p = False
                closing = False
                continue
            if c == lt:
                in_tag = True
                check_p = True
                continue
            elif c == gt:
                in_tag = False
                check_p = False
                continue
            if in_p and not in_tag:
                p_char_count += 1
                if p_char_count == 70:
                    lines.append(pos)
                    p_char_count = 0
        # Every 30 lines is a new page
        for i in range(0, len(lines), 32):
            pages.append(lines[i])
        return pages
    def get_pages_pagebreak_tag(self, mobi_file_path):
        '''
        Determine pages based on the presence of
        <mbp:pagebreak>.
        '''
        pages = []
        # Get the MOBI html.
        mr = MobiReader(mobi_file_path, default_log)
        if mr.book_header.encryption_type != 0:
            # DRMed book
            return self.get_pages_fast(mobi_file_path)
        mr.extract_text()
        html = as_bytes(mr.mobi_html.lower())
        for m in re.finditer(b'<[^>]*pagebreak[^>]*>', html):
            pages.append(m.end())
        return pages
--- a/src/calibre/devices/kindle/apnx_page_generator/init.py
+++ b/src/calibre/devices/kindle/apnx_page_generator/init.py
--- a/src/calibre/devices/kindle/apnx_page_generator/generators/accurate_page_generator.py
+++ b/src/calibre/devices/kindle/apnx_page_generator/generators/accurate_page_generator.py
@ -0,0 +1,103 @@
 __license__ = 'GPL v3'
 __copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
 __docformat__ = 'restructuredtext en'
 from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator
 from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator
 from calibre.devices.kindle.apnx_page_generator.pages import Pages
 class AccuratePageGenerator(IPageGenerator):
    def name(self) -> str:
        return "accurate"
    def _generate_fallback(self, mobi_file_path: str, real_count: int | None) -> Pages:
        return FastPageGenerator.instance.generate(mobi_file_path, real_count)
    def _generate(self, mobi_file_path: str, real_count: int | None) -> Pages:
        """
        A more accurate but much more resource intensive and slower
        method to calculate the page length.
        Parses the uncompressed text. In an average paper back book
        There are 32 lines per page and a maximum of 70 characters
        per line.
        Each paragraph starts a new line and every 70 characters
        (minus markup) in a paragraph starts a new line. The
        position after every 30 lines will be marked as a new
        page.
        This can be make more accurate by accounting for
        <div class="mbp_pagebreak" /> as a new page marker.
        And <br> elements as an empty line.
        """
        pages = []
        html = self.mobi_html(mobi_file_path)
        # States
        in_tag = False
        in_p = False
        check_p = False
        closing = False
        p_char_count = 0
        # Get positions of every line
        # A line is either a paragraph starting
        # or every 70 characters in a paragraph.
        lines = []
        pos = -1
        # We want this to be as fast as possible so we
        # are going to do one pass across the text. re
        # and string functions will parse the text each
        # time they are called.
        #
        # We can can use .lower() here because we are
        # not modifying the text. In this case the case
        # doesn't matter just the absolute character and
        # the position within the stream.
        data = bytearray(html)
        slash, p, lt, gt = map(ord, '/p<>')
        for c in data:
            pos += 1
            # Check if we are starting or stopping a p tag.
            if check_p:
                if c == slash:
                    closing = True
                    continue
                elif c == p:
                    if closing:
                        in_p = False
                    else:
                        in_p = True
                        lines.append(pos - 2)
                check_p = False
                closing = False
                continue
            if c == lt:
                in_tag = True
                check_p = True
                continue
            elif c == gt:
                in_tag = False
                check_p = False
                continue
            if in_p and not in_tag:
                p_char_count += 1
                if p_char_count == 70:
                    lines.append(pos)
                    p_char_count = 0
        # Every 30 lines is a new page
        for i in range(0, len(lines), 32):
            pages.append(lines[i])
        return Pages(pages)
 AccuratePageGenerator.instance = AccuratePageGenerator()
--- a/src/calibre/devices/kindle/apnx_page_generator/generators/aria_pagebreak_page_generator.py
+++ b/src/calibre/devices/kindle/apnx_page_generator/generators/aria_pagebreak_page_generator.py
@ -0,0 +1,84 @@
 __license__ = 'GPL v3'
 __copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
 __docformat__ = 'restructuredtext en'
 from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator
 from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator
 from calibre.devices.kindle.apnx_page_generator.page_number_type import PageNumberTypes
 from calibre.devices.kindle.apnx_page_generator.pages import Pages
 from calibre.devices.kindle.apnx_page_generator.page_group import PageGroup
 import re
 roman_numeral_map = (('m', 1000), ('cm', 900), ('d', 500), ('cd', 400), ('c', 100), ('xc', 90), ('l', 50), ('xl', 40),
                     ('x', 10), ('ix', 9), ('v', 5), ('iv', 4), ('i', 1))
 roman_numeral_pattern = re.compile("""^m{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|V?i{0,3})$""", re.VERBOSE)
 def from_roman(s: str) -> int:
    """convert Roman numeral to integer"""
    if not s:
        raise ValueError('Input can not be blank')
    if not roman_numeral_pattern.match(s):
        raise ValueError('Invalid Roman numeral: %s' % s)
    result = 0
    index = 0
    for numeral, integer in roman_numeral_map:
        while s[index:index + len(numeral)] == numeral:
            result += integer
            index += len(numeral)
    return result
 class LabelDescriptor:
    def __init__(self, label: str, value: int, label_type: PageNumberTypes):
        self.label: str = label
        self.value: int = value
        self.label_type: PageNumberTypes = label_type
 class AriaPagebreakPageGenerator(IPageGenerator):
    def name(self) -> str:
        return "aria_pagebreak"
    def _generate_fallback(self, mobi_file_path: str, real_count: int | None) -> Pages:
        return FastPageGenerator.instance.generate(mobi_file_path, real_count)
    def _generate(self, mobi_file_path: str, real_count: int | None) -> Pages:
        html = self.mobi_html(mobi_file_path)
        pages = Pages()
        for m in re.finditer(b'<[^>]*role="doc-pagebreak"[^>]*aria-label="([^"|]+)"[^>]*>', html):
            label_descriptor = self.get_label(m.group(1))
            if pages.number_of_pages == 0:
                pages.append(PageGroup(m.end(), label_descriptor.label_type, label_descriptor.value,
                                       label_descriptor.label))
            elif (
                    pages.last_group.last_value == label_descriptor.value - 1 or label_descriptor.label_type ==
                    PageNumberTypes.Custom) and pages.last_group.page_number_types == label_descriptor.label_type:
                if label_descriptor.label_type != PageNumberTypes.Custom:
                    pages.last_group.append(m.end())
                else:
                    pages.last_group.append((m.end(), label_descriptor.label))
            else:
                pages.append(PageGroup(m.end(), label_descriptor.label_type, label_descriptor.value,
                                       label_descriptor.label))
        return pages
    @staticmethod
    def get_label(label: bytes) -> LabelDescriptor:
        label_string = label.decode()
        try:
            return LabelDescriptor(label_string, int(label_string), PageNumberTypes.Arabic)
        except ValueError:
            try:
                return LabelDescriptor(label_string, from_roman(label_string), PageNumberTypes.Roman)
            except ValueError:
                return LabelDescriptor(label_string, 0, PageNumberTypes.Custom)
 AriaPagebreakPageGenerator.instance = AriaPagebreakPageGenerator()
--- a/src/calibre/devices/kindle/apnx_page_generator/generators/exact_page_generator.py
+++ b/src/calibre/devices/kindle/apnx_page_generator/generators/exact_page_generator.py
@ -0,0 +1,41 @@
 __license__ = 'GPL v3'
 __copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
 __docformat__ = 'restructuredtext en'
 from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator
 from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator
 from calibre.devices.kindle.apnx_page_generator.pages import Pages
 class ExactPageGenerator(IPageGenerator):
    def name(self) -> str:
        return "exact"
    def _generate_fallback(self, mobi_file_path: str, real_count: int | None) -> Pages:
        return FastPageGenerator.instance.generate(mobi_file_path, real_count)
    def _generate(self, mobi_file_path: str, real_count: int | None) -> Pages:
        """
        Given a specified page count (such as from a custom column),
        create our array of pages for the apnx file by dividing by
        the content size of the book.
        """
        pages = []
        count = 0
        text_length = self.mobi_html_length(mobi_file_path)
        chars_per_page = int(text_length // real_count)
        while count < text_length:
            pages.append(count)
            count += chars_per_page
        if len(pages) > real_count:
            # Rounding created extra page entries
            pages = pages[:real_count]
        return Pages(pages)
 ExactPageGenerator.instance = ExactPageGenerator()
--- a/src/calibre/devices/kindle/apnx_page_generator/generators/fast_page_generator.py
+++ b/src/calibre/devices/kindle/apnx_page_generator/generators/fast_page_generator.py
@ -0,0 +1,46 @@
 __license__ = 'GPL v3'
 __copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
 __docformat__ = 'restructuredtext en'
 from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator
 from calibre.devices.kindle.apnx_page_generator.pages import Pages
 class FastPageGenerator(IPageGenerator):
    def name(self) -> str:
        return "fast"
    def _generate_fallback(self, mobi_file_path: str, real_count: int | None) -> Pages:
        raise Exception("Fast calculation impossible.")
    def _generate(self, mobi_file_path: str, real_count: int | None) -> Pages:
        """
        2300 characters of uncompressed text per page. This is
        not meant to map 1 to 1 to a print book but to be a
        close enough measure.
        A test book was chosen and the characters were counted
        on one page. This number was round to 2240 then 60
        characters of markup were added to the total giving
        2300.
        Uncompressed text length is used because it's easily
        accessible in MOBI files (part of the header). Also,
        It's faster to work off of the length then to
        decompress and parse the actual text.
        """
        pages = []
        count = 0
        text_length = self.mobi_html_length(mobi_file_path)
        while count < text_length:
            pages.append(count)
            count += 2300
        return Pages(pages)
 FastPageGenerator.instance = FastPageGenerator()
--- a/src/calibre/devices/kindle/apnx_page_generator/generators/pagebreak_page_generator.py
+++ b/src/calibre/devices/kindle/apnx_page_generator/generators/pagebreak_page_generator.py
@ -0,0 +1,29 @@
 __license__ = 'GPL v3'
 __copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
 __docformat__ = 'restructuredtext en'
 from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator
 from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator
 from calibre.devices.kindle.apnx_page_generator.pages import Pages
 import re
 class PagebreakPageGenerator(IPageGenerator):
    def name(self) -> str:
        return "pagebreak"
    def _generate_fallback(self, mobi_file_path: str, real_count: int | None) -> Pages:
        return FastPageGenerator.instance.generate(mobi_file_path, real_count)
    def _generate(self, mobi_file_path: str, real_count: int | None) -> Pages:
        """ Determine pages based on the presence of <*pagebreak*/>. """
        html = self.mobi_html(mobi_file_path)
        pages = []
        for m in re.finditer(b'<[^>]*pagebreak[^>]*>', html):
            pages.append(m.end())
        return Pages(pages)
 PagebreakPageGenerator.instance = PagebreakPageGenerator()
--- a/src/calibre/devices/kindle/apnx_page_generator/i_page_generator.py
+++ b/src/calibre/devices/kindle/apnx_page_generator/i_page_generator.py
@ -0,0 +1,53 @@
 __license__ = 'GPL v3'
 __copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
 __docformat__ = 'restructuredtext en'
 import struct
 from abc import abstractmethod, ABCMeta
 from calibre.devices.kindle.apnx_page_generator.pages import Pages
 from calibre.ebooks.mobi.reader.mobi6 import MobiReader
 from calibre.utils.logging import default_log
 from polyglot.builtins import as_bytes
 from calibre.ebooks.pdb.header import PdbHeaderReader
 class IPageGenerator(metaclass=ABCMeta):
    @abstractmethod
    def _generate(self, mobi_file_path: str, real_count: int | None) -> Pages:
        pass
    @abstractmethod
    def _generate_fallback(self, mobi_file_path: str, real_count: int | None) -> Pages:
        pass
    def generate(self, mobi_file_path: str, real_count: int | None) -> Pages:
        try:
            result = self._generate(mobi_file_path, real_count)
            if result.number_of_pages > 0:
                return result
            return self._generate_fallback(mobi_file_path, real_count)
        except Exception as e:
            if self.__class__.__name__ == "FastPageGenerator":
                raise e
            return self._generate_fallback(mobi_file_path, real_count)
    @abstractmethod
    def name(self) -> str:
        pass
    @staticmethod
    def mobi_html(mobi_file_path: str) -> bytes:
        mr = MobiReader(mobi_file_path, default_log)
        if mr.book_header.encryption_type != 0:
            raise Exception("DRMed book")
        mr.extract_text()
        return as_bytes(mr.mobi_html.lower())
    @staticmethod
    def mobi_html_length(mobi_file_path: str) -> int:
        with lopen(mobi_file_path, 'rb') as mf:
            pdb_header = PdbHeaderReader(mf)
            r0 = pdb_header.section_data(0)
            return struct.unpack('>I', r0[4:8])[0]
--- a/src/calibre/devices/kindle/apnx_page_generator/page_group.py
+++ b/src/calibre/devices/kindle/apnx_page_generator/page_group.py
@ -0,0 +1,55 @@
 __license__ = 'GPL v3'
 __copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
 __docformat__ = 'restructuredtext en'
 from calibre.devices.kindle.apnx_page_generator.page_number_type import PageNumberTypes
 class PageGroup:
    """Simulate constructor overloading"""
    def __init__(self, page_locations: int | list[int], page_number_type: PageNumberTypes, first_value: int,
                 page_labels: str | list[str] | None = None):
        if page_locations.__class__ == int:
            self.page_locations: list[int] = [page_locations]
        else:
            self.page_locations: list[int] = page_locations
        self.__page_number_type: PageNumberTypes = page_number_type
        self.__first_value = first_value
        if page_number_type == PageNumberTypes.Custom:
            assert(page_labels is not None)
            if page_labels.__class__ == str:
                assert (1 == len(self.page_locations) and len(page_labels) > 0)
                self.__page_number_labels: list[str] = [page_labels]
            else:
                assert (len(page_labels) == len(self.page_locations))
                assert(all(len(label) > 0 for label in page_labels))
                self.__page_number_labels: list[str] = page_labels
    def append(self, page_location: int | tuple[int, str]) -> None:
        if page_location.__class__ == int:
            assert (self.__page_number_type != PageNumberTypes.Custom)
            self.page_locations.append(page_location)
        else:
            assert (self.__page_number_type == PageNumberTypes.Custom)
            self.page_locations.append(page_location[0])
            self.__page_number_labels.append(page_location[1])
        return
    @property
    def page_number_types(self) -> PageNumberTypes:
        return self.__page_number_type
    @property
    def number_of_pages(self) -> int:
        return len(self.page_locations)
    @property
    def last_value(self) -> int:
        return self.__first_value + len(self.page_locations) - 1
    def get_page_map(self, starting_location: int) -> str:
        if self.__page_number_type != PageNumberTypes.Custom:
            values = str(self.__first_value)
        else:
            values = "|".join(self.__page_number_labels)
        return "(%s,%s,%s)" % (starting_location, self.__page_number_type.value, values)
--- a/src/calibre/devices/kindle/apnx_page_generator/page_number_type.py
+++ b/src/calibre/devices/kindle/apnx_page_generator/page_number_type.py
@ -0,0 +1,11 @@
 __license__ = 'GPL v3'
 __copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
 __docformat__ = 'restructuredtext en'
 import enum
 class PageNumberTypes(str, enum.Enum):
    Arabic = "a"
    Roman = "r"
    Custom = 'c'
--- a/src/calibre/devices/kindle/apnx_page_generator/pages.py
+++ b/src/calibre/devices/kindle/apnx_page_generator/pages.py
@ -0,0 +1,43 @@
 __license__ = 'GPL v3'
 __copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
 __docformat__ = 'restructuredtext en'
 import itertools
 from calibre.devices.kindle.apnx_page_generator.page_group import PageGroup
 from calibre.devices.kindle.apnx_page_generator.page_number_type import PageNumberTypes
 class Pages:
    def __init__(self, page_locations: list[int] | None = None):
        if page_locations.__class__ == list:
            self.__pages_groups: list[PageGroup] = [PageGroup(page_locations, PageNumberTypes.Arabic, 1)]
        else:
            self.__pages_groups: list[PageGroup] = []
    def append(self, page_location: PageGroup) -> None:
        self.__pages_groups.append(page_location)
        return
    @property
    def last_group(self) -> PageGroup:
        return self.__pages_groups[-1]
    @property
    def page_maps(self) -> str:
        location = 1
        result = []
        for group in self.__pages_groups:
            result.append(group.get_page_map(location))
            location += group.number_of_pages
        return ",".join(result)
    @property
    def page_locations(self) -> list[int]:
        return list(itertools.chain.from_iterable(list(map(lambda pg: pg.page_locations, self.__pages_groups))))
    @property
    def number_of_pages(self) -> int:
        return sum(list(map(lambda pg: len(pg.page_locations), self.__pages_groups)))
--- a/src/calibre/devices/kindle/driver.py
+++ b/src/calibre/devices/kindle/driver.py
@ -2,6 +2,8 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john at nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 from calibre.devices.kindle.apnx import APNXBuilder
 '''
 Device driver for Amazon's Kindle
 '''
@ -409,7 +411,7 @@ class KINDLE2(KINDLE):
    OPT_APNX_CUST_COL        = 2
    OPT_APNX_METHOD_COL      = 3
    OPT_APNX_OVERWRITE       = 4
-    EXTRA_CUSTOMIZATION_CHOICES = {OPT_APNX_METHOD:{'fast', 'accurate', 'pagebreak'}}
+    EXTRA_CUSTOMIZATION_CHOICES = {OPT_APNX_METHOD: APNXBuilder.generators.keys()}
    # x330 on the PaperWhite
    # x262 on the Touch. Doesn't choke on x330, though.