From c6bcce78b2c5527c90fd67a2ef2f82518fff4d70 Mon Sep 17 00:00:00 2001 From: Vaso Peras-Likodric Date: Wed, 24 Aug 2022 00:36:01 +0200 Subject: [PATCH 1/4] Added way to make custom labeled page break in APNX file using ARIA ( Accessible Rich Internet Applications) tag in html. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using will produce page break location. Possible usage

Header

Text --- src/calibre/devices/kindle/apnx.py | 274 ++++-------------- .../kindle/apnx_page_generator/__init__.py | 0 .../generators/accurate_page_generator.py | 103 +++++++ .../aria_pagebreak_page_generator.py | 84 ++++++ .../generators/exact_page_generator.py | 41 +++ .../generators/fast_page_generator.py | 46 +++ .../generators/pagebreak_page_generator.py | 29 ++ .../apnx_page_generator/i_page_generator.py | 53 ++++ .../kindle/apnx_page_generator/page_group.py | 55 ++++ .../apnx_page_generator/page_number_type.py | 11 + .../kindle/apnx_page_generator/pages.py | 43 +++ src/calibre/devices/kindle/driver.py | 4 +- 12 files changed, 526 insertions(+), 217 deletions(-) create mode 100644 src/calibre/devices/kindle/apnx_page_generator/__init__.py create mode 100644 src/calibre/devices/kindle/apnx_page_generator/generators/accurate_page_generator.py create mode 100644 src/calibre/devices/kindle/apnx_page_generator/generators/aria_pagebreak_page_generator.py create mode 100644 src/calibre/devices/kindle/apnx_page_generator/generators/exact_page_generator.py create mode 100644 src/calibre/devices/kindle/apnx_page_generator/generators/fast_page_generator.py create mode 100644 src/calibre/devices/kindle/apnx_page_generator/generators/pagebreak_page_generator.py create mode 100644 src/calibre/devices/kindle/apnx_page_generator/i_page_generator.py create mode 100644 src/calibre/devices/kindle/apnx_page_generator/page_group.py create mode 100644 src/calibre/devices/kindle/apnx_page_generator/page_number_type.py create mode 100644 src/calibre/devices/kindle/apnx_page_generator/pages.py diff --git a/src/calibre/devices/kindle/apnx.py b/src/calibre/devices/kindle/apnx.py index 21176374a9..941bd5afcc 100644 --- a/src/calibre/devices/kindle/apnx.py +++ b/src/calibre/devices/kindle/apnx.py @@ -1,4 +1,4 @@ -__license__ = 'GPL v3' +__license__ = 'GPL v3' __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' @@ -6,10 +6,8 @@ __docformat__ = 'restructuredtext en' Generates and writes an APNX page mapping file. ''' -import re import struct -from calibre.ebooks.mobi.reader.mobi6 import MobiReader from calibre.ebooks.pdb.header import PdbHeaderReader from calibre.ebooks.mobi.reader.headers import MetadataHeader from calibre.utils.logging import default_log @@ -17,29 +15,69 @@ from calibre import prints, fsync from calibre.constants import DEBUG from polyglot.builtins import as_unicode, as_bytes +from calibre.devices.kindle.apnx_page_generator.generators.accurate_page_generator import AccuratePageGenerator +from calibre.devices.kindle.apnx_page_generator.generators.pagebreak_page_generator import PagebreakPageGenerator +from calibre.devices.kindle.apnx_page_generator.generators.aria_pagebreak_page_generator import \ + AriaPagebreakPageGenerator +from calibre.devices.kindle.apnx_page_generator.generators.exact_page_generator import ExactPageGenerator +from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator +from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator +from calibre.devices.kindle.apnx_page_generator.pages import Pages + class APNXBuilder: - ''' + """ Create an APNX file using a pseudo page mapping. - ''' + """ - def write_apnx(self, mobi_file_path, apnx_path, method=None, page_count=0): - ''' + generators: dict[str, IPageGenerator] = { + FastPageGenerator.instance.name(): FastPageGenerator.instance, + AccuratePageGenerator.instance.name(): AccuratePageGenerator.instance, + PagebreakPageGenerator.instance.name(): PagebreakPageGenerator.instance, + AriaPagebreakPageGenerator.instance.name(): AriaPagebreakPageGenerator.instance, + # ExactPageGenerator.instance.name(): ExactPageGenerator.instance, + } + + def write_apnx(self, mobi_file_path: str, apnx_path: str, method: str | None = None, page_count: int = 0): + """ If you want a fixed number of pages (such as from a custom column) then pass in a value to page_count, otherwise a count will be estimated using either the fast or accurate algorithm. - ''' - import uuid - apnx_meta = {'guid': str(uuid.uuid4()).replace('-', '')[:8], 'asin': - '', 'cdetype': 'EBOK', 'format': 'MOBI_7', 'acr': ''} + """ + apnx_meta = self.get_apnx_meta(mobi_file_path) + if page_count: + generator: IPageGenerator = ExactPageGenerator.instance + else: + generator: IPageGenerator = self.generators.setdefault(method, FastPageGenerator.instance) + + pages = generator.generate(mobi_file_path, page_count) + if pages.number_of_pages == 0: + raise Exception(_('Could not generate page mapping.')) + # Generate the APNX file from the page mapping. + apnx = self.generate_apnx(pages, apnx_meta) + + # Write the APNX. + with lopen(apnx_path, 'wb') as apnxf: + apnxf.write(apnx) + fsync(apnxf) + + @staticmethod + def get_apnx_meta(mobi_file_path) -> dict[str, str]: + import uuid + apnx_meta = { + 'guid': str(uuid.uuid4()).replace('-', '')[:8], + 'asin': '', + 'cdetype': 'EBOK', + 'format': 'MOBI_7', + 'acr': '' + } with lopen(mobi_file_path, 'rb') as mf: ident = PdbHeaderReader(mf).identity() if as_bytes(ident) != b'BOOKMOBI': # Check that this is really a MOBI file. raise Exception(_('Not a valid MOBI file. Reports identity of %s') % ident) apnx_meta['acr'] = as_unicode(PdbHeaderReader(mf).name(), errors='replace') - # We'll need the PDB name, the MOBI version, and some metadata to make FW 3.4 happy with KF8 files... with lopen(mobi_file_path, 'rb') as mf: mh = MetadataHeader(mf, default_log) @@ -55,41 +93,10 @@ class APNXBuilder: apnx_meta['asin'] = '' else: apnx_meta['asin'] = str(mh.exth.uuid) + return apnx_meta - # Get the pages depending on the chosen parser - pages = [] - if page_count: - pages = self.get_pages_exact(mobi_file_path, page_count) - else: - try: - if method == 'accurate': - pages = self.get_pages_accurate(mobi_file_path) - elif method == 'pagebreak': - pages = self.get_pages_pagebreak_tag(mobi_file_path) - if not pages: - pages = self.get_pages_accurate(mobi_file_path) - else: - raise Exception('%r is not a valid apnx generation method' % method) - except: - # Fall back to the fast parser if we can't - # use the accurate one. Typically this is - # due to the file having DRM. - pages = self.get_pages_fast(mobi_file_path) - - if not pages: - pages = self.get_pages_fast(mobi_file_path) - if not pages: - raise Exception(_('Could not generate page mapping.')) - - # Generate the APNX file from the page mapping. - apnx = self.generate_apnx(pages, apnx_meta) - - # Write the APNX. - with lopen(apnx_path, 'wb') as apnxf: - apnxf.write(apnx) - fsync(apnxf) - - def generate_apnx(self, pages, apnx_meta): + @staticmethod + def generate_apnx(pages: Pages, apnx_meta) -> bytes: apnx = b'' if DEBUG: @@ -107,8 +114,8 @@ class APNXBuilder: # legacy mobi files, too. But, since they still handle this one too, let's # try not to break old devices, and keep using the simple header ;). content_header = '{"contentGuid":"%(guid)s","asin":"%(asin)s","cdeType":"%(cdetype)s","fileRevisionId":"1"}' % apnx_meta - page_header = '{"asin":"%(asin)s","pageMap":"(1,a,1)"}' % apnx_meta - + page_header = '{"asin":"%(asin)s","pageMap":"' % apnx_meta + page_header += pages.page_maps + '"}' if DEBUG: prints('APNX Content Header:', content_header) content_header = as_bytes(content_header) @@ -120,177 +127,12 @@ class APNXBuilder: apnx += content_header apnx += struct.pack('>H', 1) apnx += struct.pack('>H', len(page_header)) - apnx += struct.pack('>H', len(pages)) + apnx += struct.pack('>H', pages.number_of_pages) apnx += struct.pack('>H', 32) apnx += page_header # Write page values to APNX. - for page in pages: - apnx += struct.pack('>I', page) + for location in pages.page_locations: + apnx += struct.pack('>I', location) return apnx - - def get_pages_exact(self, mobi_file_path, page_count): - ''' - Given a specified page count (such as from a custom column), - create our array of pages for the apnx file by dividing by - the content size of the book. - ''' - pages = [] - count = 0 - - with lopen(mobi_file_path, 'rb') as mf: - phead = PdbHeaderReader(mf) - r0 = phead.section_data(0) - text_length = struct.unpack('>I', r0[4:8])[0] - - chars_per_page = int(text_length // page_count) - while count < text_length: - pages.append(count) - count += chars_per_page - - if len(pages) > page_count: - # Rounding created extra page entries - pages = pages[:page_count] - - return pages - - def get_pages_fast(self, mobi_file_path): - ''' - 2300 characters of uncompressed text per page. This is - not meant to map 1 to 1 to a print book but to be a - close enough measure. - - A test book was chosen and the characters were counted - on one page. This number was round to 2240 then 60 - characters of markup were added to the total giving - 2300. - - Uncompressed text length is used because it's easily - accessible in MOBI files (part of the header). Also, - It's faster to work off of the length then to - decompress and parse the actual text. - ''' - text_length = 0 - pages = [] - count = 0 - - with lopen(mobi_file_path, 'rb') as mf: - phead = PdbHeaderReader(mf) - r0 = phead.section_data(0) - text_length = struct.unpack('>I', r0[4:8])[0] - - while count < text_length: - pages.append(count) - count += 2300 - - return pages - - def get_pages_accurate(self, mobi_file_path): - ''' - A more accurate but much more resource intensive and slower - method to calculate the page length. - - Parses the uncompressed text. In an average paper back book - There are 32 lines per page and a maximum of 70 characters - per line. - - Each paragraph starts a new line and every 70 characters - (minus markup) in a paragraph starts a new line. The - position after every 30 lines will be marked as a new - page. - - This can be make more accurate by accounting for -

as a new page marker. - And
elements as an empty line. - ''' - pages = [] - - # Get the MOBI html. - mr = MobiReader(mobi_file_path, default_log) - if mr.book_header.encryption_type != 0: - # DRMed book - return self.get_pages_fast(mobi_file_path) - mr.extract_text() - - # States - in_tag = False - in_p = False - check_p = False - closing = False - p_char_count = 0 - - # Get positions of every line - # A line is either a paragraph starting - # or every 70 characters in a paragraph. - lines = [] - pos = -1 - # We want this to be as fast as possible so we - # are going to do one pass across the text. re - # and string functions will parse the text each - # time they are called. - # - # We can can use .lower() here because we are - # not modifying the text. In this case the case - # doesn't matter just the absolute character and - # the position within the stream. - data = bytearray(as_bytes(mr.mobi_html.lower())) - slash, p, lt, gt = map(ord, '/p<>') - for c in data: - pos += 1 - - # Check if we are starting or stopping a p tag. - if check_p: - if c == slash: - closing = True - continue - elif c == p: - if closing: - in_p = False - else: - in_p = True - lines.append(pos - 2) - check_p = False - closing = False - continue - - if c == lt: - in_tag = True - check_p = True - continue - elif c == gt: - in_tag = False - check_p = False - continue - - if in_p and not in_tag: - p_char_count += 1 - if p_char_count == 70: - lines.append(pos) - p_char_count = 0 - - # Every 30 lines is a new page - for i in range(0, len(lines), 32): - pages.append(lines[i]) - - return pages - - def get_pages_pagebreak_tag(self, mobi_file_path): - ''' - Determine pages based on the presence of - . - ''' - pages = [] - - # Get the MOBI html. - mr = MobiReader(mobi_file_path, default_log) - if mr.book_header.encryption_type != 0: - # DRMed book - return self.get_pages_fast(mobi_file_path) - mr.extract_text() - - html = as_bytes(mr.mobi_html.lower()) - for m in re.finditer(b'<[^>]*pagebreak[^>]*>', html): - pages.append(m.end()) - - return pages diff --git a/src/calibre/devices/kindle/apnx_page_generator/__init__.py b/src/calibre/devices/kindle/apnx_page_generator/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/calibre/devices/kindle/apnx_page_generator/generators/accurate_page_generator.py b/src/calibre/devices/kindle/apnx_page_generator/generators/accurate_page_generator.py new file mode 100644 index 0000000000..24cf5eac9d --- /dev/null +++ b/src/calibre/devices/kindle/apnx_page_generator/generators/accurate_page_generator.py @@ -0,0 +1,103 @@ +__license__ = 'GPL v3' +__copyright__ = '2022, Vaso Peras-Likodric ' +__docformat__ = 'restructuredtext en' + +from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator +from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator +from calibre.devices.kindle.apnx_page_generator.pages import Pages + + +class AccuratePageGenerator(IPageGenerator): + + def name(self) -> str: + return "accurate" + + def _generate_fallback(self, mobi_file_path: str, real_count: int | None) -> Pages: + return FastPageGenerator.instance.generate(mobi_file_path, real_count) + + def _generate(self, mobi_file_path: str, real_count: int | None) -> Pages: + """ + A more accurate but much more resource intensive and slower + method to calculate the page length. + + Parses the uncompressed text. In an average paper back book + There are 32 lines per page and a maximum of 70 characters + per line. + + Each paragraph starts a new line and every 70 characters + (minus markup) in a paragraph starts a new line. The + position after every 30 lines will be marked as a new + page. + + This can be make more accurate by accounting for +
as a new page marker. + And
elements as an empty line. + """ + pages = [] + + html = self.mobi_html(mobi_file_path) + + # States + in_tag = False + in_p = False + check_p = False + closing = False + p_char_count = 0 + + # Get positions of every line + # A line is either a paragraph starting + # or every 70 characters in a paragraph. + lines = [] + pos = -1 + # We want this to be as fast as possible so we + # are going to do one pass across the text. re + # and string functions will parse the text each + # time they are called. + # + # We can can use .lower() here because we are + # not modifying the text. In this case the case + # doesn't matter just the absolute character and + # the position within the stream. + data = bytearray(html) + slash, p, lt, gt = map(ord, '/p<>') + for c in data: + pos += 1 + + # Check if we are starting or stopping a p tag. + if check_p: + if c == slash: + closing = True + continue + elif c == p: + if closing: + in_p = False + else: + in_p = True + lines.append(pos - 2) + check_p = False + closing = False + continue + + if c == lt: + in_tag = True + check_p = True + continue + elif c == gt: + in_tag = False + check_p = False + continue + + if in_p and not in_tag: + p_char_count += 1 + if p_char_count == 70: + lines.append(pos) + p_char_count = 0 + + # Every 30 lines is a new page + for i in range(0, len(lines), 32): + pages.append(lines[i]) + + return Pages(pages) + + +AccuratePageGenerator.instance = AccuratePageGenerator() diff --git a/src/calibre/devices/kindle/apnx_page_generator/generators/aria_pagebreak_page_generator.py b/src/calibre/devices/kindle/apnx_page_generator/generators/aria_pagebreak_page_generator.py new file mode 100644 index 0000000000..9ea708597f --- /dev/null +++ b/src/calibre/devices/kindle/apnx_page_generator/generators/aria_pagebreak_page_generator.py @@ -0,0 +1,84 @@ +__license__ = 'GPL v3' +__copyright__ = '2022, Vaso Peras-Likodric ' +__docformat__ = 'restructuredtext en' + +from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator +from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator +from calibre.devices.kindle.apnx_page_generator.page_number_type import PageNumberTypes +from calibre.devices.kindle.apnx_page_generator.pages import Pages +from calibre.devices.kindle.apnx_page_generator.page_group import PageGroup +import re + +roman_numeral_map = (('m', 1000), ('cm', 900), ('d', 500), ('cd', 400), ('c', 100), ('xc', 90), ('l', 50), ('xl', 40), + ('x', 10), ('ix', 9), ('v', 5), ('iv', 4), ('i', 1)) + +roman_numeral_pattern = re.compile("""^m{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|V?i{0,3})$""", re.VERBOSE) + + +def from_roman(s: str) -> int: + """convert Roman numeral to integer""" + if not s: + raise ValueError('Input can not be blank') + if not roman_numeral_pattern.match(s): + raise ValueError('Invalid Roman numeral: %s' % s) + + result = 0 + index = 0 + for numeral, integer in roman_numeral_map: + while s[index:index + len(numeral)] == numeral: + result += integer + index += len(numeral) + return result + + +class LabelDescriptor: + def __init__(self, label: str, value: int, label_type: PageNumberTypes): + self.label: str = label + self.value: int = value + self.label_type: PageNumberTypes = label_type + + +class AriaPagebreakPageGenerator(IPageGenerator): + + def name(self) -> str: + return "aria_pagebreak" + + def _generate_fallback(self, mobi_file_path: str, real_count: int | None) -> Pages: + return FastPageGenerator.instance.generate(mobi_file_path, real_count) + + def _generate(self, mobi_file_path: str, real_count: int | None) -> Pages: + html = self.mobi_html(mobi_file_path) + pages = Pages() + + for m in re.finditer(b'<[^>]*role="doc-pagebreak"[^>]*aria-label="([^"|]+)"[^>]*>', html): + label_descriptor = self.get_label(m.group(1)) + if pages.number_of_pages == 0: + pages.append(PageGroup(m.end(), label_descriptor.label_type, label_descriptor.value, + label_descriptor.label)) + elif ( + pages.last_group.last_value == label_descriptor.value - 1 or label_descriptor.label_type == + PageNumberTypes.Custom) and pages.last_group.page_number_types == label_descriptor.label_type: + + if label_descriptor.label_type != PageNumberTypes.Custom: + pages.last_group.append(m.end()) + else: + pages.last_group.append((m.end(), label_descriptor.label)) + else: + pages.append(PageGroup(m.end(), label_descriptor.label_type, label_descriptor.value, + label_descriptor.label)) + + return pages + + @staticmethod + def get_label(label: bytes) -> LabelDescriptor: + label_string = label.decode() + try: + return LabelDescriptor(label_string, int(label_string), PageNumberTypes.Arabic) + except ValueError: + try: + return LabelDescriptor(label_string, from_roman(label_string), PageNumberTypes.Roman) + except ValueError: + return LabelDescriptor(label_string, 0, PageNumberTypes.Custom) + + +AriaPagebreakPageGenerator.instance = AriaPagebreakPageGenerator() diff --git a/src/calibre/devices/kindle/apnx_page_generator/generators/exact_page_generator.py b/src/calibre/devices/kindle/apnx_page_generator/generators/exact_page_generator.py new file mode 100644 index 0000000000..a30d481ba4 --- /dev/null +++ b/src/calibre/devices/kindle/apnx_page_generator/generators/exact_page_generator.py @@ -0,0 +1,41 @@ +__license__ = 'GPL v3' +__copyright__ = '2022, Vaso Peras-Likodric ' +__docformat__ = 'restructuredtext en' + +from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator +from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator +from calibre.devices.kindle.apnx_page_generator.pages import Pages + + +class ExactPageGenerator(IPageGenerator): + + def name(self) -> str: + return "exact" + + def _generate_fallback(self, mobi_file_path: str, real_count: int | None) -> Pages: + return FastPageGenerator.instance.generate(mobi_file_path, real_count) + + def _generate(self, mobi_file_path: str, real_count: int | None) -> Pages: + """ + Given a specified page count (such as from a custom column), + create our array of pages for the apnx file by dividing by + the content size of the book. + """ + pages = [] + count = 0 + + text_length = self.mobi_html_length(mobi_file_path) + + chars_per_page = int(text_length // real_count) + while count < text_length: + pages.append(count) + count += chars_per_page + + if len(pages) > real_count: + # Rounding created extra page entries + pages = pages[:real_count] + + return Pages(pages) + + +ExactPageGenerator.instance = ExactPageGenerator() diff --git a/src/calibre/devices/kindle/apnx_page_generator/generators/fast_page_generator.py b/src/calibre/devices/kindle/apnx_page_generator/generators/fast_page_generator.py new file mode 100644 index 0000000000..23320dacdf --- /dev/null +++ b/src/calibre/devices/kindle/apnx_page_generator/generators/fast_page_generator.py @@ -0,0 +1,46 @@ +__license__ = 'GPL v3' +__copyright__ = '2022, Vaso Peras-Likodric ' +__docformat__ = 'restructuredtext en' + +from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator +from calibre.devices.kindle.apnx_page_generator.pages import Pages + + +class FastPageGenerator(IPageGenerator): + + def name(self) -> str: + return "fast" + + def _generate_fallback(self, mobi_file_path: str, real_count: int | None) -> Pages: + raise Exception("Fast calculation impossible.") + + def _generate(self, mobi_file_path: str, real_count: int | None) -> Pages: + """ + 2300 characters of uncompressed text per page. This is + not meant to map 1 to 1 to a print book but to be a + close enough measure. + + A test book was chosen and the characters were counted + on one page. This number was round to 2240 then 60 + characters of markup were added to the total giving + 2300. + + Uncompressed text length is used because it's easily + accessible in MOBI files (part of the header). Also, + It's faster to work off of the length then to + decompress and parse the actual text. + """ + + pages = [] + count = 0 + + text_length = self.mobi_html_length(mobi_file_path) + + while count < text_length: + pages.append(count) + count += 2300 + + return Pages(pages) + + +FastPageGenerator.instance = FastPageGenerator() diff --git a/src/calibre/devices/kindle/apnx_page_generator/generators/pagebreak_page_generator.py b/src/calibre/devices/kindle/apnx_page_generator/generators/pagebreak_page_generator.py new file mode 100644 index 0000000000..bf591480f2 --- /dev/null +++ b/src/calibre/devices/kindle/apnx_page_generator/generators/pagebreak_page_generator.py @@ -0,0 +1,29 @@ +__license__ = 'GPL v3' +__copyright__ = '2022, Vaso Peras-Likodric ' +__docformat__ = 'restructuredtext en' + +from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator +from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator +from calibre.devices.kindle.apnx_page_generator.pages import Pages +import re + + +class PagebreakPageGenerator(IPageGenerator): + + def name(self) -> str: + return "pagebreak" + + def _generate_fallback(self, mobi_file_path: str, real_count: int | None) -> Pages: + return FastPageGenerator.instance.generate(mobi_file_path, real_count) + + def _generate(self, mobi_file_path: str, real_count: int | None) -> Pages: + """ Determine pages based on the presence of <*pagebreak*/>. """ + html = self.mobi_html(mobi_file_path) + pages = [] + for m in re.finditer(b'<[^>]*pagebreak[^>]*>', html): + pages.append(m.end()) + + return Pages(pages) + + +PagebreakPageGenerator.instance = PagebreakPageGenerator() diff --git a/src/calibre/devices/kindle/apnx_page_generator/i_page_generator.py b/src/calibre/devices/kindle/apnx_page_generator/i_page_generator.py new file mode 100644 index 0000000000..8de2eb05a8 --- /dev/null +++ b/src/calibre/devices/kindle/apnx_page_generator/i_page_generator.py @@ -0,0 +1,53 @@ +__license__ = 'GPL v3' +__copyright__ = '2022, Vaso Peras-Likodric ' +__docformat__ = 'restructuredtext en' + +import struct +from abc import abstractmethod, ABCMeta +from calibre.devices.kindle.apnx_page_generator.pages import Pages +from calibre.ebooks.mobi.reader.mobi6 import MobiReader +from calibre.utils.logging import default_log +from polyglot.builtins import as_bytes +from calibre.ebooks.pdb.header import PdbHeaderReader + + +class IPageGenerator(metaclass=ABCMeta): + + @abstractmethod + def _generate(self, mobi_file_path: str, real_count: int | None) -> Pages: + pass + + @abstractmethod + def _generate_fallback(self, mobi_file_path: str, real_count: int | None) -> Pages: + pass + + def generate(self, mobi_file_path: str, real_count: int | None) -> Pages: + try: + result = self._generate(mobi_file_path, real_count) + if result.number_of_pages > 0: + return result + return self._generate_fallback(mobi_file_path, real_count) + except Exception as e: + if self.__class__.__name__ == "FastPageGenerator": + raise e + return self._generate_fallback(mobi_file_path, real_count) + + @abstractmethod + def name(self) -> str: + pass + + @staticmethod + def mobi_html(mobi_file_path: str) -> bytes: + mr = MobiReader(mobi_file_path, default_log) + if mr.book_header.encryption_type != 0: + raise Exception("DRMed book") + mr.extract_text() + return as_bytes(mr.mobi_html.lower()) + + @staticmethod + def mobi_html_length(mobi_file_path: str) -> int: + with lopen(mobi_file_path, 'rb') as mf: + pdb_header = PdbHeaderReader(mf) + r0 = pdb_header.section_data(0) + return struct.unpack('>I', r0[4:8])[0] + diff --git a/src/calibre/devices/kindle/apnx_page_generator/page_group.py b/src/calibre/devices/kindle/apnx_page_generator/page_group.py new file mode 100644 index 0000000000..f8ea2488c4 --- /dev/null +++ b/src/calibre/devices/kindle/apnx_page_generator/page_group.py @@ -0,0 +1,55 @@ +__license__ = 'GPL v3' +__copyright__ = '2022, Vaso Peras-Likodric ' +__docformat__ = 'restructuredtext en' + +from calibre.devices.kindle.apnx_page_generator.page_number_type import PageNumberTypes + + +class PageGroup: + """Simulate constructor overloading""" + def __init__(self, page_locations: int | list[int], page_number_type: PageNumberTypes, first_value: int, + page_labels: str | list[str] | None = None): + if page_locations.__class__ == int: + self.page_locations: list[int] = [page_locations] + else: + self.page_locations: list[int] = page_locations + self.__page_number_type: PageNumberTypes = page_number_type + self.__first_value = first_value + if page_number_type == PageNumberTypes.Custom: + assert(page_labels is not None) + if page_labels.__class__ == str: + assert (1 == len(self.page_locations) and len(page_labels) > 0) + self.__page_number_labels: list[str] = [page_labels] + else: + assert (len(page_labels) == len(self.page_locations)) + assert(all(len(label) > 0 for label in page_labels)) + self.__page_number_labels: list[str] = page_labels + + def append(self, page_location: int | tuple[int, str]) -> None: + if page_location.__class__ == int: + assert (self.__page_number_type != PageNumberTypes.Custom) + self.page_locations.append(page_location) + else: + assert (self.__page_number_type == PageNumberTypes.Custom) + self.page_locations.append(page_location[0]) + self.__page_number_labels.append(page_location[1]) + return + + @property + def page_number_types(self) -> PageNumberTypes: + return self.__page_number_type + + @property + def number_of_pages(self) -> int: + return len(self.page_locations) + + @property + def last_value(self) -> int: + return self.__first_value + len(self.page_locations) - 1 + + def get_page_map(self, starting_location: int) -> str: + if self.__page_number_type != PageNumberTypes.Custom: + values = str(self.__first_value) + else: + values = "|".join(self.__page_number_labels) + return "(%s,%s,%s)" % (starting_location, self.__page_number_type.value, values) diff --git a/src/calibre/devices/kindle/apnx_page_generator/page_number_type.py b/src/calibre/devices/kindle/apnx_page_generator/page_number_type.py new file mode 100644 index 0000000000..93650522e9 --- /dev/null +++ b/src/calibre/devices/kindle/apnx_page_generator/page_number_type.py @@ -0,0 +1,11 @@ +__license__ = 'GPL v3' +__copyright__ = '2022, Vaso Peras-Likodric ' +__docformat__ = 'restructuredtext en' + +import enum + + +class PageNumberTypes(str, enum.Enum): + Arabic = "a" + Roman = "r" + Custom = 'c' diff --git a/src/calibre/devices/kindle/apnx_page_generator/pages.py b/src/calibre/devices/kindle/apnx_page_generator/pages.py new file mode 100644 index 0000000000..ff20943060 --- /dev/null +++ b/src/calibre/devices/kindle/apnx_page_generator/pages.py @@ -0,0 +1,43 @@ +__license__ = 'GPL v3' +__copyright__ = '2022, Vaso Peras-Likodric ' +__docformat__ = 'restructuredtext en' + +import itertools + +from calibre.devices.kindle.apnx_page_generator.page_group import PageGroup +from calibre.devices.kindle.apnx_page_generator.page_number_type import PageNumberTypes + + +class Pages: + def __init__(self, page_locations: list[int] | None = None): + if page_locations.__class__ == list: + self.__pages_groups: list[PageGroup] = [PageGroup(page_locations, PageNumberTypes.Arabic, 1)] + else: + self.__pages_groups: list[PageGroup] = [] + + def append(self, page_location: PageGroup) -> None: + self.__pages_groups.append(page_location) + return + + @property + def last_group(self) -> PageGroup: + return self.__pages_groups[-1] + + @property + def page_maps(self) -> str: + location = 1 + result = [] + for group in self.__pages_groups: + result.append(group.get_page_map(location)) + location += group.number_of_pages + return ",".join(result) + + @property + def page_locations(self) -> list[int]: + return list(itertools.chain.from_iterable(list(map(lambda pg: pg.page_locations, self.__pages_groups)))) + + @property + def number_of_pages(self) -> int: + return sum(list(map(lambda pg: len(pg.page_locations), self.__pages_groups))) + + diff --git a/src/calibre/devices/kindle/driver.py b/src/calibre/devices/kindle/driver.py index 56285e7e4a..051d887cf3 100644 --- a/src/calibre/devices/kindle/driver.py +++ b/src/calibre/devices/kindle/driver.py @@ -2,6 +2,8 @@ __license__ = 'GPL v3' __copyright__ = '2009, John Schember ' __docformat__ = 'restructuredtext en' +from calibre.devices.kindle.apnx import APNXBuilder + ''' Device driver for Amazon's Kindle ''' @@ -409,7 +411,7 @@ class KINDLE2(KINDLE): OPT_APNX_CUST_COL = 2 OPT_APNX_METHOD_COL = 3 OPT_APNX_OVERWRITE = 4 - EXTRA_CUSTOMIZATION_CHOICES = {OPT_APNX_METHOD:{'fast', 'accurate', 'pagebreak'}} + EXTRA_CUSTOMIZATION_CHOICES = {OPT_APNX_METHOD: APNXBuilder.generators.keys()} # x330 on the PaperWhite # x262 on the Touch. Doesn't choke on x330, though. From ddb68daae148dbc5e38a54e028da12c6d3fecfe8 Mon Sep 17 00:00:00 2001 From: Vaso Peras-Likodric Date: Wed, 24 Aug 2022 00:44:58 +0200 Subject: [PATCH 2/4] Copyright change. --- src/calibre/devices/kindle/apnx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/devices/kindle/apnx.py b/src/calibre/devices/kindle/apnx.py index 941bd5afcc..ca90465278 100644 --- a/src/calibre/devices/kindle/apnx.py +++ b/src/calibre/devices/kindle/apnx.py @@ -1,5 +1,5 @@ __license__ = 'GPL v3' -__copyright__ = '2011, John Schember ' +__copyright__ = '2011, John Schember , refactored: 2022, Vaso Peras-Likodric ' __docformat__ = 'restructuredtext en' ''' From 4a6d9d8b2be7642f4969383fa074d21a0dfed4c2 Mon Sep 17 00:00:00 2001 From: Vaso Peras-Likodric Date: Mon, 29 Aug 2022 12:08:32 +0200 Subject: [PATCH 3/4] Remove APNX aria pagebreak generation --- src/calibre/devices/kindle/apnx.py | 1 - .../aria_pagebreak_page_generator.py | 84 ------------------- 2 files changed, 85 deletions(-) delete mode 100644 src/calibre/devices/kindle/apnx_page_generator/generators/aria_pagebreak_page_generator.py diff --git a/src/calibre/devices/kindle/apnx.py b/src/calibre/devices/kindle/apnx.py index ca90465278..19a7bfdff5 100644 --- a/src/calibre/devices/kindle/apnx.py +++ b/src/calibre/devices/kindle/apnx.py @@ -34,7 +34,6 @@ class APNXBuilder: FastPageGenerator.instance.name(): FastPageGenerator.instance, AccuratePageGenerator.instance.name(): AccuratePageGenerator.instance, PagebreakPageGenerator.instance.name(): PagebreakPageGenerator.instance, - AriaPagebreakPageGenerator.instance.name(): AriaPagebreakPageGenerator.instance, # ExactPageGenerator.instance.name(): ExactPageGenerator.instance, } diff --git a/src/calibre/devices/kindle/apnx_page_generator/generators/aria_pagebreak_page_generator.py b/src/calibre/devices/kindle/apnx_page_generator/generators/aria_pagebreak_page_generator.py deleted file mode 100644 index 9ea708597f..0000000000 --- a/src/calibre/devices/kindle/apnx_page_generator/generators/aria_pagebreak_page_generator.py +++ /dev/null @@ -1,84 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2022, Vaso Peras-Likodric ' -__docformat__ = 'restructuredtext en' - -from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator -from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator -from calibre.devices.kindle.apnx_page_generator.page_number_type import PageNumberTypes -from calibre.devices.kindle.apnx_page_generator.pages import Pages -from calibre.devices.kindle.apnx_page_generator.page_group import PageGroup -import re - -roman_numeral_map = (('m', 1000), ('cm', 900), ('d', 500), ('cd', 400), ('c', 100), ('xc', 90), ('l', 50), ('xl', 40), - ('x', 10), ('ix', 9), ('v', 5), ('iv', 4), ('i', 1)) - -roman_numeral_pattern = re.compile("""^m{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|V?i{0,3})$""", re.VERBOSE) - - -def from_roman(s: str) -> int: - """convert Roman numeral to integer""" - if not s: - raise ValueError('Input can not be blank') - if not roman_numeral_pattern.match(s): - raise ValueError('Invalid Roman numeral: %s' % s) - - result = 0 - index = 0 - for numeral, integer in roman_numeral_map: - while s[index:index + len(numeral)] == numeral: - result += integer - index += len(numeral) - return result - - -class LabelDescriptor: - def __init__(self, label: str, value: int, label_type: PageNumberTypes): - self.label: str = label - self.value: int = value - self.label_type: PageNumberTypes = label_type - - -class AriaPagebreakPageGenerator(IPageGenerator): - - def name(self) -> str: - return "aria_pagebreak" - - def _generate_fallback(self, mobi_file_path: str, real_count: int | None) -> Pages: - return FastPageGenerator.instance.generate(mobi_file_path, real_count) - - def _generate(self, mobi_file_path: str, real_count: int | None) -> Pages: - html = self.mobi_html(mobi_file_path) - pages = Pages() - - for m in re.finditer(b'<[^>]*role="doc-pagebreak"[^>]*aria-label="([^"|]+)"[^>]*>', html): - label_descriptor = self.get_label(m.group(1)) - if pages.number_of_pages == 0: - pages.append(PageGroup(m.end(), label_descriptor.label_type, label_descriptor.value, - label_descriptor.label)) - elif ( - pages.last_group.last_value == label_descriptor.value - 1 or label_descriptor.label_type == - PageNumberTypes.Custom) and pages.last_group.page_number_types == label_descriptor.label_type: - - if label_descriptor.label_type != PageNumberTypes.Custom: - pages.last_group.append(m.end()) - else: - pages.last_group.append((m.end(), label_descriptor.label)) - else: - pages.append(PageGroup(m.end(), label_descriptor.label_type, label_descriptor.value, - label_descriptor.label)) - - return pages - - @staticmethod - def get_label(label: bytes) -> LabelDescriptor: - label_string = label.decode() - try: - return LabelDescriptor(label_string, int(label_string), PageNumberTypes.Arabic) - except ValueError: - try: - return LabelDescriptor(label_string, from_roman(label_string), PageNumberTypes.Roman) - except ValueError: - return LabelDescriptor(label_string, 0, PageNumberTypes.Custom) - - -AriaPagebreakPageGenerator.instance = AriaPagebreakPageGenerator() From ca45bcaaf54affce0a8786ab1487eb41981da0b4 Mon Sep 17 00:00:00 2001 From: Vaso Peras-Likodric Date: Wed, 14 Sep 2022 17:45:48 +0200 Subject: [PATCH 4/4] Make code compatible with python 3.7, removed unused imports, change static methods. --- src/calibre/devices/kindle/apnx.py | 10 +++--- .../generators/accurate_page_generator.py | 14 +++++--- .../generators/exact_page_generator.py | 12 ++++--- .../generators/fast_page_generator.py | 10 +++--- .../generators/pagebreak_page_generator.py | 10 +++--- .../apnx_page_generator/i_page_generator.py | 33 ++++++++++--------- .../kindle/apnx_page_generator/page_group.py | 16 +++++---- .../apnx_page_generator/page_number_type.py | 4 +-- .../kindle/apnx_page_generator/pages.py | 9 ++--- src/calibre/devices/kindle/driver.py | 2 +- 10 files changed, 68 insertions(+), 52 deletions(-) diff --git a/src/calibre/devices/kindle/apnx.py b/src/calibre/devices/kindle/apnx.py index 19a7bfdff5..e2c5856d25 100644 --- a/src/calibre/devices/kindle/apnx.py +++ b/src/calibre/devices/kindle/apnx.py @@ -2,6 +2,8 @@ __license__ = 'GPL v3' __copyright__ = '2011, John Schember , refactored: 2022, Vaso Peras-Likodric ' __docformat__ = 'restructuredtext en' +from typing import Optional, Dict + ''' Generates and writes an APNX page mapping file. ''' @@ -17,8 +19,6 @@ from polyglot.builtins import as_unicode, as_bytes from calibre.devices.kindle.apnx_page_generator.generators.accurate_page_generator import AccuratePageGenerator from calibre.devices.kindle.apnx_page_generator.generators.pagebreak_page_generator import PagebreakPageGenerator -from calibre.devices.kindle.apnx_page_generator.generators.aria_pagebreak_page_generator import \ - AriaPagebreakPageGenerator from calibre.devices.kindle.apnx_page_generator.generators.exact_page_generator import ExactPageGenerator from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator @@ -30,14 +30,14 @@ class APNXBuilder: Create an APNX file using a pseudo page mapping. """ - generators: dict[str, IPageGenerator] = { + generators: Dict[str, IPageGenerator] = { FastPageGenerator.instance.name(): FastPageGenerator.instance, AccuratePageGenerator.instance.name(): AccuratePageGenerator.instance, PagebreakPageGenerator.instance.name(): PagebreakPageGenerator.instance, # ExactPageGenerator.instance.name(): ExactPageGenerator.instance, } - def write_apnx(self, mobi_file_path: str, apnx_path: str, method: str | None = None, page_count: int = 0): + def write_apnx(self, mobi_file_path: str, apnx_path: str, method: Optional[str] = None, page_count: int = 0): """ If you want a fixed number of pages (such as from a custom column) then pass in a value to page_count, otherwise a count will be estimated @@ -62,7 +62,7 @@ class APNXBuilder: fsync(apnxf) @staticmethod - def get_apnx_meta(mobi_file_path) -> dict[str, str]: + def get_apnx_meta(mobi_file_path) -> Dict[str, str]: import uuid apnx_meta = { 'guid': str(uuid.uuid4()).replace('-', '')[:8], diff --git a/src/calibre/devices/kindle/apnx_page_generator/generators/accurate_page_generator.py b/src/calibre/devices/kindle/apnx_page_generator/generators/accurate_page_generator.py index 24cf5eac9d..b76ac6cff9 100644 --- a/src/calibre/devices/kindle/apnx_page_generator/generators/accurate_page_generator.py +++ b/src/calibre/devices/kindle/apnx_page_generator/generators/accurate_page_generator.py @@ -2,20 +2,24 @@ __license__ = 'GPL v3' __copyright__ = '2022, Vaso Peras-Likodric ' __docformat__ = 'restructuredtext en' +from typing import Optional + from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator -from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator +from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator, mobi_html from calibre.devices.kindle.apnx_page_generator.pages import Pages class AccuratePageGenerator(IPageGenerator): + instance = None + def name(self) -> str: return "accurate" - def _generate_fallback(self, mobi_file_path: str, real_count: int | None) -> Pages: + def _generate_fallback(self, mobi_file_path: str, real_count: Optional[int]) -> Pages: return FastPageGenerator.instance.generate(mobi_file_path, real_count) - def _generate(self, mobi_file_path: str, real_count: int | None) -> Pages: + def _generate(self, mobi_file_path: str, real_count: Optional[int]) -> Pages: """ A more accurate but much more resource intensive and slower method to calculate the page length. @@ -35,7 +39,7 @@ class AccuratePageGenerator(IPageGenerator): """ pages = [] - html = self.mobi_html(mobi_file_path) + html = mobi_html(mobi_file_path) # States in_tag = False @@ -54,7 +58,7 @@ class AccuratePageGenerator(IPageGenerator): # and string functions will parse the text each # time they are called. # - # We can can use .lower() here because we are + # We can use .lower() here because we are # not modifying the text. In this case the case # doesn't matter just the absolute character and # the position within the stream. diff --git a/src/calibre/devices/kindle/apnx_page_generator/generators/exact_page_generator.py b/src/calibre/devices/kindle/apnx_page_generator/generators/exact_page_generator.py index a30d481ba4..864ad08eae 100644 --- a/src/calibre/devices/kindle/apnx_page_generator/generators/exact_page_generator.py +++ b/src/calibre/devices/kindle/apnx_page_generator/generators/exact_page_generator.py @@ -2,20 +2,24 @@ __license__ = 'GPL v3' __copyright__ = '2022, Vaso Peras-Likodric ' __docformat__ = 'restructuredtext en' +from typing import Optional + from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator -from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator +from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator, mobi_html_length from calibre.devices.kindle.apnx_page_generator.pages import Pages class ExactPageGenerator(IPageGenerator): + instance = None + def name(self) -> str: return "exact" - def _generate_fallback(self, mobi_file_path: str, real_count: int | None) -> Pages: + def _generate_fallback(self, mobi_file_path: str, real_count: Optional[int]) -> Pages: return FastPageGenerator.instance.generate(mobi_file_path, real_count) - def _generate(self, mobi_file_path: str, real_count: int | None) -> Pages: + def _generate(self, mobi_file_path: str, real_count: Optional[int]) -> Pages: """ Given a specified page count (such as from a custom column), create our array of pages for the apnx file by dividing by @@ -24,7 +28,7 @@ class ExactPageGenerator(IPageGenerator): pages = [] count = 0 - text_length = self.mobi_html_length(mobi_file_path) + text_length = mobi_html_length(mobi_file_path) chars_per_page = int(text_length // real_count) while count < text_length: diff --git a/src/calibre/devices/kindle/apnx_page_generator/generators/fast_page_generator.py b/src/calibre/devices/kindle/apnx_page_generator/generators/fast_page_generator.py index 23320dacdf..5247a17bfd 100644 --- a/src/calibre/devices/kindle/apnx_page_generator/generators/fast_page_generator.py +++ b/src/calibre/devices/kindle/apnx_page_generator/generators/fast_page_generator.py @@ -2,7 +2,9 @@ __license__ = 'GPL v3' __copyright__ = '2022, Vaso Peras-Likodric ' __docformat__ = 'restructuredtext en' -from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator +from typing import Optional + +from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator, mobi_html_length from calibre.devices.kindle.apnx_page_generator.pages import Pages @@ -11,10 +13,10 @@ class FastPageGenerator(IPageGenerator): def name(self) -> str: return "fast" - def _generate_fallback(self, mobi_file_path: str, real_count: int | None) -> Pages: + def _generate_fallback(self, mobi_file_path: str, real_count: Optional[int]) -> Pages: raise Exception("Fast calculation impossible.") - def _generate(self, mobi_file_path: str, real_count: int | None) -> Pages: + def _generate(self, mobi_file_path: str, real_count: Optional[int]) -> Pages: """ 2300 characters of uncompressed text per page. This is not meant to map 1 to 1 to a print book but to be a @@ -34,7 +36,7 @@ class FastPageGenerator(IPageGenerator): pages = [] count = 0 - text_length = self.mobi_html_length(mobi_file_path) + text_length = mobi_html_length(mobi_file_path) while count < text_length: pages.append(count) diff --git a/src/calibre/devices/kindle/apnx_page_generator/generators/pagebreak_page_generator.py b/src/calibre/devices/kindle/apnx_page_generator/generators/pagebreak_page_generator.py index bf591480f2..f80b131556 100644 --- a/src/calibre/devices/kindle/apnx_page_generator/generators/pagebreak_page_generator.py +++ b/src/calibre/devices/kindle/apnx_page_generator/generators/pagebreak_page_generator.py @@ -2,8 +2,10 @@ __license__ = 'GPL v3' __copyright__ = '2022, Vaso Peras-Likodric ' __docformat__ = 'restructuredtext en' +from typing import Optional + from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator -from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator +from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator, mobi_html from calibre.devices.kindle.apnx_page_generator.pages import Pages import re @@ -13,12 +15,12 @@ class PagebreakPageGenerator(IPageGenerator): def name(self) -> str: return "pagebreak" - def _generate_fallback(self, mobi_file_path: str, real_count: int | None) -> Pages: + def _generate_fallback(self, mobi_file_path: str, real_count: Optional[int]) -> Pages: return FastPageGenerator.instance.generate(mobi_file_path, real_count) - def _generate(self, mobi_file_path: str, real_count: int | None) -> Pages: + def _generate(self, mobi_file_path: str, real_count: Optional[int]) -> Pages: """ Determine pages based on the presence of <*pagebreak*/>. """ - html = self.mobi_html(mobi_file_path) + html = mobi_html(mobi_file_path) pages = [] for m in re.finditer(b'<[^>]*pagebreak[^>]*>', html): pages.append(m.end()) diff --git a/src/calibre/devices/kindle/apnx_page_generator/i_page_generator.py b/src/calibre/devices/kindle/apnx_page_generator/i_page_generator.py index 8de2eb05a8..62b2265c4b 100644 --- a/src/calibre/devices/kindle/apnx_page_generator/i_page_generator.py +++ b/src/calibre/devices/kindle/apnx_page_generator/i_page_generator.py @@ -4,6 +4,8 @@ __docformat__ = 'restructuredtext en' import struct from abc import abstractmethod, ABCMeta +from typing import Optional + from calibre.devices.kindle.apnx_page_generator.pages import Pages from calibre.ebooks.mobi.reader.mobi6 import MobiReader from calibre.utils.logging import default_log @@ -14,14 +16,14 @@ from calibre.ebooks.pdb.header import PdbHeaderReader class IPageGenerator(metaclass=ABCMeta): @abstractmethod - def _generate(self, mobi_file_path: str, real_count: int | None) -> Pages: + def _generate(self, mobi_file_path: str, real_count: Optional[int]) -> Pages: pass @abstractmethod - def _generate_fallback(self, mobi_file_path: str, real_count: int | None) -> Pages: + def _generate_fallback(self, mobi_file_path: str, real_count: Optional[int]) -> Pages: pass - def generate(self, mobi_file_path: str, real_count: int | None) -> Pages: + def generate(self, mobi_file_path: str, real_count: Optional[int]) -> Pages: try: result = self._generate(mobi_file_path, real_count) if result.number_of_pages > 0: @@ -36,18 +38,17 @@ class IPageGenerator(metaclass=ABCMeta): def name(self) -> str: pass - @staticmethod - def mobi_html(mobi_file_path: str) -> bytes: - mr = MobiReader(mobi_file_path, default_log) - if mr.book_header.encryption_type != 0: - raise Exception("DRMed book") - mr.extract_text() - return as_bytes(mr.mobi_html.lower()) - @staticmethod - def mobi_html_length(mobi_file_path: str) -> int: - with lopen(mobi_file_path, 'rb') as mf: - pdb_header = PdbHeaderReader(mf) - r0 = pdb_header.section_data(0) - return struct.unpack('>I', r0[4:8])[0] +def mobi_html(mobi_file_path: str) -> bytes: + mr = MobiReader(mobi_file_path, default_log) + if mr.book_header.encryption_type != 0: + raise Exception("DRMed book") + mr.extract_text() + return as_bytes(mr.mobi_html.lower()) + +def mobi_html_length(mobi_file_path: str) -> int: + with lopen(mobi_file_path, 'rb') as mf: + pdb_header = PdbHeaderReader(mf) + r0 = pdb_header.section_data(0) + return struct.unpack('>I', r0[4:8])[0] diff --git a/src/calibre/devices/kindle/apnx_page_generator/page_group.py b/src/calibre/devices/kindle/apnx_page_generator/page_group.py index f8ea2488c4..b99e639023 100644 --- a/src/calibre/devices/kindle/apnx_page_generator/page_group.py +++ b/src/calibre/devices/kindle/apnx_page_generator/page_group.py @@ -2,30 +2,32 @@ __license__ = 'GPL v3' __copyright__ = '2022, Vaso Peras-Likodric ' __docformat__ = 'restructuredtext en' +from typing import Union, List, Tuple + from calibre.devices.kindle.apnx_page_generator.page_number_type import PageNumberTypes class PageGroup: """Simulate constructor overloading""" - def __init__(self, page_locations: int | list[int], page_number_type: PageNumberTypes, first_value: int, - page_labels: str | list[str] | None = None): + def __init__(self, page_locations: Union[int, List[int]], page_number_type: PageNumberTypes, first_value: int, + page_labels: Union[str, List[str], None] = None): if page_locations.__class__ == int: - self.page_locations: list[int] = [page_locations] + self.page_locations: List[int] = [page_locations] else: - self.page_locations: list[int] = page_locations + self.page_locations: List[int] = page_locations self.__page_number_type: PageNumberTypes = page_number_type self.__first_value = first_value if page_number_type == PageNumberTypes.Custom: assert(page_labels is not None) if page_labels.__class__ == str: assert (1 == len(self.page_locations) and len(page_labels) > 0) - self.__page_number_labels: list[str] = [page_labels] + self.__page_number_labels: List[str] = [page_labels] else: assert (len(page_labels) == len(self.page_locations)) assert(all(len(label) > 0 for label in page_labels)) - self.__page_number_labels: list[str] = page_labels + self.__page_number_labels: List[str] = page_labels - def append(self, page_location: int | tuple[int, str]) -> None: + def append(self, page_location: Union[int, Tuple[int, str]]) -> None: if page_location.__class__ == int: assert (self.__page_number_type != PageNumberTypes.Custom) self.page_locations.append(page_location) diff --git a/src/calibre/devices/kindle/apnx_page_generator/page_number_type.py b/src/calibre/devices/kindle/apnx_page_generator/page_number_type.py index 93650522e9..4f468ab204 100644 --- a/src/calibre/devices/kindle/apnx_page_generator/page_number_type.py +++ b/src/calibre/devices/kindle/apnx_page_generator/page_number_type.py @@ -5,7 +5,7 @@ __docformat__ = 'restructuredtext en' import enum -class PageNumberTypes(str, enum.Enum): +class PageNumberTypes(enum.Enum): Arabic = "a" Roman = "r" - Custom = 'c' + Custom = "c" diff --git a/src/calibre/devices/kindle/apnx_page_generator/pages.py b/src/calibre/devices/kindle/apnx_page_generator/pages.py index ff20943060..6edeeb875a 100644 --- a/src/calibre/devices/kindle/apnx_page_generator/pages.py +++ b/src/calibre/devices/kindle/apnx_page_generator/pages.py @@ -3,17 +3,18 @@ __copyright__ = '2022, Vaso Peras-Likodric ' __docformat__ = 'restructuredtext en' import itertools +from typing import Optional, List from calibre.devices.kindle.apnx_page_generator.page_group import PageGroup from calibre.devices.kindle.apnx_page_generator.page_number_type import PageNumberTypes class Pages: - def __init__(self, page_locations: list[int] | None = None): + def __init__(self, page_locations: Optional[List[int]] = None): if page_locations.__class__ == list: - self.__pages_groups: list[PageGroup] = [PageGroup(page_locations, PageNumberTypes.Arabic, 1)] + self.__pages_groups: List[PageGroup] = [PageGroup(page_locations, PageNumberTypes.Arabic, 1)] else: - self.__pages_groups: list[PageGroup] = [] + self.__pages_groups: List[PageGroup] = [] def append(self, page_location: PageGroup) -> None: self.__pages_groups.append(page_location) @@ -33,7 +34,7 @@ class Pages: return ",".join(result) @property - def page_locations(self) -> list[int]: + def page_locations(self) -> List[int]: return list(itertools.chain.from_iterable(list(map(lambda pg: pg.page_locations, self.__pages_groups)))) @property diff --git a/src/calibre/devices/kindle/driver.py b/src/calibre/devices/kindle/driver.py index 051d887cf3..647498f754 100644 --- a/src/calibre/devices/kindle/driver.py +++ b/src/calibre/devices/kindle/driver.py @@ -411,7 +411,7 @@ class KINDLE2(KINDLE): OPT_APNX_CUST_COL = 2 OPT_APNX_METHOD_COL = 3 OPT_APNX_OVERWRITE = 4 - EXTRA_CUSTOMIZATION_CHOICES = {OPT_APNX_METHOD: APNXBuilder.generators.keys()} + EXTRA_CUSTOMIZATION_CHOICES = {OPT_APNX_METHOD: set(APNXBuilder.generators.keys())} # x330 on the PaperWhite # x262 on the Touch. Doesn't choke on x330, though.