Remove APNX aria pagebreak generation

This commit is contained in:
Vaso Peras-Likodric 2022-08-29 12:08:32 +02:00
parent ddb68daae1
commit 4a6d9d8b2b
2 changed files with 0 additions and 85 deletions

View File

@ -34,7 +34,6 @@ class APNXBuilder:
FastPageGenerator.instance.name(): FastPageGenerator.instance,
AccuratePageGenerator.instance.name(): AccuratePageGenerator.instance,
PagebreakPageGenerator.instance.name(): PagebreakPageGenerator.instance,
AriaPagebreakPageGenerator.instance.name(): AriaPagebreakPageGenerator.instance,
# ExactPageGenerator.instance.name(): ExactPageGenerator.instance,
}

View File

@ -1,84 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
__docformat__ = 'restructuredtext en'
from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator
from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator
from calibre.devices.kindle.apnx_page_generator.page_number_type import PageNumberTypes
from calibre.devices.kindle.apnx_page_generator.pages import Pages
from calibre.devices.kindle.apnx_page_generator.page_group import PageGroup
import re
roman_numeral_map = (('m', 1000), ('cm', 900), ('d', 500), ('cd', 400), ('c', 100), ('xc', 90), ('l', 50), ('xl', 40),
('x', 10), ('ix', 9), ('v', 5), ('iv', 4), ('i', 1))
roman_numeral_pattern = re.compile("""^m{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|V?i{0,3})$""", re.VERBOSE)
def from_roman(s: str) -> int:
"""convert Roman numeral to integer"""
if not s:
raise ValueError('Input can not be blank')
if not roman_numeral_pattern.match(s):
raise ValueError('Invalid Roman numeral: %s' % s)
result = 0
index = 0
for numeral, integer in roman_numeral_map:
while s[index:index + len(numeral)] == numeral:
result += integer
index += len(numeral)
return result
class LabelDescriptor:
def __init__(self, label: str, value: int, label_type: PageNumberTypes):
self.label: str = label
self.value: int = value
self.label_type: PageNumberTypes = label_type
class AriaPagebreakPageGenerator(IPageGenerator):
def name(self) -> str:
return "aria_pagebreak"
def _generate_fallback(self, mobi_file_path: str, real_count: int | None) -> Pages:
return FastPageGenerator.instance.generate(mobi_file_path, real_count)
def _generate(self, mobi_file_path: str, real_count: int | None) -> Pages:
html = self.mobi_html(mobi_file_path)
pages = Pages()
for m in re.finditer(b'<[^>]*role="doc-pagebreak"[^>]*aria-label="([^"|]+)"[^>]*>', html):
label_descriptor = self.get_label(m.group(1))
if pages.number_of_pages == 0:
pages.append(PageGroup(m.end(), label_descriptor.label_type, label_descriptor.value,
label_descriptor.label))
elif (
pages.last_group.last_value == label_descriptor.value - 1 or label_descriptor.label_type ==
PageNumberTypes.Custom) and pages.last_group.page_number_types == label_descriptor.label_type:
if label_descriptor.label_type != PageNumberTypes.Custom:
pages.last_group.append(m.end())
else:
pages.last_group.append((m.end(), label_descriptor.label))
else:
pages.append(PageGroup(m.end(), label_descriptor.label_type, label_descriptor.value,
label_descriptor.label))
return pages
@staticmethod
def get_label(label: bytes) -> LabelDescriptor:
label_string = label.decode()
try:
return LabelDescriptor(label_string, int(label_string), PageNumberTypes.Arabic)
except ValueError:
try:
return LabelDescriptor(label_string, from_roman(label_string), PageNumberTypes.Roman)
except ValueError:
return LabelDescriptor(label_string, 0, PageNumberTypes.Custom)
AriaPagebreakPageGenerator.instance = AriaPagebreakPageGenerator()