mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge branch 'master' of https://github.com/Vasolik/calibre
This commit is contained in:
commit
30ef660cb9
@ -1,15 +1,15 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, John Schember <john at nachtimwald.com>'
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, John Schember <john at nachtimwald.com>, refactored: 2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from typing import Optional, Dict
|
||||
|
||||
'''
|
||||
Generates and writes an APNX page mapping file.
|
||||
'''
|
||||
|
||||
import re
|
||||
import struct
|
||||
|
||||
from calibre.ebooks.mobi.reader.mobi6 import MobiReader
|
||||
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||
from calibre.ebooks.mobi.reader.headers import MetadataHeader
|
||||
from calibre.utils.logging import default_log
|
||||
@ -17,29 +17,66 @@ from calibre import prints, fsync
|
||||
from calibre.constants import DEBUG
|
||||
from polyglot.builtins import as_unicode, as_bytes
|
||||
|
||||
from calibre.devices.kindle.apnx_page_generator.generators.accurate_page_generator import AccuratePageGenerator
|
||||
from calibre.devices.kindle.apnx_page_generator.generators.pagebreak_page_generator import PagebreakPageGenerator
|
||||
from calibre.devices.kindle.apnx_page_generator.generators.exact_page_generator import ExactPageGenerator
|
||||
from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator
|
||||
from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator
|
||||
from calibre.devices.kindle.apnx_page_generator.pages import Pages
|
||||
|
||||
|
||||
class APNXBuilder:
|
||||
'''
|
||||
"""
|
||||
Create an APNX file using a pseudo page mapping.
|
||||
'''
|
||||
"""
|
||||
|
||||
def write_apnx(self, mobi_file_path, apnx_path, method=None, page_count=0):
|
||||
'''
|
||||
generators: Dict[str, IPageGenerator] = {
|
||||
FastPageGenerator.instance.name(): FastPageGenerator.instance,
|
||||
AccuratePageGenerator.instance.name(): AccuratePageGenerator.instance,
|
||||
PagebreakPageGenerator.instance.name(): PagebreakPageGenerator.instance,
|
||||
# ExactPageGenerator.instance.name(): ExactPageGenerator.instance,
|
||||
}
|
||||
|
||||
def write_apnx(self, mobi_file_path: str, apnx_path: str, method: Optional[str] = None, page_count: int = 0):
|
||||
"""
|
||||
If you want a fixed number of pages (such as from a custom column) then
|
||||
pass in a value to page_count, otherwise a count will be estimated
|
||||
using either the fast or accurate algorithm.
|
||||
'''
|
||||
import uuid
|
||||
apnx_meta = {'guid': str(uuid.uuid4()).replace('-', '')[:8], 'asin':
|
||||
'', 'cdetype': 'EBOK', 'format': 'MOBI_7', 'acr': ''}
|
||||
"""
|
||||
apnx_meta = self.get_apnx_meta(mobi_file_path)
|
||||
|
||||
if page_count:
|
||||
generator: IPageGenerator = ExactPageGenerator.instance
|
||||
else:
|
||||
generator: IPageGenerator = self.generators.setdefault(method, FastPageGenerator.instance)
|
||||
|
||||
pages = generator.generate(mobi_file_path, page_count)
|
||||
if pages.number_of_pages == 0:
|
||||
raise Exception(_('Could not generate page mapping.'))
|
||||
# Generate the APNX file from the page mapping.
|
||||
apnx = self.generate_apnx(pages, apnx_meta)
|
||||
|
||||
# Write the APNX.
|
||||
with lopen(apnx_path, 'wb') as apnxf:
|
||||
apnxf.write(apnx)
|
||||
fsync(apnxf)
|
||||
|
||||
@staticmethod
|
||||
def get_apnx_meta(mobi_file_path) -> Dict[str, str]:
|
||||
import uuid
|
||||
apnx_meta = {
|
||||
'guid': str(uuid.uuid4()).replace('-', '')[:8],
|
||||
'asin': '',
|
||||
'cdetype': 'EBOK',
|
||||
'format': 'MOBI_7',
|
||||
'acr': ''
|
||||
}
|
||||
with lopen(mobi_file_path, 'rb') as mf:
|
||||
ident = PdbHeaderReader(mf).identity()
|
||||
if as_bytes(ident) != b'BOOKMOBI':
|
||||
# Check that this is really a MOBI file.
|
||||
raise Exception(_('Not a valid MOBI file. Reports identity of %s') % ident)
|
||||
apnx_meta['acr'] = as_unicode(PdbHeaderReader(mf).name(), errors='replace')
|
||||
|
||||
# We'll need the PDB name, the MOBI version, and some metadata to make FW 3.4 happy with KF8 files...
|
||||
with lopen(mobi_file_path, 'rb') as mf:
|
||||
mh = MetadataHeader(mf, default_log)
|
||||
@ -55,41 +92,10 @@ class APNXBuilder:
|
||||
apnx_meta['asin'] = ''
|
||||
else:
|
||||
apnx_meta['asin'] = str(mh.exth.uuid)
|
||||
return apnx_meta
|
||||
|
||||
# Get the pages depending on the chosen parser
|
||||
pages = []
|
||||
if page_count:
|
||||
pages = self.get_pages_exact(mobi_file_path, page_count)
|
||||
else:
|
||||
try:
|
||||
if method == 'accurate':
|
||||
pages = self.get_pages_accurate(mobi_file_path)
|
||||
elif method == 'pagebreak':
|
||||
pages = self.get_pages_pagebreak_tag(mobi_file_path)
|
||||
if not pages:
|
||||
pages = self.get_pages_accurate(mobi_file_path)
|
||||
else:
|
||||
raise Exception('%r is not a valid apnx generation method' % method)
|
||||
except:
|
||||
# Fall back to the fast parser if we can't
|
||||
# use the accurate one. Typically this is
|
||||
# due to the file having DRM.
|
||||
pages = self.get_pages_fast(mobi_file_path)
|
||||
|
||||
if not pages:
|
||||
pages = self.get_pages_fast(mobi_file_path)
|
||||
if not pages:
|
||||
raise Exception(_('Could not generate page mapping.'))
|
||||
|
||||
# Generate the APNX file from the page mapping.
|
||||
apnx = self.generate_apnx(pages, apnx_meta)
|
||||
|
||||
# Write the APNX.
|
||||
with lopen(apnx_path, 'wb') as apnxf:
|
||||
apnxf.write(apnx)
|
||||
fsync(apnxf)
|
||||
|
||||
def generate_apnx(self, pages, apnx_meta):
|
||||
@staticmethod
|
||||
def generate_apnx(pages: Pages, apnx_meta) -> bytes:
|
||||
apnx = b''
|
||||
|
||||
if DEBUG:
|
||||
@ -107,8 +113,8 @@ class APNXBuilder:
|
||||
# legacy mobi files, too. But, since they still handle this one too, let's
|
||||
# try not to break old devices, and keep using the simple header ;).
|
||||
content_header = '{"contentGuid":"%(guid)s","asin":"%(asin)s","cdeType":"%(cdetype)s","fileRevisionId":"1"}' % apnx_meta
|
||||
page_header = '{"asin":"%(asin)s","pageMap":"(1,a,1)"}' % apnx_meta
|
||||
|
||||
page_header = '{"asin":"%(asin)s","pageMap":"' % apnx_meta
|
||||
page_header += pages.page_maps + '"}'
|
||||
if DEBUG:
|
||||
prints('APNX Content Header:', content_header)
|
||||
content_header = as_bytes(content_header)
|
||||
@ -120,177 +126,12 @@ class APNXBuilder:
|
||||
apnx += content_header
|
||||
apnx += struct.pack('>H', 1)
|
||||
apnx += struct.pack('>H', len(page_header))
|
||||
apnx += struct.pack('>H', len(pages))
|
||||
apnx += struct.pack('>H', pages.number_of_pages)
|
||||
apnx += struct.pack('>H', 32)
|
||||
apnx += page_header
|
||||
|
||||
# Write page values to APNX.
|
||||
for page in pages:
|
||||
apnx += struct.pack('>I', page)
|
||||
for location in pages.page_locations:
|
||||
apnx += struct.pack('>I', location)
|
||||
|
||||
return apnx
|
||||
|
||||
def get_pages_exact(self, mobi_file_path, page_count):
|
||||
'''
|
||||
Given a specified page count (such as from a custom column),
|
||||
create our array of pages for the apnx file by dividing by
|
||||
the content size of the book.
|
||||
'''
|
||||
pages = []
|
||||
count = 0
|
||||
|
||||
with lopen(mobi_file_path, 'rb') as mf:
|
||||
phead = PdbHeaderReader(mf)
|
||||
r0 = phead.section_data(0)
|
||||
text_length = struct.unpack('>I', r0[4:8])[0]
|
||||
|
||||
chars_per_page = int(text_length // page_count)
|
||||
while count < text_length:
|
||||
pages.append(count)
|
||||
count += chars_per_page
|
||||
|
||||
if len(pages) > page_count:
|
||||
# Rounding created extra page entries
|
||||
pages = pages[:page_count]
|
||||
|
||||
return pages
|
||||
|
||||
def get_pages_fast(self, mobi_file_path):
|
||||
'''
|
||||
2300 characters of uncompressed text per page. This is
|
||||
not meant to map 1 to 1 to a print book but to be a
|
||||
close enough measure.
|
||||
|
||||
A test book was chosen and the characters were counted
|
||||
on one page. This number was round to 2240 then 60
|
||||
characters of markup were added to the total giving
|
||||
2300.
|
||||
|
||||
Uncompressed text length is used because it's easily
|
||||
accessible in MOBI files (part of the header). Also,
|
||||
It's faster to work off of the length then to
|
||||
decompress and parse the actual text.
|
||||
'''
|
||||
text_length = 0
|
||||
pages = []
|
||||
count = 0
|
||||
|
||||
with lopen(mobi_file_path, 'rb') as mf:
|
||||
phead = PdbHeaderReader(mf)
|
||||
r0 = phead.section_data(0)
|
||||
text_length = struct.unpack('>I', r0[4:8])[0]
|
||||
|
||||
while count < text_length:
|
||||
pages.append(count)
|
||||
count += 2300
|
||||
|
||||
return pages
|
||||
|
||||
def get_pages_accurate(self, mobi_file_path):
|
||||
'''
|
||||
A more accurate but much more resource intensive and slower
|
||||
method to calculate the page length.
|
||||
|
||||
Parses the uncompressed text. In an average paper back book
|
||||
There are 32 lines per page and a maximum of 70 characters
|
||||
per line.
|
||||
|
||||
Each paragraph starts a new line and every 70 characters
|
||||
(minus markup) in a paragraph starts a new line. The
|
||||
position after every 30 lines will be marked as a new
|
||||
page.
|
||||
|
||||
This can be make more accurate by accounting for
|
||||
<div class="mbp_pagebreak" /> as a new page marker.
|
||||
And <br> elements as an empty line.
|
||||
'''
|
||||
pages = []
|
||||
|
||||
# Get the MOBI html.
|
||||
mr = MobiReader(mobi_file_path, default_log)
|
||||
if mr.book_header.encryption_type != 0:
|
||||
# DRMed book
|
||||
return self.get_pages_fast(mobi_file_path)
|
||||
mr.extract_text()
|
||||
|
||||
# States
|
||||
in_tag = False
|
||||
in_p = False
|
||||
check_p = False
|
||||
closing = False
|
||||
p_char_count = 0
|
||||
|
||||
# Get positions of every line
|
||||
# A line is either a paragraph starting
|
||||
# or every 70 characters in a paragraph.
|
||||
lines = []
|
||||
pos = -1
|
||||
# We want this to be as fast as possible so we
|
||||
# are going to do one pass across the text. re
|
||||
# and string functions will parse the text each
|
||||
# time they are called.
|
||||
#
|
||||
# We can can use .lower() here because we are
|
||||
# not modifying the text. In this case the case
|
||||
# doesn't matter just the absolute character and
|
||||
# the position within the stream.
|
||||
data = bytearray(as_bytes(mr.mobi_html.lower()))
|
||||
slash, p, lt, gt = map(ord, '/p<>')
|
||||
for c in data:
|
||||
pos += 1
|
||||
|
||||
# Check if we are starting or stopping a p tag.
|
||||
if check_p:
|
||||
if c == slash:
|
||||
closing = True
|
||||
continue
|
||||
elif c == p:
|
||||
if closing:
|
||||
in_p = False
|
||||
else:
|
||||
in_p = True
|
||||
lines.append(pos - 2)
|
||||
check_p = False
|
||||
closing = False
|
||||
continue
|
||||
|
||||
if c == lt:
|
||||
in_tag = True
|
||||
check_p = True
|
||||
continue
|
||||
elif c == gt:
|
||||
in_tag = False
|
||||
check_p = False
|
||||
continue
|
||||
|
||||
if in_p and not in_tag:
|
||||
p_char_count += 1
|
||||
if p_char_count == 70:
|
||||
lines.append(pos)
|
||||
p_char_count = 0
|
||||
|
||||
# Every 30 lines is a new page
|
||||
for i in range(0, len(lines), 32):
|
||||
pages.append(lines[i])
|
||||
|
||||
return pages
|
||||
|
||||
def get_pages_pagebreak_tag(self, mobi_file_path):
|
||||
'''
|
||||
Determine pages based on the presence of
|
||||
<mbp:pagebreak>.
|
||||
'''
|
||||
pages = []
|
||||
|
||||
# Get the MOBI html.
|
||||
mr = MobiReader(mobi_file_path, default_log)
|
||||
if mr.book_header.encryption_type != 0:
|
||||
# DRMed book
|
||||
return self.get_pages_fast(mobi_file_path)
|
||||
mr.extract_text()
|
||||
|
||||
html = as_bytes(mr.mobi_html.lower())
|
||||
for m in re.finditer(b'<[^>]*pagebreak[^>]*>', html):
|
||||
pages.append(m.end())
|
||||
|
||||
return pages
|
||||
|
@ -0,0 +1,107 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator
|
||||
from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator, mobi_html
|
||||
from calibre.devices.kindle.apnx_page_generator.pages import Pages
|
||||
|
||||
|
||||
class AccuratePageGenerator(IPageGenerator):
|
||||
|
||||
instance = None
|
||||
|
||||
def name(self) -> str:
|
||||
return "accurate"
|
||||
|
||||
def _generate_fallback(self, mobi_file_path: str, real_count: Optional[int]) -> Pages:
|
||||
return FastPageGenerator.instance.generate(mobi_file_path, real_count)
|
||||
|
||||
def _generate(self, mobi_file_path: str, real_count: Optional[int]) -> Pages:
|
||||
"""
|
||||
A more accurate but much more resource intensive and slower
|
||||
method to calculate the page length.
|
||||
|
||||
Parses the uncompressed text. In an average paper back book
|
||||
There are 32 lines per page and a maximum of 70 characters
|
||||
per line.
|
||||
|
||||
Each paragraph starts a new line and every 70 characters
|
||||
(minus markup) in a paragraph starts a new line. The
|
||||
position after every 30 lines will be marked as a new
|
||||
page.
|
||||
|
||||
This can be make more accurate by accounting for
|
||||
<div class="mbp_pagebreak" /> as a new page marker.
|
||||
And <br> elements as an empty line.
|
||||
"""
|
||||
pages = []
|
||||
|
||||
html = mobi_html(mobi_file_path)
|
||||
|
||||
# States
|
||||
in_tag = False
|
||||
in_p = False
|
||||
check_p = False
|
||||
closing = False
|
||||
p_char_count = 0
|
||||
|
||||
# Get positions of every line
|
||||
# A line is either a paragraph starting
|
||||
# or every 70 characters in a paragraph.
|
||||
lines = []
|
||||
pos = -1
|
||||
# We want this to be as fast as possible so we
|
||||
# are going to do one pass across the text. re
|
||||
# and string functions will parse the text each
|
||||
# time they are called.
|
||||
#
|
||||
# We can use .lower() here because we are
|
||||
# not modifying the text. In this case the case
|
||||
# doesn't matter just the absolute character and
|
||||
# the position within the stream.
|
||||
data = bytearray(html)
|
||||
slash, p, lt, gt = map(ord, '/p<>')
|
||||
for c in data:
|
||||
pos += 1
|
||||
|
||||
# Check if we are starting or stopping a p tag.
|
||||
if check_p:
|
||||
if c == slash:
|
||||
closing = True
|
||||
continue
|
||||
elif c == p:
|
||||
if closing:
|
||||
in_p = False
|
||||
else:
|
||||
in_p = True
|
||||
lines.append(pos - 2)
|
||||
check_p = False
|
||||
closing = False
|
||||
continue
|
||||
|
||||
if c == lt:
|
||||
in_tag = True
|
||||
check_p = True
|
||||
continue
|
||||
elif c == gt:
|
||||
in_tag = False
|
||||
check_p = False
|
||||
continue
|
||||
|
||||
if in_p and not in_tag:
|
||||
p_char_count += 1
|
||||
if p_char_count == 70:
|
||||
lines.append(pos)
|
||||
p_char_count = 0
|
||||
|
||||
# Every 30 lines is a new page
|
||||
for i in range(0, len(lines), 32):
|
||||
pages.append(lines[i])
|
||||
|
||||
return Pages(pages)
|
||||
|
||||
|
||||
AccuratePageGenerator.instance = AccuratePageGenerator()
|
@ -0,0 +1,45 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator
|
||||
from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator, mobi_html_length
|
||||
from calibre.devices.kindle.apnx_page_generator.pages import Pages
|
||||
|
||||
|
||||
class ExactPageGenerator(IPageGenerator):
|
||||
|
||||
instance = None
|
||||
|
||||
def name(self) -> str:
|
||||
return "exact"
|
||||
|
||||
def _generate_fallback(self, mobi_file_path: str, real_count: Optional[int]) -> Pages:
|
||||
return FastPageGenerator.instance.generate(mobi_file_path, real_count)
|
||||
|
||||
def _generate(self, mobi_file_path: str, real_count: Optional[int]) -> Pages:
|
||||
"""
|
||||
Given a specified page count (such as from a custom column),
|
||||
create our array of pages for the apnx file by dividing by
|
||||
the content size of the book.
|
||||
"""
|
||||
pages = []
|
||||
count = 0
|
||||
|
||||
text_length = mobi_html_length(mobi_file_path)
|
||||
|
||||
chars_per_page = int(text_length // real_count)
|
||||
while count < text_length:
|
||||
pages.append(count)
|
||||
count += chars_per_page
|
||||
|
||||
if len(pages) > real_count:
|
||||
# Rounding created extra page entries
|
||||
pages = pages[:real_count]
|
||||
|
||||
return Pages(pages)
|
||||
|
||||
|
||||
ExactPageGenerator.instance = ExactPageGenerator()
|
@ -0,0 +1,48 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator, mobi_html_length
|
||||
from calibre.devices.kindle.apnx_page_generator.pages import Pages
|
||||
|
||||
|
||||
class FastPageGenerator(IPageGenerator):
|
||||
|
||||
def name(self) -> str:
|
||||
return "fast"
|
||||
|
||||
def _generate_fallback(self, mobi_file_path: str, real_count: Optional[int]) -> Pages:
|
||||
raise Exception("Fast calculation impossible.")
|
||||
|
||||
def _generate(self, mobi_file_path: str, real_count: Optional[int]) -> Pages:
|
||||
"""
|
||||
2300 characters of uncompressed text per page. This is
|
||||
not meant to map 1 to 1 to a print book but to be a
|
||||
close enough measure.
|
||||
|
||||
A test book was chosen and the characters were counted
|
||||
on one page. This number was round to 2240 then 60
|
||||
characters of markup were added to the total giving
|
||||
2300.
|
||||
|
||||
Uncompressed text length is used because it's easily
|
||||
accessible in MOBI files (part of the header). Also,
|
||||
It's faster to work off of the length then to
|
||||
decompress and parse the actual text.
|
||||
"""
|
||||
|
||||
pages = []
|
||||
count = 0
|
||||
|
||||
text_length = mobi_html_length(mobi_file_path)
|
||||
|
||||
while count < text_length:
|
||||
pages.append(count)
|
||||
count += 2300
|
||||
|
||||
return Pages(pages)
|
||||
|
||||
|
||||
FastPageGenerator.instance = FastPageGenerator()
|
@ -0,0 +1,31 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator
|
||||
from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator, mobi_html
|
||||
from calibre.devices.kindle.apnx_page_generator.pages import Pages
|
||||
import re
|
||||
|
||||
|
||||
class PagebreakPageGenerator(IPageGenerator):
|
||||
|
||||
def name(self) -> str:
|
||||
return "pagebreak"
|
||||
|
||||
def _generate_fallback(self, mobi_file_path: str, real_count: Optional[int]) -> Pages:
|
||||
return FastPageGenerator.instance.generate(mobi_file_path, real_count)
|
||||
|
||||
def _generate(self, mobi_file_path: str, real_count: Optional[int]) -> Pages:
|
||||
""" Determine pages based on the presence of <*pagebreak*/>. """
|
||||
html = mobi_html(mobi_file_path)
|
||||
pages = []
|
||||
for m in re.finditer(b'<[^>]*pagebreak[^>]*>', html):
|
||||
pages.append(m.end())
|
||||
|
||||
return Pages(pages)
|
||||
|
||||
|
||||
PagebreakPageGenerator.instance = PagebreakPageGenerator()
|
@ -0,0 +1,54 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import struct
|
||||
from abc import abstractmethod, ABCMeta
|
||||
from typing import Optional
|
||||
|
||||
from calibre.devices.kindle.apnx_page_generator.pages import Pages
|
||||
from calibre.ebooks.mobi.reader.mobi6 import MobiReader
|
||||
from calibre.utils.logging import default_log
|
||||
from polyglot.builtins import as_bytes
|
||||
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||
|
||||
|
||||
class IPageGenerator(metaclass=ABCMeta):
|
||||
|
||||
@abstractmethod
|
||||
def _generate(self, mobi_file_path: str, real_count: Optional[int]) -> Pages:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def _generate_fallback(self, mobi_file_path: str, real_count: Optional[int]) -> Pages:
|
||||
pass
|
||||
|
||||
def generate(self, mobi_file_path: str, real_count: Optional[int]) -> Pages:
|
||||
try:
|
||||
result = self._generate(mobi_file_path, real_count)
|
||||
if result.number_of_pages > 0:
|
||||
return result
|
||||
return self._generate_fallback(mobi_file_path, real_count)
|
||||
except Exception as e:
|
||||
if self.__class__.__name__ == "FastPageGenerator":
|
||||
raise e
|
||||
return self._generate_fallback(mobi_file_path, real_count)
|
||||
|
||||
@abstractmethod
|
||||
def name(self) -> str:
|
||||
pass
|
||||
|
||||
|
||||
def mobi_html(mobi_file_path: str) -> bytes:
|
||||
mr = MobiReader(mobi_file_path, default_log)
|
||||
if mr.book_header.encryption_type != 0:
|
||||
raise Exception("DRMed book")
|
||||
mr.extract_text()
|
||||
return as_bytes(mr.mobi_html.lower())
|
||||
|
||||
|
||||
def mobi_html_length(mobi_file_path: str) -> int:
|
||||
with lopen(mobi_file_path, 'rb') as mf:
|
||||
pdb_header = PdbHeaderReader(mf)
|
||||
r0 = pdb_header.section_data(0)
|
||||
return struct.unpack('>I', r0[4:8])[0]
|
57
src/calibre/devices/kindle/apnx_page_generator/page_group.py
Normal file
57
src/calibre/devices/kindle/apnx_page_generator/page_group.py
Normal file
@ -0,0 +1,57 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from typing import Union, List, Tuple
|
||||
|
||||
from calibre.devices.kindle.apnx_page_generator.page_number_type import PageNumberTypes
|
||||
|
||||
|
||||
class PageGroup:
|
||||
"""Simulate constructor overloading"""
|
||||
def __init__(self, page_locations: Union[int, List[int]], page_number_type: PageNumberTypes, first_value: int,
|
||||
page_labels: Union[str, List[str], None] = None):
|
||||
if page_locations.__class__ == int:
|
||||
self.page_locations: List[int] = [page_locations]
|
||||
else:
|
||||
self.page_locations: List[int] = page_locations
|
||||
self.__page_number_type: PageNumberTypes = page_number_type
|
||||
self.__first_value = first_value
|
||||
if page_number_type == PageNumberTypes.Custom:
|
||||
assert(page_labels is not None)
|
||||
if page_labels.__class__ == str:
|
||||
assert (1 == len(self.page_locations) and len(page_labels) > 0)
|
||||
self.__page_number_labels: List[str] = [page_labels]
|
||||
else:
|
||||
assert (len(page_labels) == len(self.page_locations))
|
||||
assert(all(len(label) > 0 for label in page_labels))
|
||||
self.__page_number_labels: List[str] = page_labels
|
||||
|
||||
def append(self, page_location: Union[int, Tuple[int, str]]) -> None:
|
||||
if page_location.__class__ == int:
|
||||
assert (self.__page_number_type != PageNumberTypes.Custom)
|
||||
self.page_locations.append(page_location)
|
||||
else:
|
||||
assert (self.__page_number_type == PageNumberTypes.Custom)
|
||||
self.page_locations.append(page_location[0])
|
||||
self.__page_number_labels.append(page_location[1])
|
||||
return
|
||||
|
||||
@property
|
||||
def page_number_types(self) -> PageNumberTypes:
|
||||
return self.__page_number_type
|
||||
|
||||
@property
|
||||
def number_of_pages(self) -> int:
|
||||
return len(self.page_locations)
|
||||
|
||||
@property
|
||||
def last_value(self) -> int:
|
||||
return self.__first_value + len(self.page_locations) - 1
|
||||
|
||||
def get_page_map(self, starting_location: int) -> str:
|
||||
if self.__page_number_type != PageNumberTypes.Custom:
|
||||
values = str(self.__first_value)
|
||||
else:
|
||||
values = "|".join(self.__page_number_labels)
|
||||
return "(%s,%s,%s)" % (starting_location, self.__page_number_type.value, values)
|
@ -0,0 +1,11 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import enum
|
||||
|
||||
|
||||
class PageNumberTypes(enum.Enum):
|
||||
Arabic = "a"
|
||||
Roman = "r"
|
||||
Custom = "c"
|
44
src/calibre/devices/kindle/apnx_page_generator/pages.py
Normal file
44
src/calibre/devices/kindle/apnx_page_generator/pages.py
Normal file
@ -0,0 +1,44 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import itertools
|
||||
from typing import Optional, List
|
||||
|
||||
from calibre.devices.kindle.apnx_page_generator.page_group import PageGroup
|
||||
from calibre.devices.kindle.apnx_page_generator.page_number_type import PageNumberTypes
|
||||
|
||||
|
||||
class Pages:
|
||||
def __init__(self, page_locations: Optional[List[int]] = None):
|
||||
if page_locations.__class__ == list:
|
||||
self.__pages_groups: List[PageGroup] = [PageGroup(page_locations, PageNumberTypes.Arabic, 1)]
|
||||
else:
|
||||
self.__pages_groups: List[PageGroup] = []
|
||||
|
||||
def append(self, page_location: PageGroup) -> None:
|
||||
self.__pages_groups.append(page_location)
|
||||
return
|
||||
|
||||
@property
|
||||
def last_group(self) -> PageGroup:
|
||||
return self.__pages_groups[-1]
|
||||
|
||||
@property
|
||||
def page_maps(self) -> str:
|
||||
location = 1
|
||||
result = []
|
||||
for group in self.__pages_groups:
|
||||
result.append(group.get_page_map(location))
|
||||
location += group.number_of_pages
|
||||
return ",".join(result)
|
||||
|
||||
@property
|
||||
def page_locations(self) -> List[int]:
|
||||
return list(itertools.chain.from_iterable(list(map(lambda pg: pg.page_locations, self.__pages_groups))))
|
||||
|
||||
@property
|
||||
def number_of_pages(self) -> int:
|
||||
return sum(list(map(lambda pg: len(pg.page_locations), self.__pages_groups)))
|
||||
|
||||
|
@ -2,6 +2,8 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john at nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from calibre.devices.kindle.apnx import APNXBuilder
|
||||
|
||||
'''
|
||||
Device driver for Amazon's Kindle
|
||||
'''
|
||||
@ -409,7 +411,7 @@ class KINDLE2(KINDLE):
|
||||
OPT_APNX_CUST_COL = 2
|
||||
OPT_APNX_METHOD_COL = 3
|
||||
OPT_APNX_OVERWRITE = 4
|
||||
EXTRA_CUSTOMIZATION_CHOICES = {OPT_APNX_METHOD:{'fast', 'accurate', 'pagebreak'}}
|
||||
EXTRA_CUSTOMIZATION_CHOICES = {OPT_APNX_METHOD: set(APNXBuilder.generators.keys())}
|
||||
|
||||
# x330 on the PaperWhite
|
||||
# x262 on the Touch. Doesn't choke on x330, though.
|
||||
|
Loading…
x
Reference in New Issue
Block a user