mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge branch 'master' of https://github.com/Vasolik/calibre
This commit is contained in:
commit
30ef660cb9
@ -1,15 +1,15 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2011, John Schember <john at nachtimwald.com>'
|
__copyright__ = '2011, John Schember <john at nachtimwald.com>, refactored: 2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from typing import Optional, Dict
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Generates and writes an APNX page mapping file.
|
Generates and writes an APNX page mapping file.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
|
||||||
import struct
|
import struct
|
||||||
|
|
||||||
from calibre.ebooks.mobi.reader.mobi6 import MobiReader
|
|
||||||
from calibre.ebooks.pdb.header import PdbHeaderReader
|
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||||
from calibre.ebooks.mobi.reader.headers import MetadataHeader
|
from calibre.ebooks.mobi.reader.headers import MetadataHeader
|
||||||
from calibre.utils.logging import default_log
|
from calibre.utils.logging import default_log
|
||||||
@ -17,29 +17,66 @@ from calibre import prints, fsync
|
|||||||
from calibre.constants import DEBUG
|
from calibre.constants import DEBUG
|
||||||
from polyglot.builtins import as_unicode, as_bytes
|
from polyglot.builtins import as_unicode, as_bytes
|
||||||
|
|
||||||
|
from calibre.devices.kindle.apnx_page_generator.generators.accurate_page_generator import AccuratePageGenerator
|
||||||
|
from calibre.devices.kindle.apnx_page_generator.generators.pagebreak_page_generator import PagebreakPageGenerator
|
||||||
|
from calibre.devices.kindle.apnx_page_generator.generators.exact_page_generator import ExactPageGenerator
|
||||||
|
from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator
|
||||||
|
from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator
|
||||||
|
from calibre.devices.kindle.apnx_page_generator.pages import Pages
|
||||||
|
|
||||||
|
|
||||||
class APNXBuilder:
|
class APNXBuilder:
|
||||||
'''
|
"""
|
||||||
Create an APNX file using a pseudo page mapping.
|
Create an APNX file using a pseudo page mapping.
|
||||||
'''
|
"""
|
||||||
|
|
||||||
def write_apnx(self, mobi_file_path, apnx_path, method=None, page_count=0):
|
generators: Dict[str, IPageGenerator] = {
|
||||||
'''
|
FastPageGenerator.instance.name(): FastPageGenerator.instance,
|
||||||
|
AccuratePageGenerator.instance.name(): AccuratePageGenerator.instance,
|
||||||
|
PagebreakPageGenerator.instance.name(): PagebreakPageGenerator.instance,
|
||||||
|
# ExactPageGenerator.instance.name(): ExactPageGenerator.instance,
|
||||||
|
}
|
||||||
|
|
||||||
|
def write_apnx(self, mobi_file_path: str, apnx_path: str, method: Optional[str] = None, page_count: int = 0):
|
||||||
|
"""
|
||||||
If you want a fixed number of pages (such as from a custom column) then
|
If you want a fixed number of pages (such as from a custom column) then
|
||||||
pass in a value to page_count, otherwise a count will be estimated
|
pass in a value to page_count, otherwise a count will be estimated
|
||||||
using either the fast or accurate algorithm.
|
using either the fast or accurate algorithm.
|
||||||
'''
|
"""
|
||||||
import uuid
|
apnx_meta = self.get_apnx_meta(mobi_file_path)
|
||||||
apnx_meta = {'guid': str(uuid.uuid4()).replace('-', '')[:8], 'asin':
|
|
||||||
'', 'cdetype': 'EBOK', 'format': 'MOBI_7', 'acr': ''}
|
|
||||||
|
|
||||||
|
if page_count:
|
||||||
|
generator: IPageGenerator = ExactPageGenerator.instance
|
||||||
|
else:
|
||||||
|
generator: IPageGenerator = self.generators.setdefault(method, FastPageGenerator.instance)
|
||||||
|
|
||||||
|
pages = generator.generate(mobi_file_path, page_count)
|
||||||
|
if pages.number_of_pages == 0:
|
||||||
|
raise Exception(_('Could not generate page mapping.'))
|
||||||
|
# Generate the APNX file from the page mapping.
|
||||||
|
apnx = self.generate_apnx(pages, apnx_meta)
|
||||||
|
|
||||||
|
# Write the APNX.
|
||||||
|
with lopen(apnx_path, 'wb') as apnxf:
|
||||||
|
apnxf.write(apnx)
|
||||||
|
fsync(apnxf)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_apnx_meta(mobi_file_path) -> Dict[str, str]:
|
||||||
|
import uuid
|
||||||
|
apnx_meta = {
|
||||||
|
'guid': str(uuid.uuid4()).replace('-', '')[:8],
|
||||||
|
'asin': '',
|
||||||
|
'cdetype': 'EBOK',
|
||||||
|
'format': 'MOBI_7',
|
||||||
|
'acr': ''
|
||||||
|
}
|
||||||
with lopen(mobi_file_path, 'rb') as mf:
|
with lopen(mobi_file_path, 'rb') as mf:
|
||||||
ident = PdbHeaderReader(mf).identity()
|
ident = PdbHeaderReader(mf).identity()
|
||||||
if as_bytes(ident) != b'BOOKMOBI':
|
if as_bytes(ident) != b'BOOKMOBI':
|
||||||
# Check that this is really a MOBI file.
|
# Check that this is really a MOBI file.
|
||||||
raise Exception(_('Not a valid MOBI file. Reports identity of %s') % ident)
|
raise Exception(_('Not a valid MOBI file. Reports identity of %s') % ident)
|
||||||
apnx_meta['acr'] = as_unicode(PdbHeaderReader(mf).name(), errors='replace')
|
apnx_meta['acr'] = as_unicode(PdbHeaderReader(mf).name(), errors='replace')
|
||||||
|
|
||||||
# We'll need the PDB name, the MOBI version, and some metadata to make FW 3.4 happy with KF8 files...
|
# We'll need the PDB name, the MOBI version, and some metadata to make FW 3.4 happy with KF8 files...
|
||||||
with lopen(mobi_file_path, 'rb') as mf:
|
with lopen(mobi_file_path, 'rb') as mf:
|
||||||
mh = MetadataHeader(mf, default_log)
|
mh = MetadataHeader(mf, default_log)
|
||||||
@ -55,41 +92,10 @@ class APNXBuilder:
|
|||||||
apnx_meta['asin'] = ''
|
apnx_meta['asin'] = ''
|
||||||
else:
|
else:
|
||||||
apnx_meta['asin'] = str(mh.exth.uuid)
|
apnx_meta['asin'] = str(mh.exth.uuid)
|
||||||
|
return apnx_meta
|
||||||
|
|
||||||
# Get the pages depending on the chosen parser
|
@staticmethod
|
||||||
pages = []
|
def generate_apnx(pages: Pages, apnx_meta) -> bytes:
|
||||||
if page_count:
|
|
||||||
pages = self.get_pages_exact(mobi_file_path, page_count)
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
if method == 'accurate':
|
|
||||||
pages = self.get_pages_accurate(mobi_file_path)
|
|
||||||
elif method == 'pagebreak':
|
|
||||||
pages = self.get_pages_pagebreak_tag(mobi_file_path)
|
|
||||||
if not pages:
|
|
||||||
pages = self.get_pages_accurate(mobi_file_path)
|
|
||||||
else:
|
|
||||||
raise Exception('%r is not a valid apnx generation method' % method)
|
|
||||||
except:
|
|
||||||
# Fall back to the fast parser if we can't
|
|
||||||
# use the accurate one. Typically this is
|
|
||||||
# due to the file having DRM.
|
|
||||||
pages = self.get_pages_fast(mobi_file_path)
|
|
||||||
|
|
||||||
if not pages:
|
|
||||||
pages = self.get_pages_fast(mobi_file_path)
|
|
||||||
if not pages:
|
|
||||||
raise Exception(_('Could not generate page mapping.'))
|
|
||||||
|
|
||||||
# Generate the APNX file from the page mapping.
|
|
||||||
apnx = self.generate_apnx(pages, apnx_meta)
|
|
||||||
|
|
||||||
# Write the APNX.
|
|
||||||
with lopen(apnx_path, 'wb') as apnxf:
|
|
||||||
apnxf.write(apnx)
|
|
||||||
fsync(apnxf)
|
|
||||||
|
|
||||||
def generate_apnx(self, pages, apnx_meta):
|
|
||||||
apnx = b''
|
apnx = b''
|
||||||
|
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
@ -107,8 +113,8 @@ class APNXBuilder:
|
|||||||
# legacy mobi files, too. But, since they still handle this one too, let's
|
# legacy mobi files, too. But, since they still handle this one too, let's
|
||||||
# try not to break old devices, and keep using the simple header ;).
|
# try not to break old devices, and keep using the simple header ;).
|
||||||
content_header = '{"contentGuid":"%(guid)s","asin":"%(asin)s","cdeType":"%(cdetype)s","fileRevisionId":"1"}' % apnx_meta
|
content_header = '{"contentGuid":"%(guid)s","asin":"%(asin)s","cdeType":"%(cdetype)s","fileRevisionId":"1"}' % apnx_meta
|
||||||
page_header = '{"asin":"%(asin)s","pageMap":"(1,a,1)"}' % apnx_meta
|
page_header = '{"asin":"%(asin)s","pageMap":"' % apnx_meta
|
||||||
|
page_header += pages.page_maps + '"}'
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
prints('APNX Content Header:', content_header)
|
prints('APNX Content Header:', content_header)
|
||||||
content_header = as_bytes(content_header)
|
content_header = as_bytes(content_header)
|
||||||
@ -120,177 +126,12 @@ class APNXBuilder:
|
|||||||
apnx += content_header
|
apnx += content_header
|
||||||
apnx += struct.pack('>H', 1)
|
apnx += struct.pack('>H', 1)
|
||||||
apnx += struct.pack('>H', len(page_header))
|
apnx += struct.pack('>H', len(page_header))
|
||||||
apnx += struct.pack('>H', len(pages))
|
apnx += struct.pack('>H', pages.number_of_pages)
|
||||||
apnx += struct.pack('>H', 32)
|
apnx += struct.pack('>H', 32)
|
||||||
apnx += page_header
|
apnx += page_header
|
||||||
|
|
||||||
# Write page values to APNX.
|
# Write page values to APNX.
|
||||||
for page in pages:
|
for location in pages.page_locations:
|
||||||
apnx += struct.pack('>I', page)
|
apnx += struct.pack('>I', location)
|
||||||
|
|
||||||
return apnx
|
return apnx
|
||||||
|
|
||||||
def get_pages_exact(self, mobi_file_path, page_count):
|
|
||||||
'''
|
|
||||||
Given a specified page count (such as from a custom column),
|
|
||||||
create our array of pages for the apnx file by dividing by
|
|
||||||
the content size of the book.
|
|
||||||
'''
|
|
||||||
pages = []
|
|
||||||
count = 0
|
|
||||||
|
|
||||||
with lopen(mobi_file_path, 'rb') as mf:
|
|
||||||
phead = PdbHeaderReader(mf)
|
|
||||||
r0 = phead.section_data(0)
|
|
||||||
text_length = struct.unpack('>I', r0[4:8])[0]
|
|
||||||
|
|
||||||
chars_per_page = int(text_length // page_count)
|
|
||||||
while count < text_length:
|
|
||||||
pages.append(count)
|
|
||||||
count += chars_per_page
|
|
||||||
|
|
||||||
if len(pages) > page_count:
|
|
||||||
# Rounding created extra page entries
|
|
||||||
pages = pages[:page_count]
|
|
||||||
|
|
||||||
return pages
|
|
||||||
|
|
||||||
def get_pages_fast(self, mobi_file_path):
|
|
||||||
'''
|
|
||||||
2300 characters of uncompressed text per page. This is
|
|
||||||
not meant to map 1 to 1 to a print book but to be a
|
|
||||||
close enough measure.
|
|
||||||
|
|
||||||
A test book was chosen and the characters were counted
|
|
||||||
on one page. This number was round to 2240 then 60
|
|
||||||
characters of markup were added to the total giving
|
|
||||||
2300.
|
|
||||||
|
|
||||||
Uncompressed text length is used because it's easily
|
|
||||||
accessible in MOBI files (part of the header). Also,
|
|
||||||
It's faster to work off of the length then to
|
|
||||||
decompress and parse the actual text.
|
|
||||||
'''
|
|
||||||
text_length = 0
|
|
||||||
pages = []
|
|
||||||
count = 0
|
|
||||||
|
|
||||||
with lopen(mobi_file_path, 'rb') as mf:
|
|
||||||
phead = PdbHeaderReader(mf)
|
|
||||||
r0 = phead.section_data(0)
|
|
||||||
text_length = struct.unpack('>I', r0[4:8])[0]
|
|
||||||
|
|
||||||
while count < text_length:
|
|
||||||
pages.append(count)
|
|
||||||
count += 2300
|
|
||||||
|
|
||||||
return pages
|
|
||||||
|
|
||||||
def get_pages_accurate(self, mobi_file_path):
|
|
||||||
'''
|
|
||||||
A more accurate but much more resource intensive and slower
|
|
||||||
method to calculate the page length.
|
|
||||||
|
|
||||||
Parses the uncompressed text. In an average paper back book
|
|
||||||
There are 32 lines per page and a maximum of 70 characters
|
|
||||||
per line.
|
|
||||||
|
|
||||||
Each paragraph starts a new line and every 70 characters
|
|
||||||
(minus markup) in a paragraph starts a new line. The
|
|
||||||
position after every 30 lines will be marked as a new
|
|
||||||
page.
|
|
||||||
|
|
||||||
This can be make more accurate by accounting for
|
|
||||||
<div class="mbp_pagebreak" /> as a new page marker.
|
|
||||||
And <br> elements as an empty line.
|
|
||||||
'''
|
|
||||||
pages = []
|
|
||||||
|
|
||||||
# Get the MOBI html.
|
|
||||||
mr = MobiReader(mobi_file_path, default_log)
|
|
||||||
if mr.book_header.encryption_type != 0:
|
|
||||||
# DRMed book
|
|
||||||
return self.get_pages_fast(mobi_file_path)
|
|
||||||
mr.extract_text()
|
|
||||||
|
|
||||||
# States
|
|
||||||
in_tag = False
|
|
||||||
in_p = False
|
|
||||||
check_p = False
|
|
||||||
closing = False
|
|
||||||
p_char_count = 0
|
|
||||||
|
|
||||||
# Get positions of every line
|
|
||||||
# A line is either a paragraph starting
|
|
||||||
# or every 70 characters in a paragraph.
|
|
||||||
lines = []
|
|
||||||
pos = -1
|
|
||||||
# We want this to be as fast as possible so we
|
|
||||||
# are going to do one pass across the text. re
|
|
||||||
# and string functions will parse the text each
|
|
||||||
# time they are called.
|
|
||||||
#
|
|
||||||
# We can can use .lower() here because we are
|
|
||||||
# not modifying the text. In this case the case
|
|
||||||
# doesn't matter just the absolute character and
|
|
||||||
# the position within the stream.
|
|
||||||
data = bytearray(as_bytes(mr.mobi_html.lower()))
|
|
||||||
slash, p, lt, gt = map(ord, '/p<>')
|
|
||||||
for c in data:
|
|
||||||
pos += 1
|
|
||||||
|
|
||||||
# Check if we are starting or stopping a p tag.
|
|
||||||
if check_p:
|
|
||||||
if c == slash:
|
|
||||||
closing = True
|
|
||||||
continue
|
|
||||||
elif c == p:
|
|
||||||
if closing:
|
|
||||||
in_p = False
|
|
||||||
else:
|
|
||||||
in_p = True
|
|
||||||
lines.append(pos - 2)
|
|
||||||
check_p = False
|
|
||||||
closing = False
|
|
||||||
continue
|
|
||||||
|
|
||||||
if c == lt:
|
|
||||||
in_tag = True
|
|
||||||
check_p = True
|
|
||||||
continue
|
|
||||||
elif c == gt:
|
|
||||||
in_tag = False
|
|
||||||
check_p = False
|
|
||||||
continue
|
|
||||||
|
|
||||||
if in_p and not in_tag:
|
|
||||||
p_char_count += 1
|
|
||||||
if p_char_count == 70:
|
|
||||||
lines.append(pos)
|
|
||||||
p_char_count = 0
|
|
||||||
|
|
||||||
# Every 30 lines is a new page
|
|
||||||
for i in range(0, len(lines), 32):
|
|
||||||
pages.append(lines[i])
|
|
||||||
|
|
||||||
return pages
|
|
||||||
|
|
||||||
def get_pages_pagebreak_tag(self, mobi_file_path):
|
|
||||||
'''
|
|
||||||
Determine pages based on the presence of
|
|
||||||
<mbp:pagebreak>.
|
|
||||||
'''
|
|
||||||
pages = []
|
|
||||||
|
|
||||||
# Get the MOBI html.
|
|
||||||
mr = MobiReader(mobi_file_path, default_log)
|
|
||||||
if mr.book_header.encryption_type != 0:
|
|
||||||
# DRMed book
|
|
||||||
return self.get_pages_fast(mobi_file_path)
|
|
||||||
mr.extract_text()
|
|
||||||
|
|
||||||
html = as_bytes(mr.mobi_html.lower())
|
|
||||||
for m in re.finditer(b'<[^>]*pagebreak[^>]*>', html):
|
|
||||||
pages.append(m.end())
|
|
||||||
|
|
||||||
return pages
|
|
||||||
|
@ -0,0 +1,107 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator
|
||||||
|
from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator, mobi_html
|
||||||
|
from calibre.devices.kindle.apnx_page_generator.pages import Pages
|
||||||
|
|
||||||
|
|
||||||
|
class AccuratePageGenerator(IPageGenerator):
|
||||||
|
|
||||||
|
instance = None
|
||||||
|
|
||||||
|
def name(self) -> str:
|
||||||
|
return "accurate"
|
||||||
|
|
||||||
|
def _generate_fallback(self, mobi_file_path: str, real_count: Optional[int]) -> Pages:
|
||||||
|
return FastPageGenerator.instance.generate(mobi_file_path, real_count)
|
||||||
|
|
||||||
|
def _generate(self, mobi_file_path: str, real_count: Optional[int]) -> Pages:
|
||||||
|
"""
|
||||||
|
A more accurate but much more resource intensive and slower
|
||||||
|
method to calculate the page length.
|
||||||
|
|
||||||
|
Parses the uncompressed text. In an average paper back book
|
||||||
|
There are 32 lines per page and a maximum of 70 characters
|
||||||
|
per line.
|
||||||
|
|
||||||
|
Each paragraph starts a new line and every 70 characters
|
||||||
|
(minus markup) in a paragraph starts a new line. The
|
||||||
|
position after every 30 lines will be marked as a new
|
||||||
|
page.
|
||||||
|
|
||||||
|
This can be make more accurate by accounting for
|
||||||
|
<div class="mbp_pagebreak" /> as a new page marker.
|
||||||
|
And <br> elements as an empty line.
|
||||||
|
"""
|
||||||
|
pages = []
|
||||||
|
|
||||||
|
html = mobi_html(mobi_file_path)
|
||||||
|
|
||||||
|
# States
|
||||||
|
in_tag = False
|
||||||
|
in_p = False
|
||||||
|
check_p = False
|
||||||
|
closing = False
|
||||||
|
p_char_count = 0
|
||||||
|
|
||||||
|
# Get positions of every line
|
||||||
|
# A line is either a paragraph starting
|
||||||
|
# or every 70 characters in a paragraph.
|
||||||
|
lines = []
|
||||||
|
pos = -1
|
||||||
|
# We want this to be as fast as possible so we
|
||||||
|
# are going to do one pass across the text. re
|
||||||
|
# and string functions will parse the text each
|
||||||
|
# time they are called.
|
||||||
|
#
|
||||||
|
# We can use .lower() here because we are
|
||||||
|
# not modifying the text. In this case the case
|
||||||
|
# doesn't matter just the absolute character and
|
||||||
|
# the position within the stream.
|
||||||
|
data = bytearray(html)
|
||||||
|
slash, p, lt, gt = map(ord, '/p<>')
|
||||||
|
for c in data:
|
||||||
|
pos += 1
|
||||||
|
|
||||||
|
# Check if we are starting or stopping a p tag.
|
||||||
|
if check_p:
|
||||||
|
if c == slash:
|
||||||
|
closing = True
|
||||||
|
continue
|
||||||
|
elif c == p:
|
||||||
|
if closing:
|
||||||
|
in_p = False
|
||||||
|
else:
|
||||||
|
in_p = True
|
||||||
|
lines.append(pos - 2)
|
||||||
|
check_p = False
|
||||||
|
closing = False
|
||||||
|
continue
|
||||||
|
|
||||||
|
if c == lt:
|
||||||
|
in_tag = True
|
||||||
|
check_p = True
|
||||||
|
continue
|
||||||
|
elif c == gt:
|
||||||
|
in_tag = False
|
||||||
|
check_p = False
|
||||||
|
continue
|
||||||
|
|
||||||
|
if in_p and not in_tag:
|
||||||
|
p_char_count += 1
|
||||||
|
if p_char_count == 70:
|
||||||
|
lines.append(pos)
|
||||||
|
p_char_count = 0
|
||||||
|
|
||||||
|
# Every 30 lines is a new page
|
||||||
|
for i in range(0, len(lines), 32):
|
||||||
|
pages.append(lines[i])
|
||||||
|
|
||||||
|
return Pages(pages)
|
||||||
|
|
||||||
|
|
||||||
|
AccuratePageGenerator.instance = AccuratePageGenerator()
|
@ -0,0 +1,45 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator
|
||||||
|
from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator, mobi_html_length
|
||||||
|
from calibre.devices.kindle.apnx_page_generator.pages import Pages
|
||||||
|
|
||||||
|
|
||||||
|
class ExactPageGenerator(IPageGenerator):
|
||||||
|
|
||||||
|
instance = None
|
||||||
|
|
||||||
|
def name(self) -> str:
|
||||||
|
return "exact"
|
||||||
|
|
||||||
|
def _generate_fallback(self, mobi_file_path: str, real_count: Optional[int]) -> Pages:
|
||||||
|
return FastPageGenerator.instance.generate(mobi_file_path, real_count)
|
||||||
|
|
||||||
|
def _generate(self, mobi_file_path: str, real_count: Optional[int]) -> Pages:
|
||||||
|
"""
|
||||||
|
Given a specified page count (such as from a custom column),
|
||||||
|
create our array of pages for the apnx file by dividing by
|
||||||
|
the content size of the book.
|
||||||
|
"""
|
||||||
|
pages = []
|
||||||
|
count = 0
|
||||||
|
|
||||||
|
text_length = mobi_html_length(mobi_file_path)
|
||||||
|
|
||||||
|
chars_per_page = int(text_length // real_count)
|
||||||
|
while count < text_length:
|
||||||
|
pages.append(count)
|
||||||
|
count += chars_per_page
|
||||||
|
|
||||||
|
if len(pages) > real_count:
|
||||||
|
# Rounding created extra page entries
|
||||||
|
pages = pages[:real_count]
|
||||||
|
|
||||||
|
return Pages(pages)
|
||||||
|
|
||||||
|
|
||||||
|
ExactPageGenerator.instance = ExactPageGenerator()
|
@ -0,0 +1,48 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator, mobi_html_length
|
||||||
|
from calibre.devices.kindle.apnx_page_generator.pages import Pages
|
||||||
|
|
||||||
|
|
||||||
|
class FastPageGenerator(IPageGenerator):
|
||||||
|
|
||||||
|
def name(self) -> str:
|
||||||
|
return "fast"
|
||||||
|
|
||||||
|
def _generate_fallback(self, mobi_file_path: str, real_count: Optional[int]) -> Pages:
|
||||||
|
raise Exception("Fast calculation impossible.")
|
||||||
|
|
||||||
|
def _generate(self, mobi_file_path: str, real_count: Optional[int]) -> Pages:
|
||||||
|
"""
|
||||||
|
2300 characters of uncompressed text per page. This is
|
||||||
|
not meant to map 1 to 1 to a print book but to be a
|
||||||
|
close enough measure.
|
||||||
|
|
||||||
|
A test book was chosen and the characters were counted
|
||||||
|
on one page. This number was round to 2240 then 60
|
||||||
|
characters of markup were added to the total giving
|
||||||
|
2300.
|
||||||
|
|
||||||
|
Uncompressed text length is used because it's easily
|
||||||
|
accessible in MOBI files (part of the header). Also,
|
||||||
|
It's faster to work off of the length then to
|
||||||
|
decompress and parse the actual text.
|
||||||
|
"""
|
||||||
|
|
||||||
|
pages = []
|
||||||
|
count = 0
|
||||||
|
|
||||||
|
text_length = mobi_html_length(mobi_file_path)
|
||||||
|
|
||||||
|
while count < text_length:
|
||||||
|
pages.append(count)
|
||||||
|
count += 2300
|
||||||
|
|
||||||
|
return Pages(pages)
|
||||||
|
|
||||||
|
|
||||||
|
FastPageGenerator.instance = FastPageGenerator()
|
@ -0,0 +1,31 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from calibre.devices.kindle.apnx_page_generator.generators.fast_page_generator import FastPageGenerator
|
||||||
|
from calibre.devices.kindle.apnx_page_generator.i_page_generator import IPageGenerator, mobi_html
|
||||||
|
from calibre.devices.kindle.apnx_page_generator.pages import Pages
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
class PagebreakPageGenerator(IPageGenerator):
|
||||||
|
|
||||||
|
def name(self) -> str:
|
||||||
|
return "pagebreak"
|
||||||
|
|
||||||
|
def _generate_fallback(self, mobi_file_path: str, real_count: Optional[int]) -> Pages:
|
||||||
|
return FastPageGenerator.instance.generate(mobi_file_path, real_count)
|
||||||
|
|
||||||
|
def _generate(self, mobi_file_path: str, real_count: Optional[int]) -> Pages:
|
||||||
|
""" Determine pages based on the presence of <*pagebreak*/>. """
|
||||||
|
html = mobi_html(mobi_file_path)
|
||||||
|
pages = []
|
||||||
|
for m in re.finditer(b'<[^>]*pagebreak[^>]*>', html):
|
||||||
|
pages.append(m.end())
|
||||||
|
|
||||||
|
return Pages(pages)
|
||||||
|
|
||||||
|
|
||||||
|
PagebreakPageGenerator.instance = PagebreakPageGenerator()
|
@ -0,0 +1,54 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import struct
|
||||||
|
from abc import abstractmethod, ABCMeta
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from calibre.devices.kindle.apnx_page_generator.pages import Pages
|
||||||
|
from calibre.ebooks.mobi.reader.mobi6 import MobiReader
|
||||||
|
from calibre.utils.logging import default_log
|
||||||
|
from polyglot.builtins import as_bytes
|
||||||
|
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||||
|
|
||||||
|
|
||||||
|
class IPageGenerator(metaclass=ABCMeta):
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _generate(self, mobi_file_path: str, real_count: Optional[int]) -> Pages:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _generate_fallback(self, mobi_file_path: str, real_count: Optional[int]) -> Pages:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def generate(self, mobi_file_path: str, real_count: Optional[int]) -> Pages:
|
||||||
|
try:
|
||||||
|
result = self._generate(mobi_file_path, real_count)
|
||||||
|
if result.number_of_pages > 0:
|
||||||
|
return result
|
||||||
|
return self._generate_fallback(mobi_file_path, real_count)
|
||||||
|
except Exception as e:
|
||||||
|
if self.__class__.__name__ == "FastPageGenerator":
|
||||||
|
raise e
|
||||||
|
return self._generate_fallback(mobi_file_path, real_count)
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def name(self) -> str:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def mobi_html(mobi_file_path: str) -> bytes:
|
||||||
|
mr = MobiReader(mobi_file_path, default_log)
|
||||||
|
if mr.book_header.encryption_type != 0:
|
||||||
|
raise Exception("DRMed book")
|
||||||
|
mr.extract_text()
|
||||||
|
return as_bytes(mr.mobi_html.lower())
|
||||||
|
|
||||||
|
|
||||||
|
def mobi_html_length(mobi_file_path: str) -> int:
|
||||||
|
with lopen(mobi_file_path, 'rb') as mf:
|
||||||
|
pdb_header = PdbHeaderReader(mf)
|
||||||
|
r0 = pdb_header.section_data(0)
|
||||||
|
return struct.unpack('>I', r0[4:8])[0]
|
57
src/calibre/devices/kindle/apnx_page_generator/page_group.py
Normal file
57
src/calibre/devices/kindle/apnx_page_generator/page_group.py
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from typing import Union, List, Tuple
|
||||||
|
|
||||||
|
from calibre.devices.kindle.apnx_page_generator.page_number_type import PageNumberTypes
|
||||||
|
|
||||||
|
|
||||||
|
class PageGroup:
|
||||||
|
"""Simulate constructor overloading"""
|
||||||
|
def __init__(self, page_locations: Union[int, List[int]], page_number_type: PageNumberTypes, first_value: int,
|
||||||
|
page_labels: Union[str, List[str], None] = None):
|
||||||
|
if page_locations.__class__ == int:
|
||||||
|
self.page_locations: List[int] = [page_locations]
|
||||||
|
else:
|
||||||
|
self.page_locations: List[int] = page_locations
|
||||||
|
self.__page_number_type: PageNumberTypes = page_number_type
|
||||||
|
self.__first_value = first_value
|
||||||
|
if page_number_type == PageNumberTypes.Custom:
|
||||||
|
assert(page_labels is not None)
|
||||||
|
if page_labels.__class__ == str:
|
||||||
|
assert (1 == len(self.page_locations) and len(page_labels) > 0)
|
||||||
|
self.__page_number_labels: List[str] = [page_labels]
|
||||||
|
else:
|
||||||
|
assert (len(page_labels) == len(self.page_locations))
|
||||||
|
assert(all(len(label) > 0 for label in page_labels))
|
||||||
|
self.__page_number_labels: List[str] = page_labels
|
||||||
|
|
||||||
|
def append(self, page_location: Union[int, Tuple[int, str]]) -> None:
|
||||||
|
if page_location.__class__ == int:
|
||||||
|
assert (self.__page_number_type != PageNumberTypes.Custom)
|
||||||
|
self.page_locations.append(page_location)
|
||||||
|
else:
|
||||||
|
assert (self.__page_number_type == PageNumberTypes.Custom)
|
||||||
|
self.page_locations.append(page_location[0])
|
||||||
|
self.__page_number_labels.append(page_location[1])
|
||||||
|
return
|
||||||
|
|
||||||
|
@property
|
||||||
|
def page_number_types(self) -> PageNumberTypes:
|
||||||
|
return self.__page_number_type
|
||||||
|
|
||||||
|
@property
|
||||||
|
def number_of_pages(self) -> int:
|
||||||
|
return len(self.page_locations)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def last_value(self) -> int:
|
||||||
|
return self.__first_value + len(self.page_locations) - 1
|
||||||
|
|
||||||
|
def get_page_map(self, starting_location: int) -> str:
|
||||||
|
if self.__page_number_type != PageNumberTypes.Custom:
|
||||||
|
values = str(self.__first_value)
|
||||||
|
else:
|
||||||
|
values = "|".join(self.__page_number_labels)
|
||||||
|
return "(%s,%s,%s)" % (starting_location, self.__page_number_type.value, values)
|
@ -0,0 +1,11 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import enum
|
||||||
|
|
||||||
|
|
||||||
|
class PageNumberTypes(enum.Enum):
|
||||||
|
Arabic = "a"
|
||||||
|
Roman = "r"
|
||||||
|
Custom = "c"
|
44
src/calibre/devices/kindle/apnx_page_generator/pages.py
Normal file
44
src/calibre/devices/kindle/apnx_page_generator/pages.py
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2022, Vaso Peras-Likodric <vaso at vipl.in.rs>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import itertools
|
||||||
|
from typing import Optional, List
|
||||||
|
|
||||||
|
from calibre.devices.kindle.apnx_page_generator.page_group import PageGroup
|
||||||
|
from calibre.devices.kindle.apnx_page_generator.page_number_type import PageNumberTypes
|
||||||
|
|
||||||
|
|
||||||
|
class Pages:
|
||||||
|
def __init__(self, page_locations: Optional[List[int]] = None):
|
||||||
|
if page_locations.__class__ == list:
|
||||||
|
self.__pages_groups: List[PageGroup] = [PageGroup(page_locations, PageNumberTypes.Arabic, 1)]
|
||||||
|
else:
|
||||||
|
self.__pages_groups: List[PageGroup] = []
|
||||||
|
|
||||||
|
def append(self, page_location: PageGroup) -> None:
|
||||||
|
self.__pages_groups.append(page_location)
|
||||||
|
return
|
||||||
|
|
||||||
|
@property
|
||||||
|
def last_group(self) -> PageGroup:
|
||||||
|
return self.__pages_groups[-1]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def page_maps(self) -> str:
|
||||||
|
location = 1
|
||||||
|
result = []
|
||||||
|
for group in self.__pages_groups:
|
||||||
|
result.append(group.get_page_map(location))
|
||||||
|
location += group.number_of_pages
|
||||||
|
return ",".join(result)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def page_locations(self) -> List[int]:
|
||||||
|
return list(itertools.chain.from_iterable(list(map(lambda pg: pg.page_locations, self.__pages_groups))))
|
||||||
|
|
||||||
|
@property
|
||||||
|
def number_of_pages(self) -> int:
|
||||||
|
return sum(list(map(lambda pg: len(pg.page_locations), self.__pages_groups)))
|
||||||
|
|
||||||
|
|
@ -2,6 +2,8 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, John Schember <john at nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john at nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from calibre.devices.kindle.apnx import APNXBuilder
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Device driver for Amazon's Kindle
|
Device driver for Amazon's Kindle
|
||||||
'''
|
'''
|
||||||
@ -409,7 +411,7 @@ class KINDLE2(KINDLE):
|
|||||||
OPT_APNX_CUST_COL = 2
|
OPT_APNX_CUST_COL = 2
|
||||||
OPT_APNX_METHOD_COL = 3
|
OPT_APNX_METHOD_COL = 3
|
||||||
OPT_APNX_OVERWRITE = 4
|
OPT_APNX_OVERWRITE = 4
|
||||||
EXTRA_CUSTOMIZATION_CHOICES = {OPT_APNX_METHOD:{'fast', 'accurate', 'pagebreak'}}
|
EXTRA_CUSTOMIZATION_CHOICES = {OPT_APNX_METHOD: set(APNXBuilder.generators.keys())}
|
||||||
|
|
||||||
# x330 on the PaperWhite
|
# x330 on the PaperWhite
|
||||||
# x262 on the Touch. Doesn't choke on x330, though.
|
# x262 on the Touch. Doesn't choke on x330, though.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user