mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Kindle APNX: Add calculation using pagebreak tag.
This commit is contained in:
parent
e7387aab1a
commit
c15e049684
@ -8,6 +8,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
Generates and writes an APNX page mapping file.
|
Generates and writes an APNX page mapping file.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
import struct
|
import struct
|
||||||
|
|
||||||
from calibre.ebooks.mobi.reader.mobi6 import MobiReader
|
from calibre.ebooks.mobi.reader.mobi6 import MobiReader
|
||||||
@ -22,7 +23,7 @@ class APNXBuilder(object):
|
|||||||
Create an APNX file using a pseudo page mapping.
|
Create an APNX file using a pseudo page mapping.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def write_apnx(self, mobi_file_path, apnx_path, accurate=True, page_count=0):
|
def write_apnx(self, mobi_file_path, apnx_path, method=None, page_count=0):
|
||||||
'''
|
'''
|
||||||
If you want a fixed number of pages (such as from a custom column) then
|
If you want a fixed number of pages (such as from a custom column) then
|
||||||
pass in a value to page_count, otherwise a count will be estimated
|
pass in a value to page_count, otherwise a count will be estimated
|
||||||
@ -60,15 +61,17 @@ class APNXBuilder(object):
|
|||||||
if page_count:
|
if page_count:
|
||||||
pages = self.get_pages_exact(mobi_file_path, page_count)
|
pages = self.get_pages_exact(mobi_file_path, page_count)
|
||||||
else:
|
else:
|
||||||
if accurate:
|
try:
|
||||||
try:
|
if method='accurate':
|
||||||
pages = self.get_pages_accurate(mobi_file_path)
|
pages = self.get_pages_accurate(mobi_file_path)
|
||||||
except:
|
elif method='pagebreak':
|
||||||
# Fall back to the fast parser if we can't
|
pages = self.get_pages_pagebreak_tag(mobi_file_path)
|
||||||
# use the accurate one. Typically this is
|
else:
|
||||||
# due to the file having DRM.
|
raise('no valid accurate method chosen use fast')
|
||||||
pages = self.get_pages_fast(mobi_file_path)
|
except:
|
||||||
else:
|
# Fall back to the fast parser if we can't
|
||||||
|
# use the accurate one. Typically this is
|
||||||
|
# due to the file having DRM.
|
||||||
pages = self.get_pages_fast(mobi_file_path)
|
pages = self.get_pages_fast(mobi_file_path)
|
||||||
|
|
||||||
if not pages:
|
if not pages:
|
||||||
@ -261,3 +264,23 @@ class APNXBuilder(object):
|
|||||||
pages.append(lines[i])
|
pages.append(lines[i])
|
||||||
|
|
||||||
return pages
|
return pages
|
||||||
|
|
||||||
|
def get_pages_pagebreak_tag(self, mobi_file_path):
|
||||||
|
'''
|
||||||
|
Determine pages based on the presense of
|
||||||
|
<mbp:pagebreak>.
|
||||||
|
'''
|
||||||
|
pages = []
|
||||||
|
|
||||||
|
# Get the MOBI html.
|
||||||
|
mr = MobiReader(mobi_file_path, default_log)
|
||||||
|
if mr.book_header.encryption_type != 0:
|
||||||
|
# DRMed book
|
||||||
|
return self.get_pages_fast(mobi_file_path)
|
||||||
|
mr.extract_text()
|
||||||
|
|
||||||
|
html = mr.mobi_html.lower()
|
||||||
|
for m in re.finditer('<\s*(mbp:)?pagebreak[^>]*>'):
|
||||||
|
pages.append(m.end())
|
||||||
|
|
||||||
|
return pages
|
||||||
|
@ -311,6 +311,14 @@ class KINDLE2(KINDLE):
|
|||||||
'generator will produce pages that correspond better to a printed book. '
|
'generator will produce pages that correspond better to a printed book. '
|
||||||
'However, this method is slower and will slow down sending files '
|
'However, this method is slower and will slow down sending files '
|
||||||
'to the Kindle.'),
|
'to the Kindle.'),
|
||||||
|
_('Accurate calculation method') +
|
||||||
|
':::' +
|
||||||
|
_('There are multiple methods to accuratly calculate the page numbers. "accurate" which '
|
||||||
|
'is an estimation based on the number of chapters, paragraphs, and visible lines in the book. '
|
||||||
|
'This method is designed to simulate an average paperback book where there are 32 lines per '
|
||||||
|
'page and a maximum of 70 characters per line. \n\n'
|
||||||
|
'The "pagebreak" method uses the presense of <mbp:pagebreak> tags within the book to '
|
||||||
|
'determine pages.'),
|
||||||
_('Custom column name to retrieve page counts from') +
|
_('Custom column name to retrieve page counts from') +
|
||||||
':::' +
|
':::' +
|
||||||
_('If you have a custom column in your library that you use to '
|
_('If you have a custom column in your library that you use to '
|
||||||
@ -322,13 +330,15 @@ class KINDLE2(KINDLE):
|
|||||||
EXTRA_CUSTOMIZATION_DEFAULT = [
|
EXTRA_CUSTOMIZATION_DEFAULT = [
|
||||||
True,
|
True,
|
||||||
False,
|
False,
|
||||||
|
'accurate',
|
||||||
'',
|
'',
|
||||||
]
|
]
|
||||||
OPT_APNX = 0
|
OPT_APNX = 0
|
||||||
OPT_APNX_ACCURATE = 1
|
OPT_APNX_ACCURATE = 1
|
||||||
OPT_APNX_CUST_COL = 2
|
OPT_APNX_ACCURATE_METHOD = 2
|
||||||
|
OPT_APNX_CUST_COL = 3
|
||||||
# x330 on the PaperWhite
|
# x330 on the PaperWhite
|
||||||
THUMBNAIL_HEIGHT = 330
|
THUMBNAIL_HEIGHT = 330
|
||||||
# x262 on the Touch. Doesn't choke on x330, though.
|
# x262 on the Touch. Doesn't choke on x330, though.
|
||||||
|
|
||||||
def formats_to_scan_for(self):
|
def formats_to_scan_for(self):
|
||||||
@ -441,8 +451,11 @@ class KINDLE2(KINDLE):
|
|||||||
apnx_path = '%s.apnx' % os.path.join(path, filename)
|
apnx_path = '%s.apnx' % os.path.join(path, filename)
|
||||||
apnx_builder = APNXBuilder()
|
apnx_builder = APNXBuilder()
|
||||||
try:
|
try:
|
||||||
|
method = None
|
||||||
|
if opts.extra_customization[self.OPT_APNX_ACCURATE]:
|
||||||
|
method = opts.extra_customization[self.OPT_APNX_ACCURATE_METHOD]
|
||||||
apnx_builder.write_apnx(filepath, apnx_path,
|
apnx_builder.write_apnx(filepath, apnx_path,
|
||||||
accurate=opts.extra_customization[self.OPT_APNX_ACCURATE],
|
method=method,
|
||||||
page_count=custom_page_count)
|
page_count=custom_page_count)
|
||||||
except:
|
except:
|
||||||
print 'Failed to generate APNX'
|
print 'Failed to generate APNX'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user