Kindle APNX: Add calculation using pagebreak tag.

This commit is contained in:
John Schember 2014-05-22 09:26:11 -04:00
parent e7387aab1a
commit c15e049684
2 changed files with 50 additions and 14 deletions

View File

@ -8,6 +8,7 @@ __docformat__ = 'restructuredtext en'
Generates and writes an APNX page mapping file.
'''
import re
import struct
from calibre.ebooks.mobi.reader.mobi6 import MobiReader
@ -22,7 +23,7 @@ class APNXBuilder(object):
Create an APNX file using a pseudo page mapping.
'''
def write_apnx(self, mobi_file_path, apnx_path, accurate=True, page_count=0):
def write_apnx(self, mobi_file_path, apnx_path, method=None, page_count=0):
'''
If you want a fixed number of pages (such as from a custom column) then
pass in a value to page_count, otherwise a count will be estimated
@ -60,16 +61,18 @@ class APNXBuilder(object):
if page_count:
pages = self.get_pages_exact(mobi_file_path, page_count)
else:
if accurate:
try:
if method='accurate':
pages = self.get_pages_accurate(mobi_file_path)
elif method='pagebreak':
pages = self.get_pages_pagebreak_tag(mobi_file_path)
else:
raise('no valid accurate method chosen use fast')
except:
# Fall back to the fast parser if we can't
# use the accurate one. Typically this is
# due to the file having DRM.
pages = self.get_pages_fast(mobi_file_path)
else:
pages = self.get_pages_fast(mobi_file_path)
if not pages:
raise Exception(_('Could not generate page mapping.'))
@ -261,3 +264,23 @@ class APNXBuilder(object):
pages.append(lines[i])
return pages
def get_pages_pagebreak_tag(self, mobi_file_path):
'''
Determine pages based on the presense of
<mbp:pagebreak>.
'''
pages = []
# Get the MOBI html.
mr = MobiReader(mobi_file_path, default_log)
if mr.book_header.encryption_type != 0:
# DRMed book
return self.get_pages_fast(mobi_file_path)
mr.extract_text()
html = mr.mobi_html.lower()
for m in re.finditer('<\s*(mbp:)?pagebreak[^>]*>'):
pages.append(m.end())
return pages

View File

@ -311,6 +311,14 @@ class KINDLE2(KINDLE):
'generator will produce pages that correspond better to a printed book. '
'However, this method is slower and will slow down sending files '
'to the Kindle.'),
_('Accurate calculation method') +
':::' +
_('There are multiple methods to accuratly calculate the page numbers. "accurate" which '
'is an estimation based on the number of chapters, paragraphs, and visible lines in the book. '
'This method is designed to simulate an average paperback book where there are 32 lines per '
'page and a maximum of 70 characters per line. \n\n'
'The "pagebreak" method uses the presense of <mbp:pagebreak> tags within the book to '
'determine pages.'),
_('Custom column name to retrieve page counts from') +
':::' +
_('If you have a custom column in your library that you use to '
@ -322,11 +330,13 @@ class KINDLE2(KINDLE):
EXTRA_CUSTOMIZATION_DEFAULT = [
True,
False,
'accurate',
'',
]
OPT_APNX = 0
OPT_APNX_ACCURATE = 1
OPT_APNX_CUST_COL = 2
OPT_APNX_ACCURATE_METHOD = 2
OPT_APNX_CUST_COL = 3
# x330 on the PaperWhite
THUMBNAIL_HEIGHT = 330
# x262 on the Touch. Doesn't choke on x330, though.
@ -441,8 +451,11 @@ class KINDLE2(KINDLE):
apnx_path = '%s.apnx' % os.path.join(path, filename)
apnx_builder = APNXBuilder()
try:
method = None
if opts.extra_customization[self.OPT_APNX_ACCURATE]:
method = opts.extra_customization[self.OPT_APNX_ACCURATE_METHOD]
apnx_builder.write_apnx(filepath, apnx_path,
accurate=opts.extra_customization[self.OPT_APNX_ACCURATE],
method=method,
page_count=custom_page_count)
except:
print 'Failed to generate APNX'