diff --git a/src/calibre/devices/kindle/apnx.py b/src/calibre/devices/kindle/apnx.py index faad3c6cd7..8e8773e058 100644 --- a/src/calibre/devices/kindle/apnx.py +++ b/src/calibre/devices/kindle/apnx.py @@ -8,6 +8,7 @@ __docformat__ = 'restructuredtext en' Generates and writes an APNX page mapping file. ''' +import re import struct from calibre.ebooks.mobi.reader.mobi6 import MobiReader @@ -22,7 +23,7 @@ class APNXBuilder(object): Create an APNX file using a pseudo page mapping. ''' - def write_apnx(self, mobi_file_path, apnx_path, accurate=True, page_count=0): + def write_apnx(self, mobi_file_path, apnx_path, method=None, page_count=0): ''' If you want a fixed number of pages (such as from a custom column) then pass in a value to page_count, otherwise a count will be estimated @@ -60,15 +61,17 @@ class APNXBuilder(object): if page_count: pages = self.get_pages_exact(mobi_file_path, page_count) else: - if accurate: - try: + try: + if method='accurate': pages = self.get_pages_accurate(mobi_file_path) - except: - # Fall back to the fast parser if we can't - # use the accurate one. Typically this is - # due to the file having DRM. - pages = self.get_pages_fast(mobi_file_path) - else: + elif method='pagebreak': + pages = self.get_pages_pagebreak_tag(mobi_file_path) + else: + raise('no valid accurate method chosen use fast') + except: + # Fall back to the fast parser if we can't + # use the accurate one. Typically this is + # due to the file having DRM. pages = self.get_pages_fast(mobi_file_path) if not pages: @@ -261,3 +264,23 @@ class APNXBuilder(object): pages.append(lines[i]) return pages + + def get_pages_pagebreak_tag(self, mobi_file_path): + ''' + Determine pages based on the presense of + . + ''' + pages = [] + + # Get the MOBI html. + mr = MobiReader(mobi_file_path, default_log) + if mr.book_header.encryption_type != 0: + # DRMed book + return self.get_pages_fast(mobi_file_path) + mr.extract_text() + + html = mr.mobi_html.lower() + for m in re.finditer('<\s*(mbp:)?pagebreak[^>]*>'): + pages.append(m.end()) + + return pages diff --git a/src/calibre/devices/kindle/driver.py b/src/calibre/devices/kindle/driver.py index 214f8cc65c..be64a5045d 100644 --- a/src/calibre/devices/kindle/driver.py +++ b/src/calibre/devices/kindle/driver.py @@ -311,6 +311,14 @@ class KINDLE2(KINDLE): 'generator will produce pages that correspond better to a printed book. ' 'However, this method is slower and will slow down sending files ' 'to the Kindle.'), + _('Accurate calculation method') + + ':::' + + _('There are multiple methods to accuratly calculate the page numbers. "accurate" which ' + 'is an estimation based on the number of chapters, paragraphs, and visible lines in the book. ' + 'This method is designed to simulate an average paperback book where there are 32 lines per ' + 'page and a maximum of 70 characters per line. \n\n' + 'The "pagebreak" method uses the presense of tags within the book to ' + 'determine pages.'), _('Custom column name to retrieve page counts from') + ':::' + _('If you have a custom column in your library that you use to ' @@ -322,13 +330,15 @@ class KINDLE2(KINDLE): EXTRA_CUSTOMIZATION_DEFAULT = [ True, False, + 'accurate', '', ] - OPT_APNX = 0 - OPT_APNX_ACCURATE = 1 - OPT_APNX_CUST_COL = 2 + OPT_APNX = 0 + OPT_APNX_ACCURATE = 1 + OPT_APNX_ACCURATE_METHOD = 2 + OPT_APNX_CUST_COL = 3 # x330 on the PaperWhite - THUMBNAIL_HEIGHT = 330 + THUMBNAIL_HEIGHT = 330 # x262 on the Touch. Doesn't choke on x330, though. def formats_to_scan_for(self): @@ -441,8 +451,11 @@ class KINDLE2(KINDLE): apnx_path = '%s.apnx' % os.path.join(path, filename) apnx_builder = APNXBuilder() try: + method = None + if opts.extra_customization[self.OPT_APNX_ACCURATE]: + method = opts.extra_customization[self.OPT_APNX_ACCURATE_METHOD] apnx_builder.write_apnx(filepath, apnx_path, - accurate=opts.extra_customization[self.OPT_APNX_ACCURATE], + method=method, page_count=custom_page_count) except: print 'Failed to generate APNX'