From c15e0496844a5ed31e54f6eaff652f485e26c08f Mon Sep 17 00:00:00 2001 From: John Schember Date: Thu, 22 May 2014 09:26:11 -0400 Subject: [PATCH 1/6] Kindle APNX: Add calculation using pagebreak tag. --- src/calibre/devices/kindle/apnx.py | 41 ++++++++++++++++++++++------ src/calibre/devices/kindle/driver.py | 23 ++++++++++++---- 2 files changed, 50 insertions(+), 14 deletions(-) diff --git a/src/calibre/devices/kindle/apnx.py b/src/calibre/devices/kindle/apnx.py index faad3c6cd7..8e8773e058 100644 --- a/src/calibre/devices/kindle/apnx.py +++ b/src/calibre/devices/kindle/apnx.py @@ -8,6 +8,7 @@ __docformat__ = 'restructuredtext en' Generates and writes an APNX page mapping file. ''' +import re import struct from calibre.ebooks.mobi.reader.mobi6 import MobiReader @@ -22,7 +23,7 @@ class APNXBuilder(object): Create an APNX file using a pseudo page mapping. ''' - def write_apnx(self, mobi_file_path, apnx_path, accurate=True, page_count=0): + def write_apnx(self, mobi_file_path, apnx_path, method=None, page_count=0): ''' If you want a fixed number of pages (such as from a custom column) then pass in a value to page_count, otherwise a count will be estimated @@ -60,15 +61,17 @@ class APNXBuilder(object): if page_count: pages = self.get_pages_exact(mobi_file_path, page_count) else: - if accurate: - try: + try: + if method='accurate': pages = self.get_pages_accurate(mobi_file_path) - except: - # Fall back to the fast parser if we can't - # use the accurate one. Typically this is - # due to the file having DRM. - pages = self.get_pages_fast(mobi_file_path) - else: + elif method='pagebreak': + pages = self.get_pages_pagebreak_tag(mobi_file_path) + else: + raise('no valid accurate method chosen use fast') + except: + # Fall back to the fast parser if we can't + # use the accurate one. Typically this is + # due to the file having DRM. pages = self.get_pages_fast(mobi_file_path) if not pages: @@ -261,3 +264,23 @@ class APNXBuilder(object): pages.append(lines[i]) return pages + + def get_pages_pagebreak_tag(self, mobi_file_path): + ''' + Determine pages based on the presense of + . + ''' + pages = [] + + # Get the MOBI html. + mr = MobiReader(mobi_file_path, default_log) + if mr.book_header.encryption_type != 0: + # DRMed book + return self.get_pages_fast(mobi_file_path) + mr.extract_text() + + html = mr.mobi_html.lower() + for m in re.finditer('<\s*(mbp:)?pagebreak[^>]*>'): + pages.append(m.end()) + + return pages diff --git a/src/calibre/devices/kindle/driver.py b/src/calibre/devices/kindle/driver.py index 214f8cc65c..be64a5045d 100644 --- a/src/calibre/devices/kindle/driver.py +++ b/src/calibre/devices/kindle/driver.py @@ -311,6 +311,14 @@ class KINDLE2(KINDLE): 'generator will produce pages that correspond better to a printed book. ' 'However, this method is slower and will slow down sending files ' 'to the Kindle.'), + _('Accurate calculation method') + + ':::' + + _('There are multiple methods to accuratly calculate the page numbers. "accurate" which ' + 'is an estimation based on the number of chapters, paragraphs, and visible lines in the book. ' + 'This method is designed to simulate an average paperback book where there are 32 lines per ' + 'page and a maximum of 70 characters per line. \n\n' + 'The "pagebreak" method uses the presense of tags within the book to ' + 'determine pages.'), _('Custom column name to retrieve page counts from') + ':::' + _('If you have a custom column in your library that you use to ' @@ -322,13 +330,15 @@ class KINDLE2(KINDLE): EXTRA_CUSTOMIZATION_DEFAULT = [ True, False, + 'accurate', '', ] - OPT_APNX = 0 - OPT_APNX_ACCURATE = 1 - OPT_APNX_CUST_COL = 2 + OPT_APNX = 0 + OPT_APNX_ACCURATE = 1 + OPT_APNX_ACCURATE_METHOD = 2 + OPT_APNX_CUST_COL = 3 # x330 on the PaperWhite - THUMBNAIL_HEIGHT = 330 + THUMBNAIL_HEIGHT = 330 # x262 on the Touch. Doesn't choke on x330, though. def formats_to_scan_for(self): @@ -441,8 +451,11 @@ class KINDLE2(KINDLE): apnx_path = '%s.apnx' % os.path.join(path, filename) apnx_builder = APNXBuilder() try: + method = None + if opts.extra_customization[self.OPT_APNX_ACCURATE]: + method = opts.extra_customization[self.OPT_APNX_ACCURATE_METHOD] apnx_builder.write_apnx(filepath, apnx_path, - accurate=opts.extra_customization[self.OPT_APNX_ACCURATE], + method=method, page_count=custom_page_count) except: print 'Failed to generate APNX' From a01eec3779103ac2f219648b10f53ce33eaed553 Mon Sep 17 00:00:00 2001 From: John Schember Date: Thu, 22 May 2014 13:26:39 -0400 Subject: [PATCH 2/6] fix equality. --- src/calibre/devices/kindle/apnx.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/devices/kindle/apnx.py b/src/calibre/devices/kindle/apnx.py index 8e8773e058..19d34c0a5f 100644 --- a/src/calibre/devices/kindle/apnx.py +++ b/src/calibre/devices/kindle/apnx.py @@ -62,9 +62,9 @@ class APNXBuilder(object): pages = self.get_pages_exact(mobi_file_path, page_count) else: try: - if method='accurate': + if method == 'accurate': pages = self.get_pages_accurate(mobi_file_path) - elif method='pagebreak': + elif method == 'pagebreak': pages = self.get_pages_pagebreak_tag(mobi_file_path) else: raise('no valid accurate method chosen use fast') From 7c7c94df612cdef698ed85fb60b43f7a1a63f56f Mon Sep 17 00:00:00 2001 From: John Schember Date: Thu, 22 May 2014 13:47:23 -0400 Subject: [PATCH 3/6] fix indentation. --- src/calibre/devices/kindle/apnx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/devices/kindle/apnx.py b/src/calibre/devices/kindle/apnx.py index 19d34c0a5f..684dfb91e4 100644 --- a/src/calibre/devices/kindle/apnx.py +++ b/src/calibre/devices/kindle/apnx.py @@ -281,6 +281,6 @@ class APNXBuilder(object): html = mr.mobi_html.lower() for m in re.finditer('<\s*(mbp:)?pagebreak[^>]*>'): - pages.append(m.end()) + pages.append(m.end()) return pages From ba61122d1ff3c6220207430f5f15a92679794c6d Mon Sep 17 00:00:00 2001 From: John Schember Date: Thu, 22 May 2014 14:04:53 -0400 Subject: [PATCH 4/6] fix fiding pagebreak tags. --- src/calibre/devices/kindle/apnx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/devices/kindle/apnx.py b/src/calibre/devices/kindle/apnx.py index 684dfb91e4..9f378cada1 100644 --- a/src/calibre/devices/kindle/apnx.py +++ b/src/calibre/devices/kindle/apnx.py @@ -280,7 +280,7 @@ class APNXBuilder(object): mr.extract_text() html = mr.mobi_html.lower() - for m in re.finditer('<\s*(mbp:)?pagebreak[^>]*>'): + for m in re.finditer('<\s*(mbp:)?pagebreak[^>]*>', html): pages.append(m.end()) return pages From 6ea252106e6ecc3fbf99ad77b31c669700a3be69 Mon Sep 17 00:00:00 2001 From: John Schember Date: Thu, 22 May 2014 16:09:54 -0400 Subject: [PATCH 5/6] page apnx algorithm to search for all tags with pagebreak in them. --- src/calibre/devices/kindle/apnx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/devices/kindle/apnx.py b/src/calibre/devices/kindle/apnx.py index 9f378cada1..223fa90ea4 100644 --- a/src/calibre/devices/kindle/apnx.py +++ b/src/calibre/devices/kindle/apnx.py @@ -280,7 +280,7 @@ class APNXBuilder(object): mr.extract_text() html = mr.mobi_html.lower() - for m in re.finditer('<\s*(mbp:)?pagebreak[^>]*>', html): + for m in re.finditer('<[^>]*pagebreak[^>]*>', html): pages.append(m.end()) return pages From 189cbd41fed12ee66b567061354d59fab97727f3 Mon Sep 17 00:00:00 2001 From: John Schember Date: Thu, 22 May 2014 16:59:32 -0400 Subject: [PATCH 6/6] Put in some more fallbacks for when a chosen parser failes. --- src/calibre/devices/kindle/apnx.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/calibre/devices/kindle/apnx.py b/src/calibre/devices/kindle/apnx.py index 223fa90ea4..a34b811b02 100644 --- a/src/calibre/devices/kindle/apnx.py +++ b/src/calibre/devices/kindle/apnx.py @@ -66,6 +66,8 @@ class APNXBuilder(object): pages = self.get_pages_accurate(mobi_file_path) elif method == 'pagebreak': pages = self.get_pages_pagebreak_tag(mobi_file_path) + if not pages: + pages = self.get_pages_accurate(mobi_file_path) else: raise('no valid accurate method chosen use fast') except: @@ -74,6 +76,8 @@ class APNXBuilder(object): # due to the file having DRM. pages = self.get_pages_fast(mobi_file_path) + if not pages: + pages = self.get_pages_fast(mobi_file_path) if not pages: raise Exception(_('Could not generate page mapping.'))