From 944310a7e23e47e47e329b213424db74c9131ad1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 16 Jul 2014 10:00:51 +0530 Subject: [PATCH] AZW3: Preserve the page-progression-direction property when converting/editing/polishing AZW3 files --- src/calibre/ebooks/mobi/reader/headers.py | 8 ++++++++ src/calibre/ebooks/mobi/reader/mobi8.py | 11 ++++------- src/calibre/ebooks/mobi/utils.py | 11 +---------- src/calibre/ebooks/mobi/writer8/exth.py | 10 +++++++++- src/calibre/ebooks/mobi/writer8/mobi.py | 22 +++++++++++++--------- 5 files changed, 35 insertions(+), 27 deletions(-) diff --git a/src/calibre/ebooks/mobi/reader/headers.py b/src/calibre/ebooks/mobi/reader/headers.py index 9dbbfb9826..1674d7566c 100644 --- a/src/calibre/ebooks/mobi/reader/headers.py +++ b/src/calibre/ebooks/mobi/reader/headers.py @@ -32,6 +32,7 @@ class EXTHHeader(object): # {{{ left = self.num_items self.kf8_header = None self.uuid = self.cdetype = None + self.page_progression_direction = None self.decode = lambda x : clean_ascii_chars(x.decode(codec, 'replace')) @@ -81,6 +82,13 @@ class EXTHHeader(object): # {{{ self.mi.language = lang except: pass + elif idx == 527: + try: + ppd = content.decode(codec) + if ppd: + self.page_progression_direction = ppd + except Exception: + pass # else: # print 'unknown record', idx, repr(content) if title: diff --git a/src/calibre/ebooks/mobi/reader/mobi8.py b/src/calibre/ebooks/mobi/reader/mobi8.py index 835a24b299..df15c76822 100644 --- a/src/calibre/ebooks/mobi/reader/mobi8.py +++ b/src/calibre/ebooks/mobi/reader/mobi8.py @@ -20,7 +20,7 @@ from calibre.ebooks.mobi.reader.ncx import read_ncx, build_toc from calibre.ebooks.mobi.reader.markup import expand_mobi8_markup from calibre.ebooks.metadata.opf2 import Guide, OPFCreator from calibre.ebooks.metadata.toc import TOC -from calibre.ebooks.mobi.utils import read_font_record, read_resc_record +from calibre.ebooks.mobi.utils import read_font_record from calibre.ebooks.oeb.parse_utils import parse_html from calibre.ebooks.oeb.base import XPath, XHTML, xml2text from calibre.utils.imghdr import what @@ -66,7 +66,6 @@ class Mobi8Reader(object): self.mobi6_reader, self.log = mobi6_reader, log self.header = mobi6_reader.book_header self.encrypted_fonts = [] - self.resc_data = {} def __call__(self): self.mobi6_reader.check_for_drm() @@ -398,10 +397,8 @@ class Mobi8Reader(object): typ = data[:4] href = None if typ in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n', b'BOUN', - b'FDST', b'DATP', b'AUDI', b'VIDE'}: + b'FDST', b'DATP', b'AUDI', b'VIDE', b'RESC', b'CMET'}: pass # Ignore these records - elif typ == b'RESC': - self.resc_data = read_resc_record(data) elif typ == b'FONT': font = read_font_record(data) href = "fonts/%05d.%s" % (fname_idx, font['ext']) @@ -480,8 +477,8 @@ class Mobi8Reader(object): entry.mime_type = 'application/xhtml+xml' opf.create_spine(spine) opf.set_toc(toc) - ppd = self.resc_data.get('page-progression-direction', None) - if ppd: + ppd = getattr(self.header.exth, 'page_progression_direction', None) + if ppd in {'ltr', 'rtl', 'default'}: opf.page_progression_direction = ppd with open('metadata.opf', 'wb') as of, open('toc.ncx', 'wb') as ncx: diff --git a/src/calibre/ebooks/mobi/utils.py b/src/calibre/ebooks/mobi/utils.py index 39a79b006a..d20fc5bcba 100644 --- a/src/calibre/ebooks/mobi/utils.py +++ b/src/calibre/ebooks/mobi/utils.py @@ -7,7 +7,7 @@ __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import struct, string, zlib, os, re +import struct, string, zlib, os from collections import OrderedDict from io import BytesIO @@ -395,15 +395,6 @@ def mobify_image(data): data = im.export('gif') return data -def read_resc_record(data): - ans = {} - match = re.search(br''']*page-progression-direction=['"](.+?)['"]''', data) - if match is not None: - ppd = match.group(1).lower() - if ppd in {b'ltr', b'rtl'}: - ans['page-progression-direction'] = ppd.decode('ascii') - return ans - # Font records {{{ def read_font_record(data, extent=1040): ''' diff --git a/src/calibre/ebooks/mobi/writer8/exth.py b/src/calibre/ebooks/mobi/writer8/exth.py index dc3f850dba..31bc6edfe3 100644 --- a/src/calibre/ebooks/mobi/writer8/exth.py +++ b/src/calibre/ebooks/mobi/writer8/exth.py @@ -40,6 +40,7 @@ EXTH_CODES = { 'lastupdatetime': 502, 'title': 503, 'language': 524, + 'page_progression_direction': 527, } COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+') @@ -47,7 +48,8 @@ COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+') def build_exth(metadata, prefer_author_sort=False, is_periodical=False, share_not_sync=True, cover_offset=None, thumbnail_offset=None, start_offset=None, mobi_doctype=2, num_of_resources=None, - kf8_unknown_count=0, be_kindlegen2=False, kf8_header_index=None): + kf8_unknown_count=0, be_kindlegen2=False, kf8_header_index=None, + page_progression_direction=None): exth = BytesIO() nrecs = 0 @@ -205,6 +207,12 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False, kf8_unknown_count)) nrecs += 1 + if page_progression_direction in {'rtl', 'ltr', 'default'}: + ppd = bytes(page_progression_direction) + exth.write(pack(b'>II', EXTH_CODES['page_progression_direction'], len(ppd) + 8)) + exth.write(ppd) + nrecs += 1 + exth = exth.getvalue() trail = len(exth) % 4 pad = b'\0' * (4 - trail) # Always pad w/ at least 1 byte diff --git a/src/calibre/ebooks/mobi/writer8/mobi.py b/src/calibre/ebooks/mobi/writer8/mobi.py index 891f45f58c..f12f1d682e 100644 --- a/src/calibre/ebooks/mobi/writer8/mobi.py +++ b/src/calibre/ebooks/mobi/writer8/mobi.py @@ -219,6 +219,7 @@ class KF8Book(object): def __init__(self, writer, for_joint=False): self.build_records(writer, for_joint) self.used_images = writer.used_images + self.page_progression_direction = writer.oeb.spine.page_progression_direction def build_records(self, writer, for_joint): metadata = writer.oeb.metadata @@ -297,15 +298,18 @@ class KF8Book(object): code to customize various values after build_records() has been called''' opts = self.opts - self.exth = build_exth(self.metadata, - prefer_author_sort=opts.prefer_author_sort, - is_periodical=opts.mobi_periodical, - share_not_sync=opts.share_not_sync, - cover_offset=self.cover_offset, - thumbnail_offset=self.thumbnail_offset, - num_of_resources=self.num_of_resources, - kf8_unknown_count=self.kuc, be_kindlegen2=True, - start_offset=self.start_offset, mobi_doctype=self.book_type) + self.exth = build_exth( + self.metadata, + prefer_author_sort=opts.prefer_author_sort, + is_periodical=opts.mobi_periodical, + share_not_sync=opts.share_not_sync, + cover_offset=self.cover_offset, + thumbnail_offset=self.thumbnail_offset, + num_of_resources=self.num_of_resources, + kf8_unknown_count=self.kuc, be_kindlegen2=True, + start_offset=self.start_offset, mobi_doctype=self.book_type, + page_progression_direction=self.page_progression_direction + ) kwargs = {field:getattr(self, field) for field in HEADER_FIELDS} return MOBIHeader()(**kwargs)