From 50f642ec07c12338e12e98b58b66cd7672583953 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 19 Jul 2011 21:15:12 -0600 Subject: [PATCH 1/6] Los Andes by Darko Miletic. Fixes #813278 (New recipe for Argentinian newspaper Los Andes) --- recipes/icons/losandes.png | Bin 0 -> 285 bytes recipes/losandes.recipe | 78 +++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 recipes/icons/losandes.png create mode 100644 recipes/losandes.recipe diff --git a/recipes/icons/losandes.png b/recipes/icons/losandes.png new file mode 100644 index 0000000000000000000000000000000000000000..635217e7279702682fa4ae5de202e79b02f20267 GIT binary patch literal 285 zcmV+&0pk9NP)=GF*fShy}P<8#i2j_~wg&oFKC54QC$% z#S537{qXe{iUtFDAs{I)!Xv=VhOA-Z*~bnVVjsT$2I@r7fE55u-Ej8F{8URId*P9L zgf#$71^VZ~o39EYTnBEvdhq%S0Syy(-2i&`!1b378lYfUaQGf}4M3MD7Oe-G_u<>G z{uq;nAie%*6R@l9LKJ|4)v#vkg-0I*I9Uzk1ur~&FTl+T*7 Date: Tue, 19 Jul 2011 23:00:13 -0600 Subject: [PATCH 2/6] Mobi debug: Interpret the TBS index entries for book type documents --- src/calibre/ebooks/mobi/debug.py | 69 ++++++++++++++++++++++-- src/calibre/ebooks/mobi/writer2/utils.py | 5 +- 2 files changed, 69 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index 2dbe363e7c..9bc587c527 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -618,6 +618,13 @@ class IndexEntry(object): # {{{ vals.append(val) self.tags.append(Tag(tag, vals, self.entry_type, cncx)) + @property + def label(self): + for tag in self.tags: + if tag.attr == 'label_offset': + return tag.cncx_value + return '' + def __str__(self): ans = ['Index Entry(index=%s, entry_type=%s, length=%d)'%( self.index, self.entry_type, len(self.tags))] @@ -731,7 +738,8 @@ class CNCX(object) : # {{{ class TextRecord(object): # {{{ - def __init__(self, idx, record, extra_data_flags, decompress): + def __init__(self, idx, record, extra_data_flags, decompress, index_record, + doc_type): self.trailing_data, self.raw = get_trailing_data(record.raw, extra_data_flags) self.raw = decompress(self.raw) if 0 in self.trailing_data: @@ -743,6 +751,60 @@ class TextRecord(object): # {{{ self.idx = idx + if 'indexing' in self.trailing_data and index_record is not None: + self.interpret_indexing(doc_type, index_record.indices) + + def interpret_indexing(self, doc_type, indices): + raw = self.trailing_data['indexing'] + ident, consumed = decint(raw) + raw = raw[consumed:] + entry_type = ident & 0b111 + index_entry_idx = ident >> 3 + index_entry = None + for i in indices: + if i.index == index_entry_idx: + index_entry = i.label + break + self.trailing_data['interpreted_indexing'] = ( + 'Type: %s, Index Entry: %s'%(entry_type, index_entry)) + if doc_type == 2: # Book + self.interpret_book_indexing(raw, entry_type) + + def interpret_book_indexing(self, raw, entry_type): + arg1, consumed = decint(raw) + raw = raw[consumed:] + if arg1 != 0: + raise ValueError('TBS index entry has unknown arg1: %d'% + arg1) + if entry_type == 2: + desc = ('This record has only a single starting or a single' + ' ending point') + if raw: + raise ValueError('TBS index entry has unknown extra bytes:' + ' %r'%raw) + elif entry_type == 3: + desc = ('This record is spanned by a single node (i.e. it' + ' has no start or end points)') + arg2, consumed = decint(raw) + if arg2 != 0: + raise ValueError('TBS index entry has unknown arg2: %d'% + arg2) + elif entry_type == 6: + if len(raw) != 1: + raise ValueError('TBS index entry has unknown extra bytes:' + ' %r'%raw) + num = ord(raw[0]) + # An unmatched starting or ending point each contributes 1 to + # this count. A matched pair of starting and ending points + # together contribute 1 to this count. Note that you can only + # ever have either 1 unmatched start point or 1 unmatched end + # point, never both (logically impossible). + desc = ('This record has %d starting/ending points and/or complete' + ' nodes.')%num + else: + raise ValueError('Unknown TBS index entry type: %d for book'%entry_type) + self.trailing_data['interpreted_indexing'] += ' :: ' + desc + def dump(self, folder): name = '%06d'%self.idx with open(os.path.join(folder, name+'.txt'), 'wb') as f: @@ -828,7 +890,7 @@ class MOBIFile(object): # {{{ else: decompress = lambda x: x - self.index_header = None + self.index_header = self.index_record = None self.indexing_record_nums = set() pir = self.mobi_header.primary_index_record if pir != 0xffffffff: @@ -848,7 +910,8 @@ class MOBIFile(object): # {{{ if fntbr == 0xffffffff: fntbr = len(self.records) self.text_records = [TextRecord(r, self.records[r], - self.mobi_header.extra_data_flags, decompress) for r in xrange(1, + self.mobi_header.extra_data_flags, decompress, self.index_record, + self.mobi_header.type_raw) for r in xrange(1, min(len(self.records), ntr+1))] self.image_records, self.binary_records = [], [] for i in xrange(fntbr, len(self.records)): diff --git a/src/calibre/ebooks/mobi/writer2/utils.py b/src/calibre/ebooks/mobi/writer2/utils.py index 708b9152d4..1c2d3a110d 100644 --- a/src/calibre/ebooks/mobi/writer2/utils.py +++ b/src/calibre/ebooks/mobi/writer2/utils.py @@ -170,7 +170,8 @@ def get_trailing_data(record, extra_data_flags): consumed = 1 else: sz, consumed = decint(record, forward=False) - data[i] = record[-(sz+consumed):-consumed] - record = record[:-(sz+consumed)] + if sz > consumed: + data[i] = record[-sz:-consumed] + record = record[:-sz] return data, record From 7f5651e0bd61b9fbf4efecb65e8d433f8fc38b1b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 19 Jul 2011 23:11:03 -0600 Subject: [PATCH 3/6] ... --- src/calibre/ebooks/mobi/writer2/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/mobi/writer2/utils.py b/src/calibre/ebooks/mobi/writer2/utils.py index 1c2d3a110d..dc9526eb77 100644 --- a/src/calibre/ebooks/mobi/writer2/utils.py +++ b/src/calibre/ebooks/mobi/writer2/utils.py @@ -166,7 +166,7 @@ def get_trailing_data(record, extra_data_flags): if i == 0: # Only the first two bits are used for the size since there can # never be more than 3 trailing multibyte chars - sz = ord(record[-1]) & 0b11 + sz = (ord(record[-1]) & 0b11) + 1 consumed = 1 else: sz, consumed = decint(record, forward=False) From ddae0ca7964bd21d0d40e6902b24ff903a66467d Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Wed, 20 Jul 2011 08:51:40 +0100 Subject: [PATCH 4/6] Make the conversion chain accept series numbers larger than 9999.99 --- src/calibre/gui2/convert/metadata.ui | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/gui2/convert/metadata.ui b/src/calibre/gui2/convert/metadata.ui index 95ccac6890..478f65e870 100644 --- a/src/calibre/gui2/convert/metadata.ui +++ b/src/calibre/gui2/convert/metadata.ui @@ -240,7 +240,7 @@ Book - 9999.989999999999782 + 9999999999.99 1.000000000000000 From 96d51719af4816fe50c6f6315c1a9bf9982559b7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 20 Jul 2011 13:30:13 -0600 Subject: [PATCH 5/6] ... --- src/calibre/ebooks/mobi/writer2/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/mobi/writer2/utils.py b/src/calibre/ebooks/mobi/writer2/utils.py index dc9526eb77..cd0ee453c3 100644 --- a/src/calibre/ebooks/mobi/writer2/utils.py +++ b/src/calibre/ebooks/mobi/writer2/utils.py @@ -161,7 +161,7 @@ def get_trailing_data(record, extra_data_flags): ''' data = OrderedDict() for i in xrange(16, -1, -1): - flag = 2**i + flag = 1 << i # 2**i if flag & extra_data_flags: if i == 0: # Only the first two bits are used for the size since there can From 629da2b324ff5017e3a316bed0722cb591259258 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 20 Jul 2011 14:01:41 -0600 Subject: [PATCH 6/6] ... --- src/calibre/ebooks/mobi/debug.py | 62 +------------------ .../ebooks/mobi/{writer2 => }/utils.py | 0 src/calibre/ebooks/mobi/writer2/main.py | 2 +- 3 files changed, 4 insertions(+), 60 deletions(-) rename src/calibre/ebooks/mobi/{writer2 => }/utils.py (100%) diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index 9bc587c527..971f037479 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -11,7 +11,7 @@ import struct, datetime, sys, os, shutil from collections import OrderedDict from calibre.utils.date import utc_tz from calibre.ebooks.mobi.langcodes import main_language, sub_language -from calibre.ebooks.mobi.writer2.utils import (decode_hex_number, decint, +from calibre.ebooks.mobi.utils import (decode_hex_number, decint, get_trailing_data) from calibre.utils.magick.draw import identify_data @@ -738,8 +738,7 @@ class CNCX(object) : # {{{ class TextRecord(object): # {{{ - def __init__(self, idx, record, extra_data_flags, decompress, index_record, - doc_type): + def __init__(self, idx, record, extra_data_flags, decompress): self.trailing_data, self.raw = get_trailing_data(record.raw, extra_data_flags) self.raw = decompress(self.raw) if 0 in self.trailing_data: @@ -751,60 +750,6 @@ class TextRecord(object): # {{{ self.idx = idx - if 'indexing' in self.trailing_data and index_record is not None: - self.interpret_indexing(doc_type, index_record.indices) - - def interpret_indexing(self, doc_type, indices): - raw = self.trailing_data['indexing'] - ident, consumed = decint(raw) - raw = raw[consumed:] - entry_type = ident & 0b111 - index_entry_idx = ident >> 3 - index_entry = None - for i in indices: - if i.index == index_entry_idx: - index_entry = i.label - break - self.trailing_data['interpreted_indexing'] = ( - 'Type: %s, Index Entry: %s'%(entry_type, index_entry)) - if doc_type == 2: # Book - self.interpret_book_indexing(raw, entry_type) - - def interpret_book_indexing(self, raw, entry_type): - arg1, consumed = decint(raw) - raw = raw[consumed:] - if arg1 != 0: - raise ValueError('TBS index entry has unknown arg1: %d'% - arg1) - if entry_type == 2: - desc = ('This record has only a single starting or a single' - ' ending point') - if raw: - raise ValueError('TBS index entry has unknown extra bytes:' - ' %r'%raw) - elif entry_type == 3: - desc = ('This record is spanned by a single node (i.e. it' - ' has no start or end points)') - arg2, consumed = decint(raw) - if arg2 != 0: - raise ValueError('TBS index entry has unknown arg2: %d'% - arg2) - elif entry_type == 6: - if len(raw) != 1: - raise ValueError('TBS index entry has unknown extra bytes:' - ' %r'%raw) - num = ord(raw[0]) - # An unmatched starting or ending point each contributes 1 to - # this count. A matched pair of starting and ending points - # together contribute 1 to this count. Note that you can only - # ever have either 1 unmatched start point or 1 unmatched end - # point, never both (logically impossible). - desc = ('This record has %d starting/ending points and/or complete' - ' nodes.')%num - else: - raise ValueError('Unknown TBS index entry type: %d for book'%entry_type) - self.trailing_data['interpreted_indexing'] += ' :: ' + desc - def dump(self, folder): name = '%06d'%self.idx with open(os.path.join(folder, name+'.txt'), 'wb') as f: @@ -910,8 +855,7 @@ class MOBIFile(object): # {{{ if fntbr == 0xffffffff: fntbr = len(self.records) self.text_records = [TextRecord(r, self.records[r], - self.mobi_header.extra_data_flags, decompress, self.index_record, - self.mobi_header.type_raw) for r in xrange(1, + self.mobi_header.extra_data_flags, decompress) for r in xrange(1, min(len(self.records), ntr+1))] self.image_records, self.binary_records = [], [] for i in xrange(fntbr, len(self.records)): diff --git a/src/calibre/ebooks/mobi/writer2/utils.py b/src/calibre/ebooks/mobi/utils.py similarity index 100% rename from src/calibre/ebooks/mobi/writer2/utils.py rename to src/calibre/ebooks/mobi/utils.py diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py index 76976ce81e..2e9d31458a 100644 --- a/src/calibre/ebooks/mobi/writer2/main.py +++ b/src/calibre/ebooks/mobi/writer2/main.py @@ -18,7 +18,7 @@ from calibre.ebooks.compression.palmdoc import compress_doc from calibre.ebooks.mobi.langcodes import iana2mobi from calibre.utils.filenames import ascii_filename from calibre.ebooks.mobi.writer2 import PALMDOC, UNCOMPRESSED -from calibre.ebooks.mobi.writer2.utils import (rescale_image, encint) +from calibre.ebooks.mobi.utils import (rescale_image, encint) EXTH_CODES = { 'creator': 100,