From 34d3ce25aa9cad874549cbf4e539e745e343582e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 22 Jul 2011 20:16:05 -0600 Subject: [PATCH 01/51] ... --- src/calibre/ebooks/mobi/writer2/indexer.py | 36 +++++++++++++++---- src/calibre/ebooks/mobi/writer2/main.py | 9 +++-- src/calibre/ebooks/mobi/writer2/serializer.py | 1 + 3 files changed, 36 insertions(+), 10 deletions(-) diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py index c28b91e63a..e586c446b4 100644 --- a/src/calibre/ebooks/mobi/writer2/indexer.py +++ b/src/calibre/ebooks/mobi/writer2/indexer.py @@ -12,6 +12,7 @@ from cStringIO import StringIO from collections import OrderedDict from calibre.ebooks import normalize +from calibre.ebook.mobi.writer2 import RECORD_SIZE from calibre.ebooks.mobi.utils import encint def utf8_text(text): @@ -37,7 +38,6 @@ def align_block(raw, multiple=4, pad=b'\0'): if extra == 0: return raw return raw + pad*(multiple - extra) - class CNCX(object): # {{{ ''' @@ -53,17 +53,15 @@ class CNCX(object): # {{{ for item in toc: if item is self.toc: continue - label = item.title - klass = item.klass + self.strings[item.title] = 0 if opts.mobi_periodical: + self.strings[item.klass] = 0 if item.description: self.strings[item.description] = 0 if item.author: self.string[item.author] = 0 - self.strings[label] = self.strings[klass] = 0 self.records = [] - offset = 0 buf = StringIO() for key in tuple(self.strings.iterkeys()): @@ -92,17 +90,41 @@ class CNCX(object): # {{{ class Indexer(object): - def __init__(self, serializer, number_of_text_records, opts, oeb): + def __init__(self, serializer, number_of_text_records, + size_of_last_text_record, opts, oeb): self.serializer = serializer self.number_of_text_records = number_of_text_records + self.text_size = (RECORD_SIZE * (self.number_of_text_records-1) + + size_of_last_text_record) self.oeb = oeb self.log = oeb.log self.opts = opts - self.cncx = CNCX(oeb.toc, opts) + self.is_periodical = opts.mobi_periodical + self.is_flat_periodical = False + if opts.mobi_periodical: + periodical_node = iter(oeb.toc).next() + sections = tuple(periodical_node) + self.is_flat_periodical = len(sections) == 1 self.records = [] + self.cncx = CNCX(oeb.toc, opts) + + if self.is_periodical: + self.create_periodical_index() + else: + raise NotImplementedError() + + def create_periodical_index(self): + periodical_node = iter(self.oeb.toc).next() + sections = tuple(periodical_node) + periodical_node_offset = self.serializer.body_start_offset + periodical_node_size = (self.serializer.body_end_offset - + periodical_node_offset) + periodical_node_size + sections + def create_header(self): buf = StringIO() diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py index 088326a876..ea67007168 100644 --- a/src/calibre/ebooks/mobi/writer2/main.py +++ b/src/calibre/ebooks/mobi/writer2/main.py @@ -20,6 +20,7 @@ from calibre.utils.filenames import ascii_filename from calibre.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED, RECORD_SIZE) from calibre.ebooks.mobi.utils import (rescale_image, encint, encode_trailing_data) +from calibre.ebooks.mobi.writer2.indexer import Indexer EXTH_CODES = { 'creator': 100, @@ -87,6 +88,11 @@ class MobiWriter(object): # Indexing {{{ def generate_index(self): self.primary_index_record_idx = None + try: + self.indexer = Indexer(self.serializer, self.last_text_record_idx, + self.opts, self.oeb) + except: + self.log.exception('Failed to generate MOBI index:') # }}} def write_uncrossable_breaks(self): # {{{ @@ -202,7 +208,6 @@ class MobiWriter(object): record.write(overlap) record.write(pack(b'>B', len(overlap))) - self.last_text_record_idx = nrecords def read_text_record(self, text): @@ -265,8 +270,6 @@ class MobiWriter(object): # EOF record self.records.append('\xE9\x8E\x0D\x0A') - self.generate_end_records() - record0 = StringIO() # The MOBI Header record0.write(pack(b'>HHIHHHH', diff --git a/src/calibre/ebooks/mobi/writer2/serializer.py b/src/calibre/ebooks/mobi/writer2/serializer.py index d6878bee4a..881937ce73 100644 --- a/src/calibre/ebooks/mobi/writer2/serializer.py +++ b/src/calibre/ebooks/mobi/writer2/serializer.py @@ -143,6 +143,7 @@ class Serializer(object): spine.extend([item for item in self.oeb.spine if not item.linear]) for item in spine: self.serialize_item(item) + self.body_end_offset = buf.tell() buf.write(b'') def serialize_item(self, item): From 9800c93daa693194b7a429c6e6fc9bed28600117 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 23 Jul 2011 01:04:49 -0600 Subject: [PATCH 02/51] ... --- src/calibre/ebooks/mobi/debug.py | 34 +++- src/calibre/ebooks/mobi/writer2/indexer.py | 214 ++++++++++++++++++++- 2 files changed, 233 insertions(+), 15 deletions(-) diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index d861e69cbf..5b0ecf6f8e 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -406,7 +406,7 @@ class IndexHeader(object): # {{{ self.unknown1 = raw[8:16] self.index_type, = struct.unpack('>I', raw[16:20]) self.index_type_desc = {0: 'normal', 2: - 'inflection'}.get(self.index_type, 'unknown') + 'inflection', 6: 'calibre'}.get(self.index_type, 'unknown') self.idxt_start, = struct.unpack('>I', raw[20:24]) self.index_count, = struct.unpack('>I', raw[24:28]) self.index_encoding_num, = struct.unpack('>I', raw[28:32]) @@ -596,10 +596,11 @@ class IndexEntry(object): # {{{ 0x3f : 'article', } - def __init__(self, ident, entry_type, raw, cncx, tagx_entries): + def __init__(self, ident, entry_type, raw, cncx, tagx_entries, flags=0): self.index = ident self.raw = raw self.tags = [] + self.entry_type_raw = entry_type try: self.entry_type = self.TYPES[entry_type] @@ -619,6 +620,24 @@ class IndexEntry(object): # {{{ vals.append(val) self.tags.append(Tag(tag, vals, self.entry_type, cncx)) + if flags & 0b10: + # Look for optional description and author + desc_tag = [t for t in tagx_entries if t.tag == 22] + if desc_tag and raw: + val, consumed = decint(raw) + raw = raw[consumed:] + if val: + self.tags.append(Tag(desc_tag[0], [val], self.entry_type, + cncx)) + if flags & 0b100: + aut_tag = [t for t in tagx_entries if t.tag == 23] + if aut_tag and raw: + val, consumed = decint(raw) + raw = raw[consumed:] + if val: + self.tags.append(Tag(aut_tag[0], [val], self.entry_type, + cncx)) + @property def label(self): for tag in self.tags: @@ -669,8 +688,8 @@ class IndexEntry(object): # {{{ return -1 def __str__(self): - ans = ['Index Entry(index=%s, entry_type=%s, length=%d)'%( - self.index, self.entry_type, len(self.tags))] + ans = ['Index Entry(index=%s, entry_type=%s (%s), length=%d)'%( + self.index, self.entry_type, bin(self.entry_type_raw)[2:], len(self.tags))] for tag in self.tags: ans.append('\t'+str(tag)) if self.first_child_index != -1: @@ -723,8 +742,13 @@ class IndexRecord(object): # {{{ next_off = len(indxt) index, consumed = decode_hex_number(indxt[off:]) entry_type = ord(indxt[off+consumed]) + d = 1 + if index_header.index_type == 6: + flags = ord(indxt[off+consumed+d]) + d += 1 self.indices.append(IndexEntry(index, entry_type, - indxt[off+consumed+1:next_off], cncx, index_header.tagx_entries)) + indxt[off+consumed+d:next_off], cncx, + index_header.tagx_entries, flags=flags)) index = self.indices[-1] def get_parent(self, index): diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py index e586c446b4..2296a40d14 100644 --- a/src/calibre/ebooks/mobi/writer2/indexer.py +++ b/src/calibre/ebooks/mobi/writer2/indexer.py @@ -2,6 +2,7 @@ # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai from __future__ import (unicode_literals, division, absolute_import, print_function) +from future_builtins import filter __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal ' @@ -13,7 +14,7 @@ from collections import OrderedDict from calibre.ebooks import normalize from calibre.ebook.mobi.writer2 import RECORD_SIZE -from calibre.ebooks.mobi.utils import encint +from calibre.ebooks.mobi.utils import (encint, encode_number_as_hex) def utf8_text(text): ''' @@ -56,10 +57,6 @@ class CNCX(object): # {{{ self.strings[item.title] = 0 if opts.mobi_periodical: self.strings[item.klass] = 0 - if item.description: - self.strings[item.description] = 0 - if item.author: - self.string[item.author] = 0 self.records = [] offset = 0 @@ -88,6 +85,69 @@ class CNCX(object): # {{{ return self.strings[string] # }}} +class IndexEntry(object): + + TAG_VALUES = { + 'offset': 1, + 'size': 2, + 'label_offset': 3, + 'depth': 4, + 'class_offset': 5, + 'parent_index': 21, + 'first_child_index': 22, + 'last_child_index': 23, + } + RTAG_MAP = dict(TAG_VALUES.itervalues(), TAG_VALUES.iterkeys()) + + BITMASKS = [1, 2, 3, 4, 5, 21, 22, 23,] + + def __init__(self, offset, label_offset, depth=0, class_offset=None): + self.offset, self.label_offset = offset, label_offset + self.depth, self.class_offset = depth, class_offset + + self.length = 0 + self.index = 0 + + self.parent_index = None + self.first_child_index = None + self.last_child_index = None + + @property + def next_offset(self): + return self.offset + self.length + + @property + def tag_nums(self): + for i in range(1, 5): + yield i + for attr in ('class_offset', 'parent_index', 'first_child_index', + 'last_child_index'): + if getattr(self, attr) is not None: + yield self.TAG_VALUES[attr] + + @property + def entry_type(self): + ans = 0 + for tag in self.tag_nums: + ans |= (1 << self.BITMASKS[tag]) # 1 << x == 2**x + return ans + + @property + def bytestring(self): + buf = StringIO() + buf.write(encode_number_as_hex(self.index)) + et = self.entry_type + buf.write(bytes(bytearray([et]))) + + for tag in self.tag_nums: + attr = self.RTAG_MAP[tag] + val = getattr(self, attr) + buf.write(encint(val)) + + ans = buf.get_value() + return ans + + class Indexer(object): def __init__(self, serializer, number_of_text_records, @@ -112,18 +172,152 @@ class Indexer(object): self.cncx = CNCX(oeb.toc, opts) if self.is_periodical: - self.create_periodical_index() + indices = self.create_periodical_index() + indices else: raise NotImplementedError() - def create_periodical_index(self): + def create_periodical_index(self): # {{{ periodical_node = iter(self.oeb.toc).next() - sections = tuple(periodical_node) periodical_node_offset = self.serializer.body_start_offset periodical_node_size = (self.serializer.body_end_offset - periodical_node_offset) - periodical_node_size - sections + + normalized_sections = [] + + id_offsets = self.serializer.id_offsets + + periodical = IndexEntry(periodical_node_offset, + self.cncx[periodical_node.title], + class_offset=self.cncx[periodical_node.klass]) + periodical.length = periodical_node_size + periodical.first_child_index = 1 + + seen_sec_offsets = set() + seen_art_offsets = set() + + for sec in periodical_node: + normalized_articles = [] + try: + offset = id_offsets[sec.href] + label = self.cncx[sec.title] + klass = self.cncx[sec.klass] + except: + continue + if offset in seen_sec_offsets: + continue + seen_sec_offsets.add(offset) + section = IndexEntry(offset, label, class_offset=klass, depth=1) + section.parent_index = 0 + for art in sec: + try: + offset = id_offsets[art.href] + label = self.cncx[art.title] + klass = self.cncx[art.klass] + except: + continue + if offset in seen_art_offsets: + continue + seen_art_offsets.add(offset) + article = IndexEntry(offset, label, class_offset=klass, + depth=2) + normalized_articles.append(article) + if normalized_articles: + normalized_articles.sort(key=lambda x:x.offset) + normalized_sections.append((section, normalized_articles)) + + normalized_sections.sort(key=lambda x:x[0].offset) + + # Set lengths + for s, x in enumerate(normalized_sections): + sec, normalized_articles = x + try: + sec.length = normalized_sections[s+1].offset - sec.offset + except: + sec.length = self.serializer.body_end_offset - sec.offset + for i, art in enumerate(normalized_articles): + try: + art.length = normalized_articles[i+1].offset - art.offset + except: + art.length = sec.offset + sec.length - art.offset + + # Filter + for i, x in list(enumerate(normalized_sections)): + sec, normalized_articles = x + normalized_articles = list(filter(lambda x: x.length > 0, + normalized_articles)) + normalized_sections[i] = (sec, normalized_articles) + + normalized_sections = list(filter(lambda x: x[0].size > 0 and x[1], + normalized_sections)) + + # Set indices + i = 0 + for sec, normalized_articles in normalized_sections: + i += 1 + sec.index = i + + for sec, normalized_articles in normalized_sections: + for art in normalized_articles: + i += 1 + art.index = i + art.parent_index = sec.index + + for sec, normalized_articles in normalized_sections: + sec.first_child_index = normalized_articles[0].index + sec.last_child_index = normalized_articles[-1].index + + # Set lengths again to close up any gaps left by filtering + for s, x in enumerate(normalized_sections): + sec, articles = x + try: + next_offset = normalized_sections[s+1].offset + except: + next_offset = self.serializer.body_end_offset + sec.length = next_offset - sec.offset + + for a, art in enumerate(articles): + try: + next_offset = articles[a+1].offset + except: + next_offset = sec.next_offset + art.length = next_offset - art.offset + + # Sanity check + for s, x in enumerate(normalized_sections): + sec, articles = x + try: + next_sec = normalized_sections[s+1] + except: + if (sec.length == 0 or sec.next_offset != + self.serializer.body_end_offset): + raise ValueError('Invalid section layout') + else: + if next_sec.offset != sec.next_offset or sec.length == 0: + raise ValueError('Invalid section layout') + for a, art in enumerate(articles): + try: + next_art = articles[a+1] + except: + if (art.length == 0 or art.next_offset != + sec.next_offset): + raise ValueError('Invalid article layout') + else: + if art.length == 0 or art.next_offset != next_art.offset: + raise ValueError('Invalid article layout') + + # Flatten + indices = [periodical] + for sec, articles in normalized_sections: + indices.append(sec) + periodical.last_child_index = sec.index + + for sec, articles in normalized_sections: + for a in articles: + indices.append(a) + + return indices + # }}} def create_header(self): buf = StringIO() From 07230e3f964f1558073d52fba9e4eb3e826a7998 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 23 Jul 2011 01:06:29 -0600 Subject: [PATCH 03/51] ... --- src/calibre/ebooks/mobi/debug.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index 5b0ecf6f8e..790c8dd45b 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -709,6 +709,7 @@ class IndexRecord(object): # {{{ def __init__(self, record, index_header, cncx): self.record = record raw = self.record.raw + if raw[:4] != b'INDX': raise ValueError('Invalid Primary Index Record') @@ -742,7 +743,7 @@ class IndexRecord(object): # {{{ next_off = len(indxt) index, consumed = decode_hex_number(indxt[off:]) entry_type = ord(indxt[off+consumed]) - d = 1 + d, flags = 1, 0 if index_header.index_type == 6: flags = ord(indxt[off+consumed+d]) d += 1 From fe733d66cf29152d113c6ad5ef60473cfd3f042b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 23 Jul 2011 01:14:55 -0600 Subject: [PATCH 04/51] ... --- src/calibre/ebooks/mobi/debug.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index 790c8dd45b..b6c7991739 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -638,6 +638,9 @@ class IndexEntry(object): # {{{ self.tags.append(Tag(aut_tag[0], [val], self.entry_type, cncx)) + if raw.replace(b'\x00', b''): + raise ValueError('Extra bytes in INDX table entry %d: %r'%(self.index, raw)) + @property def label(self): for tag in self.tags: From c137537298257c164b9adff1962b0fc0fab1afbe Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sat, 23 Jul 2011 09:27:00 +0100 Subject: [PATCH 05/51] Remove "no items" print statement in quickview --- src/calibre/gui2/dialogs/quickview.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/calibre/gui2/dialogs/quickview.py b/src/calibre/gui2/dialogs/quickview.py index 2215a3cc95..926c884773 100644 --- a/src/calibre/gui2/dialogs/quickview.py +++ b/src/calibre/gui2/dialogs/quickview.py @@ -183,7 +183,6 @@ class Quickview(QDialog, Ui_Quickview): self.items.blockSignals(False) def indicate_no_items(self): - print 'no items' self.no_valid_items = True self.items.clear() self.items.addItem(QListWidgetItem(_('**No items found**'))) From e5469824e211b269f6f104b82711eea8c273ed89 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 23 Jul 2011 09:28:45 -0600 Subject: [PATCH 06/51] Fix an error in the book details panel if the user sets the default author link to blank --- src/calibre/gui2/book_details.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/calibre/gui2/book_details.py b/src/calibre/gui2/book_details.py index 07a963c81b..c65b6b5d14 100644 --- a/src/calibre/gui2/book_details.py +++ b/src/calibre/gui2/book_details.py @@ -133,6 +133,7 @@ def render_data(mi, use_roman_numbers=True, all_fields=False): authors = [] formatter = EvalFormatter() for aut in mi.authors: + link = '' if mi.author_link_map[aut]: link = mi.author_link_map[aut] elif gprefs.get('default_author_link'): From 18a81ee8be12839639d411318d885e7555356ffd Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 23 Jul 2011 09:29:25 -0600 Subject: [PATCH 07/51] ... --- src/calibre/devices/android/driver.py | 2 +- src/calibre/ebooks/mobi/debug.py | 7 +++- src/calibre/ebooks/mobi/writer2/indexer.py | 44 +++++++++++++++++++++- 3 files changed, 49 insertions(+), 4 deletions(-) diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index d26489c42f..a12f37c7eb 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -128,7 +128,7 @@ class ANDROID(USBMS): '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2', 'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK', 'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612', - 'GT-S5830_CARD'] + 'GT-S5830_CARD', 'GT-S5570_CARD'] WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD', diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index b6c7991739..514cf9c246 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -638,7 +638,7 @@ class IndexEntry(object): # {{{ self.tags.append(Tag(aut_tag[0], [val], self.entry_type, cncx)) - if raw.replace(b'\x00', b''): + if raw.replace(b'\x00', b''): # There can be padding null bytes raise ValueError('Extra bytes in INDX table entry %d: %r'%(self.index, raw)) @property @@ -736,6 +736,9 @@ class IndexRecord(object): # {{{ for i in range(self.idxt_count): off, = u(b'>H', indices[i*2:(i+1)*2]) self.index_offsets.append(off-192) + rest = indices[(i+1)*2:] + if rest.replace(b'\0', ''): # There can be padding null bytes + raise ValueError('Extra bytes after IDXT table: %r'%rest) indxt = raw[192:self.idxt_offset] self.indices = [] @@ -772,7 +775,7 @@ class IndexRecord(object): # {{{ len(w), not bool(w.replace(b'\0', b'')) )) a('Header length: %d'%self.header_length) u(self.unknown1) - a('Header Type: %d'%self.header_type) + a('Unknown (header type? index record number? always 1?): %d'%self.header_type) u(self.unknown2) a('IDXT Offset: %d'%self.idxt_offset) a('IDXT Count: %d'%self.idxt_count) diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py index 2296a40d14..48b1d82c04 100644 --- a/src/calibre/ebooks/mobi/writer2/indexer.py +++ b/src/calibre/ebooks/mobi/writer2/indexer.py @@ -173,10 +173,52 @@ class Indexer(object): if self.is_periodical: indices = self.create_periodical_index() - indices else: raise NotImplementedError() + self.records.append(self.create_index_record(indices)) + + def create_index_record(self, indices): + header_length = 192 + buf = StringIO() + + # Write index entries + offsets = [] + for i in indices: + offsets.append(buf.tell()) + buf.write(i.bytestring) + index_block = align_block(buf.getvalue()) + + # Write offsets to index entries as an IDXT block + idxt_block = b'IDXT' + buf.truncate(0) + for offset in offsets: + buf.write(pack(b'>H', header_length+offset)) + idxt_block = align_block(idxt_block + buf.getvalue()) + body = index_block + idxt_block + + header = b'INDX' + buf.truncate(0) + buf.write(pack(b'>I', header_length)) + buf.write(b'\0'*4) # Unknown + buf.write(pack(b'>I', 1)) # Header type? Or index record number? + buf.write(b'\0'*4) # Unknown + # IDXT block offset + buf.write(pack(b'>I', header_length + len(index_block))) + # Number of index entries + buf.write(pack(b'>I', len(offsets))) + # Unknown + buf.write(b'\xff'*8) + # Unknown + buf.write(b'\0'*156) + + header += buf.getvalue() + + ans = header + body + if len(ans) > 0x10000: + raise ValueError('Too many entries (%d) in the TOC'%len(offsets)) + return ans + def create_periodical_index(self): # {{{ periodical_node = iter(self.oeb.toc).next() periodical_node_offset = self.serializer.body_start_offset From e0be0d844d091bed368f5e7d01f961836922e5f8 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sat, 23 Jul 2011 16:37:01 +0100 Subject: [PATCH 08/51] Fix exception arising when someone empties the author link template box. --- src/calibre/gui2/book_details.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/calibre/gui2/book_details.py b/src/calibre/gui2/book_details.py index 07a963c81b..ebb7c9d5ed 100644 --- a/src/calibre/gui2/book_details.py +++ b/src/calibre/gui2/book_details.py @@ -143,6 +143,8 @@ def render_data(mi, use_roman_numbers=True, all_fields=False): vals['author_sort'] = aut.replace(' ', '+') link = formatter.safe_format( gprefs.get('default_author_link'), vals, '', vals) + else: + link = '' if link: link = prepare_string_for_xml(link) authors.append(u'%s'%(link, aut)) From 16ad2c9f02795042a01d7214508b20c464c95f2c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 23 Jul 2011 12:07:41 -0600 Subject: [PATCH 09/51] ... --- recipes/guardian.recipe | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/recipes/guardian.recipe b/recipes/guardian.recipe index c5021cb91d..124820d0a1 100644 --- a/recipes/guardian.recipe +++ b/recipes/guardian.recipe @@ -12,7 +12,7 @@ from datetime import date class Guardian(BasicNewsRecipe): - title = u'The Guardian / The Observer' + title = u'The Guardian and The Observer' if date.today().weekday() == 6: base_url = "http://www.guardian.co.uk/theobserver" else: @@ -28,7 +28,7 @@ class Guardian(BasicNewsRecipe): # List of section titles to ignore # For example: ['Sport'] ignore_sections = [] - + timefmt = ' [%a, %d %b %Y]' keep_only_tags = [ dict(name='div', attrs={'id':["content","article_header","main-article-info",]}), @@ -94,7 +94,7 @@ class Guardian(BasicNewsRecipe): prefix = section_title + ': ' for subsection in s.parent.findAll('a', attrs={'class':'book-section'}): yield (prefix + self.tag_to_string(subsection), subsection['href']) - + def find_articles(self, url): soup = self.index_to_soup(url) div = soup.find('div', attrs={'class':'book-index'}) @@ -115,7 +115,7 @@ class Guardian(BasicNewsRecipe): 'title': title, 'url':url, 'description':desc, 'date' : strftime('%a, %d %b'), } - + def parse_index(self): try: feeds = [] From 3f226c85bbb52cd11b631c88641b13137e82436a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 23 Jul 2011 16:44:35 -0600 Subject: [PATCH 10/51] Fix #815224 (UnicodeEncodeError on start due to store StoreChitankaStore name value) --- src/calibre/gui2/store/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/calibre/gui2/store/__init__.py b/src/calibre/gui2/store/__init__.py index d58ccbda84..ae42d82032 100644 --- a/src/calibre/gui2/store/__init__.py +++ b/src/calibre/gui2/store/__init__.py @@ -6,6 +6,8 @@ __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' +from calibre.utils.filenames import ascii_filename + class StorePlugin(object): # {{{ ''' A plugin representing an online ebook repository (store). The store can @@ -43,7 +45,7 @@ class StorePlugin(object): # {{{ The easiest way to handle affiliate money payouts is to randomly select between the author's affiliate id and calibre's affiliate id so that 70% of the time the author's id is used. - + See declined.txt for a list of stores that do not want to be included. ''' @@ -53,7 +55,7 @@ class StorePlugin(object): # {{{ self.gui = gui self.name = name self.base_plugin = None - self.config = JSONConfig('store/stores/' + self.name) + self.config = JSONConfig('store/stores/' + ascii_filename(self.name)) def open(self, gui, parent=None, detail_item=None, external=False): ''' From 1443197753943df0c1e1846d97cd6df60a6cbf4b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 23 Jul 2011 18:06:18 -0600 Subject: [PATCH 11/51] ... --- src/calibre/gui2/update.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/calibre/gui2/update.py b/src/calibre/gui2/update.py index f76d4b8e65..caa1d3f3dc 100644 --- a/src/calibre/gui2/update.py +++ b/src/calibre/gui2/update.py @@ -15,6 +15,7 @@ from calibre.gui2 import config, dynamic, open_url from calibre.gui2.dialogs.plugin_updater import get_plugin_updates_available URL = 'http://status.calibre-ebook.com/latest' +#URL = 'http://localhost:8000/latest' NO_CALIBRE_UPDATE = '-0.0.0' VSEP = '|' From 38ef36d69eb894f281952330dc87042392d4935a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 23 Jul 2011 23:06:48 -0600 Subject: [PATCH 12/51] Fix #814964 (error message when doing bulk edit) --- src/calibre/db/tables.py | 7 +++++-- src/calibre/library/sqlite.py | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/calibre/db/tables.py b/src/calibre/db/tables.py index b75effff4b..fa7b001851 100644 --- a/src/calibre/db/tables.py +++ b/src/calibre/db/tables.py @@ -12,7 +12,7 @@ from datetime import datetime from dateutil.tz import tzoffset from calibre.constants import plugins -from calibre.utils.date import parse_date, local_tz +from calibre.utils.date import parse_date, local_tz, UNDEFINED_DATE from calibre.ebooks.metadata import author_to_author_sort _c_speedup = plugins['speedup'][0] @@ -29,8 +29,11 @@ def _c_convert_timestamp(val): if ret is None: return parse_date(val, as_utc=False) year, month, day, hour, minutes, seconds, tzsecs = ret - return datetime(year, month, day, hour, minutes, seconds, + try: + return datetime(year, month, day, hour, minutes, seconds, tzinfo=tzoffset(None, tzsecs)).astimezone(local_tz) + except OverflowError: + return UNDEFINED_DATE.astimezone(local_tz) class Table(object): diff --git a/src/calibre/library/sqlite.py b/src/calibre/library/sqlite.py index a2a85806f5..b5917f1a55 100644 --- a/src/calibre/library/sqlite.py +++ b/src/calibre/library/sqlite.py @@ -17,7 +17,7 @@ from datetime import datetime from functools import partial from calibre.ebooks.metadata import title_sort, author_to_author_sort -from calibre.utils.date import parse_date, isoformat, local_tz +from calibre.utils.date import parse_date, isoformat, local_tz, UNDEFINED_DATE from calibre import isbytestring, force_unicode from calibre.constants import iswindows, DEBUG, plugins from calibre.utils.icu import strcmp @@ -39,8 +39,11 @@ def _c_convert_timestamp(val): if ret is None: return parse_date(val, as_utc=False) year, month, day, hour, minutes, seconds, tzsecs = ret - return datetime(year, month, day, hour, minutes, seconds, + try: + return datetime(year, month, day, hour, minutes, seconds, tzinfo=tzoffset(None, tzsecs)).astimezone(local_tz) + except OverflowError: + return UNDEFINED_DATE.astimezone(local_tz) def _py_convert_timestamp(val): if val: From d212512317d476b4b4f39700c247c16f408db1fe Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sun, 24 Jul 2011 16:29:42 +0100 Subject: [PATCH 13/51] Fix typo in search section --- src/calibre/manual/gui.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/manual/gui.rst b/src/calibre/manual/gui.rst index 520206343f..23813a27a8 100755 --- a/src/calibre/manual/gui.rst +++ b/src/calibre/manual/gui.rst @@ -401,7 +401,7 @@ with undefined values in the column. Searching for ``true`` will find all books values in the column. Searching for ``yes`` or ``checked`` will find all books with ``Yes`` in the column. Searching for ``no`` or ``unchecked`` will find all books with ``No`` in the column. -Hierarchical items (e.g. A.B.C) use an extended syntax to match initial parts of the hierarchy. This is done by adding a period between the exact match indicator (=) and the text. For example, the query ``tags:=.A`` will find the tags `A` and `A.B`, but will not find the tags `AA` or `AA.B`. The query ``tags:=.A.B`` will find the tags `A.B` and `A.C`, but not the tag `A`. +Hierarchical items (e.g. A.B.C) use an extended syntax to match initial parts of the hierarchy. This is done by adding a period between the exact match indicator (=) and the text. For example, the query ``tags:=.A`` will find the tags `A` and `A.B`, but will not find the tags `AA` or `AA.B`. The query ``tags:=.A.B`` will find the tags `A.B` and `A.B.C`, but not the tag `A`. Identifiers (e.g., isbn, doi, lccn etc) also use an extended syntax. First, note that an identifier has the form ``type:value``, as in ``isbn:123456789``. The extended syntax permits you to specify independently which type and value to search for. Both the type and the value parts of the query can use `equality`, `contains`, or `regular expression` matches. Examples: From 49d99ff2006de14d10b3aa00a28d8365729dde90 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 24 Jul 2011 14:25:22 -0600 Subject: [PATCH 14/51] Updated Corren --- recipes/corren2.recipe | 63 +++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 34 deletions(-) diff --git a/recipes/corren2.recipe b/recipes/corren2.recipe index 494be88f10..f53da20fd1 100644 --- a/recipes/corren2.recipe +++ b/recipes/corren2.recipe @@ -1,39 +1,34 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPLv3' + from calibre.web.feeds.news import BasicNewsRecipe -class AdvancedUserRecipe1255797795(BasicNewsRecipe): - title = u'Corren' - language = 'sv' - __author__ = 'Jonas Svensson' - simultaneous_downloads = 1 - no_stylesheets = True - oldest_article = 7 +class AdvancedUserRecipe1311446032(BasicNewsRecipe): + title = 'Corren' + __author__ = 'Jonas Svensson' + description = 'News from Sweden' + publisher = 'Corren' + category = 'news, politics, Sweden' + oldest_article = 2 + delay = 1 max_articles_per_feed = 100 - remove_attributes = ['onload'] - timefmt = '' + no_stylesheets = True + use_embedded_content = False + encoding = 'iso-8859-1' + language = 'sv' - feeds = [ - (u'Toppnyheter (alla kategorier)', u'http://www.corren.se/inc/RssHandler.ashx?id=4122151&ripurl=http://www.corren.se/nyheter/'), - (u'Bostad', u'http://www.corren.se/inc/RssHandler.ashx?id=4122174&ripurl=http://www.corren.se/bostad/'), - (u'Ekonomi & Jobb', u'http://www.corren.se/inc/RssHandler.ashx?id=4122176&ripurl=http://www.corren.se/ekonomi/'), - (u'Kultur & Nöje', u'http://www.corren.se/inc/RssHandler.ashx?id=4122192&ripurl=http://www.corren.se/kultur/'), - (u'Mat & dryck', u'http://www.corren.se/inc/RssHandler.ashx?id=4122201&ripurl=http://www.corren.se/mat-dryck/'), - (u'Motor', u'http://www.corren.se/inc/RssHandler.ashx?id=4122203&ripurl=http://www.corren.se/motor/'), - (u'Sport', u'http://www.corren.se/inc/RssHandler.ashx?id=4122206&ripurl=http://www.corren.se/sport/'), - (u'Åsikter', u'http://www.corren.se/inc/RssHandler.ashx?id=4122223&ripurl=http://www.corren.se/asikter/'), - (u'Mjölby', u'http://www.corren.se/inc/RssHandler.ashx?id=4122235&ripurl=http://www.corren.se/ostergotland/mjolby/'), - (u'Motala', u'http://www.corren.se/inc/RssHandler.ashx?id=4122236&ripurl=http://www.corren.se/ostergotland/motala/') - ] - - def print_version(self, url): - url = url.replace("ekonomi/artikel.aspx", "Print.aspx") - url = url.replace("bostad/artikel.aspx", "Print.aspx") - url = url.replace("kultur/artikel.aspx", "Print.aspx") - url = url.replace("motor/artikel.aspx", "Print.aspx") - url = url.replace("mat-dryck/artikel.aspx", "Print.aspx") - url = url.replace("sport/artikel.aspx", "Print.aspx") - url = url.replace("asikter/artikel.aspx", "Print.aspx") - url = url.replace("mat-dryck/artikel.aspx", "Print.aspx") - url = url.replace("ostergotland/mjolby/artikel.aspx", "Print.aspx") - url = url.replace("ostergotland/motala/artikel.aspx", "Print.aspx") - return url.replace("nyheter/artikel.aspx", "Print.aspx") + feeds = [ + (u'Toppnyheter', u'http://www.corren.se/inc/RssHandler.ashx?id=4122151&ripurl=http://www.corren.se/nyheter/') + ,(u'Ekonomi', u'http://www.corren.se/inc/RssHandler.ashx?id=4122176&ripurl=http://www.corren.se/ekonomi/') + ,(u'Link\xf6ping', u'http://www.corren.se/inc/RssHandler.ashx?id=4122234') + ,(u'Åsikter', u'http://www.corren.se/inc/RssHandler.ashx?id=4122223,4122224,4122226,4122227,4122228,4122229,4122230') + ] + keep_only_tags = [dict(name='div', attrs={'id':'article'}),dict(name='div', attrs={'class':'body'})] + remove_tags = [ + dict(name='ul',attrs={'class':'functions'}) + ,dict(name='a',attrs={'href':'javascript*'}) + ,dict(name='div',attrs={'class':'box'}) + ,dict(name='div',attrs={'class':'functionsbottom'}) + ] From 48b50a2e478108b26b1991477396c70e69231689 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 24 Jul 2011 14:28:13 -0600 Subject: [PATCH 15/51] Dagens Industri by Jonas Svensson --- recipes/dagens_industri.recipe | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 recipes/dagens_industri.recipe diff --git a/recipes/dagens_industri.recipe b/recipes/dagens_industri.recipe new file mode 100644 index 0000000000..c9b60c72b1 --- /dev/null +++ b/recipes/dagens_industri.recipe @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPLv3' + +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1311450855(BasicNewsRecipe): + title = u'Dagens Industri' + __author__ = 'Jonas Svensson' + description = 'Economy news from Sweden' + publisher = 'DI' + category = 'news, politics, Sweden' + oldest_article = 2 + delay = 1 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + language = 'sv' + + feeds = [(u'DI', u'http://di.se/rss')] + + keep_only_tags = [dict(name='h1', attrs={'id':'ctl00_ExtraWideContentRegion_WideContentRegion_MainRegion_MainContentRegion_MainBodyRegion_headlineNormal'}),dict(name='div', attrs={'id':'articleBody'})] + + remove_tags = [ + dict(name='div',attrs={'class':'article-actions clear'}) + ,dict(name='div',attrs={'class':'article-action-popup'}) + ,dict(name='div',attrs={'class':'header'}) + ,dict(name='div',attrs={'class':'content clear'}) + ,dict(name='div',attrs={'id':'articleAdvertisementDiv'}) + ,dict(name='ul',attrs={'class':'action-list'}) + ] From 5e7c3b5b3586061f663170c3a59eccfa4b93d243 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 24 Jul 2011 15:49:08 -0600 Subject: [PATCH 16/51] New MOBI writer: Finish up creation of all index records for periodicals --- src/calibre/ebooks/mobi/debug.py | 1 + src/calibre/ebooks/mobi/writer2/indexer.py | 124 +++++++++++++++++++-- src/calibre/ebooks/mobi/writer2/main.py | 3 + 3 files changed, 119 insertions(+), 9 deletions(-) diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index 514cf9c246..a848f11355 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -399,6 +399,7 @@ class IndexHeader(object): # {{{ def __init__(self, record): self.record = record raw = self.record.raw + #open('/t/index_header.bin', 'wb').write(raw) if raw[:4] != b'INDX': raise ValueError('Invalid Primary Index Record') diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py index 48b1d82c04..e2428a2dd2 100644 --- a/src/calibre/ebooks/mobi/writer2/indexer.py +++ b/src/calibre/ebooks/mobi/writer2/indexer.py @@ -15,6 +15,7 @@ from collections import OrderedDict from calibre.ebooks import normalize from calibre.ebook.mobi.writer2 import RECORD_SIZE from calibre.ebooks.mobi.utils import (encint, encode_number_as_hex) +from calibre.ebooks.mobi.langcodes import iana2mobi def utf8_text(text): ''' @@ -85,7 +86,7 @@ class CNCX(object): # {{{ return self.strings[string] # }}} -class IndexEntry(object): +class IndexEntry(object): # {{{ TAG_VALUES = { 'offset': 1, @@ -112,6 +113,35 @@ class IndexEntry(object): self.first_child_index = None self.last_child_index = None + @classmethod + def tagx_block(cls, for_periodical=True): + buf = bytearray() + + def add_tag(tag, num_values=1): + buf.append(tag) + buf.append(num_values) + # bitmask + buf.append(1 << (cls.BITMASKS.index(tag))) + # eof + buf.append(0) + + for tag in xrange(1, 5): + add_tag(tag) + + if for_periodical: + for tag in (5, 21, 22, 23): + add_tag(tag) + + # End of TAGX record + for i in xrange(3): buf.append(0) + buf.append(1) + + header = b'TAGX' + header += pack(b'>I', len(buf)) # table length + header += pack(b'>I', 1) # control byte count + + return header + bytes(buf) + @property def next_offset(self): return self.offset + self.length @@ -147,6 +177,7 @@ class IndexEntry(object): ans = buf.get_value() return ans +# }}} class Indexer(object): @@ -172,15 +203,18 @@ class Indexer(object): self.cncx = CNCX(oeb.toc, opts) if self.is_periodical: - indices = self.create_periodical_index() + self.indices = self.create_periodical_index() else: raise NotImplementedError() - self.records.append(self.create_index_record(indices)) + self.records.append(self.create_index_record()) + self.records.insert(0, self.create_header()) + self.records.extend(self.cncx.records) - def create_index_record(self, indices): + def create_index_record(self): # {{{ header_length = 192 buf = StringIO() + indices = self.indices # Write index entries offsets = [] @@ -218,6 +252,7 @@ class Indexer(object): if len(ans) > 0x10000: raise ValueError('Too many entries (%d) in the TOC'%len(offsets)) return ans + # }}} def create_periodical_index(self): # {{{ periodical_node = iter(self.oeb.toc).next() @@ -361,14 +396,85 @@ class Indexer(object): return indices # }}} - def create_header(self): + def create_header(self): # {{{ buf = StringIO() + tagx_block = IndexEntry.tagx_block(self.is_periodical) + header_length = 192 - # Ident + # Ident 0 - 4 buf.write(b'INDX') - # Header length - buf.write(pack(b'>I', 192)) + # Header length 4 - 8 + buf.write(pack(b'>I', header_length)) - # Index type: 0 - normal, 2 - inflection + # Unknown 8-16 + buf.write(b'\0'*8) + + # Index type: 0 - normal, 2 - inflection 16 - 20 buf.write(pack(b'>I', 2)) + + # IDXT offset 20-24 + buf.write(pack(b'>I', 0)) # Filled in later + + # Number of index records 24-28 + buf.write(pack('b>I', len(self.records))) + + # Index Encoding 28-32 + buf.write(pack(b'>I', 65001)) # utf-8 + + # Index language 32-36 + buf.write(iana2mobi( + str(self.oeb.metadata.language[0]))) + + # Number of index entries 36-40 + buf.write(pack(b'>I', len(self.indices))) + + # ORDT offset 40-44 + buf.write(pack(b'>I', 0)) + + # LIGT offset 44-48 + buf.write(pack(b'>I', 0)) + + # Number of LIGT entries 48-52 + buf.write(pack(b'>I', 0)) + + # Number of CNCX records 52-56 + buf.write(pack(b'>I', len(self.cncx.records))) + + # Unknown 56-180 + buf.write(b'\0'*124) + + # TAGX offset 180-184 + buf.write(pack(b'>I', header_length)) + + # Unknown 184-192 + buf.write(b'\0'*8) + + # TAGX block + buf.write(tagx_block) + + num = len(self.indices) + + # The index of the last entry in the NCX + buf.write(encode_number_as_hex(num-1)) + + # The number of entries in the NCX + buf.write(pack(b'>H', num)) + + # Padding + pad = (4 - (buf.tell()%4))%4 + if pad: + buf.write(b'\0'*pad) + + idxt_offset = buf.tell() + + buf.write(b'IDXT') + buf.write(header_length + len(tagx_block)) + buf.write(b'\0') + buf.seek(20) + buf.write(pack(b'>I', idxt_offset)) + + return align_block(buf.getvalue()) + # }}} + + diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py index ea67007168..a031e2e957 100644 --- a/src/calibre/ebooks/mobi/writer2/main.py +++ b/src/calibre/ebooks/mobi/writer2/main.py @@ -93,6 +93,9 @@ class MobiWriter(object): self.opts, self.oeb) except: self.log.exception('Failed to generate MOBI index:') + else: + self.primary_index_record_idx = len(self.records) + self.records.extend(self.indexer.records) # }}} def write_uncrossable_breaks(self): # {{{ From 8f40166b9dc52ba867611a60e565bdf0b2d242ee Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 24 Jul 2011 16:04:33 -0600 Subject: [PATCH 17/51] New MOBI writer: Create index records for books --- src/calibre/ebooks/mobi/writer2/indexer.py | 212 +++++++++++++-------- 1 file changed, 129 insertions(+), 83 deletions(-) diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py index e2428a2dd2..501b23113f 100644 --- a/src/calibre/ebooks/mobi/writer2/indexer.py +++ b/src/calibre/ebooks/mobi/writer2/indexer.py @@ -205,7 +205,7 @@ class Indexer(object): if self.is_periodical: self.indices = self.create_periodical_index() else: - raise NotImplementedError() + self.indices = self.create_book_index() self.records.append(self.create_index_record()) self.records.insert(0, self.create_header()) @@ -254,6 +254,134 @@ class Indexer(object): return ans # }}} + def create_header(self): # {{{ + buf = StringIO() + tagx_block = IndexEntry.tagx_block(self.is_periodical) + header_length = 192 + + # Ident 0 - 4 + buf.write(b'INDX') + + # Header length 4 - 8 + buf.write(pack(b'>I', header_length)) + + # Unknown 8-16 + buf.write(b'\0'*8) + + # Index type: 0 - normal, 2 - inflection 16 - 20 + buf.write(pack(b'>I', 2)) + + # IDXT offset 20-24 + buf.write(pack(b'>I', 0)) # Filled in later + + # Number of index records 24-28 + buf.write(pack('b>I', len(self.records))) + + # Index Encoding 28-32 + buf.write(pack(b'>I', 65001)) # utf-8 + + # Index language 32-36 + buf.write(iana2mobi( + str(self.oeb.metadata.language[0]))) + + # Number of index entries 36-40 + buf.write(pack(b'>I', len(self.indices))) + + # ORDT offset 40-44 + buf.write(pack(b'>I', 0)) + + # LIGT offset 44-48 + buf.write(pack(b'>I', 0)) + + # Number of LIGT entries 48-52 + buf.write(pack(b'>I', 0)) + + # Number of CNCX records 52-56 + buf.write(pack(b'>I', len(self.cncx.records))) + + # Unknown 56-180 + buf.write(b'\0'*124) + + # TAGX offset 180-184 + buf.write(pack(b'>I', header_length)) + + # Unknown 184-192 + buf.write(b'\0'*8) + + # TAGX block + buf.write(tagx_block) + + num = len(self.indices) + + # The index of the last entry in the NCX + buf.write(encode_number_as_hex(num-1)) + + # The number of entries in the NCX + buf.write(pack(b'>H', num)) + + # Padding + pad = (4 - (buf.tell()%4))%4 + if pad: + buf.write(b'\0'*pad) + + idxt_offset = buf.tell() + + buf.write(b'IDXT') + buf.write(header_length + len(tagx_block)) + buf.write(b'\0') + buf.seek(20) + buf.write(pack(b'>I', idxt_offset)) + + return align_block(buf.getvalue()) + # }}} + + def create_book_index(self): # {{{ + indices = [] + seen = set() + id_offsets = self.serializer.id_offsets + + for node in self.oeb.toc.iterdescendants(): + try: + offset = id_offsets[node.href] + label = self.cncx[node.title] + except: + self.log.warn('TOC item %s not found in document'%node.href) + continue + if offset in seen: + continue + seen.add(offset) + index = IndexEntry(offset, label) + self.indices.append(index) + + indices.sort(key=lambda x:x.offset) + + # Set lengths + for i, index in indices: + try: + next_offset = indices[i+1].offset + except: + next_offset = self.serializer.body_end_offset + index.length = next_offset - index.offset + + # Remove empty nodes + indices = [i for i in indices if i.length > 0] + + # Set index values + for i, index in indices: + index.index = i + + # Set lengths again to close up any gaps left by filtering + for i, index in indices: + try: + next_offset = indices[i+1].offset + except: + next_offset = self.serializer.body_end_offset + index.length = next_offset - index.offset + + return indices + + # }}} + def create_periodical_index(self): # {{{ periodical_node = iter(self.oeb.toc).next() periodical_node_offset = self.serializer.body_start_offset @@ -396,85 +524,3 @@ class Indexer(object): return indices # }}} - def create_header(self): # {{{ - buf = StringIO() - tagx_block = IndexEntry.tagx_block(self.is_periodical) - header_length = 192 - - # Ident 0 - 4 - buf.write(b'INDX') - - # Header length 4 - 8 - buf.write(pack(b'>I', header_length)) - - # Unknown 8-16 - buf.write(b'\0'*8) - - # Index type: 0 - normal, 2 - inflection 16 - 20 - buf.write(pack(b'>I', 2)) - - # IDXT offset 20-24 - buf.write(pack(b'>I', 0)) # Filled in later - - # Number of index records 24-28 - buf.write(pack('b>I', len(self.records))) - - # Index Encoding 28-32 - buf.write(pack(b'>I', 65001)) # utf-8 - - # Index language 32-36 - buf.write(iana2mobi( - str(self.oeb.metadata.language[0]))) - - # Number of index entries 36-40 - buf.write(pack(b'>I', len(self.indices))) - - # ORDT offset 40-44 - buf.write(pack(b'>I', 0)) - - # LIGT offset 44-48 - buf.write(pack(b'>I', 0)) - - # Number of LIGT entries 48-52 - buf.write(pack(b'>I', 0)) - - # Number of CNCX records 52-56 - buf.write(pack(b'>I', len(self.cncx.records))) - - # Unknown 56-180 - buf.write(b'\0'*124) - - # TAGX offset 180-184 - buf.write(pack(b'>I', header_length)) - - # Unknown 184-192 - buf.write(b'\0'*8) - - # TAGX block - buf.write(tagx_block) - - num = len(self.indices) - - # The index of the last entry in the NCX - buf.write(encode_number_as_hex(num-1)) - - # The number of entries in the NCX - buf.write(pack(b'>H', num)) - - # Padding - pad = (4 - (buf.tell()%4))%4 - if pad: - buf.write(b'\0'*pad) - - idxt_offset = buf.tell() - - buf.write(b'IDXT') - buf.write(header_length + len(tagx_block)) - buf.write(b'\0') - buf.seek(20) - buf.write(pack(b'>I', idxt_offset)) - - return align_block(buf.getvalue()) - # }}} - - From 9b1f09cc9e750728df535b9bb50ff0441c22ea59 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 25 Jul 2011 00:48:08 -0600 Subject: [PATCH 18/51] ... --- src/calibre/ebooks/mobi/writer2/indexer.py | 59 +++++++++++++++++++++- src/calibre/ebooks/mobi/writer2/main.py | 6 +++ 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py index 501b23113f..41c5d2ec91 100644 --- a/src/calibre/ebooks/mobi/writer2/indexer.py +++ b/src/calibre/ebooks/mobi/writer2/indexer.py @@ -179,7 +179,27 @@ class IndexEntry(object): # {{{ # }}} -class Indexer(object): +class TBS(object): # {{{ + + ''' + Take the list of index nodes starting/ending on a record and calculate the + trailing byte sequence for the record. + ''' + + def __init__(self, data, is_periodical): + if is_periodical: + self.periodical_tbs(data) + else: + self.book_tbs(data) + + def periodical_tbs(self, data): + self.bytestring = b'' + + def book_tbs(self, data): + self.bytestring = b'' +# }}} + +class Indexer(object): # {{{ def __init__(self, serializer, number_of_text_records, size_of_last_text_record, opts, oeb): @@ -211,6 +231,8 @@ class Indexer(object): self.records.insert(0, self.create_header()) self.records.extend(self.cncx.records) + self.calculate_trailing_byte_sequences() + def create_index_record(self): # {{{ header_length = 192 buf = StringIO() @@ -524,3 +546,38 @@ class Indexer(object): return indices # }}} + def calculate_trailing_byte_sequences(self): + self.tbs_map = {} + for i in xrange(self.number_of_text_records): + offset = i * RECORD_SIZE + next_offset = offset + RECORD_SIZE + data = OrderedDict([('ends',[]), ('completes',[]), ('starts',[]), + ('spans', None)]) + for index in self.indices: + if index.offset >= next_offset: + # Node starts after current record + break + if index.next_offset <= offset: + # Node ends before current record + continue + if index.offset >= offset: + # Node starts in current record + if index.next_offset <= next_offset: + # Node ends in current record + data['completes'].append(index) + else: + data['starts'].append(index) + else: + # Node starts before current records + if index.next_offset <= next_offset: + # Node ends in current record + data['ends'].append(index) + else: + data['spans'] = index + self.tbs_map[i+1] = TBS(data, self.is_periodical) + + def get_trailing_byte_sequence(self, num): + return self.tbs_map[num].bytestring + +# }}} + diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py index a031e2e957..06572f48c4 100644 --- a/src/calibre/ebooks/mobi/writer2/main.py +++ b/src/calibre/ebooks/mobi/writer2/main.py @@ -95,7 +95,13 @@ class MobiWriter(object): self.log.exception('Failed to generate MOBI index:') else: self.primary_index_record_idx = len(self.records) + for i in xrange(len(self.records)): + if i == 0: continue + tbs = self.indexer.get_trailing_byte_sequence(i) + self.records[i] += tbs self.records.extend(self.indexer.records) + + # }}} def write_uncrossable_breaks(self): # {{{ From 68632c18531168ed5cb169e7eccb5eacf0036232 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 25 Jul 2011 03:08:03 -0600 Subject: [PATCH 19/51] ... --- src/calibre/ebooks/mobi/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/mobi/utils.py b/src/calibre/ebooks/mobi/utils.py index cf03c613f4..ae1241e2f1 100644 --- a/src/calibre/ebooks/mobi/utils.py +++ b/src/calibre/ebooks/mobi/utils.py @@ -39,7 +39,7 @@ def encode_number_as_hex(num): The bytes that follow are simply the hexadecimal representation of the number. ''' - num = bytes(hex(num)[2:]) + num = bytes(hex(num)[2:].upper()) ans = bytearray(num) ans.insert(0, len(num)) return bytes(ans) From 6867bde932f59d68f1e223d4428270e1bc4fd5c2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 25 Jul 2011 09:52:08 -0600 Subject: [PATCH 20/51] Fix #815971 (Garbage in Job Detail List) --- src/calibre/utils/ipc/job.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/utils/ipc/job.py b/src/calibre/utils/ipc/job.py index f4b54aee95..e75884d387 100644 --- a/src/calibre/utils/ipc/job.py +++ b/src/calibre/utils/ipc/job.py @@ -141,7 +141,8 @@ class BaseJob(object): def log_file(self): if self.log_path: return open(self.log_path, 'rb') - return cStringIO.StringIO(_('No details available.')) + return cStringIO.StringIO(_('No details available.').encode('utf-8', + 'replace')) @property def details(self): From c59e0cb9aa5526877c8e92ad646ef6b864fb4c0b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 25 Jul 2011 10:47:16 -0600 Subject: [PATCH 21/51] Improved Instapaper --- recipes/instapaper.recipe | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/recipes/instapaper.recipe b/recipes/instapaper.recipe index c6175a783f..d182e556a2 100644 --- a/recipes/instapaper.recipe +++ b/recipes/instapaper.recipe @@ -43,7 +43,7 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe): lfeeds = self.get_feeds() for feedobj in lfeeds: feedtitle, feedurl = feedobj - self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) + self.report_progress(0, 'Fetching feed'+' %s...'%(feedtitle if feedtitle else feedurl)) articles = [] soup = self.index_to_soup(feedurl) for item in soup.findAll('div', attrs={'class':'cornerControls'}): @@ -63,3 +63,8 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe): def populate_article_metadata(self, article, soup, first): article.title = soup.find('title').contents[0].strip() + def postprocess_html(self, soup, first_fetch): + for link_tag in soup.findAll(attrs={"id" : "story"}): + link_tag.insert(0,'

'+soup.find('title').contents[0].strip()+'

') + + return soup From 1f59369d64f4640a8e40bbae3ac55a478faa32fa Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 25 Jul 2011 14:41:48 -0600 Subject: [PATCH 22/51] ... --- src/calibre/ebooks/mobi/debug.py | 85 +++++++++++++++++++------------- 1 file changed, 51 insertions(+), 34 deletions(-) diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index a848f11355..b85d73f55c 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -12,7 +12,7 @@ from collections import OrderedDict, defaultdict from calibre.utils.date import utc_tz from calibre.ebooks.mobi.langcodes import main_language, sub_language from calibre.ebooks.mobi.utils import (decode_hex_number, decint, - get_trailing_data) + get_trailing_data, decode_fvwi) from calibre.utils.magick.draw import identify_data # PalmDB {{{ @@ -964,7 +964,8 @@ class TBSIndexing(object): # {{{ byts = byts[consumed:] ans.append('Unknown (vwi: always 0?): %d'%arg1) if self.doc_type in (257, 259): # Hierarchical periodical - byts, a = self.interpret_periodical(tbs_type, byts) + byts, a = self.interpret_periodical(tbs_type, byts, + dat['geom'][0]) ans += a if byts: sbyts = tuple(hex(b)[2:] for b in byts) @@ -973,7 +974,7 @@ class TBSIndexing(object): # {{{ ans.append('') return tbs_type, ans - def interpret_periodical(self, tbs_type, byts): + def interpret_periodical(self, tbs_type, byts, record_offset): ans = [] def tbs_type_6(byts, psi=None, msg=None, fmsg='Unknown'): # {{{ @@ -1014,6 +1015,50 @@ class TBSIndexing(object): # {{{ # }}} + def read_section_transitions(byts, psi=None): # {{{ + if psi is None: + # Assume parent section is 1 + psi = self.get_index(1) + + while byts: + ai, flags, consumed = decode_fvwi(byts) + byts = byts[consumed:] + if flags & 0b1000: + nsi = self.get_index(psi.index+1) + ans.append('Last article in this record of section %d' + ' (relative to next section index [%d]): ' + '%d [%d absolute index]'%(psi.index, nsi.index, ai, + ai+nsi.index)) + psi = nsi + continue + + ans.append('First article in this record of section %d' + ' (relative to its parent section): ' + '%d [%d absolute index]'%(psi.index, ai, ai+psi.index)) + + if flags == 0: + ans.append('The section %d has only one article' + ' in this record'%psi.index) + continue + + if flags & 0b0100: + num = byts[0] + byts = byts[1:] + ans.append('Number of articles in this record of ' + 'section %d: %d'%(psi.index, num)) + + if flags & 0b0010: + raise ValueError( + 'Dont know how to interpret the 0b0010 flag') + + if flags & 0b0001: + arg, consumed = decint(byts) + byts = byts[consumed:] + ans.append('->Offset to start of next section (%d) from start' + ' of record: %d [%d absolute offset]'%(psi.index+1, + arg, arg+record_offset)) + # }}} + if tbs_type == 3: # {{{ arg2, consumed = decint(byts) byts = byts[consumed:] @@ -1025,7 +1070,7 @@ class TBSIndexing(object): # {{{ flags = arg3 & 0b1111 ans.append('First section index (fvwi): %d'%fsi) psi = self.get_index(fsi) - ans.append('Flags (flag: always 0?): %d'%flags) + ans.append('Flags: %d'%flags) if flags == 4: ans.append('Number of articles in this section: %d'%byts[0]) byts = byts[1:] @@ -1033,35 +1078,7 @@ class TBSIndexing(object): # {{{ pass else: raise ValueError('Unknown flags value: %d'%flags) - - - if byts: - byts = tbs_type_6(byts, psi=psi, - msg=('First article of ending section, relative to its' - ' parent\'s index'), - fmsg=('->Offset from start of record to beginning of' - ' last starting section')) - while byts: - # We have a transition not just an opening first section - psi = self.get_index(psi.index+1) - arg, consumed = decint(byts) - off = arg >> 4 - byts = byts[consumed:] - flags = arg & 0b1111 - ans.append('Last article of ending section w.r.t. starting' - ' section offset (fvwi): %d [%d absolute]'%(off, - psi.index+off)) - ans.append('Flags (always 8?): %d'%flags) - byts = tbs_type_6(byts, psi=psi) - if byts: - # Ended with flag 1,and not EOF, which means there's - # another section transition in this record - arg, consumed = decint(byts) - byts = byts[consumed:] - ans.append('->Offset from start of record to beginning of ' - 'last starting section: %d'%(arg)) - else: - break + byts = read_section_transitions(byts, psi) # }}} @@ -1124,7 +1141,7 @@ class TBSIndexing(object): # {{{ elif flags == 0: byts = tbs_type_6(byts, psi=psi) else: - raise ValueError('Unkown flags: %d'%flags) + raise ValueError('Unknown flags: %d'%flags) # }}} return byts, ans From 0c5a37fbc0120cbb262700ead7f13385b713a758 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 25 Jul 2011 15:45:16 -0600 Subject: [PATCH 23/51] ... --- src/calibre/ebooks/metadata/sources/isbndb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/metadata/sources/isbndb.py b/src/calibre/ebooks/metadata/sources/isbndb.py index b33a625ca7..31c5e69d65 100644 --- a/src/calibre/ebooks/metadata/sources/isbndb.py +++ b/src/calibre/ebooks/metadata/sources/isbndb.py @@ -151,7 +151,7 @@ class ISBNDB(Source): bl = feed.find('BookList') if bl is None: - err = tostring(etree.find('errormessage')) + err = tostring(feed.find('errormessage')) raise ValueError('ISBNDb query failed:' + err) total_results = int(bl.get('total_results')) shown_results = int(bl.get('shown_results')) From 5fa4e3ced420fbf608bb17ddcd2f93d3d18a5437 Mon Sep 17 00:00:00 2001 From: John Schember Date: Mon, 25 Jul 2011 18:26:09 -0400 Subject: [PATCH 24/51] Store: Remove EpubBud due to complaints about the amount of copyright infringment on the service. Remove EpubBuy plugin due to them asking not to be included. --- src/calibre/customize/builtins.py | 24 +----- .../gui2/store/stores/epubbud_plugin.py | 27 ------- .../gui2/store/stores/epubbuy_de_plugin.py | 80 ------------------- 3 files changed, 1 insertion(+), 130 deletions(-) delete mode 100644 src/calibre/gui2/store/stores/epubbud_plugin.py delete mode 100644 src/calibre/gui2/store/stores/epubbuy_de_plugin.py diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index a79078988a..1499dbe3f4 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -1228,17 +1228,6 @@ class StoreEbookscomStore(StoreBase): formats = ['EPUB', 'LIT', 'MOBI', 'PDF'] affiliate = True -#class StoreEPubBuyDEStore(StoreBase): -# name = 'EPUBBuy DE' -# author = 'Charles Haley' -# description = u'Bei EPUBBuy.com finden Sie ausschliesslich eBooks im weitverbreiteten EPUB-Format und ohne DRM. So haben Sie die freie Wahl, wo Sie Ihr eBook lesen: Tablet, eBook-Reader, Smartphone oder einfach auf Ihrem PC. So macht eBook-Lesen Spaß!' -# actual_plugin = 'calibre.gui2.store.stores.epubbuy_de_plugin:EPubBuyDEStore' -# -# drm_free_only = True -# headquarters = 'DE' -# formats = ['EPUB'] -# affiliate = True - class StoreEBookShoppeUKStore(StoreBase): name = 'ebookShoppe UK' author = u'Charles Haley' @@ -1266,16 +1255,7 @@ class StoreEKnigiStore(StoreBase): headquarters = 'BG' formats = ['EPUB', 'PDF', 'HTML'] - #affiliate = True - -class StoreEpubBudStore(StoreBase): - name = 'ePub Bud' - description = 'Well, it\'s pretty much just "YouTube for Children\'s eBooks. A not-for-profit organization devoted to brining self published childrens books to the world.' - actual_plugin = 'calibre.gui2.store.stores.epubbud_plugin:EpubBudStore' - - drm_free_only = True - headquarters = 'US' - formats = ['EPUB'] + affiliate = True class StoreFeedbooksStore(StoreBase): name = 'Feedbooks' @@ -1491,10 +1471,8 @@ plugins += [ StoreEbookNLStore, StoreEbookscomStore, StoreEBookShoppeUKStore, -# StoreEPubBuyDEStore, StoreEHarlequinStore, StoreEKnigiStore, - StoreEpubBudStore, StoreFeedbooksStore, StoreFoylesUKStore, StoreGandalfStore, diff --git a/src/calibre/gui2/store/stores/epubbud_plugin.py b/src/calibre/gui2/store/stores/epubbud_plugin.py deleted file mode 100644 index 029b2b3fc9..0000000000 --- a/src/calibre/gui2/store/stores/epubbud_plugin.py +++ /dev/null @@ -1,27 +0,0 @@ -# -*- coding: utf-8 -*- - -from __future__ import (unicode_literals, division, absolute_import, print_function) - -__license__ = 'GPL 3' -__copyright__ = '2011, John Schember ' -__docformat__ = 'restructuredtext en' - -from calibre.gui2.store.basic_config import BasicStoreConfig -from calibre.gui2.store.opensearch_store import OpenSearchOPDSStore -from calibre.gui2.store.search_result import SearchResult - -class EpubBudStore(BasicStoreConfig, OpenSearchOPDSStore): - - open_search_url = 'http://www.epubbud.com/feeds/opensearch.xml' - web_url = 'http://www.epubbud.com/' - - # http://www.epubbud.com/feeds/catalog.atom - - def search(self, query, max_results=10, timeout=60): - for s in OpenSearchOPDSStore.search(self, query, max_results, timeout): - s.price = '$0.00' - s.drm = SearchResult.DRM_UNLOCKED - s.formats = 'EPUB' - # Download links are broken for this store. - s.downloads = {} - yield s diff --git a/src/calibre/gui2/store/stores/epubbuy_de_plugin.py b/src/calibre/gui2/store/stores/epubbuy_de_plugin.py deleted file mode 100644 index 242ef76793..0000000000 --- a/src/calibre/gui2/store/stores/epubbuy_de_plugin.py +++ /dev/null @@ -1,80 +0,0 @@ -# -*- coding: utf-8 -*- - -from __future__ import (unicode_literals, division, absolute_import, print_function) - -__license__ = 'GPL 3' -__copyright__ = '2011, John Schember ' -__docformat__ = 'restructuredtext en' - -import urllib2 -from contextlib import closing - -from lxml import html - -from PyQt4.Qt import QUrl - -from calibre import browser -from calibre.gui2 import open_url -from calibre.gui2.store import StorePlugin -from calibre.gui2.store.basic_config import BasicStoreConfig -from calibre.gui2.store.search_result import SearchResult -from calibre.gui2.store.web_store_dialog import WebStoreDialog - -class EPubBuyDEStore(BasicStoreConfig, StorePlugin): - - def open(self, parent=None, detail_item=None, external=False): - url = 'http://klick.affiliwelt.net/klick.php?bannerid=47653&pid=32307&prid=2627' - url_details = ('http://klick.affiliwelt.net/klick.php?bannerid=47653' - '&pid=32307&prid=2627&prodid={0}') - - if external or self.config.get('open_external', False): - if detail_item: - url = url_details.format(detail_item) - open_url(QUrl(url)) - else: - detail_url = None - if detail_item: - detail_url = url_details.format(detail_item) - d = WebStoreDialog(self.gui, url, parent, detail_url) - d.setWindowTitle(self.name) - d.set_tags(self.config.get('tags', '')) - d.exec_() - - def search(self, query, max_results=10, timeout=60): - url = 'http://www.epubbuy.com/search.php?search_query=' + urllib2.quote(query) - br = browser() - - counter = max_results - with closing(br.open(url, timeout=timeout)) as f: - doc = html.fromstring(f.read()) - for data in doc.xpath('//li[contains(@class, "ajax_block_product")]'): - if counter <= 0: - break - - id = ''.join(data.xpath('./div[@class="center_block"]' - '/p[contains(text(), "artnr:")]/text()')).strip() - if not id: - continue - id = id[6:].strip() - if not id: - continue - cover_url = ''.join(data.xpath('./div[@class="center_block"]' - '/a[@class="product_img_link"]/img/@src')) - if cover_url: - cover_url = 'http://www.epubbuy.com' + cover_url - title = ''.join(data.xpath('./div[@class="center_block"]' - '/a[@class="product_img_link"]/@title')) - author = ''.join(data.xpath('./div[@class="center_block"]/a[2]/text()')) - price = ''.join(data.xpath('.//span[@class="price"]/text()')) - counter -= 1 - - s = SearchResult() - s.cover_url = cover_url - s.title = title.strip() - s.author = author.strip() - s.price = price - s.drm = SearchResult.DRM_UNLOCKED - s.detail_item = id - s.formats = 'ePub' - - yield s From 00287231073cb9dc39d02a80ab5bbd6e6ad8f34a Mon Sep 17 00:00:00 2001 From: John Schember Date: Mon, 25 Jul 2011 18:50:07 -0400 Subject: [PATCH 25/51] Store: ozon ru store, RU translation updates from Roman Mukhin. --- src/calibre/customize/builtins.py | 12 ++ src/calibre/ebooks/metadata/fb2.py | 17 +- src/calibre/gui2/store/search/models.py | 8 +- src/calibre/translations/ru.po | 225 ++++++++++++------------ 4 files changed, 142 insertions(+), 120 deletions(-) diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 1499dbe3f4..31e771313e 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -1374,6 +1374,17 @@ class StoreOReillyStore(StoreBase): headquarters = 'US' formats = ['APK', 'DAISY', 'EPUB', 'MOBI', 'PDF'] +class StoreOzonRUStore(StoreBase): + name = 'OZON.ru' + description = u'ebooks from OZON.ru' + actual_plugin = 'calibre.gui2.store.stores.ozon_ru_plugin:OzonRUStore' + author = 'Roman Mukhin' + + drm_free_only = True + headquarters = 'RU' + formats = ['TXT', 'PDF', 'DJVU', 'RTF', 'DOC', 'JAR', 'FB2'] + affiliate = True + class StorePragmaticBookshelfStore(StoreBase): name = 'Pragmatic Bookshelf' description = u'The Pragmatic Bookshelf\'s collection of programming and tech books avaliable as ebooks.' @@ -1486,6 +1497,7 @@ plugins += [ StoreNextoStore, StoreOpenBooksStore, StoreOReillyStore, + StoreOzonRUStore, StorePragmaticBookshelfStore, StoreSmashwordsStore, StoreVirtualoStore, diff --git a/src/calibre/ebooks/metadata/fb2.py b/src/calibre/ebooks/metadata/fb2.py index 4c47d87717..765ac6d009 100644 --- a/src/calibre/ebooks/metadata/fb2.py +++ b/src/calibre/ebooks/metadata/fb2.py @@ -24,10 +24,9 @@ XPath = partial(etree.XPath, namespaces=NAMESPACES) tostring = partial(etree.tostring, method='text', encoding=unicode) def get_metadata(stream): - """ Return fb2 metadata as a L{MetaInformation} object """ + ''' Return fb2 metadata as a L{MetaInformation} object ''' root = _get_fbroot(stream) - book_title = _parse_book_title(root) authors = _parse_authors(root) @@ -166,7 +165,7 @@ def _parse_tags(root, mi): break def _parse_series(root, mi): - #calibri supports only 1 series: use the 1-st one + # calibri supports only 1 series: use the 1-st one # pick up sequence but only from 1 secrion in prefered order # except xp_ti = '//fb2:title-info/fb2:sequence[1]' @@ -181,11 +180,12 @@ def _parse_series(root, mi): def _parse_isbn(root, mi): # some people try to put several isbn in this field, but it is not allowed. try to stick to the 1-st one in this case isbn = XPath('normalize-space(//fb2:publish-info/fb2:isbn/text())')(root) - # some people try to put several isbn in this field, but it is not allowed. try to stick to the 1-st one in this case - if ',' in isbn: - isbn = isbn[:isbn.index(',')] - if check_isbn(isbn): - mi.isbn = isbn + if isbn: + # some people try to put several isbn in this field, but it is not allowed. try to stick to the 1-st one in this case + if ',' in isbn: + isbn = isbn[:isbn.index(',')] + if check_isbn(isbn): + mi.isbn = isbn def _parse_comments(root, mi): # pick up annotation but only from 1 secrion ; fallback: @@ -232,4 +232,3 @@ def _get_fbroot(stream): raw = xml_to_unicode(raw, strip_encoding_pats=True)[0] root = etree.fromstring(raw, parser=parser) return root - diff --git a/src/calibre/gui2/store/search/models.py b/src/calibre/gui2/store/search/models.py index 1a2327fc45..b62b581bd8 100644 --- a/src/calibre/gui2/store/search/models.py +++ b/src/calibre/gui2/store/search/models.py @@ -23,7 +23,8 @@ from calibre.utils.search_query_parser import SearchQueryParser def comparable_price(text): text = re.sub(r'[^0-9.,]', '', text) - if len(text) < 3 or text[-3] not in ('.', ','): + delimeter = (',', '.') + if len(text) < 3 or text[-3] not in delimeter: text += '00' text = re.sub(r'\D', '', text) text = text.rjust(6, '0') @@ -334,6 +335,11 @@ class SearchFilter(SearchQueryParser): } for x in ('author', 'download', 'format'): q[x+'s'] = q[x] + + # make the price in query the same format as result + if location == 'price': + query = comparable_price(query) + for sr in self.srs: for locvalue in locations: accessor = q[locvalue] diff --git a/src/calibre/translations/ru.po b/src/calibre/translations/ru.po index fb90bfc5e0..f437b7975a 100644 --- a/src/calibre/translations/ru.po +++ b/src/calibre/translations/ru.po @@ -5541,23 +5541,23 @@ msgstr "Книги с такими же тегами" #: /home/kovid/work/calibre/src/calibre/gui2/actions/store.py:20 msgid "Get books" -msgstr "" +msgstr "Загрузить книги" #: /home/kovid/work/calibre/src/calibre/gui2/actions/store.py:29 msgid "Search for ebooks" -msgstr "" +msgstr "Поиск книг..." #: /home/kovid/work/calibre/src/calibre/gui2/actions/store.py:30 msgid "Search for this author" -msgstr "" +msgstr "Поиск по автору" #: /home/kovid/work/calibre/src/calibre/gui2/actions/store.py:31 msgid "Search for this title" -msgstr "" +msgstr "Поиск по названию" #: /home/kovid/work/calibre/src/calibre/gui2/actions/store.py:32 msgid "Search for this book" -msgstr "" +msgstr "Поиск по книге" #: /home/kovid/work/calibre/src/calibre/gui2/actions/store.py:34 #: /home/kovid/work/calibre/src/calibre/gui2/store/search/search_ui.py:135 @@ -5569,21 +5569,21 @@ msgstr "Магазины" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/chooser/chooser_dialog.py:18 #: /home/kovid/work/calibre/src/calibre/gui2/store/search/search.py:285 msgid "Choose stores" -msgstr "" +msgstr "Выбрать магазины" #: /home/kovid/work/calibre/src/calibre/gui2/actions/store.py:83 #: /home/kovid/work/calibre/src/calibre/gui2/actions/store.py:102 #: /home/kovid/work/calibre/src/calibre/gui2/actions/store.py:111 msgid "Cannot search" -msgstr "" +msgstr "Поиск не может быть произведён" #: /home/kovid/work/calibre/src/calibre/gui2/actions/store.py:130 msgid "" "Calibre helps you find the ebooks you want by searching the websites of " "various commercial and public domain book sources for you." msgstr "" -"Calibre помогает вам отыскать книги, которые вы хотите найти, предлагая вам " -"найденные веб-сайты различных коммерческих и публичных источников книг." +"Calibre поможет Вам найти книги, предлагая " +"веб-сайты различных коммерческих и публичных источников книг." #: /home/kovid/work/calibre/src/calibre/gui2/actions/store.py:134 msgid "" @@ -5591,6 +5591,8 @@ msgid "" "are looking for, at the best price. You also get DRM status and other useful " "information." msgstr "" +"Используя встроенный поиск Вы можете легко найти магазин предлагающий выгодную цену " +"для интересующей Вас книги. Также Вы получите другу полезную инфрмацию" #: /home/kovid/work/calibre/src/calibre/gui2/actions/store.py:138 msgid "" @@ -5608,7 +5610,7 @@ msgstr "Показать снова данное сообщение" #: /home/kovid/work/calibre/src/calibre/gui2/actions/store.py:149 msgid "About Get Books" -msgstr "" +msgstr "О 'Загрузить книги'" #: /home/kovid/work/calibre/src/calibre/gui2/actions/tweak_epub.py:17 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/tweak_epub_ui.py:60 @@ -5617,7 +5619,7 @@ msgstr "Tweak EPUB" #: /home/kovid/work/calibre/src/calibre/gui2/actions/tweak_epub.py:18 msgid "Make small changes to ePub format books" -msgstr "" +msgstr "Внести небольшие изненения ePub в формат книги" #: /home/kovid/work/calibre/src/calibre/gui2/actions/tweak_epub.py:19 msgid "T" @@ -5704,7 +5706,7 @@ msgstr "Не могу открыть папку" #: /home/kovid/work/calibre/src/calibre/gui2/actions/view.py:220 msgid "This book no longer exists in your library" -msgstr "" +msgstr "Эта книга больше не находится в Вашей библиотеке" #: /home/kovid/work/calibre/src/calibre/gui2/actions/view.py:227 #, python-format @@ -9167,11 +9169,11 @@ msgstr "&Показать пароль" #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/plugin_updater.py:122 msgid "Restart required" -msgstr "" +msgstr "Требуется перезапуск" #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/plugin_updater.py:123 msgid "You must restart Calibre before using this plugin!" -msgstr "" +msgstr "Для использования плагина Вам нужно перезапустить Calibre!" #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/plugin_updater.py:164 #, python-format @@ -9183,17 +9185,17 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/store/search/search_ui.py:136 #: /home/kovid/work/calibre/src/calibre/gui2/store/search_ui.py:111 msgid "All" -msgstr "" +msgstr "Всё" #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/plugin_updater.py:184 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/plugin_updater.py:302 msgid "Installed" -msgstr "" +msgstr "Установленные" #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/plugin_updater.py:184 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/plugin_updater.py:397 msgid "Not installed" -msgstr "" +msgstr "Не установленные" #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/plugin_updater.py:184 msgid "Update available" @@ -9201,7 +9203,7 @@ msgstr "Доступно обновление" #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/plugin_updater.py:302 msgid "Plugin Name" -msgstr "" +msgstr "Название плагина" #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/plugin_updater.py:302 #: /home/kovid/work/calibre/src/calibre/gui2/jobs.py:63 @@ -13317,7 +13319,7 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins_ui.py:114 msgid "&Load plugin from file" -msgstr "" +msgstr "Загрузить плагин из файла" #: /home/kovid/work/calibre/src/calibre/gui2/preferences/save_template.py:33 msgid "Any custom field" @@ -13579,11 +13581,11 @@ msgstr "Сбой запуска контент-сервера" #: /home/kovid/work/calibre/src/calibre/gui2/preferences/server.py:106 msgid "Error log:" -msgstr "Лог ошибок:" +msgstr "Журнал ошибок:" #: /home/kovid/work/calibre/src/calibre/gui2/preferences/server.py:113 msgid "Access log:" -msgstr "Лог доступа:" +msgstr "Журнал доступа:" #: /home/kovid/work/calibre/src/calibre/gui2/preferences/server.py:128 msgid "You need to restart the server for changes to take effect" @@ -14053,7 +14055,7 @@ msgstr "Ничего" #: /home/kovid/work/calibre/src/calibre/gui2/shortcuts.py:59 msgid "Press a key..." -msgstr "" +msgstr "Нажмите клавишу..." #: /home/kovid/work/calibre/src/calibre/gui2/shortcuts.py:80 msgid "Already assigned" @@ -14108,19 +14110,19 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/store/basic_config_widget_ui.py:38 msgid "Added Tags:" -msgstr "" +msgstr "Добавленные тэги:" #: /home/kovid/work/calibre/src/calibre/gui2/store/basic_config_widget_ui.py:39 msgid "Open store in external web browswer" -msgstr "" +msgstr "Открыть сайт магазина в интернет броузере" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/chooser/adv_search_builder_ui.py:219 msgid "&Name:" -msgstr "" +msgstr "&Название" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/chooser/adv_search_builder_ui.py:221 msgid "&Description:" -msgstr "" +msgstr "&Описание" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/chooser/adv_search_builder_ui.py:222 msgid "&Headquarters:" @@ -14140,7 +14142,7 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/store/search/adv_search_builder_ui.py:217 #: /home/kovid/work/calibre/src/calibre/gui2/store/search/adv_search_builder_ui.py:220 msgid "true" -msgstr "" +msgstr "да" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/chooser/adv_search_builder_ui.py:229 #: /home/kovid/work/calibre/src/calibre/gui2/store/config/chooser/adv_search_builder_ui.py:231 @@ -14148,41 +14150,41 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/store/search/adv_search_builder_ui.py:218 #: /home/kovid/work/calibre/src/calibre/gui2/store/search/adv_search_builder_ui.py:221 msgid "false" -msgstr "" +msgstr "нет" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/chooser/adv_search_builder_ui.py:232 #: /home/kovid/work/calibre/src/calibre/gui2/store/search/adv_search_builder_ui.py:216 msgid "Affiliate:" -msgstr "" +msgstr "Партнёрство:" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/chooser/adv_search_builder_ui.py:235 msgid "Nam&e/Description ..." -msgstr "" +msgstr "Названи&е/Описание" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/chooser/chooser_widget_ui.py:78 #: /home/kovid/work/calibre/src/calibre/gui2/store/search/search_ui.py:132 #: /home/kovid/work/calibre/src/calibre/gui2/store/search_ui.py:108 msgid "Query:" -msgstr "" +msgstr "Запрос:" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/chooser/chooser_widget_ui.py:81 msgid "Enable" -msgstr "" +msgstr "Включить" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/chooser/chooser_widget_ui.py:84 #: /home/kovid/work/calibre/src/calibre/gui2/store/search/search_ui.py:137 #: /home/kovid/work/calibre/src/calibre/gui2/store/search_ui.py:112 msgid "Invert" -msgstr "" +msgstr "Инвертировать" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/chooser/models.py:21 #: /home/kovid/work/calibre/src/calibre/gui2/store/search/models.py:37 msgid "Affiliate" -msgstr "" +msgstr "Партнерство" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/chooser/models.py:21 msgid "Enabled" -msgstr "" +msgstr "Включено" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/chooser/models.py:21 msgid "Headquarters" @@ -14190,7 +14192,7 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/chooser/models.py:21 msgid "No DRM" -msgstr "" +msgstr "Без DRM" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/chooser/models.py:129 msgid "" @@ -14205,13 +14207,14 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/chooser/models.py:136 msgid "This store only distributes ebooks without DRM." -msgstr "" +msgstr "Этот магазин распространяет электронные книги исключительно без DRM" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/chooser/models.py:138 msgid "" "This store distributes ebooks with DRM. It may have some titles without DRM, " "but you will need to check on a per title basis." -msgstr "" +msgstr "Этот магазин распространяет электронные книги с DRM. Возможно, некоторые издания" +" доступны без DRM, но для этого надо проверять каждую книгу в отдельности." #: /home/kovid/work/calibre/src/calibre/gui2/store/config/chooser/models.py:140 #, python-format @@ -14225,46 +14228,46 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/store/search/models.py:211 #, python-format msgid "Buying from this store supports the calibre developer: %s." -msgstr "" +msgstr "Покупая в этом магазине Вы поддерживаете проект calibre и разработчика: %s." #: /home/kovid/work/calibre/src/calibre/gui2/store/config/chooser/models.py:145 #, python-format msgid "This store distributes ebooks in the following formats: %s" -msgstr "" +msgstr "Магазин распространяет эл. книги в следующих фотрматах" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/chooser/results_view.py:47 msgid "Configure..." -msgstr "" +msgstr "Настроить..." #: /home/kovid/work/calibre/src/calibre/gui2/store/config/search/search_widget_ui.py:99 #: /home/kovid/work/calibre/src/calibre/gui2/store/config/search_widget_ui.py:99 msgid "Time" -msgstr "" +msgstr "Время" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/search/search_widget_ui.py:100 #: /home/kovid/work/calibre/src/calibre/gui2/store/config/search_widget_ui.py:100 msgid "Number of seconds to wait for a store to respond" -msgstr "" +msgstr "Время ожидания ответа магазина (в секундах)" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/search/search_widget_ui.py:101 #: /home/kovid/work/calibre/src/calibre/gui2/store/config/search_widget_ui.py:101 msgid "Number of seconds to let a store process results" -msgstr "" +msgstr "Допустипое время обработки результата магазином (в секундах)" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/search/search_widget_ui.py:102 #: /home/kovid/work/calibre/src/calibre/gui2/store/config/search_widget_ui.py:102 msgid "Display" -msgstr "" +msgstr "Показать" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/search/search_widget_ui.py:103 #: /home/kovid/work/calibre/src/calibre/gui2/store/config/search_widget_ui.py:103 msgid "Maximum number of results to show per store" -msgstr "" +msgstr "Максимальное количество результатов для показа (по каждому магазину)" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/search/search_widget_ui.py:104 #: /home/kovid/work/calibre/src/calibre/gui2/store/config/search_widget_ui.py:104 msgid "Open search result in system browser" -msgstr "" +msgstr "Показывать результаты поиска в системном интернет броузере" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/search/search_widget_ui.py:105 msgid "Threads" @@ -14288,11 +14291,11 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/search_widget_ui.py:105 msgid "Performance" -msgstr "" +msgstr "Производительность" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/search_widget_ui.py:106 msgid "Number of simultaneous searches" -msgstr "" +msgstr "Количество одновременно выполняемых поисков" #: /home/kovid/work/calibre/src/calibre/gui2/store/config/search_widget_ui.py:107 msgid "Number of simultaneous cache updates" @@ -14308,13 +14311,13 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/store/mobileread_store_dialog_ui.py:62 msgid "Search:" -msgstr "" +msgstr "Поиск:" #: /home/kovid/work/calibre/src/calibre/gui2/store/mobileread_store_dialog_ui.py:63 #: /home/kovid/work/calibre/src/calibre/gui2/store/search/search_ui.py:142 #: /home/kovid/work/calibre/src/calibre/gui2/store/stores/mobileread/store_dialog_ui.py:77 msgid "Books:" -msgstr "" +msgstr "Книги:" #: /home/kovid/work/calibre/src/calibre/gui2/store/mobileread_store_dialog_ui.py:65 #: /home/kovid/work/calibre/src/calibre/gui2/store/search/search_ui.py:144 @@ -14323,20 +14326,20 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/store/web_store_dialog_ui.py:63 #: /usr/src/qt-everywhere-opensource-src-4.7.2/src/gui/widgets/qdialogbuttonbox.cpp:661 msgid "Close" -msgstr "" +msgstr "Закрыть" #: /home/kovid/work/calibre/src/calibre/gui2/store/search/adv_search_builder_ui.py:212 msgid "&Price:" -msgstr "" +msgstr "&Цена:" #: /home/kovid/work/calibre/src/calibre/gui2/store/search/adv_search_builder_ui.py:219 msgid "Download:" -msgstr "" +msgstr "Скачать" #: /home/kovid/work/calibre/src/calibre/gui2/store/search/adv_search_builder_ui.py:222 #: /home/kovid/work/calibre/src/calibre/gui2/store/stores/mobileread/adv_search_builder_ui.py:187 msgid "Titl&e/Author/Price ..." -msgstr "" +msgstr "Названи&е/Автор/Цена ..." #: /home/kovid/work/calibre/src/calibre/gui2/store/search/models.py:37 msgid "DRM" @@ -14344,11 +14347,11 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/store/search/models.py:37 msgid "Download" -msgstr "" +msgstr "Скачать" #: /home/kovid/work/calibre/src/calibre/gui2/store/search/models.py:37 msgid "Price" -msgstr "" +msgstr "Цена" #: /home/kovid/work/calibre/src/calibre/gui2/store/search/models.py:196 #, python-format @@ -14383,90 +14386,90 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/store/search/models.py:208 #, python-format msgid "The following formats can be downloaded directly: %s." -msgstr "" +msgstr "Форматы доступные для непосредственного скачивания: %s." #: /home/kovid/work/calibre/src/calibre/gui2/store/search/results_view.py:41 msgid "Download..." -msgstr "" +msgstr "Скачать..." #: /home/kovid/work/calibre/src/calibre/gui2/store/search/results_view.py:45 msgid "Goto in store..." -msgstr "" +msgstr "Перейти в магазин..." #: /home/kovid/work/calibre/src/calibre/gui2/store/search/search.py:114 #, python-format msgid "Buying from this store supports the calibre developer: %s

" -msgstr "" +msgstr "Покупая в этом магазине Вы поддерживаете проект calibre и разработчика: %s

" #: /home/kovid/work/calibre/src/calibre/gui2/store/search/search.py:276 msgid "Customize get books search" -msgstr "" +msgstr "Перенастроить под себя поиск книг для скачивания" #: /home/kovid/work/calibre/src/calibre/gui2/store/search/search.py:286 msgid "Configure search" -msgstr "" +msgstr "Настроить поиск" #: /home/kovid/work/calibre/src/calibre/gui2/store/search/search.py:336 msgid "Couldn't find any books matching your query." -msgstr "Ну удалось найти ни одной кники, соотвествующей вашему запросу." +msgstr "Не удалось найти ни одной книги, соотвествующей вашему запросу." #: /home/kovid/work/calibre/src/calibre/gui2/store/search/search.py:350 msgid "Choose format to download to your library." -msgstr "" +msgstr "Выберите формат для скачивания в библиотеку" #: /home/kovid/work/calibre/src/calibre/gui2/store/search/search_ui.py:131 #: /home/kovid/work/calibre/src/calibre/gui2/store/search_ui.py:107 msgid "Get Books" -msgstr "" +msgstr "Скачать книги" #: /home/kovid/work/calibre/src/calibre/gui2/store/search/search_ui.py:140 msgid "Open a selected book in the system's web browser" -msgstr "" +msgstr "Показать выбранную книгу в системном интернет броузере" #: /home/kovid/work/calibre/src/calibre/gui2/store/search/search_ui.py:141 msgid "Open in &external browser" -msgstr "" +msgstr "Показывать в системном интернет броузере" #: /home/kovid/work/calibre/src/calibre/gui2/store/stores/ebooks_com_plugin.py:96 msgid "Not Available" -msgstr "" +msgstr "Недоступно" #: /home/kovid/work/calibre/src/calibre/gui2/store/stores/mobileread/adv_search_builder_ui.py:179 msgid "" "See the User Manual for more help" msgstr "" -"Смотри Пользовательский мануал для помощи" +"Смотрите Руководство пользователя для помощи" #: /home/kovid/work/calibre/src/calibre/gui2/store/stores/mobileread/cache_progress_dialog_ui.py:51 msgid "Updating book cache" -msgstr "" +msgstr "Обноволяется кэш книг" #: /home/kovid/work/calibre/src/calibre/gui2/store/stores/mobileread/cache_update_thread.py:42 msgid "Checking last download date." -msgstr "" +msgstr "Проверяется врема последнего скачивания" #: /home/kovid/work/calibre/src/calibre/gui2/store/stores/mobileread/cache_update_thread.py:48 msgid "Downloading book list from MobileRead." -msgstr "" +msgstr "Загружается список книг с MobileRead." #: /home/kovid/work/calibre/src/calibre/gui2/store/stores/mobileread/cache_update_thread.py:61 msgid "Processing books." -msgstr "" +msgstr "Книги обрабатываются" #: /home/kovid/work/calibre/src/calibre/gui2/store/stores/mobileread/cache_update_thread.py:71 #, python-format msgid "%(num)s of %(tot)s books processed." -msgstr "" +msgstr "обработано %(num)s из %(tot)." #: /home/kovid/work/calibre/src/calibre/gui2/store/stores/mobileread/mobileread_plugin.py:62 msgid "Updating MobileRead book cache..." -msgstr "" +msgstr "Обноволяется кэщ MobileRead книг..." #: /home/kovid/work/calibre/src/calibre/gui2/store/stores/mobileread/store_dialog_ui.py:74 msgid "&Query:" -msgstr "" +msgstr "&Запрос:" #: /home/kovid/work/calibre/src/calibre/gui2/store/web_control.py:73 msgid "" @@ -14480,15 +14483,15 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/store/web_control.py:86 msgid "File is not a supported ebook type. Save to disk?" -msgstr "" +msgstr "Файл содержит неподдерживаемый формат эл. книги. Сохранить на диске?" #: /home/kovid/work/calibre/src/calibre/gui2/store/web_store_dialog_ui.py:59 msgid "Home" -msgstr "" +msgstr "Главная страница" #: /home/kovid/work/calibre/src/calibre/gui2/store/web_store_dialog_ui.py:60 msgid "Reload" -msgstr "" +msgstr "Перегрузить" #: /home/kovid/work/calibre/src/calibre/gui2/store/web_store_dialog_ui.py:61 msgid "%p%" @@ -14502,22 +14505,24 @@ msgstr "" msgid "" "Changing the authors for several books can take a while. Are you sure?" msgstr "" +"Изменить автора нескольких книг займёт некоторое время. Вы согласны" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/model.py:729 msgid "" "Changing the metadata for that many books can take a while. Are you sure?" msgstr "" +"Изменить мета-данные нескольких книг займёт некоторое время. Вы согласны" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/model.py:816 #: /home/kovid/work/calibre/src/calibre/library/database2.py:449 msgid "Searches" -msgstr "" +msgstr "Поиски" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/model.py:881 #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/model.py:901 #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/model.py:910 msgid "Rename user category" -msgstr "" +msgstr "Переименовать пользовательскую категорию" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/model.py:882 msgid "You cannot use periods in the name when renaming user categories" @@ -14540,30 +14545,30 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/ui.py:48 msgid "Manage Authors" -msgstr "" +msgstr "Упорядочнить авторов" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/ui.py:50 msgid "Manage Series" -msgstr "" +msgstr "Упорядочнить серии" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/ui.py:52 msgid "Manage Publishers" -msgstr "" +msgstr "Упорядочнить издателей" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/ui.py:54 msgid "Manage Tags" -msgstr "" +msgstr "Упорядочнить тэги" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/ui.py:56 #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/view.py:465 #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/view.py:469 msgid "Manage User Categories" -msgstr "Управление пользовательскими категориями" +msgstr "Упорядочнить пользовательские категории" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/ui.py:58 #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/view.py:457 msgid "Manage Saved Searches" -msgstr "Управление сохраненными поисками" +msgstr "Упорядочнить сохраненные поиски" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/ui.py:66 msgid "Invalid search restriction" @@ -14580,17 +14585,17 @@ msgstr "Новая категория" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/ui.py:134 #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/ui.py:137 msgid "Delete user category" -msgstr "" +msgstr "Удалить пользовательскую категорию" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/ui.py:135 #, python-format msgid "%s is not a user category" -msgstr "" +msgstr "%s не является пользовательской категорией" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/ui.py:138 #, python-format msgid "%s contains items. Do you really want to delete it?" -msgstr "" +msgstr "%s содержит элементы. Вы действительно хотете её удалить?" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/ui.py:159 msgid "Remove category" @@ -14599,16 +14604,16 @@ msgstr "Удалить категорию" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/ui.py:160 #, python-format msgid "User category %s does not exist" -msgstr "" +msgstr "Пользовательская категория %s не существует" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/ui.py:179 msgid "Add to user category" -msgstr "" +msgstr "Добавить в пользовательские категории" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/ui.py:180 #, python-format msgid "A user category %s does not exist" -msgstr "" +msgstr "Пользовательская категория %s не существует" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/ui.py:305 msgid "Find item in tag browser" @@ -14701,7 +14706,7 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/view.py:359 #, python-format msgid "Add %s to user category" -msgstr "" +msgstr "Добавить %s в пользовательские категории" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/view.py:372 #, python-format @@ -14711,7 +14716,7 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/view.py:382 #, python-format msgid "Delete search %s" -msgstr "" +msgstr "Удалить поиск %s" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/view.py:387 #, python-format @@ -14721,27 +14726,27 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/view.py:394 #, python-format msgid "Search for %s" -msgstr "" +msgstr "Искать %s" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/view.py:399 #, python-format msgid "Search for everything but %s" -msgstr "" +msgstr "Искать всё кроме %s" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/view.py:411 #, python-format msgid "Add sub-category to %s" -msgstr "" +msgstr "Добавить подкатегорию в %s" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/view.py:415 #, python-format msgid "Delete user category %s" -msgstr "" +msgstr "Удалить пользовательскую категорию %s" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/view.py:420 #, python-format msgid "Hide category %s" -msgstr "" +msgstr "Скрыть категорию %s" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/view.py:424 msgid "Show category" @@ -14750,12 +14755,12 @@ msgstr "Показать категорию" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/view.py:434 #, python-format msgid "Search for books in category %s" -msgstr "" +msgstr "Искать книги в категории %s" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/view.py:440 #, python-format msgid "Search for books not in category %s" -msgstr "" +msgstr "Искать книги НЕ в категории %s" #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/view.py:449 #: /home/kovid/work/calibre/src/calibre/gui2/tag_browser/view.py:454 @@ -14837,7 +14842,7 @@ msgstr "Извлечь подключенное устройство" #: /home/kovid/work/calibre/src/calibre/gui2/ui.py:347 msgid "Debug mode" -msgstr "" +msgstr "Резим отладки" #: /home/kovid/work/calibre/src/calibre/gui2/ui.py:348 #, python-format @@ -14875,7 +14880,7 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/ui.py:630 msgid "Active jobs" -msgstr "" +msgstr "Активные задания" #: /home/kovid/work/calibre/src/calibre/gui2/ui.py:698 msgid "" @@ -14898,11 +14903,11 @@ msgstr "Доступно обновление!" #: /home/kovid/work/calibre/src/calibre/gui2/update.py:84 msgid "Show this notification for future updates" -msgstr "" +msgstr "Показвать сообщение о доступности новой версии (обнивления)" #: /home/kovid/work/calibre/src/calibre/gui2/update.py:89 msgid "&Get update" -msgstr "" +msgstr "&Скачать обнивление" #: /home/kovid/work/calibre/src/calibre/gui2/update.py:93 msgid "Update &plugins" @@ -14929,11 +14934,11 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/update.py:187 #, python-format msgid "There are %d plugin updates available" -msgstr "" +msgstr "Доступны обновления для %d плагинов" #: /home/kovid/work/calibre/src/calibre/gui2/update.py:191 msgid "Install and configure user plugins" -msgstr "" +msgstr "Установка и настройка пользовательских плагинов" #: /home/kovid/work/calibre/src/calibre/gui2/viewer/bookmarkmanager.py:43 msgid "Edit bookmark" From 8e5506b1cb03386ce5e4ae21b5c741529de4a6af Mon Sep 17 00:00:00 2001 From: John Schember Date: Mon, 25 Jul 2011 19:49:38 -0400 Subject: [PATCH 26/51] Store: add ozon plugin. Add affiliate codes for Google Books. --- src/calibre/customize/builtins.py | 1 + .../gui2/store/stores/google_books_plugin.py | 20 ++- .../gui2/store/stores/ozon_ru_plugin.py | 126 ++++++++++++++++++ 3 files changed, 146 insertions(+), 1 deletion(-) create mode 100644 src/calibre/gui2/store/stores/ozon_ru_plugin.py diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 31e771313e..9a01633cfe 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -1291,6 +1291,7 @@ class StoreGoogleBooksStore(StoreBase): headquarters = 'US' formats = ['EPUB', 'PDF', 'TXT'] + affiliate = True class StoreGutenbergStore(StoreBase): name = 'Project Gutenberg' diff --git a/src/calibre/gui2/store/stores/google_books_plugin.py b/src/calibre/gui2/store/stores/google_books_plugin.py index 938ca70664..4819509c3f 100644 --- a/src/calibre/gui2/store/stores/google_books_plugin.py +++ b/src/calibre/gui2/store/stores/google_books_plugin.py @@ -6,6 +6,7 @@ __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' +import random import urllib from contextlib import closing @@ -23,7 +24,24 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog class GoogleBooksStore(BasicStoreConfig, StorePlugin): def open(self, parent=None, detail_item=None, external=False): - url = 'http://books.google.com/' + aff_id = { + 'lid': '41000000033185143', + 'pubid': '21000000000352219', + 'ganpub': 'k352219', + 'ganclk': 'GOOG_1335334761', + } + # Use Kovid's affiliate id 30% of the time. + if random.randint(1, 10) in (1, 2, 3): + aff_id = { + 'lid': '41000000031855266', + 'pubid': '21000000000352583', + 'ganpub': 'k352583', + 'ganclk': 'GOOG_1335335464', + } + + url = 'http://gan.doubleclick.net/gan_click?lid=%(lid)s&pubid=%(pubid)s' % aff_id + if detail_item: + detail_item += '&ganpub=%(ganpub)s&ganclk=%(ganclk)s' % aff_id if external or self.config.get('open_external', False): open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url))) diff --git a/src/calibre/gui2/store/stores/ozon_ru_plugin.py b/src/calibre/gui2/store/stores/ozon_ru_plugin.py new file mode 100644 index 0000000000..0d513f3dfa --- /dev/null +++ b/src/calibre/gui2/store/stores/ozon_ru_plugin.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- + +from __future__ import (unicode_literals, division, absolute_import, print_function) + +__license__ = 'GPL 3' +__copyright__ = '2011, Roman Mukhin ' +__docformat__ = 'restructuredtext en' + +import random +import re +import urllib2 + +from contextlib import closing +from lxml import etree, html +from PyQt4.Qt import QUrl + +from calibre import browser, url_slash_cleaner +from calibre.ebooks.chardet import xml_to_unicode +from calibre.gui2 import open_url +from calibre.gui2.store import StorePlugin +from calibre.gui2.store.basic_config import BasicStoreConfig +from calibre.gui2.store.search_result import SearchResult +from calibre.gui2.store.web_store_dialog import WebStoreDialog + +class OzonRUStore(BasicStoreConfig, StorePlugin): + shop_url = 'http://www.ozon.ru' + + def open(self, parent=None, detail_item=None, external=False): + + aff_id = '?partner=romuk' + # Use Kovid's affiliate id 30% of the time. + if random.randint(1, 10) in (1, 2, 3): + aff_id = '?partner=kovidgoyal' + + url = self.shop_url + aff_id + detail_url = None + if detail_item: + # http://www.ozon.ru/context/detail/id/3037277/ + detail_url = self.shop_url + '/context/detail/id/' + urllib2.quote(detail_item) + aff_id + + if external or self.config.get('open_external', False): + open_url(QUrl(url_slash_cleaner(detail_url if detail_url else url))) + else: + d = WebStoreDialog(self.gui, url, parent, detail_url) + d.setWindowTitle(self.name) + d.set_tags(self.config.get('tags', '')) + d.exec_() + + + def search(self, query, max_results=10, timeout=60): + search_url = self.shop_url + '/webservice/webservice.asmx/SearchWebService?'\ + 'searchText=%s&searchContext=ebook' % urllib2.quote(query) + + counter = max_results + br = browser() + with closing(br.open(search_url, timeout=timeout)) as f: + raw = xml_to_unicode(f.read(), strip_encoding_pats=True, assume_utf8=True)[0] + doc = etree.fromstring(raw) + for data in doc.xpath('//*[local-name() = "SearchItems"]'): + if counter <= 0: + break + counter -= 1 + + xp_template = 'normalize-space(./*[local-name() = "{0}"]/text())' + + s = SearchResult() + s.detail_item = data.xpath(xp_template.format('ID')) + s.title = data.xpath(xp_template.format('Name')) + s.author = data.xpath(xp_template.format('Author')) + s.price = data.xpath(xp_template.format('Price')) + s.cover_url = data.xpath(xp_template.format('Picture')) + if re.match("^\d+?\.\d+?$", s.price): + s.price = u'{:.2F} руб.'.format(float(s.price)) + yield s + + def get_details(self, search_result, timeout=60): + url = self.shop_url + '/context/detail/id/' + urllib2.quote(search_result.detail_item) + br = browser() + + result = False + with closing(br.open(url, timeout=timeout)) as f: + doc = html.fromstring(f.read()) + + # example where we are going to find formats + #
+ # ... + # Доступные форматы: + #
.epub, .fb2, .pdf, .pdf, .txt
+ # ... + #
+ xpt = u'normalize-space(//div[@class="box"]//*[contains(normalize-space(text()), "Доступные форматы:")][1]/following-sibling::div[1]/text())' + formats = doc.xpath(xpt) + if formats: + result = True + search_result.drm = SearchResult.DRM_UNLOCKED + search_result.formats = ', '.join(_parse_ebook_formats(formats)) + # unfortunately no direct links to download books (only buy link) + # search_result.downloads['BF2'] = self.shop_url + '/order/digitalorder.aspx?id=' + + urllib2.quote(search_result.detail_item) + return result + +def _parse_ebook_formats(formatsStr): + ''' + Creates a list with displayable names of the formats + + :param formatsStr: string with comma separated book formats + as it provided by ozon.ru + :return: a list with displayable book formats + ''' + + formatsUnstruct = formatsStr.lower() + formats = [] + if 'epub' in formatsUnstruct: + formats.append('ePub') + if 'pdf' in formatsUnstruct: + formats.append('PDF') + if 'fb2' in formatsUnstruct: + formats.append('FB2') + if 'rtf' in formatsUnstruct: + formats.append('RTF') + if 'txt' in formatsUnstruct: + formats.append('TXT') + if 'djvu' in formatsUnstruct: + formats.append('DjVu') + if 'doc' in formatsUnstruct: + formats.append('DOC') + return formats From 427060533522e005f82e6866046abb8b3ec81dee Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 25 Jul 2011 19:49:57 -0600 Subject: [PATCH 27/51] New MOBI output: Write the TBS sequences for periodicals. Also fully decoded all TBS sequences, only unknown bits left are in the opening sequence that seems to depend on the type of record being indexed. The rules are simple, so I just use them instead of spending more time looking for deeper meaning. --- src/calibre/ebooks/mobi/debug.py | 214 ++++++-------------- src/calibre/ebooks/mobi/tbs_periodicals.rst | 89 +++++++- src/calibre/ebooks/mobi/utils.py | 94 +++++++++ src/calibre/ebooks/mobi/writer2/indexer.py | 166 +++++++++++---- 4 files changed, 375 insertions(+), 188 deletions(-) diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index b85d73f55c..67f20e691f 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -12,7 +12,7 @@ from collections import OrderedDict, defaultdict from calibre.utils.date import utc_tz from calibre.ebooks.mobi.langcodes import main_language, sub_language from calibre.ebooks.mobi.utils import (decode_hex_number, decint, - get_trailing_data, decode_fvwi) + get_trailing_data, decode_tbs) from calibre.utils.magick.draw import identify_data # PalmDB {{{ @@ -949,20 +949,22 @@ class TBSIndexing(object): # {{{ ans.append(('\t\tIndex Entry: %d (Parent index: %d, ' 'Depth: %d, Offset: %d, Size: %d) [%s]')%( x.index, x.parent_index, x.depth, x.offset, x.size, x.label)) - def bin3(num): + def bin4(num): ans = bin(num)[2:] - return '0'*(3-len(ans)) + ans + return bytes('0'*(4-len(ans)) + ans) + + def repr_extra(x): + return str({bin4(k):v for k, v in extra.iteritems()}) tbs_type = 0 if len(byts): - outer, consumed = decint(byts) + outermost_index, extra, consumed = decode_tbs(byts) byts = byts[consumed:] - tbs_type = outer & 0b111 - ans.append('TBS Type: %s (%d)'%(bin3(tbs_type), tbs_type)) - ans.append('Outer Index entry: %d'%(outer >> 3)) - arg1, consumed = decint(byts) - byts = byts[consumed:] - ans.append('Unknown (vwi: always 0?): %d'%arg1) + for k in extra: + tbs_type |= k + ans.append('\nTBS: %d (%s)'%(tbs_type, bin4(tbs_type))) + ans.append('Outermost index: %d'%outermost_index) + ans.append('Unknown extra start bytes: %s'%repr_extra(extra)) if self.doc_type in (257, 259): # Hierarchical periodical byts, a = self.interpret_periodical(tbs_type, byts, dat['geom'][0]) @@ -977,53 +979,21 @@ class TBSIndexing(object): # {{{ def interpret_periodical(self, tbs_type, byts, record_offset): ans = [] - def tbs_type_6(byts, psi=None, msg=None, fmsg='Unknown'): # {{{ - if psi is None: - # Assume parent section is 1 - psi = self.get_index(1) - if msg is None: - msg = ('Article index at start of record or first article' - ' index, relative to parent section') - if byts: - # byts could be empty - arg, consumed = decint(byts) - byts = byts[consumed:] - flags = (arg & 0b1111) - ai = (arg >> 4) - ans.append('%s (fvwi): %d [%d absolute]'%(msg, ai, - ai+psi.index)) - if flags == 1: - arg, consumed = decint(byts) - if arg == 0: - # EOF of record, otherwise ignore and hope someone else - # will deal with these bytes - byts = byts[consumed:] - ans.append('EOF (vwi: should be 0): %d'%arg) - elif flags in (4, 5): - num = byts[0] - byts = byts[1:] - ans.append('Number of article nodes in the record (byte): %d'%num) - if flags == 5: - arg, consumed = decint(byts) - byts = byts[consumed:] - ans.append('%s (vwi)): %d'%(fmsg, arg)) - elif flags == 0: - pass - else: - raise ValueError('Unknown flags: %d'%flags) - return byts - - # }}} - def read_section_transitions(byts, psi=None): # {{{ if psi is None: - # Assume parent section is 1 + # Assume previous section is 1 psi = self.get_index(1) while byts: - ai, flags, consumed = decode_fvwi(byts) + ai, extra, consumed = decode_tbs(byts) byts = byts[consumed:] - if flags & 0b1000: + if extra.get(0b0010, None) is not None: + raise ValueError('Dont know how to interpret flag 0b0010' + ' while reading section transitions') + if extra.get(0b1000, None) is not None: + if len(extra) > 1: + raise ValueError('Dont know how to interpret flags' + ' %r while reading section transitions'%extra) nsi = self.get_index(psi.index+1) ans.append('Last article in this record of section %d' ' (relative to next section index [%d]): ' @@ -1036,113 +1006,57 @@ class TBSIndexing(object): # {{{ ' (relative to its parent section): ' '%d [%d absolute index]'%(psi.index, ai, ai+psi.index)) - if flags == 0: - ans.append('The section %d has only one article' - ' in this record'%psi.index) - continue + num = extra.get(0b0100, None) + if num is None: + msg = ('The section %d has at most one article' + ' in this record')%psi.index + else: + msg = ('Number of articles in this record of ' + 'section %d: %d')%(psi.index, num) + ans.append(msg) - if flags & 0b0100: - num = byts[0] - byts = byts[1:] - ans.append('Number of articles in this record of ' - 'section %d: %d'%(psi.index, num)) - - if flags & 0b0010: - raise ValueError( - 'Dont know how to interpret the 0b0010 flag') - - if flags & 0b0001: - arg, consumed = decint(byts) - byts = byts[consumed:] - ans.append('->Offset to start of next section (%d) from start' + offset = extra.get(0b0001, None) + if offset is not None: + if offset == 0: + ans.append('This record is spanned by the article:' + '%d'%(ai+psi.index)) + else: + ans.append('->Offset to start of next section (%d) from start' ' of record: %d [%d absolute offset]'%(psi.index+1, - arg, arg+record_offset)) + offset, offset+record_offset)) + return byts # }}} - if tbs_type == 3: # {{{ - arg2, consumed = decint(byts) + def read_starting_section(byts): # {{{ + si, extra, consumed = decode_tbs(byts) byts = byts[consumed:] - ans.append('Unknown (vwi: always 0?): %d'%arg2) - - arg3, consumed = decint(byts) - byts = byts[consumed:] - fsi = arg3 >> 4 - flags = arg3 & 0b1111 - ans.append('First section index (fvwi): %d'%fsi) - psi = self.get_index(fsi) - ans.append('Flags: %d'%flags) - if flags == 4: - ans.append('Number of articles in this section: %d'%byts[0]) - byts = byts[1:] - elif flags == 0: - pass - else: - raise ValueError('Unknown flags value: %d'%flags) - byts = read_section_transitions(byts, psi) - - # }}} - - elif tbs_type == 7: # {{{ - # This occurs for records that have no section nodes and - # whose parent section's index == 1 - ans.append('Unknown (maybe vwi?): %r'%bytes(byts[:2])) - byts = byts[2:] - arg, consumed = decint(byts) - byts = byts[consumed:] - ai = arg >> 4 - flags = arg & 0b1111 - ans.append('Article at start of record (fvwi): %d'%ai) - if flags == 4: - num = byts[0] - byts = byts[1:] - ans.append('Number of articles in record (byte): %d'%num) - elif flags == 0: - pass - elif flags == 1: - arg, consumed = decint(byts) - byts = byts[consumed:] - ans.append('EOF (vwi: should be 0): %d'%arg) - else: - raise ValueError('Unknown flags value: %d'%flags) + if len(extra) > 1 or 0b0010 in extra or 0b1000 in extra: + raise ValueError('Dont know how to interpret flags %r' + ' when reading starting section'%extra) + si = self.get_index(si) + ans.append('The section at the start of this record is:' + ' %d'%si.index) + if 0b0100 in extra: + num = extra[0b0100] + ans.append('The number of articles from the section %d' + ' in this record: %d'%(si.index, num)) + elif 0b0001 in extra: + eof = extra[0b0001] + if eof != 0: + raise ValueError('Unknown eof value %s when reading' + ' starting section'%eof) + ans.append('This record is spanned by an article from' + ' the section: %d'%si.index) + return si, byts # }}} - elif tbs_type == 6: # {{{ - # This is used for records spanned by an article whose parent - # section's index == 1 or for the opening record if it contains the - # periodical start, section 1 start and at least one article. The - # two cases are distinguished by the flags on the article index - # vwi. - unk = byts[0] - byts = byts[1:] - ans.append('Unknown (byte: always 2?): %d'%unk) - byts = tbs_type_6(byts) - # }}} + if tbs_type & 0b0100: + # Starting section is the first section + ssi = self.get_index(1) + else: + ssi, byts = read_starting_section(byts) - elif tbs_type == 2: # {{{ - # This occurs for records with no section nodes and whose parent - # section's index != 1 (undefined (records before the first - # section) or > 1) - # This is also used for records that are spanned by an article - # whose parent section index > 1. In this case the flags of the - # vwi referring to the article at the start - # of the record are set to 1 instead of 4. - arg, consumed = decint(byts) - byts = byts[consumed:] - flags = (arg & 0b1111) - psi = (arg >> 4) - ans.append('Parent section index (fvwi): %d'%psi) - psi = self.get_index(psi) - ans.append('Flags: %d'%flags) - if flags == 1: - arg, consumed = decint(byts) - byts = byts[consumed:] - ans.append('Unknown (vwi?: always 0?): %d'%arg) - byts = tbs_type_6(byts, psi=psi) - elif flags == 0: - byts = tbs_type_6(byts, psi=psi) - else: - raise ValueError('Unknown flags: %d'%flags) - # }}} + byts = read_section_transitions(byts, ssi) return byts, ans diff --git a/src/calibre/ebooks/mobi/tbs_periodicals.rst b/src/calibre/ebooks/mobi/tbs_periodicals.rst index d770133625..2fa6ec90f3 100644 --- a/src/calibre/ebooks/mobi/tbs_periodicals.rst +++ b/src/calibre/ebooks/mobi/tbs_periodicals.rst @@ -3,6 +3,20 @@ Reverse engineering the trailing byte sequences for hierarchical periodicals In the following, *vwi* means variable width integer and *fvwi* means a vwi whose lowest four bits are used as a flag. All the following information/inferences are from examining the output of kindlegen on a sample periodical. Given the general level of Amazon's incompetence, there are no guarantees that this information is the *best/most complete* way to do TBS indexing. +Sequence encoding: + +0b1000 : Continuation bit + +First sequences: +0b0010 : 80 +0b0011 : 80 80 +0b0110 : 80 2 +0b0111 : 80 2 80 + +Other sequences: +0b0101 : 4 1a +0b0001 : c b1 + Opening record ---------------- @@ -52,10 +66,60 @@ The text record that contains the opening node for the periodical (depth=0 node If there was only a single article, instead of 2, then the last two bytes would be: c0, i.e. there would be no byte giving the number of articles in the record. + Starting record with two section transitions:: + + Record #1: Starts at: 0 Ends at: 4095 + Contains: 7 index entries (0 ends, 4 complete, 3 starts) + TBS bytes: 86 80 2 c0 b8 c4 3 + Complete: + Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 564, Size: 375) [Ars Technica] + Index Entry: 5 (Parent index: 1, Depth: 2, Offset: 572, Size: 367) [Week in gaming: 3DS review, Crysis 2, George Hotz] + Index Entry: 6 (Parent index: 2, Depth: 2, Offset: 947, Size: 1014) [Max and the Magic Marker for iPad: Review] + Index Entry: 7 (Parent index: 2, Depth: 2, Offset: 1961, Size: 1077) [iPad 2 steers itself into home console gaming territory with Real Racing 2 HD] + Starts: + Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 35372) [j_x's Google reader] + Index Entry: 2 (Parent index: 0, Depth: 1, Offset: 939, Size: 10368) [Neowin.net] + Index Entry: 8 (Parent index: 2, Depth: 2, Offset: 3038, Size: 1082) [Microsoft's Joe Belfiore still working on upcoming Zune hardware] + TBS Type: 110 (6) + Outer Index entry: 0 + Unknown (vwi: always 0?): 0 + Unknown (byte: always 2?): 2 + Article index at start of record or first article index, relative to parent section (fvwi): 4 [5 absolute] + Remaining bytes: b8 c4 3 + + Starting record with three section transitions:: + + Record #1: Starts at: 0 Ends at: 4095 + Contains: 10 index entries (0 ends, 7 complete, 3 starts) + TBS bytes: 86 80 2 c0 b8 c0 b8 c4 4 + Complete: + Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 564, Size: 375) [Ars Technica] + Index Entry: 2 (Parent index: 0, Depth: 1, Offset: 939, Size: 316) [Neowin.net] + Index Entry: 5 (Parent index: 1, Depth: 2, Offset: 572, Size: 367) [Week in gaming: 3DS review, Crysis 2, George Hotz] + Index Entry: 6 (Parent index: 2, Depth: 2, Offset: 947, Size: 308) [Max and the Magic Marker for iPad: Review] + Index Entry: 7 (Parent index: 3, Depth: 2, Offset: 1263, Size: 760) [OSnews Asks on Interrupts: The Results] + Index Entry: 8 (Parent index: 3, Depth: 2, Offset: 2023, Size: 693) [Apple Ditches SAMBA in Favour of Homegrown Replacement] + Index Entry: 9 (Parent index: 3, Depth: 2, Offset: 2716, Size: 747) [ITC: Apple's Mobile Products Do Not Violate Nokia Patents] + Starts: + Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 25320) [j_x's Google reader] + Index Entry: 3 (Parent index: 0, Depth: 1, Offset: 1255, Size: 6829) [OSNews] + Index Entry: 10 (Parent index: 3, Depth: 2, Offset: 3463, Size: 666) [Transparent Monitor Embedded in Window Glass] + TBS Type: 110 (6) + Outer Index entry: 0 + Unknown (vwi: always 0?): 0 + Unknown (byte: always 2?): 2 + Article index at start of record or first article index, relative to parent section (fvwi): 4 [5 absolute] + Remaining bytes: b8 c0 b8 c4 4 + + + + Records with no nodes ------------------------ +subtype = 010 + These records are spanned by a single article. They are of two types: 1. If the parent section index is 1, TBS type of 6, like this:: @@ -247,7 +311,7 @@ In such a record there is a transition from one section to the next. As such the Last article of ending section w.r.t. starting section offset (fvwi): 12 [15 absolute] Flags (always 8?): 8 Article index at start of record or first article index, relative to parent section (fvwi): 13 [16 absolute] - Number of article nodes in the record (byte): 4 + Number of article nodes in the record belonging ot the last section (byte): 4 Ending record @@ -274,3 +338,26 @@ Logically, ending records must have at least one article ending, one section end If the record had only a single article end, the last two bytes would be replaced with: f0 +If the last record has multiple section transitions, it is of type 6 and looks like:: + + Record #9: Starts at: 32768 Ends at: 34953 + Contains: 9 index entries (3 ends, 6 complete, 0 starts) + TBS bytes: 86 80 2 1 d0 1 c8 1 d0 1 c8 1 d0 1 c8 1 d0 + Ends: + Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 34739) [j_x's Google reader] + Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 7758, Size: 26279) [Ars Technica] + Index Entry: 14 (Parent index: 1, Depth: 2, Offset: 31929, Size: 2108) [Trademarked keyword sales may soon be restricted in Europe] + Complete: + Index Entry: 2 (Parent index: 0, Depth: 1, Offset: 34037, Size: 316) [Neowin.net] + Index Entry: 3 (Parent index: 0, Depth: 1, Offset: 34353, Size: 282) [OSNews] + Index Entry: 4 (Parent index: 0, Depth: 1, Offset: 34635, Size: 319) [Slashdot] + Index Entry: 15 (Parent index: 2, Depth: 2, Offset: 34045, Size: 308) [Max and the Magic Marker for iPad: Review] + Index Entry: 16 (Parent index: 3, Depth: 2, Offset: 34361, Size: 274) [OSnews Asks on Interrupts: The Results] + Index Entry: 17 (Parent index: 4, Depth: 2, Offset: 34643, Size: 311) [Leonard Nimoy Turns 80] + TBS Type: 110 (6) + Outer Index entry: 0 + Unknown (vwi: always 0?): 0 + Unknown (byte: always 2?): 2 + Article index at start of record or first article index, relative to parent section (fvwi): 13 [14 absolute] + Remaining bytes: 1 c8 1 d0 1 c8 1 d0 1 c8 1 d0 + diff --git a/src/calibre/ebooks/mobi/utils.py b/src/calibre/ebooks/mobi/utils.py index ae1241e2f1..37d2093066 100644 --- a/src/calibre/ebooks/mobi/utils.py +++ b/src/calibre/ebooks/mobi/utils.py @@ -11,6 +11,7 @@ import struct from collections import OrderedDict from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail +from calibre.ebooks import normalize IMAGE_MAX_SIZE = 10 * 1024 * 1024 @@ -197,3 +198,96 @@ def encode_trailing_data(raw): lsize += 1 return raw + encoded +def encode_fvwi(val, flags): + ''' + Encode the value val and the 4 bit flags flags as a fvwi. This encoding is + used in the trailing byte sequences for indexing. Returns encoded + bytestring. + ''' + ans = (val << 4) | (flags & 0b1111) + return encint(ans) + + +def decode_fvwi(byts): + ''' + Decode encoded fvwi. Returns number, flags, consumed + ''' + arg, consumed = decint(bytes(byts)) + return (arg >> 4), (arg & 0b1111), consumed + +def decode_tbs(byts): + ''' + Trailing byte sequences for indexing consists of series of fvwi numbers. + This function reads the fvwi number and its associated flags. It them uses + the flags to read any more numbers that belong to the series. The flags are + the lowest 4 bits of the vwi (see the encode_fvwi function above). + + Returns the fvwi number, a dictionary mapping flags bits to the associated + data and the number of bytes consumed. + ''' + byts = bytes(byts) + val, flags, consumed = decode_fvwi(byts) + extra = {} + byts = byts[consumed:] + if flags & 0b1000: + extra[0b1000] = True + if flags & 0b0010: + x, consumed2 = decint(byts) + byts = byts[consumed2:] + extra[0b0010] = x + consumed += consumed2 + if flags & 0b0100: + extra[0b0100] = ord(byts[0]) + byts = byts[1:] + consumed += 1 + if flags & 0b0001: + x, consumed2 = decint(byts) + byts = byts[consumed2:] + extra[0b0001] = x + consumed += consumed2 + return val, extra, consumed + +def encode_tbs(val, extra): + ''' + Encode the number val and the extra data in the extra dict as an fvwi. See + decode_tbs above. + ''' + flags = 0 + for flag in extra: + flags |= flag + ans = encode_fvwi(val, flags) + + if 0b0010 in extra: + ans += encint(extra[0b0010]) + if 0b0100 in extra: + ans += bytes(bytearray([extra[0b0100]])) + if 0b0001 in extra: + ans += encint(extra[0b0001]) + return ans + +def utf8_text(text): + ''' + Convert a possibly null string to utf-8 bytes, guaranteeing to return a non + empty, normalized bytestring. + ''' + if text and text.strip(): + text = text.strip() + if not isinstance(text, unicode): + text = text.decode('utf-8', 'replace') + text = normalize(text).encode('utf-8') + else: + text = _('Unknown').encode('utf-8') + return text + +def align_block(raw, multiple=4, pad=b'\0'): + ''' + Return raw with enough pad bytes append to ensure its length is a multiple + of 4. + ''' + extra = len(raw) % multiple + if extra == 0: return raw + return raw + pad*(multiple - extra) + + + + diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py index 41c5d2ec91..04387f47f7 100644 --- a/src/calibre/ebooks/mobi/writer2/indexer.py +++ b/src/calibre/ebooks/mobi/writer2/indexer.py @@ -10,35 +10,13 @@ __docformat__ = 'restructuredtext en' from struct import pack from cStringIO import StringIO -from collections import OrderedDict +from collections import OrderedDict, defaultdict -from calibre.ebooks import normalize -from calibre.ebook.mobi.writer2 import RECORD_SIZE -from calibre.ebooks.mobi.utils import (encint, encode_number_as_hex) +from calibre.ebooks.mobi.writer2 import RECORD_SIZE +from calibre.ebooks.mobi.utils import (encint, encode_number_as_hex, + encode_trailing_data, encode_tbs, align_block, utf8_text) from calibre.ebooks.mobi.langcodes import iana2mobi -def utf8_text(text): - ''' - Convert a possibly null string to utf-8 bytes, guaranteeing to return a non - empty, normalized bytestring. - ''' - if text and text.strip(): - text = text.strip() - if not isinstance(text, unicode): - text = text.decode('utf-8', 'replace') - text = normalize(text).encode('utf-8') - else: - text = _('Unknown').encode('utf-8') - return text - -def align_block(raw, multiple=4, pad=b'\0'): - ''' - Return raw with enough pad bytes append to ensure its length is a multiple - of 4. - ''' - extra = len(raw) % multiple - if extra == 0: return raw - return raw + pad*(multiple - extra) class CNCX(object): # {{{ @@ -98,7 +76,7 @@ class IndexEntry(object): # {{{ 'first_child_index': 22, 'last_child_index': 23, } - RTAG_MAP = dict(TAG_VALUES.itervalues(), TAG_VALUES.iterkeys()) + RTAG_MAP = {v:k for k, v in TAG_VALUES.iteritems()} BITMASKS = [1, 2, 3, 4, 5, 21, 22, 23,] @@ -186,17 +164,123 @@ class TBS(object): # {{{ trailing byte sequence for the record. ''' - def __init__(self, data, is_periodical): - if is_periodical: - self.periodical_tbs(data) + def __init__(self, data, is_periodical, first=False, all_sections=[]): + if not data: + self.bytestring = encode_trailing_data(b'') else: - self.book_tbs(data) + self.section_map = OrderedDict((i.index, i) for i in + sorted(all_sections, key=lambda x:x.offset)) - def periodical_tbs(self, data): - self.bytestring = b'' + if is_periodical: + # The starting bytes. + # The value is zero which I think indicates the periodical + # index entry. The values for the various flags seem to be + # unused. If the 0b0100 is present, it means that the record + # deals with section 1 (or is the final record with section + # transitions). + self.type_010 = encode_tbs(0, {0b0010: 0}) + self.type_011 = encode_tbs(0, {0b0010: 0, 0b0001: 0}) + self.type_110 = encode_tbs(0, {0b0100: 2, 0b0010: 0}) + self.type_111 = encode_tbs(0, {0b0100: 2, 0b0010: 0, 0b0001: 0}) - def book_tbs(self, data): - self.bytestring = b'' + depth_map = defaultdict(list) + for x in ('starts', 'ends', 'completes'): + for idx in data[x]: + depth_map[idx.depth].append(idx) + for l in depth_map.itervalues(): + l.sort(key=lambda x:x.offset) + self.periodical_tbs(data, first, depth_map) + else: + self.book_tbs(data, first) + + def periodical_tbs(self, data, first, depth_map): + buf = StringIO() + + has_section_start = (depth_map[1] and depth_map[1][0] in + data['starts']) + spanner = data['spans'] + first_node = None + for nodes in depth_map.values(): + for node in nodes: + if (first_node is None or (node.offset, node.depth) < + (first_node.offset, first_node.depth)): + first_node = node + + parent_section_index = -1 + if depth_map[0]: + # We have a terminal record + typ = (self.type_110 if has_section_start else self.type_010) + if first_node.depth > 0: + parent_section_index = (first_node.index if first_node.depth + == 1 else first_node.parent_index) + else: + if spanner is not None: + # record is spanned by a single article + parent_section_index = spanner.parent_index + typ = (self.type_110 if parent_section_index == 1 else + self.type_010) + elif not depth_map[1]: + # has only article nodes, i.e. spanned by a section + parent_section_index = self.depth_map[2][0].parent_index + typ = (self.type_111 if parent_section_index == 1 else + self.type_010) + else: + # has section transitions + parent_section_index = self.depth_map[2][0].parent_index + + buf.write(typ) + + if parent_section_index > 1: + # Write starting section information + if spanner is None: + num_articles = len(depth_map[1]) + extra = {} + if num_articles > 1: + extra = {0b0100: num_articles} + else: + extra = {0b0001: 0} + buf.write(encode_tbs(parent_section_index, extra)) + + if spanner is None: + articles = depth_map[2] + sections = [self.section_map[a.parent_index] for a in articles] + sections.sort(key=lambda x:x.offset) + section_map = {s:[a for a in articles is a.parent_index == + s.index] for s in sections} + for i, section in enumerate(sections): + # All the articles in this record that belong to section + articles = section_map[section] + first_article = articles[0] + last_article = articles[-1] + num = len(articles) + + try: + next_sec = sections[i+1] + except: + next_sec == None + + extra = {} + if num > 1: + extra[0b0100] = num + if i == 0 and next_sec is not None: + # Write offset to next section from start of record + # For some reason kindlegen only writes this offset + # for the first section transition. Imitate it. + extra[0b0001] = next_sec.offset - data['offset'] + + buf.write(encode_tbs(first_article.index-section.index, extra)) + + if next_sec is not None: + buf.write(encode_tbs(last_article.index-next_sec.index, + {0b1000: 0})) + else: + buf.write(encode_tbs(spanner.index - parent_section_index, + {0b0001: 0})) + + self.bytestring = encode_trailing_data(buf.getvalue()) + + def book_tbs(self, data, first): + self.bytestring = encode_trailing_data(b'') # }}} class Indexer(object): # {{{ @@ -548,11 +632,13 @@ class Indexer(object): # {{{ def calculate_trailing_byte_sequences(self): self.tbs_map = {} + found_node = False + sections = [i for i in self.indices if i.depth == 1] for i in xrange(self.number_of_text_records): offset = i * RECORD_SIZE next_offset = offset + RECORD_SIZE data = OrderedDict([('ends',[]), ('completes',[]), ('starts',[]), - ('spans', None)]) + ('spans', None), ('offset', offset)]) for index in self.indices: if index.offset >= next_offset: # Node starts after current record @@ -574,7 +660,13 @@ class Indexer(object): # {{{ data['ends'].append(index) else: data['spans'] = index - self.tbs_map[i+1] = TBS(data, self.is_periodical) + if (data['ends'] or data['completes'] or data['starts'] or + data['spans'] is not None): + self.tbs_map[i+1] = TBS(data, self.is_periodical, first=not + found_node, all_sections=sections) + found_node = True + else: + self.tbs_map[i+1] = TBS({}, self.is_periodical, first=False) def get_trailing_byte_sequence(self, num): return self.tbs_map[num].bytestring From 1297576ee20028ce7302ac180dc6e7c2520ae760 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 25 Jul 2011 20:23:52 -0600 Subject: [PATCH 28/51] New MOBI output: Allow calibre to convert OEB documents with a toc.ncx conforming to the kindlegen periodical specification into periodicals --- src/calibre/ebooks/mobi/writer2/indexer.py | 28 +++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py index 04387f47f7..0f7a670cff 100644 --- a/src/calibre/ebooks/mobi/writer2/indexer.py +++ b/src/calibre/ebooks/mobi/writer2/indexer.py @@ -295,7 +295,9 @@ class Indexer(object): # {{{ self.log = oeb.log self.opts = opts - self.is_periodical = opts.mobi_periodical + self.is_periodical = self.detect_periodical() + self.log('Generating MOBI index for a %s'%('periodical' if + self.is_periodical else 'book')) self.is_flat_periodical = False if opts.mobi_periodical: periodical_node = iter(oeb.toc).next() @@ -317,6 +319,28 @@ class Indexer(object): # {{{ self.calculate_trailing_byte_sequences() + def detect_periodical(self): # {{{ + for node in self.oeb.toc.iterdescendants(): + if node.depth() == 1 and node.klass != 'article': + self.log.debug( + 'Not a periodical: Deepest node does not have ' + 'class="article"') + return False + if node.depth() == 2 and node.klass != 'section': + self.log.debug( + 'Not a periodical: Second deepest node does not have' + ' class="section"') + return False + if node.depth() == 3 and node.klass != 'periodical': + self.log.debug('Not a periodical: Third deepest node' + ' does not have class="periodical"') + return False + if node.depth() > 3: + self.log.debug('Not a periodical: Has nodes of depth > 3') + return False + return True + # }}} + def create_index_record(self): # {{{ header_length = 192 buf = StringIO() @@ -630,6 +654,7 @@ class Indexer(object): # {{{ return indices # }}} + # TBS {{{ def calculate_trailing_byte_sequences(self): self.tbs_map = {} found_node = False @@ -670,6 +695,7 @@ class Indexer(object): # {{{ def get_trailing_byte_sequence(self, num): return self.tbs_map[num].bytestring + # }}} # }}} From 586aa592459ea7c3279166393218ceb24463a649 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Tue, 26 Jul 2011 13:53:45 +0100 Subject: [PATCH 29/51] Fix 815573: Series number Tweak will not accept constant value --- resources/default_tweaks.py | 14 +++++++++++++- src/calibre/ebooks/metadata/opf2.py | 8 +++++++- src/calibre/library/database2.py | 13 ++++++++----- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py index 3e2cc4da57..65cb030f96 100644 --- a/resources/default_tweaks.py +++ b/resources/default_tweaks.py @@ -11,7 +11,7 @@ defaults. ''' #: Auto increment series index -# The algorithm used to assign a new book in an existing series a series number. +# The algorithm used to assign a book added to an existing series a series number. # New series numbers assigned using this tweak are always integer values, except # if a constant non-integer is specified. # Possible values are: @@ -27,7 +27,19 @@ defaults. # series_index_auto_increment = 'next' # series_index_auto_increment = 'next_free' # series_index_auto_increment = 16.5 +# +# Set the use_series_auto_increment_tweak_when_importing tweak to True to +# use the above values when importing/adding books. If this tweak is set to +# False (the default) then the series number will be set to 1 if it is not +# explicitly set to something else during the import. If set to True, then the +# series index will be set according to the series_index_auto_increment setting. +# Note that the use_series_auto_increment_tweak_when_importing tweak is used +# only when a value is not provided during import. If the importing regular +# expression produces a value for series_index, or if you are reading metadata +# from books and the import plugin produces a value, than that value will +# be used irrespective of the setting of the tweak. series_index_auto_increment = 'next' +use_series_auto_increment_tweak_when_importing = False #: Add separator after completing an author name # Should the completion separator be append diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index 186821b0c3..7ad741848e 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -22,6 +22,7 @@ from calibre.utils.date import parse_date, isoformat from calibre.utils.localization import get_lang from calibre import prints, guess_type from calibre.utils.cleantext import clean_ascii_chars +from calibre.utils.config import tweaks class Resource(object): # {{{ ''' @@ -527,7 +528,12 @@ class OPF(object): # {{{ category = MetadataField('type') rights = MetadataField('rights') series = MetadataField('series', is_dc=False) - series_index = MetadataField('series_index', is_dc=False, formatter=float, none_is=1) + if tweaks['use_series_auto_increment_tweak_when_importing']: + series_index = MetadataField('series_index', is_dc=False, + formatter=float, none_is=None) + else: + series_index = MetadataField('series_index', is_dc=False, + formatter=float, none_is=1) title_sort = TitleSortField('title_sort', is_dc=False) rating = MetadataField('rating', is_dc=False, formatter=int) pubdate = MetadataField('date', formatter=parse_date, diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 8d16ffbc52..9ae8f0569b 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -3023,8 +3023,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): stream.seek(0) mi = get_metadata(stream, format, use_libprs_metadata=False) stream.seek(0) - if not mi.series_index: - mi.series_index = 1.0 + if mi.series_index is None: + mi.series_index = self.get_next_series_num_for(mi.series) mi.tags = [_('News')] if arg['add_title_tag']: mi.tags += [arg['title']] @@ -3076,7 +3076,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): self._add_newbook_tag(mi) if not add_duplicates and self.has_book(mi): return None - series_index = 1.0 if mi.series_index is None else mi.series_index + series_index = self.get_next_series_num_for(mi.series) \ + if mi.series_index is None else mi.series_index aus = mi.author_sort if mi.author_sort else self.author_sort_from_authors(mi.authors) title = mi.title if isbytestring(aus): @@ -3123,7 +3124,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): if not add_duplicates and self.has_book(mi): duplicates.append((path, format, mi)) continue - series_index = 1.0 if mi.series_index is None else mi.series_index + series_index = self.get_next_series_num_for(mi.series) \ + if mi.series_index is None else mi.series_index aus = mi.author_sort if mi.author_sort else self.author_sort_from_authors(mi.authors) title = mi.title if isinstance(aus, str): @@ -3157,7 +3159,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): def import_book(self, mi, formats, notify=True, import_hooks=True, apply_import_tags=True, preserve_uuid=False): - series_index = 1.0 if mi.series_index is None else mi.series_index + series_index = self.get_next_series_num_for(mi.series) \ + if mi.series_index is None else mi.series_index if apply_import_tags: self._add_newbook_tag(mi) if not mi.title: From e61b86cd243da4ff394fb82d94ab31fbe0bafbc8 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 26 Jul 2011 11:25:56 -0600 Subject: [PATCH 30/51] ... --- src/calibre/ebooks/mobi/debug.py | 6 +- src/calibre/ebooks/mobi/utils.py | 30 +++++--- src/calibre/ebooks/mobi/writer2/indexer.py | 80 ++++++++++++++-------- src/calibre/ebooks/mobi/writer2/main.py | 2 + 4 files changed, 77 insertions(+), 41 deletions(-) diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index 67f20e691f..f35d8ac075 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -957,15 +957,17 @@ class TBSIndexing(object): # {{{ return str({bin4(k):v for k, v in extra.iteritems()}) tbs_type = 0 + is_periodical = self.doc_type in (257, 258, 259) if len(byts): - outermost_index, extra, consumed = decode_tbs(byts) + outermost_index, extra, consumed = decode_tbs(byts, flag_size=4 if + is_periodical else 3) byts = byts[consumed:] for k in extra: tbs_type |= k ans.append('\nTBS: %d (%s)'%(tbs_type, bin4(tbs_type))) ans.append('Outermost index: %d'%outermost_index) ans.append('Unknown extra start bytes: %s'%repr_extra(extra)) - if self.doc_type in (257, 259): # Hierarchical periodical + if is_periodical: # Hierarchical periodical byts, a = self.interpret_periodical(tbs_type, byts, dat['geom'][0]) ans += a diff --git a/src/calibre/ebooks/mobi/utils.py b/src/calibre/ebooks/mobi/utils.py index 37d2093066..16aa2a3b64 100644 --- a/src/calibre/ebooks/mobi/utils.py +++ b/src/calibre/ebooks/mobi/utils.py @@ -66,11 +66,14 @@ def encint(value, forward=True): If forward is True the bytes returned are suitable for prepending to the output buffer, otherwise they must be append to the output buffer. ''' + if value < 0: + raise ValueError('Cannot encode negative numbers as vwi') # Encode vwi byts = bytearray() while True: b = value & 0b01111111 value >>= 7 # shift value to the right by 7 bits + byts.append(b) if value == 0: break @@ -198,24 +201,31 @@ def encode_trailing_data(raw): lsize += 1 return raw + encoded -def encode_fvwi(val, flags): +def encode_fvwi(val, flags, flag_size=4): ''' - Encode the value val and the 4 bit flags flags as a fvwi. This encoding is + Encode the value val and the flag_size bits from flags as a fvwi. This encoding is used in the trailing byte sequences for indexing. Returns encoded bytestring. ''' - ans = (val << 4) | (flags & 0b1111) + ans = val << flag_size + for i in xrange(flag_size): + ans |= (flags & (1 << i)) return encint(ans) -def decode_fvwi(byts): +def decode_fvwi(byts, flag_size=4): ''' Decode encoded fvwi. Returns number, flags, consumed ''' arg, consumed = decint(bytes(byts)) - return (arg >> 4), (arg & 0b1111), consumed + val = arg >> flag_size + flags = 0 + for i in xrange(flag_size): + flags |= (arg & (1 << i)) + return val, flags, consumed -def decode_tbs(byts): + +def decode_tbs(byts, flag_size=4): ''' Trailing byte sequences for indexing consists of series of fvwi numbers. This function reads the fvwi number and its associated flags. It them uses @@ -226,10 +236,10 @@ def decode_tbs(byts): data and the number of bytes consumed. ''' byts = bytes(byts) - val, flags, consumed = decode_fvwi(byts) + val, flags, consumed = decode_fvwi(byts, flag_size=flag_size) extra = {} byts = byts[consumed:] - if flags & 0b1000: + if flags & 0b1000 and flag_size > 3: extra[0b1000] = True if flags & 0b0010: x, consumed2 = decint(byts) @@ -247,7 +257,7 @@ def decode_tbs(byts): consumed += consumed2 return val, extra, consumed -def encode_tbs(val, extra): +def encode_tbs(val, extra, flag_size=4): ''' Encode the number val and the extra data in the extra dict as an fvwi. See decode_tbs above. @@ -255,7 +265,7 @@ def encode_tbs(val, extra): flags = 0 for flag in extra: flags |= flag - ans = encode_fvwi(val, flags) + ans = encode_fvwi(val, flags, flag_size=flag_size) if 0b0010 in extra: ans += encint(extra[0b0010]) diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py index 0f7a670cff..ece96e3a7c 100644 --- a/src/calibre/ebooks/mobi/writer2/indexer.py +++ b/src/calibre/ebooks/mobi/writer2/indexer.py @@ -28,13 +28,12 @@ class CNCX(object): # {{{ MAX_STRING_LENGTH = 500 - def __init__(self, toc, opts): + def __init__(self, toc, is_periodical): self.strings = OrderedDict() - for item in toc: - if item is self.toc: continue + for item in toc.iterdescendants(): self.strings[item.title] = 0 - if opts.mobi_periodical: + if is_periodical: self.strings[item.klass] = 0 self.records = [] @@ -91,6 +90,17 @@ class IndexEntry(object): # {{{ self.first_child_index = None self.last_child_index = None + def __repr__(self): + return ('IndexEntry(offset=%r, depth=%r, length=%r, index=%r,' + ' parent_index=%r)')%(self.offset, self.depth, self.length, + self.index, self.parent_index) + + @dynamic_property + def size(self): + def fget(self): return self.length + def fset(self, val): self.length = val + return property(fget=fget, fset=fset, doc='Alias for length') + @classmethod def tagx_block(cls, for_periodical=True): buf = bytearray() @@ -137,7 +147,7 @@ class IndexEntry(object): # {{{ def entry_type(self): ans = 0 for tag in self.tag_nums: - ans |= (1 << self.BITMASKS[tag]) # 1 << x == 2**x + ans |= (1 << self.BITMASKS.index(tag)) # 1 << x == 2**x return ans @property @@ -152,7 +162,7 @@ class IndexEntry(object): # {{{ val = getattr(self, attr) buf.write(encint(val)) - ans = buf.get_value() + ans = buf.getvalue() return ans # }}} @@ -175,13 +185,16 @@ class TBS(object): # {{{ # The starting bytes. # The value is zero which I think indicates the periodical # index entry. The values for the various flags seem to be - # unused. If the 0b0100 is present, it means that the record + # unused. If the 0b100 is present, it means that the record # deals with section 1 (or is the final record with section # transitions). - self.type_010 = encode_tbs(0, {0b0010: 0}) - self.type_011 = encode_tbs(0, {0b0010: 0, 0b0001: 0}) - self.type_110 = encode_tbs(0, {0b0100: 2, 0b0010: 0}) - self.type_111 = encode_tbs(0, {0b0100: 2, 0b0010: 0, 0b0001: 0}) + self.type_010 = encode_tbs(0, {0b010: 0}, flag_size=3) + self.type_011 = encode_tbs(0, {0b010: 0, 0b001: 0}, + flag_size=3) + self.type_110 = encode_tbs(0, {0b100: 2, 0b010: 0}, + flag_size=3) + self.type_111 = encode_tbs(0, {0b100: 2, 0b010: 0, 0b001: + 0}, flag_size=3) depth_map = defaultdict(list) for x in ('starts', 'ends', 'completes'): @@ -221,12 +234,18 @@ class TBS(object): # {{{ self.type_010) elif not depth_map[1]: # has only article nodes, i.e. spanned by a section - parent_section_index = self.depth_map[2][0].parent_index + parent_section_index = depth_map[2][0].parent_index typ = (self.type_111 if parent_section_index == 1 else self.type_010) else: # has section transitions - parent_section_index = self.depth_map[2][0].parent_index + if depth_map[2]: + parent_section_index = depth_map[2][0].parent_index + typ = self.type_011 + else: + parent_section_index = depth_map[1][0].index + typ = (self.type_110 if parent_section_index == 1 else + self.type_011) buf.write(typ) @@ -243,9 +262,10 @@ class TBS(object): # {{{ if spanner is None: articles = depth_map[2] - sections = [self.section_map[a.parent_index] for a in articles] - sections.sort(key=lambda x:x.offset) - section_map = {s:[a for a in articles is a.parent_index == + sections = set([self.section_map[a.parent_index] for a in + articles]) + sections = sorted(sections, key=lambda x:x.offset) + section_map = {s:[a for a in articles if a.parent_index == s.index] for s in sections} for i, section in enumerate(sections): # All the articles in this record that belong to section @@ -257,7 +277,7 @@ class TBS(object): # {{{ try: next_sec = sections[i+1] except: - next_sec == None + next_sec = None extra = {} if num > 1: @@ -299,14 +319,14 @@ class Indexer(object): # {{{ self.log('Generating MOBI index for a %s'%('periodical' if self.is_periodical else 'book')) self.is_flat_periodical = False - if opts.mobi_periodical: + if self.is_periodical: periodical_node = iter(oeb.toc).next() sections = tuple(periodical_node) self.is_flat_periodical = len(sections) == 1 self.records = [] - self.cncx = CNCX(oeb.toc, opts) + self.cncx = CNCX(oeb.toc, self.is_periodical) if self.is_periodical: self.indices = self.create_periodical_index() @@ -405,7 +425,7 @@ class Indexer(object): # {{{ buf.write(pack(b'>I', 0)) # Filled in later # Number of index records 24-28 - buf.write(pack('b>I', len(self.records))) + buf.write(pack(b'>I', len(self.records))) # Index Encoding 28-32 buf.write(pack(b'>I', 65001)) # utf-8 @@ -457,7 +477,7 @@ class Indexer(object): # {{{ idxt_offset = buf.tell() buf.write(b'IDXT') - buf.write(header_length + len(tagx_block)) + buf.write(pack(b'>H', header_length + len(tagx_block))) buf.write(b'\0') buf.seek(20) buf.write(pack(b'>I', idxt_offset)) @@ -567,7 +587,7 @@ class Indexer(object): # {{{ for s, x in enumerate(normalized_sections): sec, normalized_articles = x try: - sec.length = normalized_sections[s+1].offset - sec.offset + sec.length = normalized_sections[s+1][0].offset - sec.offset except: sec.length = self.serializer.body_end_offset - sec.offset for i, art in enumerate(normalized_articles): @@ -583,17 +603,18 @@ class Indexer(object): # {{{ normalized_articles)) normalized_sections[i] = (sec, normalized_articles) - normalized_sections = list(filter(lambda x: x[0].size > 0 and x[1], + normalized_sections = list(filter(lambda x: x[0].length > 0 and x[1], normalized_sections)) # Set indices i = 0 - for sec, normalized_articles in normalized_sections: + for sec, articles in normalized_sections: i += 1 sec.index = i + sec.parent_index = 0 - for sec, normalized_articles in normalized_sections: - for art in normalized_articles: + for sec, articles in normalized_sections: + for art in articles: i += 1 art.index = i art.parent_index = sec.index @@ -606,7 +627,7 @@ class Indexer(object): # {{{ for s, x in enumerate(normalized_sections): sec, articles = x try: - next_offset = normalized_sections[s+1].offset + next_offset = normalized_sections[s+1][0].offset except: next_offset = self.serializer.body_end_offset sec.length = next_offset - sec.offset @@ -622,7 +643,7 @@ class Indexer(object): # {{{ for s, x in enumerate(normalized_sections): sec, articles = x try: - next_sec = normalized_sections[s+1] + next_sec = normalized_sections[s+1][0] except: if (sec.length == 0 or sec.next_offset != self.serializer.body_end_offset): @@ -659,6 +680,7 @@ class Indexer(object): # {{{ self.tbs_map = {} found_node = False sections = [i for i in self.indices if i.depth == 1] + deepest = max(i.depth for i in self.indices) for i in xrange(self.number_of_text_records): offset = i * RECORD_SIZE next_offset = offset + RECORD_SIZE @@ -683,7 +705,7 @@ class Indexer(object): # {{{ if index.next_offset <= next_offset: # Node ends in current record data['ends'].append(index) - else: + elif index.depth == deepest: data['spans'] = index if (data['ends'] or data['completes'] or data['starts'] or data['spans'] is not None): diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py index 06572f48c4..a5e80cc3cd 100644 --- a/src/calibre/ebooks/mobi/writer2/main.py +++ b/src/calibre/ebooks/mobi/writer2/main.py @@ -55,6 +55,7 @@ class MobiWriter(object): self.last_text_record_idx = 1 def __call__(self, oeb, path_or_stream): + self.log = oeb.log if hasattr(path_or_stream, 'write'): return self.dump_stream(oeb, path_or_stream) with open(path_or_stream, 'w+b') as stream: @@ -90,6 +91,7 @@ class MobiWriter(object): self.primary_index_record_idx = None try: self.indexer = Indexer(self.serializer, self.last_text_record_idx, + len(self.records[self.last_text_record_idx]), self.opts, self.oeb) except: self.log.exception('Failed to generate MOBI index:') From abe30422a6986ade37995802b921bb1cf083282e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 26 Jul 2011 11:42:57 -0600 Subject: [PATCH 31/51] ... --- src/calibre/ebooks/mobi/writer2/indexer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py index ece96e3a7c..311b4220d9 100644 --- a/src/calibre/ebooks/mobi/writer2/indexer.py +++ b/src/calibre/ebooks/mobi/writer2/indexer.py @@ -125,7 +125,7 @@ class IndexEntry(object): # {{{ buf.append(1) header = b'TAGX' - header += pack(b'>I', len(buf)) # table length + header += pack(b'>I', 12+len(buf)) # table length header += pack(b'>I', 1) # control byte count return header + bytes(buf) From e8cc278b186a653f11e4db07efa4918c1bf8fa32 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 26 Jul 2011 12:06:09 -0600 Subject: [PATCH 32/51] ... --- src/calibre/ebooks/mobi/writer2/indexer.py | 3 +-- src/calibre/ebooks/mobi/writer2/main.py | 14 +++++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py index 311b4220d9..f6add97a53 100644 --- a/src/calibre/ebooks/mobi/writer2/indexer.py +++ b/src/calibre/ebooks/mobi/writer2/indexer.py @@ -52,11 +52,10 @@ class CNCX(object): # {{{ self.records.append(buf.getvalue()) buf.truncate(0) offset = len(self.records) * 0x10000 - + buf.write(raw) self.strings[key] = offset offset += len(raw) - buf.write(b'\0') # CNCX must end with zero byte self.records.append(align_block(buf.getvalue())) def __getitem__(self, string): diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py index a5e80cc3cd..e614567508 100644 --- a/src/calibre/ebooks/mobi/writer2/main.py +++ b/src/calibre/ebooks/mobi/writer2/main.py @@ -279,7 +279,7 @@ class MobiWriter(object): last_content_record = len(self.records) - 1 # EOF record - self.records.append('\xE9\x8E\x0D\x0A') + self.records.append(b'\xE9\x8E\x0D\x0A') record0 = StringIO() # The MOBI Header @@ -309,8 +309,15 @@ class MobiWriter(object): # 0x10 - 0x13 : UID # 0x14 - 0x17 : Generator version + bt = 0x002 + if self.primary_index_record_idx is not None: + if self.indexer.is_flat_periodical: + bt = 0x102 + elif self.indexer.is_periodical: + bt = 0x103 + record0.write(pack(b'>IIIII', - 0xe8, 0x002, 65001, uid, 6)) + 0xe8, bt, 65001, uid, 6)) # 0x18 - 0x1f : Unknown record0.write(b'\xff' * 8) @@ -339,7 +346,8 @@ class MobiWriter(object): # 0x58 - 0x5b : Format version # 0x5c - 0x5f : First image record number record0.write(pack(b'>II', - 6, self.first_image_record if self.first_image_record else 0)) + 6, self.first_image_record if self.first_image_record else + len(self.records)-1)) # 0x60 - 0x63 : First HUFF/CDIC record number # 0x64 - 0x67 : Number of HUFF/CDIC records From 91ac0a879c285b2a57a4cdd1ee298383b0c3753d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 26 Jul 2011 12:13:20 -0600 Subject: [PATCH 33/51] ... --- src/calibre/ebooks/mobi/debug.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index f35d8ac075..6c9a2136b7 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -73,7 +73,7 @@ class PalmDB(object): self.ident = self.type + self.creator if self.ident not in (b'BOOKMOBI', b'TEXTREAD'): raise ValueError('Unknown book ident: %r'%self.ident) - self.uid_seed = self.raw[68:72] + self.uid_seed, = struct.unpack(b'>I', self.raw[68:72]) self.next_rec_list_id = self.raw[72:76] self.number_of_records, = struct.unpack(b'>H', self.raw[76:78]) @@ -290,7 +290,12 @@ class MOBIHeader(object): # {{{ (self.fcis_number, self.fcis_count, self.flis_number, self.flis_count) = struct.unpack(b'>IIII', self.raw[200:216]) - self.unknown6 = self.raw[216:240] + self.unknown6 = self.raw[216:224] + self.srcs_record_index = struct.unpack(b'>I', + self.raw[224:228])[0] + self.num_srcs_records = struct.unpack(b'>I', + self.raw[228:232])[0] + self.unknown7 = self.raw[232:240] self.extra_data_flags = struct.unpack(b'>I', self.raw[240:244])[0] self.has_multibytes = bool(self.extra_data_flags & 0b1) @@ -356,6 +361,9 @@ class MOBIHeader(object): # {{{ ans.append('FLIS number: %d'% self.flis_number) ans.append('FLIS count: %d'% self.flis_count) ans.append('Unknown6: %r'% self.unknown6) + ans.append('SRCS record index: %d'%self.srcs_record_index) + ans.append('Number of SRCS records?: %d'%self.num_srcs_records) + ans.append('Unknown7: %r'%self.unknown7) ans.append(('Extra data flags: %s (has multibyte: %s) ' '(has indexing: %s) (has uncrossable breaks: %s)')%( bin(self.extra_data_flags), self.has_multibytes, From f47f4afe9f2bfd157eb973e69a2a59449aa2cc40 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 26 Jul 2011 13:41:23 -0600 Subject: [PATCH 34/51] ... --- src/calibre/ebooks/pdf/writer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/calibre/ebooks/pdf/writer.py b/src/calibre/ebooks/pdf/writer.py index 516509fdd7..dc7f2edba9 100644 --- a/src/calibre/ebooks/pdf/writer.py +++ b/src/calibre/ebooks/pdf/writer.py @@ -165,6 +165,7 @@ class PDFWriter(QObject): # {{{ printer = get_pdf_printer(self.opts) printer.setOutputFileName(item_path) self.view.print_(printer) + printer.abort() self._render_book() def _delete_tmpdir(self): @@ -186,6 +187,7 @@ class PDFWriter(QObject): # {{{ draw_image_page(printer, painter, p, preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio) painter.end() + printer.abort() def _write(self): From dbbde2c494743a78e077e0b42953039efbf25431 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 26 Jul 2011 14:20:37 -0600 Subject: [PATCH 35/51] ... --- src/calibre/ebooks/mobi/writer2/indexer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py index f6add97a53..f99f7824d0 100644 --- a/src/calibre/ebooks/mobi/writer2/indexer.py +++ b/src/calibre/ebooks/mobi/writer2/indexer.py @@ -500,12 +500,12 @@ class Indexer(object): # {{{ continue seen.add(offset) index = IndexEntry(offset, label) - self.indices.append(index) + indices.append(index) indices.sort(key=lambda x:x.offset) # Set lengths - for i, index in indices: + for i, index in enumerate(indices): try: next_offset = indices[i+1].offset except: @@ -516,11 +516,11 @@ class Indexer(object): # {{{ indices = [i for i in indices if i.length > 0] # Set index values - for i, index in indices: + for i, index in enumerate(indices): index.index = i # Set lengths again to close up any gaps left by filtering - for i, index in indices: + for i, index in enumerate(indices): try: next_offset = indices[i+1].offset except: From 3cd3ca6acd13d1cdc25512f56137e6b1c18dbe7e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 26 Jul 2011 15:18:33 -0600 Subject: [PATCH 36/51] Improve Irish Times --- recipes/irish_times.recipe | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/recipes/irish_times.recipe b/recipes/irish_times.recipe index 3efcfc6d29..31ccd306e4 100644 --- a/recipes/irish_times.recipe +++ b/recipes/irish_times.recipe @@ -1,4 +1,4 @@ -__license__ = 'GPL v3' +__license__ = 'GPL v3' __copyright__ = "2008, Derry FitzGerald. 2009 Modified by Ray Kinsella and David O'Callaghan, 2011 Modified by Phil Burns" ''' irishtimes.com @@ -10,7 +10,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class IrishTimes(BasicNewsRecipe): title = u'The Irish Times' encoding = 'ISO-8859-15' - __author__ = "Derry FitzGerald, Ray Kinsella, David O'Callaghan and Phil Burns" + __author__ = "Derry FitzGerald, Ray Kinsella, David O'Callaghan and Phil Burns" language = 'en_IE' timefmt = ' (%A, %B %d, %Y)' @@ -18,6 +18,7 @@ class IrishTimes(BasicNewsRecipe): oldest_article = 1.0 max_articles_per_feed = 100 no_stylesheets = True + simultaneous_downloads= 5 r = re.compile('.*(?Phttp:\/\/(www.irishtimes.com)|(rss.feedsportal.com\/c)\/.*\.html?).*') remove_tags = [dict(name='div', attrs={'class':'footer'})] @@ -25,17 +26,17 @@ class IrishTimes(BasicNewsRecipe): feeds = [ ('Frontpage', 'http://www.irishtimes.com/feeds/rss/newspaper/index.rss'), - ('Ireland', 'http://rss.feedsportal.com/c/851/f/10845/index.rss'), - ('World', 'http://rss.feedsportal.com/c/851/f/10846/index.rss'), - ('Finance', 'http://rss.feedsportal.com/c/851/f/10847/index.rss'), - ('Features', 'http://rss.feedsportal.com/c/851/f/10848/index.rss'), - ('Sport', 'http://rss.feedsportal.com/c/851/f/10849/index.rss'), - ('Opinion', 'http://rss.feedsportal.com/c/851/f/10850/index.rss'), - ('Letters', 'http://rss.feedsportal.com/c/851/f/10851/index.rss'), + ('Ireland', 'http://www.irishtimes.com/feeds/rss/newspaper/ireland.rss'), + ('World', 'http://www.irishtimes.com/feeds/rss/newspaper/world.rss'), + ('Finance', 'http://www.irishtimes.com/feeds/rss/newspaper/finance.rss'), + ('Features', 'http://www.irishtimes.com/feeds/rss/newspaper/features.rss'), + ('Sport', 'http://www.irishtimes.com/feeds/rss/newspaper/sport.rss'), + ('Opinion', 'http://www.irishtimes.com/feeds/rss/newspaper/opinion.rss'), + ('Letters', 'http://www.irishtimes.com/feeds/rss/newspaper/letters.rss'), ('Magazine', 'http://www.irishtimes.com/feeds/rss/newspaper/magazine.rss'), - ('Health', 'http://rss.feedsportal.com/c/851/f/10852/index.rss'), - ('Education & Parenting', 'http://rss.feedsportal.com/c/851/f/10853/index.rss'), - ('Motors', 'http://rss.feedsportal.com/c/851/f/10854/index.rss'), + ('Health', 'http://www.irishtimes.com/feeds/rss/newspaper/health.rss'), + ('Education & Parenting', 'http://www.irishtimes.com/feeds/rss/newspaper/education.rss'), + ('Motors', 'http://www.irishtimes.com/feeds/rss/newspaper/motors.rss'), ('An Teanga Bheo', 'http://www.irishtimes.com/feeds/rss/newspaper/anteangabheo.rss'), ('Commercial Property', 'http://www.irishtimes.com/feeds/rss/newspaper/commercialproperty.rss'), ('Science Today', 'http://www.irishtimes.com/feeds/rss/newspaper/sciencetoday.rss'), @@ -49,10 +50,16 @@ class IrishTimes(BasicNewsRecipe): def print_version(self, url): if url.count('rss.feedsportal.com'): - u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm') + #u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm') + u = url.find('irishtimes') + u = 'http://www.irishtimes.com' + url[u + 12:] + u = u.replace('0C', '/') + u = u.replace('A', '') + u = u.replace('0Bhtml/story01.htm', '_pf.html') else: u = url.replace('.html','_pf.html') return u def get_article_url(self, article): return article.link + From 0ab02460480af5f48be761913ba86b509e33f54b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 26 Jul 2011 18:29:12 -0600 Subject: [PATCH 37/51] ... --- src/calibre/ebooks/mobi/writer2/indexer.py | 2 +- src/calibre/ebooks/oeb/base.py | 15 +++++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py index f99f7824d0..4c428dd38d 100644 --- a/src/calibre/ebooks/mobi/writer2/indexer.py +++ b/src/calibre/ebooks/mobi/writer2/indexer.py @@ -31,7 +31,7 @@ class CNCX(object): # {{{ def __init__(self, toc, is_periodical): self.strings = OrderedDict() - for item in toc.iterdescendants(): + for item in toc.iterdescendants(breadth_first=True): self.strings[item.title] = 0 if is_periodical: self.strings[item.klass] = 0 diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index fb1910d717..56f4a3ee96 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -1680,11 +1680,18 @@ class TOC(object): return True return False - def iterdescendants(self): + def iterdescendants(self, breadth_first=False): """Iterate over all descendant nodes in depth-first order.""" - for child in self.nodes: - for node in child.iter(): - yield node + if breadth_first: + for child in self.nodes: + yield child + for child in self.nodes: + for node in child.iterdescendants(breadth_first=True): + yield node + else: + for child in self.nodes: + for node in child.iter(): + yield node def __iter__(self): """Iterate over all immediate child nodes.""" From ae6f049792bc62eb43688d0b266a3dbbff450750 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 26 Jul 2011 20:05:32 -0600 Subject: [PATCH 38/51] ... --- src/calibre/ebooks/mobi/debug.py | 13 ++-- src/calibre/ebooks/mobi/writer2/indexer.py | 76 +++++++++++++--------- 2 files changed, 48 insertions(+), 41 deletions(-) diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index 6c9a2136b7..4bf8d356cd 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -424,12 +424,7 @@ class IndexHeader(object): # {{{ if self.index_encoding == 'unknown': raise ValueError( 'Unknown index encoding: %d'%self.index_encoding_num) - self.locale_raw, = struct.unpack(b'>I', raw[32:36]) - langcode = self.locale_raw - langid = langcode & 0xFF - sublangid = (langcode >> 10) & 0xFF - self.language = main_language.get(langid, 'ENGLISH') - self.sublanguage = sub_language.get(sublangid, 'NEUTRAL') + self.possibly_language = raw[32:36] self.num_index_entries, = struct.unpack('>I', raw[36:40]) self.ordt_start, = struct.unpack('>I', raw[40:44]) self.ligt_start, = struct.unpack('>I', raw[44:48]) @@ -489,8 +484,7 @@ class IndexHeader(object): # {{{ a('Number of index records: %d'%self.index_count) a('Index encoding: %s (%d)'%(self.index_encoding, self.index_encoding_num)) - a('Index language: %s - %s (%s)'%(self.language, self.sublanguage, - hex(self.locale_raw))) + a('Unknown (possibly language?): %r'%(self.possibly_language)) a('Number of index entries: %d'% self.num_index_entries) a('ORDT start: %d'%self.ordt_start) a('LIGT start: %d'%self.ligt_start) @@ -1038,6 +1032,7 @@ class TBSIndexing(object): # {{{ # }}} def read_starting_section(byts): # {{{ + orig = byts si, extra, consumed = decode_tbs(byts) byts = byts[consumed:] if len(extra) > 1 or 0b0010 in extra or 0b1000 in extra: @@ -1054,7 +1049,7 @@ class TBSIndexing(object): # {{{ eof = extra[0b0001] if eof != 0: raise ValueError('Unknown eof value %s when reading' - ' starting section'%eof) + ' starting section. All bytes: %r'%(eof, orig)) ans.append('This record is spanned by an article from' ' the section: %d'%si.index) return si, byts diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py index 4c428dd38d..14c5328622 100644 --- a/src/calibre/ebooks/mobi/writer2/indexer.py +++ b/src/calibre/ebooks/mobi/writer2/indexer.py @@ -15,7 +15,6 @@ from collections import OrderedDict, defaultdict from calibre.ebooks.mobi.writer2 import RECORD_SIZE from calibre.ebooks.mobi.utils import (encint, encode_number_as_hex, encode_trailing_data, encode_tbs, align_block, utf8_text) -from calibre.ebooks.mobi.langcodes import iana2mobi class CNCX(object): # {{{ @@ -173,28 +172,34 @@ class TBS(object): # {{{ trailing byte sequence for the record. ''' - def __init__(self, data, is_periodical, first=False, all_sections=[]): - if not data: - self.bytestring = encode_trailing_data(b'') - else: - self.section_map = OrderedDict((i.index, i) for i in - sorted(all_sections, key=lambda x:x.offset)) + def __init__(self, data, is_periodical, first=False, all_sections=[], + after_first=False): + self.section_map = OrderedDict((i.index, i) for i in + sorted(all_sections, key=lambda x:x.offset)) - if is_periodical: - # The starting bytes. - # The value is zero which I think indicates the periodical - # index entry. The values for the various flags seem to be - # unused. If the 0b100 is present, it means that the record - # deals with section 1 (or is the final record with section - # transitions). - self.type_010 = encode_tbs(0, {0b010: 0}, flag_size=3) - self.type_011 = encode_tbs(0, {0b010: 0, 0b001: 0}, - flag_size=3) - self.type_110 = encode_tbs(0, {0b100: 2, 0b010: 0}, - flag_size=3) - self.type_111 = encode_tbs(0, {0b100: 2, 0b010: 0, 0b001: - 0}, flag_size=3) + if is_periodical: + # The starting bytes. + # The value is zero which I think indicates the periodical + # index entry. The values for the various flags seem to be + # unused. If the 0b100 is present, it means that the record + # deals with section 1 (or is the final record with section + # transitions). + self.type_010 = encode_tbs(0, {0b010: 0}, flag_size=3) + self.type_011 = encode_tbs(0, {0b010: 0, 0b001: 0}, + flag_size=3) + self.type_110 = encode_tbs(0, {0b100: 2, 0b010: 0}, + flag_size=3) + self.type_111 = encode_tbs(0, {0b100: 2, 0b010: 0, 0b001: + 0}, flag_size=3) + if not data: + byts = b'' + if after_first: + # This can happen if a record contains only text between + # the periodical start and the first section + byts = self.type_011 + self.bytestring = encode_trailing_data(byts) + else: depth_map = defaultdict(list) for x in ('starts', 'ends', 'completes'): for idx in data[x]: @@ -202,6 +207,9 @@ class TBS(object): # {{{ for l in depth_map.itervalues(): l.sort(key=lambda x:x.offset) self.periodical_tbs(data, first, depth_map) + else: + if not data: + self.bytestring = encode_trailing_data(b'') else: self.book_tbs(data, first) @@ -240,15 +248,13 @@ class TBS(object): # {{{ # has section transitions if depth_map[2]: parent_section_index = depth_map[2][0].parent_index - typ = self.type_011 else: parent_section_index = depth_map[1][0].index - typ = (self.type_110 if parent_section_index == 1 else - self.type_011) + typ = self.type_011 buf.write(typ) - if parent_section_index > 1: + if typ not in (self.type_110, self.type_111) and parent_section_index > 0: # Write starting section information if spanner is None: num_articles = len(depth_map[1]) @@ -429,9 +435,8 @@ class Indexer(object): # {{{ # Index Encoding 28-32 buf.write(pack(b'>I', 65001)) # utf-8 - # Index language 32-36 - buf.write(iana2mobi( - str(self.oeb.metadata.language[0]))) + # Unknown 32-36 + buf.write(b'\xff'*4) # Number of index entries 36-40 buf.write(pack(b'>I', len(self.indices))) @@ -680,15 +685,20 @@ class Indexer(object): # {{{ found_node = False sections = [i for i in self.indices if i.depth == 1] deepest = max(i.depth for i in self.indices) + for i in xrange(self.number_of_text_records): offset = i * RECORD_SIZE next_offset = offset + RECORD_SIZE - data = OrderedDict([('ends',[]), ('completes',[]), ('starts',[]), - ('spans', None), ('offset', offset)]) + data = {'ends':[], 'completes':[], 'starts':[], + 'spans':None, 'offset':offset, 'record_number':i+1} + for index in self.indices: if index.offset >= next_offset: # Node starts after current record - break + if index.depth == deepest: + break + else: + continue if index.next_offset <= offset: # Node ends before current record continue @@ -706,13 +716,15 @@ class Indexer(object): # {{{ data['ends'].append(index) elif index.depth == deepest: data['spans'] = index + if (data['ends'] or data['completes'] or data['starts'] or data['spans'] is not None): self.tbs_map[i+1] = TBS(data, self.is_periodical, first=not found_node, all_sections=sections) found_node = True else: - self.tbs_map[i+1] = TBS({}, self.is_periodical, first=False) + self.tbs_map[i+1] = TBS({}, self.is_periodical, first=False, + after_first=found_node) def get_trailing_byte_sequence(self, num): return self.tbs_map[num].bytestring From 4bbc23d70657bdbd6cfa143d396421c35565fe89 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 26 Jul 2011 20:34:42 -0600 Subject: [PATCH 39/51] ... --- src/calibre/ebooks/mobi/debug.py | 28 ++++++++++++++++++++-------- src/calibre/ebooks/mobi/utils.py | 3 +++ 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index 4bf8d356cd..cb028b9055 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -604,6 +604,9 @@ class IndexEntry(object): # {{{ self.raw = raw self.tags = [] self.entry_type_raw = entry_type + self.byte_size = len(raw) + + orig_raw = raw try: self.entry_type = self.TYPES[entry_type] @@ -641,8 +644,8 @@ class IndexEntry(object): # {{{ self.tags.append(Tag(aut_tag[0], [val], self.entry_type, cncx)) - if raw.replace(b'\x00', b''): # There can be padding null bytes - raise ValueError('Extra bytes in INDX table entry %d: %r'%(self.index, raw)) + self.consumed = len(orig_raw) - len(raw) + self.trailing_bytes = raw @property def label(self): @@ -694,13 +697,16 @@ class IndexEntry(object): # {{{ return -1 def __str__(self): - ans = ['Index Entry(index=%s, entry_type=%s (%s), length=%d)'%( - self.index, self.entry_type, bin(self.entry_type_raw)[2:], len(self.tags))] + ans = ['Index Entry(index=%s, entry_type=%s (%s), length=%d, byte_size=%d)'%( + self.index, self.entry_type, bin(self.entry_type_raw)[2:], + len(self.tags), self.byte_size)] for tag in self.tags: ans.append('\t'+str(tag)) if self.first_child_index != -1: ans.append('\tNumber of children: %d'%(self.last_child_index - self.first_child_index + 1)) + if self.trailing_bytes: + ans.append('\tTrailing bytes: %r'%self.trailing_bytes) return '\n'.join(ans) # }}} @@ -744,6 +750,7 @@ class IndexRecord(object): # {{{ raise ValueError('Extra bytes after IDXT table: %r'%rest) indxt = raw[192:self.idxt_offset] + self.size_of_indxt_block = len(indxt) self.indices = [] for i, off in enumerate(self.index_offsets): try: @@ -756,10 +763,14 @@ class IndexRecord(object): # {{{ if index_header.index_type == 6: flags = ord(indxt[off+consumed+d]) d += 1 + pos = off+consumed+d self.indices.append(IndexEntry(index, entry_type, - indxt[off+consumed+d:next_off], cncx, + indxt[pos:next_off], cncx, index_header.tagx_entries, flags=flags)) - index = self.indices[-1] + + rest = indxt[pos+self.indices[-1].consumed:] + if rest.replace(b'\0', ''): # There can be padding null bytes + raise ValueError('Extra bytes after IDXT table: %r'%rest) def get_parent(self, index): if index.depth < 1: @@ -780,12 +791,13 @@ class IndexRecord(object): # {{{ u(self.unknown1) a('Unknown (header type? index record number? always 1?): %d'%self.header_type) u(self.unknown2) - a('IDXT Offset: %d'%self.idxt_offset) + a('IDXT Offset (%d block size): %d'%(self.size_of_indxt_block, + self.idxt_offset)) a('IDXT Count: %d'%self.idxt_count) u(self.unknown3) u(self.unknown4) a('Index offsets: %r'%self.index_offsets) - a('\nIndex Entries:') + a('\nIndex Entries (%d entries):'%len(self.indices)) for entry in self.indices: a(str(entry)+'\n') diff --git a/src/calibre/ebooks/mobi/utils.py b/src/calibre/ebooks/mobi/utils.py index 16aa2a3b64..839374af70 100644 --- a/src/calibre/ebooks/mobi/utils.py +++ b/src/calibre/ebooks/mobi/utils.py @@ -41,6 +41,9 @@ def encode_number_as_hex(num): number. ''' num = bytes(hex(num)[2:].upper()) + nlen = len(num) + if nlen % 2 != 0: + num = b'0'+num ans = bytearray(num) ans.insert(0, len(num)) return bytes(ans) From 8a0a978c526803acf6beb961009a1e3d56aeacee Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 26 Jul 2011 21:02:25 -0600 Subject: [PATCH 40/51] ... --- src/calibre/ebooks/mobi/debug.py | 1 + src/calibre/ebooks/mobi/writer2/main.py | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index cb028b9055..fe1e928dea 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -182,6 +182,7 @@ class EXTHHeader(object): self.records = [] for i in xrange(self.count): pos = self.read_record(pos) + self.records.sort(key=lambda x:x.type) def read_record(self, pos): type_, length = struct.unpack(b'>II', self.raw[pos:pos+8]) diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py index e614567508..8925d7f281 100644 --- a/src/calibre/ebooks/mobi/writer2/main.py +++ b/src/calibre/ebooks/mobi/writer2/main.py @@ -29,7 +29,6 @@ EXTH_CODES = { 'identifier': 104, 'subject': 105, 'pubdate': 106, - 'date': 106, 'review': 107, 'contributor': 108, 'rights': 109, @@ -479,16 +478,17 @@ class MobiWriter(object): nrecs += 1 # Write cdetype - if not self.opts.mobi_periodical: + if (self.primary_index_record_idx is None or not + self.indexer.is_periodical): data = b'EBOK' exth.write(pack(b'>II', 501, len(data)+8)) exth.write(data) nrecs += 1 # Add a publication date entry - if oeb.metadata['date'] != [] : + if oeb.metadata['date']: datestr = str(oeb.metadata['date'][0]) - elif oeb.metadata['timestamp'] != [] : + elif oeb.metadata['timestamp']: datestr = str(oeb.metadata['timestamp'][0]) if datestr is not None: From 807b7069d7e14d61bf72c5cc4d99c8664a90b4b0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 26 Jul 2011 21:23:56 -0600 Subject: [PATCH 41/51] New mobi output: Make the MOBI header/extra records as similar to the output of kindlegen as possible --- src/calibre/ebooks/mobi/debug.py | 2 +- src/calibre/ebooks/mobi/writer2/main.py | 31 ++++++++++++++++++++----- 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index fe1e928dea..12bdb41f4b 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -345,7 +345,7 @@ class MOBIHeader(object): # {{{ ans.append('Huffman record offset: %d'%self.huffman_record_offset) ans.append('Huffman record count: %d'%self.huffman_record_count) ans.append('Unknown2: %r'%self.unknown2) - ans.append('EXTH flags: %r (%s)'%(self.exth_flags, self.has_exth)) + ans.append('EXTH flags: %s (%s)'%(bin(self.exth_flags)[2:], self.has_exth)) if self.has_drm_data: ans.append('Unknown3: %r'%self.unknown3) ans.append('DRM Offset: %s'%self.drm_offset) diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py index 8925d7f281..476b53cd46 100644 --- a/src/calibre/ebooks/mobi/writer2/main.py +++ b/src/calibre/ebooks/mobi/writer2/main.py @@ -102,6 +102,10 @@ class MobiWriter(object): self.records[i] += tbs self.records.extend(self.indexer.records) + @property + def is_periodical(self): + return (self.primary_index_record_idx is None or not + self.indexer.is_periodical) # }}} @@ -277,6 +281,17 @@ class MobiWriter(object): exth = self.build_exth() last_content_record = len(self.records) - 1 + # FCIS/FLIS (Seem to server no purpose) + flis_number = len(self.records) + self.records.append( + b'FLIS\0\0\0\x08\0\x41\0\0\0\0\0\0\xff\xff\xff\xff\0\x01\0\x03\0\0\0\x03\0\0\0\x01'+ + b'\xff'*4) + fcis = b'FCIS\x00\x00\x00\x14\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00' + fcis += pack(b'>I', self.text_length) + fcis += b'\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x08\x00\x01\x00\x01\x00\x00\x00\x00' + fcis_number = len(self.records) + self.records.append(fcis) + # EOF record self.records.append(b'\xE9\x8E\x0D\x0A') @@ -355,7 +370,12 @@ class MobiWriter(object): record0.write(b'\0' * 16) # 0x70 - 0x73 : EXTH flags - record0.write(pack(b'>I', 0x50)) + # Bit 6 (0b1000000) being set indicates the presence of an EXTH header + # The purpose of the other bits is unknown + exth_flags = 0b1011000 + if self.is_periodical: + exth_flags |= 0b1000 + record0.write(pack(b'>I', exth_flags)) # 0x74 - 0x93 : Unknown record0.write(b'\0' * 32) @@ -380,13 +400,13 @@ class MobiWriter(object): record0.write(b'\0\0\0\x01') # 0xb8 - 0xbb : FCIS record number - record0.write(pack(b'>I', 0xffffffff)) + record0.write(pack(b'>I', fcis_number)) # 0xbc - 0xbf : Unknown (FCIS record count?) - record0.write(pack(b'>I', 0xffffffff)) + record0.write(pack(b'>I', 1)) # 0xc0 - 0xc3 : FLIS record number - record0.write(pack(b'>I', 0xffffffff)) + record0.write(pack(b'>I', flis_number)) # 0xc4 - 0xc7 : Unknown (FLIS record count?) record0.write(pack(b'>I', 1)) @@ -478,8 +498,7 @@ class MobiWriter(object): nrecs += 1 # Write cdetype - if (self.primary_index_record_idx is None or not - self.indexer.is_periodical): + if self.is_periodical: data = b'EBOK' exth.write(pack(b'>II', 501, len(data)+8)) exth.write(data) From 766545324283daa20ceae668c7b9e1ad590df3b9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 26 Jul 2011 21:41:37 -0600 Subject: [PATCH 42/51] ... --- src/calibre/ebooks/mobi/writer2/main.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py index 476b53cd46..e13afa2ba7 100644 --- a/src/calibre/ebooks/mobi/writer2/main.py +++ b/src/calibre/ebooks/mobi/writer2/main.py @@ -511,12 +511,20 @@ class MobiWriter(object): datestr = str(oeb.metadata['timestamp'][0]) if datestr is not None: + datestr = bytes(datestr) + datestr = datestr.replace(b'+00:00', b'Z') exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8)) exth.write(datestr) nrecs += 1 else: raise NotImplementedError("missing date or timestamp needed for mobi_periodical") + # Write the same creator info as kindlegen 1.2 + for code, val in [(204, 202), (205, 1), (206, 2), (207, 33307)]: + exth.write(pack(b'>II', code, 12)) + exth.write(pack(b'>I', val)) + nrecs += 1 + if (oeb.metadata.cover and unicode(oeb.metadata.cover[0]) in oeb.manifest.ids): id = unicode(oeb.metadata.cover[0]) From 3453746d906b5a36fbca9435a065a25217a599e0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 26 Jul 2011 22:17:50 -0600 Subject: [PATCH 43/51] oops --- src/calibre/ebooks/mobi/debug.py | 2 ++ src/calibre/ebooks/mobi/utils.py | 2 +- src/calibre/ebooks/mobi/writer2/indexer.py | 10 +++++----- src/calibre/ebooks/mobi/writer2/main.py | 8 ++++---- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index 12bdb41f4b..1279ba7793 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -844,6 +844,7 @@ class TextRecord(object): # {{{ def __init__(self, idx, record, extra_data_flags, decompress): self.trailing_data, self.raw = get_trailing_data(record.raw, extra_data_flags) + raw_trailing_bytes = record.raw[len(self.raw):] self.raw = decompress(self.raw) if 0 in self.trailing_data: self.trailing_data['multibyte_overlap'] = self.trailing_data.pop(0) @@ -851,6 +852,7 @@ class TextRecord(object): # {{{ self.trailing_data['indexing'] = self.trailing_data.pop(1) if 2 in self.trailing_data: self.trailing_data['uncrossable_breaks'] = self.trailing_data.pop(2) + self.trailing_data['raw_bytes'] = raw_trailing_bytes self.idx = idx diff --git a/src/calibre/ebooks/mobi/utils.py b/src/calibre/ebooks/mobi/utils.py index 839374af70..6df9db3b3b 100644 --- a/src/calibre/ebooks/mobi/utils.py +++ b/src/calibre/ebooks/mobi/utils.py @@ -191,7 +191,7 @@ def encode_trailing_data(raw): where size is a backwards encoded vwi whose value is the length of the - entire return bytestring. + entire returned bytestring. data is the bytestring passed in as raw. This is the encoding used for trailing data entries at the end of text records. See get_trailing_data() for details. diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py index 14c5328622..f121e29835 100644 --- a/src/calibre/ebooks/mobi/writer2/indexer.py +++ b/src/calibre/ebooks/mobi/writer2/indexer.py @@ -14,7 +14,7 @@ from collections import OrderedDict, defaultdict from calibre.ebooks.mobi.writer2 import RECORD_SIZE from calibre.ebooks.mobi.utils import (encint, encode_number_as_hex, - encode_trailing_data, encode_tbs, align_block, utf8_text) + encode_tbs, align_block, utf8_text) class CNCX(object): # {{{ @@ -198,7 +198,7 @@ class TBS(object): # {{{ # This can happen if a record contains only text between # the periodical start and the first section byts = self.type_011 - self.bytestring = encode_trailing_data(byts) + self.bytestring = byts else: depth_map = defaultdict(list) for x in ('starts', 'ends', 'completes'): @@ -209,7 +209,7 @@ class TBS(object): # {{{ self.periodical_tbs(data, first, depth_map) else: if not data: - self.bytestring = encode_trailing_data(b'') + self.bytestring = b'' else: self.book_tbs(data, first) @@ -302,10 +302,10 @@ class TBS(object): # {{{ buf.write(encode_tbs(spanner.index - parent_section_index, {0b0001: 0})) - self.bytestring = encode_trailing_data(buf.getvalue()) + self.bytestring = buf.getvalue() def book_tbs(self, data, first): - self.bytestring = encode_trailing_data(b'') + self.bytestring = b'' # }}} class Indexer(object): # {{{ diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py index e13afa2ba7..44c471d3d4 100644 --- a/src/calibre/ebooks/mobi/writer2/main.py +++ b/src/calibre/ebooks/mobi/writer2/main.py @@ -99,7 +99,7 @@ class MobiWriter(object): for i in xrange(len(self.records)): if i == 0: continue tbs = self.indexer.get_trailing_byte_sequence(i) - self.records[i] += tbs + self.records[i] += encode_trailing_data(tbs) self.records.extend(self.indexer.records) @property @@ -212,15 +212,15 @@ class MobiWriter(object): if self.compression == PALMDOC: data = compress_doc(data) record = StringIO() - record.write(data) - self.records.append(record.getvalue()) nrecords += 1 data, overlap = self.read_text_record(text) + record.write(data) - # Write information about the mutibyte character overlap, if any + # Write information about the multibyte character overlap, if any record.write(overlap) record.write(pack(b'>B', len(overlap))) + self.records.append(record.getvalue()) self.last_text_record_idx = nrecords From 543cabb4184f0f909056e406af6f217b3f604479 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 26 Jul 2011 23:05:11 -0600 Subject: [PATCH 44/51] ... --- src/calibre/ebooks/mobi/utils.py | 24 ++++++++++++++++++++ src/calibre/ebooks/mobi/writer2/indexer.py | 26 ++-------------------- src/calibre/ebooks/mobi/writer2/main.py | 18 +++++---------- 3 files changed, 32 insertions(+), 36 deletions(-) diff --git a/src/calibre/ebooks/mobi/utils.py b/src/calibre/ebooks/mobi/utils.py index 6df9db3b3b..80214b04d3 100644 --- a/src/calibre/ebooks/mobi/utils.py +++ b/src/calibre/ebooks/mobi/utils.py @@ -302,5 +302,29 @@ def align_block(raw, multiple=4, pad=b'\0'): return raw + pad*(multiple - extra) +def detect_periodical(toc, log): + ''' + Detect if the TOC object toc contains a periodical that conforms to the + structure required by kindlegen to generate a periodical. + ''' + for node in toc.iterdescendants(): + if node.depth() == 1 and node.klass != 'article': + log.debug( + 'Not a periodical: Deepest node does not have ' + 'class="article"') + return False + if node.depth() == 2 and node.klass != 'section': + log.debug( + 'Not a periodical: Second deepest node does not have' + ' class="section"') + return False + if node.depth() == 3 and node.klass != 'periodical': + log.debug('Not a periodical: Third deepest node' + ' does not have class="periodical"') + return False + if node.depth() > 3: + log.debug('Not a periodical: Has nodes of depth > 3') + return False + return True diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py index f121e29835..54bef57ae3 100644 --- a/src/calibre/ebooks/mobi/writer2/indexer.py +++ b/src/calibre/ebooks/mobi/writer2/indexer.py @@ -14,7 +14,7 @@ from collections import OrderedDict, defaultdict from calibre.ebooks.mobi.writer2 import RECORD_SIZE from calibre.ebooks.mobi.utils import (encint, encode_number_as_hex, - encode_tbs, align_block, utf8_text) + encode_tbs, align_block, utf8_text, detect_periodical) class CNCX(object): # {{{ @@ -320,7 +320,7 @@ class Indexer(object): # {{{ self.log = oeb.log self.opts = opts - self.is_periodical = self.detect_periodical() + self.is_periodical = detect_periodical(self.oeb.toc, self.log) self.log('Generating MOBI index for a %s'%('periodical' if self.is_periodical else 'book')) self.is_flat_periodical = False @@ -344,28 +344,6 @@ class Indexer(object): # {{{ self.calculate_trailing_byte_sequences() - def detect_periodical(self): # {{{ - for node in self.oeb.toc.iterdescendants(): - if node.depth() == 1 and node.klass != 'article': - self.log.debug( - 'Not a periodical: Deepest node does not have ' - 'class="article"') - return False - if node.depth() == 2 and node.klass != 'section': - self.log.debug( - 'Not a periodical: Second deepest node does not have' - ' class="section"') - return False - if node.depth() == 3 and node.klass != 'periodical': - self.log.debug('Not a periodical: Third deepest node' - ' does not have class="periodical"') - return False - if node.depth() > 3: - self.log.debug('Not a periodical: Has nodes of depth > 3') - return False - return True - # }}} - def create_index_record(self): # {{{ header_length = 192 buf = StringIO() diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py index 44c471d3d4..e3f4081670 100644 --- a/src/calibre/ebooks/mobi/writer2/main.py +++ b/src/calibre/ebooks/mobi/writer2/main.py @@ -198,7 +198,6 @@ class MobiWriter(object): self.serializer = Serializer(self.oeb, self.images, write_page_breaks_after_item=self.write_page_breaks_after_item) text = self.serializer() - self.content_length = len(text) self.text_length = len(text) text = StringIO(text) nrecords = 0 @@ -206,21 +205,16 @@ class MobiWriter(object): if self.compression != UNCOMPRESSED: self.oeb.logger.info(' Compressing markup content...') - data, overlap = self.read_text_record(text) - - while len(data) > 0: + while text.tell() < self.text_length: + data, overlap = self.read_text_record(text) if self.compression == PALMDOC: data = compress_doc(data) - record = StringIO() + data += overlap + data += pack(b'>B', len(overlap)) + + self.records.append(data) nrecords += 1 - data, overlap = self.read_text_record(text) - record.write(data) - - # Write information about the multibyte character overlap, if any - record.write(overlap) - record.write(pack(b'>B', len(overlap))) - self.records.append(record.getvalue()) self.last_text_record_idx = nrecords From 25ef6ef13ade559661423d47f4bf6b6b00a8de21 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 26 Jul 2011 23:28:31 -0600 Subject: [PATCH 45/51] ... --- src/calibre/ebooks/mobi/writer2/indexer.py | 40 +++++++++++++--------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py index 54bef57ae3..917c7f1e4c 100644 --- a/src/calibre/ebooks/mobi/writer2/indexer.py +++ b/src/calibre/ebooks/mobi/writer2/indexer.py @@ -172,11 +172,12 @@ class TBS(object): # {{{ trailing byte sequence for the record. ''' - def __init__(self, data, is_periodical, first=False, all_sections=[], + def __init__(self, data, is_periodical, first=False, section_map={}, after_first=False): - self.section_map = OrderedDict((i.index, i) for i in - sorted(all_sections, key=lambda x:x.offset)) - + self.section_map = section_map + import pprint + pprint.pprint(data) + print() if is_periodical: # The starting bytes. # The value is zero which I think indicates the periodical @@ -216,21 +217,22 @@ class TBS(object): # {{{ def periodical_tbs(self, data, first, depth_map): buf = StringIO() - has_section_start = (depth_map[1] and depth_map[1][0] in - data['starts']) + has_section_start = (depth_map[1] and + set(depth_map[1]).intersection(set(data['starts']))) spanner = data['spans'] - first_node = None - for nodes in depth_map.values(): - for node in nodes: - if (first_node is None or (node.offset, node.depth) < - (first_node.offset, first_node.depth)): - first_node = node - parent_section_index = -1 + if depth_map[0]: # We have a terminal record + first_node = None + for nodes in (depth_map[1], depth_map[2]): + for node in nodes: + if (first_node is None or (node.offset, node.depth) < + (first_node.offset, first_node.depth)): + first_node = node + typ = (self.type_110 if has_section_start else self.type_010) - if first_node.depth > 0: + if first_node is not None and first_node.depth > 0: parent_section_index = (first_node.index if first_node.depth == 1 else first_node.parent_index) else: @@ -257,7 +259,8 @@ class TBS(object): # {{{ if typ not in (self.type_110, self.type_111) and parent_section_index > 0: # Write starting section information if spanner is None: - num_articles = len(depth_map[1]) + num_articles = len([a for a in depth_map[1] if a.parent_index + == parent_section_index]) extra = {} if num_articles > 1: extra = {0b0100: num_articles} @@ -662,6 +665,9 @@ class Indexer(object): # {{{ self.tbs_map = {} found_node = False sections = [i for i in self.indices if i.depth == 1] + section_map = OrderedDict((i.index, i) for i in + sorted(sections, key=lambda x:x.offset)) + deepest = max(i.depth for i in self.indices) for i in xrange(self.number_of_text_records): @@ -698,11 +704,11 @@ class Indexer(object): # {{{ if (data['ends'] or data['completes'] or data['starts'] or data['spans'] is not None): self.tbs_map[i+1] = TBS(data, self.is_periodical, first=not - found_node, all_sections=sections) + found_node, section_map=section_map) found_node = True else: self.tbs_map[i+1] = TBS({}, self.is_periodical, first=False, - after_first=found_node) + after_first=found_node, section_map=section_map) def get_trailing_byte_sequence(self, num): return self.tbs_map[num].bytestring From 4b7d3035600d92119829e5f06d70f5052418b6cd Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 26 Jul 2011 23:39:45 -0600 Subject: [PATCH 46/51] ... --- src/calibre/ebooks/mobi/writer2/indexer.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py index 917c7f1e4c..f454412187 100644 --- a/src/calibre/ebooks/mobi/writer2/indexer.py +++ b/src/calibre/ebooks/mobi/writer2/indexer.py @@ -224,6 +224,8 @@ class TBS(object): # {{{ if depth_map[0]: # We have a terminal record + + # Find the first non periodical node first_node = None for nodes in (depth_map[1], depth_map[2]): for node in nodes: @@ -232,10 +234,17 @@ class TBS(object): # {{{ first_node = node typ = (self.type_110 if has_section_start else self.type_010) + + # parent_section_index is needed for the last record if first_node is not None and first_node.depth > 0: parent_section_index = (first_node.index if first_node.depth == 1 else first_node.parent_index) + else: + parent_section_index = max(self.section_map.iterkeys()) + else: + # Non terminal record + if spanner is not None: # record is spanned by a single article parent_section_index = spanner.parent_index From a4721656b0f93d69a485668fb7d141e854959750 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 27 Jul 2011 00:01:18 -0600 Subject: [PATCH 47/51] ... --- src/calibre/ebooks/mobi/utils.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/mobi/utils.py b/src/calibre/ebooks/mobi/utils.py index 80214b04d3..4298276bc1 100644 --- a/src/calibre/ebooks/mobi/utils.py +++ b/src/calibre/ebooks/mobi/utils.py @@ -302,28 +302,32 @@ def align_block(raw, multiple=4, pad=b'\0'): return raw + pad*(multiple - extra) -def detect_periodical(toc, log): +def detect_periodical(toc, log=None): ''' Detect if the TOC object toc contains a periodical that conforms to the structure required by kindlegen to generate a periodical. ''' for node in toc.iterdescendants(): if node.depth() == 1 and node.klass != 'article': - log.debug( + if log is not None: + log.debug( 'Not a periodical: Deepest node does not have ' 'class="article"') return False if node.depth() == 2 and node.klass != 'section': - log.debug( + if log is not None: + log.debug( 'Not a periodical: Second deepest node does not have' ' class="section"') return False if node.depth() == 3 and node.klass != 'periodical': - log.debug('Not a periodical: Third deepest node' + if log is not None: + log.debug('Not a periodical: Third deepest node' ' does not have class="periodical"') return False if node.depth() > 3: - log.debug('Not a periodical: Has nodes of depth > 3') + if log is not None: + log.debug('Not a periodical: Has nodes of depth > 3') return False return True From 4683b3b30f71b8f5cb5570a22cd561cb40061c5e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 27 Jul 2011 00:04:21 -0600 Subject: [PATCH 48/51] ... --- src/calibre/ebooks/mobi/writer2/indexer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py index f454412187..d5226f68bd 100644 --- a/src/calibre/ebooks/mobi/writer2/indexer.py +++ b/src/calibre/ebooks/mobi/writer2/indexer.py @@ -175,9 +175,9 @@ class TBS(object): # {{{ def __init__(self, data, is_periodical, first=False, section_map={}, after_first=False): self.section_map = section_map - import pprint - pprint.pprint(data) - print() + #import pprint + #pprint.pprint(data) + #print() if is_periodical: # The starting bytes. # The value is zero which I think indicates the periodical From b461b58e8cfe5aa18a22cf14b247c3b689a9274f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 27 Jul 2011 00:23:31 -0600 Subject: [PATCH 49/51] Fix #816094 ([Enhancement] Add COBY MP977 Support) --- src/calibre/customize/builtins.py | 4 ++-- src/calibre/devices/misc.py | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 9a01633cfe..620254b1f5 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -570,7 +570,7 @@ from calibre.devices.teclast.driver import (TECLAST_K3, NEWSMY, IPAPYRUS, from calibre.devices.sne.driver import SNE from calibre.devices.misc import (PALMPRE, AVANT, SWEEX, PDNOVEL, GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR, - TREKSTOR, EEEREADER, NEXTBOOK, ADAM, MOOVYBOOK) + TREKSTOR, EEEREADER, NEXTBOOK, ADAM, MOOVYBOOK, COBY) from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG from calibre.devices.kobo.driver import KOBO from calibre.devices.bambook.driver import BAMBOOK @@ -705,7 +705,7 @@ plugins += [ EEEREADER, NEXTBOOK, ADAM, - MOOVYBOOK, + MOOVYBOOK, COBY, ITUNES, BOEYE_BEX, BOEYE_BDX, diff --git a/src/calibre/devices/misc.py b/src/calibre/devices/misc.py index 6c5706f039..92fce68f11 100644 --- a/src/calibre/devices/misc.py +++ b/src/calibre/devices/misc.py @@ -351,3 +351,29 @@ class MOOVYBOOK(USBMS): def get_main_ebook_dir(self, for_upload=False): return 'Books' if for_upload else self.EBOOK_DIR_MAIN +class COBY(USBMS): + + name = 'COBY MP977 device interface' + gui_name = 'COBY' + description = _('Communicate with the COBY') + author = 'Kovid Goyal' + supported_platforms = ['windows', 'osx', 'linux'] + + # Ordered list of supported formats + FORMATS = ['epub', 'pdf'] + + VENDOR_ID = [0x1e74] + PRODUCT_ID = [0x7121] + BCD = [0x02] + VENDOR_NAME = 'USB_2.0' + WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'MP977_DRIVER' + + EBOOK_DIR_MAIN = '' + + SUPPORTS_SUB_DIRS = False + + def get_carda_ebook_dir(self, for_upload=False): + if for_upload: + return 'eBooks' + return self.EBOOK_DIR_CARD_A + From d66fd24888834c5d6bd33c63a7aa8492797150e5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 27 Jul 2011 00:37:20 -0600 Subject: [PATCH 50/51] Prevent metadata download from returning published dates earlier than 101 A.D. --- src/calibre/ebooks/metadata/sources/identify.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py index 97b6d15bc8..a7bcbc5a89 100644 --- a/src/calibre/ebooks/metadata/sources/identify.py +++ b/src/calibre/ebooks/metadata/sources/identify.py @@ -22,6 +22,7 @@ from calibre.ebooks.metadata.book.base import Metadata from calibre.utils.date import utc_tz, as_utc from calibre.utils.html2text import html2text from calibre.utils.icu import lower +from calibre.utils.date import UNDEFINED_DATE # Download worker {{{ class Worker(Thread): @@ -490,6 +491,8 @@ def identify(log, abort, # {{{ max_tags = msprefs['max_tags'] for r in results: r.tags = r.tags[:max_tags] + if getattr(r.pubdate, 'year', 2000) <= UNDEFINED_DATE.year: + r.pubdate = None if msprefs['swap_author_names']: for r in results: From 9618a0ac4d523df1d7fbec9003c2208eb08997be Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 27 Jul 2011 01:00:50 -0600 Subject: [PATCH 51/51] Fix #814722 (Option to save .opf metadata as in epub.) --- src/calibre/ebooks/metadata/opf2.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index 7ad741848e..35fd724ddd 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -1030,8 +1030,10 @@ class OPF(object): # {{{ attrib = attrib or {} attrib['name'] = 'calibre:' + name name = '{%s}%s' % (self.NAMESPACES['opf'], 'meta') + nsmap = dict(self.NAMESPACES) + del nsmap['opf'] elem = etree.SubElement(self.metadata, name, attrib=attrib, - nsmap=self.NAMESPACES) + nsmap=nsmap) elem.tail = '\n' return elem