diff --git a/format_docs/pdb/apnx.txt b/format_docs/pdb/apnx.txt new file mode 100644 index 0000000000..f9feed1da1 --- /dev/null +++ b/format_docs/pdb/apnx.txt @@ -0,0 +1,69 @@ +APNX +---- + +apnx files are used by the Amazon Kindle (firmware revision 3.1+) to +map pages from a print book to the Kindle version. Integers within +the file are big-endian. + + +Layout +------ + +bytes content comments + +4 00010001 Format identifier. Value of 65537 little-endian. +4 start of next The offset after ending location of the first header. + Starts a new sequence of header info +4 length Length of first header +N first header String containing content header +Starts next sequence +2 unknown Always 1 +2 length Length of second header +2 page count Total number of bytes after second header that + represent pages. This total includes bytes that + are ignored by the pageMap. +2 unknown Always 32 +N second header String containing the page mapping header +4*N padding The first number given in the page mapping header indicates the number of 0 bytes. +4*N page list + + +Content Header +-------------- + +The content header is a string enclosed in {} containing key, value pairs. + +content comments + +contentGuid Guid. +asin Amazon identifier for the Kindle version of the book. +cdeType MOBI cdeType. Should always be EBOK for ebooks. +fileRevisionId Revision of this file. + +Example: +{"contentGuid":"d8c14b0","asin":"B000JML5VM","cdeType":"EBOK","fileRevisionId":"1296874359405"} + + +Page Mapping Header +------------------- + +The page mapping header is a string enclosed in {} containing key, value pairs. + +content comments + +asin The ISBN 10 for the paper book the pages correspond to +pageMap Three value tuple. Looks like: "(N,N,N)" + 1) Number of bytes after header that starts the page numbering sequence + 2) unknown + 3) unknown + +Example: +{"asin":"1906694184","pageMap":"(4,a,1)"} + + +Page List +--------- + +The page list is a sequence of offsets in the uncompressed HTML. Each +value is the beginning of a new page. Each entry is a 4 byte big endian +int. The list is ordered lowest to highest. \ No newline at end of file diff --git a/src/calibre/devices/kindle/apnx.py b/src/calibre/devices/kindle/apnx.py new file mode 100644 index 0000000000..328d3a97a6 --- /dev/null +++ b/src/calibre/devices/kindle/apnx.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPL v3' +__copyright__ = '2011, John Schember ' +__docformat__ = 'restructuredtext en' + +''' +Generates and writes an APNX page mapping file. +''' + +import struct +import uuid + +from calibre.ebooks.pdb.header import PdbHeaderReader + +class APNXBuilder(object): + ''' + Currently uses the Adobe 1024 byte count equal one page formula. + ''' + + def write_apnx(self, mobi_file_path, apnx_path): + with open(mobi_file_path, 'rb') as mf: + phead = PdbHeaderReader(mf) + r0 = phead.section_data(0) + text_length = struct.unpack('>I', r0[4:8])[0] + + pages = self.get_pages(text_length) + apnx = self.generate_apnx(pages) + + with open(apnx_path, 'wb') as apnxf: + apnxf.write(apnx) + + def generate_apnx(self, pages): + apnx = '' + + content_vals = { + 'guid': str(uuid.uuid4()).replace('-', '')[:8], + 'isbn': '', + } + + content_header = '{"contentGuid":"%(guid)s","asin":"%(isbn)s","cdeType":"EBOK","fileRevisionId":"1"}' % content_vals + page_header = '{"asin":"%(isbn)s","pageMap":"(1,a,1)"}' % content_vals + + apnx += struct.pack('>I', 65537) + apnx += struct.pack('>I', 12 + len(content_header)) + apnx += struct.pack('>I', len(content_header)) + apnx += content_header + apnx += struct.pack('>H', 1) + apnx += struct.pack('>H', len(page_header)) + apnx += struct.pack('>H', len(pages)) + apnx += struct.pack('>H', 32) + apnx += page_header + + # write page values to apnx + for page in pages: + apnx += struct.pack('>L', page) + + return apnx + + def get_pages(self, text_length): + pages = [] + count = 0 + + while count < text_length: + pages.append(count) + count += 1024 + + return pages diff --git a/src/calibre/devices/kindle/bookmark.py b/src/calibre/devices/kindle/bookmark.py new file mode 100644 index 0000000000..aed498a093 --- /dev/null +++ b/src/calibre/devices/kindle/bookmark.py @@ -0,0 +1,315 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPL v3' +__docformat__ = 'restructuredtext en' + +import os +from cStringIO import StringIO +from struct import unpack + +class Bookmark(): # {{{ + ''' + A simple class fetching bookmark data + Kindle-specific + ''' + def __init__(self, path, id, book_format, bookmark_extension): + self.book_format = book_format + self.bookmark_extension = bookmark_extension + self.book_length = 0 + self.id = id + self.last_read = 0 + self.last_read_location = 0 + self.path = path + self.timestamp = 0 + self.user_notes = None + + self.get_bookmark_data() + self.get_book_length() + try: + self.percent_read = min(float(100*self.last_read / self.book_length),100) + except: + self.percent_read = 0 + + def record(self, n): + from calibre.ebooks.metadata.mobi import StreamSlicer + if n >= self.nrecs: + raise ValueError('non-existent record %r' % n) + offoff = 78 + (8 * n) + start, = unpack('>I', self.data[offoff + 0:offoff + 4]) + stop = None + if n < (self.nrecs - 1): + stop, = unpack('>I', self.data[offoff + 8:offoff + 12]) + return StreamSlicer(self.stream, start, stop) + + def get_bookmark_data(self): + ''' Return the timestamp and last_read_location ''' + from calibre.ebooks.metadata.mobi import StreamSlicer + user_notes = {} + if self.bookmark_extension == 'mbp': + MAGIC_MOBI_CONSTANT = 150 + with open(self.path,'rb') as f: + stream = StringIO(f.read()) + data = StreamSlicer(stream) + self.timestamp, = unpack('>I', data[0x24:0x28]) + bpar_offset, = unpack('>I', data[0x4e:0x52]) + lrlo = bpar_offset + 0x0c + self.last_read = int(unpack('>I', data[lrlo:lrlo+4])[0]) + self.last_read_location = self.last_read/MAGIC_MOBI_CONSTANT + 1 + entries, = unpack('>I', data[0x4a:0x4e]) + + # Store the annotations/locations + bpl = bpar_offset + 4 + bpar_len, = unpack('>I', data[bpl:bpl+4]) + bpar_len += 8 + #print "bpar_len: 0x%x" % bpar_len + eo = bpar_offset + bpar_len + + # Walk bookmark entries + #print " --- %s --- " % self.path + current_entry = 1 + sig = data[eo:eo+4] + previous_block = None + + while sig == 'DATA': + text = None + entry_type = None + rec_len, = unpack('>I', data[eo+4:eo+8]) + if rec_len == 0: + current_block = "empty_data" + elif data[eo+8:eo+12] == "EBAR": + current_block = "data_header" + #entry_type = "data_header" + location, = unpack('>I', data[eo+0x34:eo+0x38]) + #print "data_header location: %d" % location + else: + current_block = "text_block" + if previous_block == 'empty_data': + entry_type = 'Note' + elif previous_block == 'data_header': + entry_type = 'Highlight' + text = data[eo+8:eo+8+rec_len].decode('utf-16-be') + + if entry_type: + displayed_location = location/MAGIC_MOBI_CONSTANT + 1 + user_notes[location] = dict(id=self.id, + displayed_location=displayed_location, + type=entry_type, + text=text) + + eo += rec_len + 8 + current_entry += 1 + previous_block = current_block + sig = data[eo:eo+4] + + while sig == 'BKMK': + # Fix start location for Highlights using BKMK data + end_loc, = unpack('>I', data[eo+0x10:eo+0x14]) + + if end_loc in user_notes and \ + (user_notes[end_loc]['type'] == 'Highlight' or \ + user_notes[end_loc]['type'] == 'Note'): + # Switch location to start (0x08:0x0c) + start, = unpack('>I', data[eo+8:eo+12]) + user_notes[start] = user_notes[end_loc] + ''' + print " %s: swapping 0x%x (%d) to 0x%x (%d)" % (user_notes[end_loc]['type'], + end_loc, + end_loc/MAGIC_MOBI_CONSTANT + 1, + start, + start//MAGIC_MOBI_CONSTANT + 1) + ''' + user_notes[start]['displayed_location'] = start/MAGIC_MOBI_CONSTANT + 1 + user_notes.pop(end_loc) + else: + # If a bookmark coincides with a user annotation, the locs could + # be the same - cheat by nudging -1 + # Skip bookmark for last_read_location + if end_loc != self.last_read: + # print " adding Bookmark at 0x%x (%d)" % (end_loc, end_loc/MAGIC_MOBI_CONSTANT + 1) + displayed_location = end_loc/MAGIC_MOBI_CONSTANT + 1 + user_notes[end_loc - 1] = dict(id=self.id, + displayed_location=displayed_location, + type='Bookmark', + text=None) + rec_len, = unpack('>I', data[eo+4:eo+8]) + eo += rec_len + 8 + sig = data[eo:eo+4] + + elif self.bookmark_extension == 'tan': + from calibre.ebooks.metadata.topaz import get_metadata as get_topaz_metadata + + def get_topaz_highlight(displayed_location): + # Parse My Clippings.txt for a matching highlight + # Search looks for book title match, highlight match, and location match + # Author is not matched + # This will find the first instance of a clipping only + book_fs = self.path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format) + with open(book_fs,'rb') as f2: + stream = StringIO(f2.read()) + mi = get_topaz_metadata(stream) + my_clippings = self.path + split = my_clippings.find('documents') + len('documents/') + my_clippings = my_clippings[:split] + "My Clippings.txt" + try: + with open(my_clippings, 'r') as f2: + marker_found = 0 + text = '' + search_str1 = '%s' % (mi.title) + search_str2 = '- Highlight Loc. %d' % (displayed_location) + for line in f2: + if marker_found == 0: + if line.startswith(search_str1): + marker_found = 1 + elif marker_found == 1: + if line.startswith(search_str2): + marker_found = 2 + elif marker_found == 2: + if line.startswith('=========='): + break + text += line.strip() + else: + raise Exception('error') + except: + text = '(Unable to extract highlight text from My Clippings.txt)' + return text + + MAGIC_TOPAZ_CONSTANT = 33.33 + self.timestamp = os.path.getmtime(self.path) + with open(self.path,'rb') as f: + stream = StringIO(f.read()) + data = StreamSlicer(stream) + self.last_read = int(unpack('>I', data[5:9])[0]) + self.last_read_location = self.last_read/MAGIC_TOPAZ_CONSTANT + 1 + entries, = unpack('>I', data[9:13]) + current_entry = 0 + e_base = 0x0d + while current_entry < entries: + location, = unpack('>I', data[e_base+2:e_base+6]) + text = None + text_len, = unpack('>I', data[e_base+0xA:e_base+0xE]) + e_type, = unpack('>B', data[e_base+1]) + if e_type == 0: + e_type = 'Bookmark' + elif e_type == 1: + e_type = 'Highlight' + text = get_topaz_highlight(location/MAGIC_TOPAZ_CONSTANT + 1) + elif e_type == 2: + e_type = 'Note' + text = data[e_base+0x10:e_base+0x10+text_len] + else: + e_type = 'Unknown annotation type' + + displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1 + user_notes[location] = dict(id=self.id, + displayed_location=displayed_location, + type=e_type, + text=text) + if text_len == 0xFFFFFFFF: + e_base = e_base + 14 + else: + e_base = e_base + 14 + 2 + text_len + current_entry += 1 + for location in user_notes: + if location == self.last_read: + user_notes.pop(location) + break + + elif self.bookmark_extension == 'pdr': + self.timestamp = os.path.getmtime(self.path) + with open(self.path,'rb') as f: + stream = StringIO(f.read()) + data = StreamSlicer(stream) + self.last_read = int(unpack('>I', data[5:9])[0]) + entries, = unpack('>I', data[9:13]) + current_entry = 0 + e_base = 0x0d + self.pdf_page_offset = 0 + while current_entry < entries: + ''' + location, = unpack('>I', data[e_base+2:e_base+6]) + text = None + text_len, = unpack('>I', data[e_base+0xA:e_base+0xE]) + e_type, = unpack('>B', data[e_base+1]) + if e_type == 0: + e_type = 'Bookmark' + elif e_type == 1: + e_type = 'Highlight' + text = get_topaz_highlight(location/MAGIC_TOPAZ_CONSTANT + 1) + elif e_type == 2: + e_type = 'Note' + text = data[e_base+0x10:e_base+0x10+text_len] + else: + e_type = 'Unknown annotation type' + + if self.book_format in ['tpz','azw1']: + displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1 + elif self.book_format == 'pdf': + # *** This needs implementation + displayed_location = location + user_notes[location] = dict(id=self.id, + displayed_location=displayed_location, + type=e_type, + text=text) + if text_len == 0xFFFFFFFF: + e_base = e_base + 14 + else: + e_base = e_base + 14 + 2 + text_len + current_entry += 1 + ''' + # Use label as page number + pdf_location, = unpack('>I', data[e_base+1:e_base+5]) + label_len, = unpack('>H', data[e_base+5:e_base+7]) + location = int(data[e_base+7:e_base+7+label_len]) + displayed_location = location + e_type = 'Bookmark' + text = None + user_notes[location] = dict(id=self.id, + displayed_location=displayed_location, + type=e_type, + text=text) + self.pdf_page_offset = pdf_location - location + e_base += (7 + label_len) + current_entry += 1 + + self.last_read_location = self.last_read - self.pdf_page_offset + + else: + print "unsupported bookmark_extension: %s" % self.bookmark_extension + self.user_notes = user_notes + + def get_book_length(self): + from calibre.ebooks.metadata.mobi import StreamSlicer + book_fs = self.path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format) + + self.book_length = 0 + if self.bookmark_extension == 'mbp': + # Read the book len from the header + try: + with open(book_fs,'rb') as f: + self.stream = StringIO(f.read()) + self.data = StreamSlicer(self.stream) + self.nrecs, = unpack('>H', self.data[76:78]) + record0 = self.record(0) + self.book_length = int(unpack('>I', record0[0x04:0x08])[0]) + except: + pass + elif self.bookmark_extension == 'tan': + # Read bookLength from metadata + from calibre.ebooks.metadata.topaz import MetadataUpdater + try: + with open(book_fs,'rb') as f: + mu = MetadataUpdater(f) + self.book_length = mu.book_length + except: + pass + elif self.bookmark_extension == 'pdr': + from calibre import plugins + try: + self.book_length = plugins['pdfreflow'][0].get_numpages(open(book_fs).read()) + except: + pass + + else: + print "unsupported bookmark_extension: %s" % self.bookmark_extension + +# }}} diff --git a/src/calibre/devices/kindle/driver.py b/src/calibre/devices/kindle/driver.py index a369b04929..5c150eab5a 100644 --- a/src/calibre/devices/kindle/driver.py +++ b/src/calibre/devices/kindle/driver.py @@ -7,10 +7,11 @@ __docformat__ = 'restructuredtext en' ''' Device driver for Amazon's Kindle ''' -import datetime, os, re, sys, json, hashlib -from cStringIO import StringIO -from struct import unpack +import datetime, os, re, sys, json, hashlib + +from calibre.devices.kindle.apnx import APNXBuilder +from calibre.devices.kindle.bookmark import Bookmark from calibre.devices.usbms.driver import USBMS ''' @@ -170,6 +171,8 @@ class KINDLE2(KINDLE): description = _('Communicate with the Kindle 2/3 eBook reader.') FORMATS = KINDLE.FORMATS + ['pdf'] + DELETE_EXTS = KINDLE.DELETE_EXTS + ['.apnx'] + PRODUCT_ID = [0x0002, 0x0004] BCD = [0x0100] @@ -205,6 +208,23 @@ class KINDLE2(KINDLE): if h in path_map: book.device_collections = list(sorted(path_map[h])) + def upload_cover(self, path, filename, metadata, filepath): + ''' + Hijacking this function to write the apnx file. + ''' + if not filepath.lower().endswith('.mobi'): + return + + apnx_path = '%s.apnx' % os.path.join(path, filename) + apnx_builder = APNXBuilder() + try: + apnx_builder.write_apnx(filepath, apnx_path) + except: + print 'Failed to generate APNX' + import traceback + traceback.print_exc() + + class KINDLE_DX(KINDLE2): name = 'Kindle DX Device Interface' @@ -214,310 +234,3 @@ class KINDLE_DX(KINDLE2): PRODUCT_ID = [0x0003] BCD = [0x0100] -class Bookmark(): # {{{ - ''' - A simple class fetching bookmark data - Kindle-specific - ''' - def __init__(self, path, id, book_format, bookmark_extension): - self.book_format = book_format - self.bookmark_extension = bookmark_extension - self.book_length = 0 - self.id = id - self.last_read = 0 - self.last_read_location = 0 - self.path = path - self.timestamp = 0 - self.user_notes = None - - self.get_bookmark_data() - self.get_book_length() - try: - self.percent_read = min(float(100*self.last_read / self.book_length),100) - except: - self.percent_read = 0 - - def record(self, n): - from calibre.ebooks.metadata.mobi import StreamSlicer - if n >= self.nrecs: - raise ValueError('non-existent record %r' % n) - offoff = 78 + (8 * n) - start, = unpack('>I', self.data[offoff + 0:offoff + 4]) - stop = None - if n < (self.nrecs - 1): - stop, = unpack('>I', self.data[offoff + 8:offoff + 12]) - return StreamSlicer(self.stream, start, stop) - - def get_bookmark_data(self): - ''' Return the timestamp and last_read_location ''' - from calibre.ebooks.metadata.mobi import StreamSlicer - user_notes = {} - if self.bookmark_extension == 'mbp': - MAGIC_MOBI_CONSTANT = 150 - with open(self.path,'rb') as f: - stream = StringIO(f.read()) - data = StreamSlicer(stream) - self.timestamp, = unpack('>I', data[0x24:0x28]) - bpar_offset, = unpack('>I', data[0x4e:0x52]) - lrlo = bpar_offset + 0x0c - self.last_read = int(unpack('>I', data[lrlo:lrlo+4])[0]) - self.last_read_location = self.last_read/MAGIC_MOBI_CONSTANT + 1 - entries, = unpack('>I', data[0x4a:0x4e]) - - # Store the annotations/locations - bpl = bpar_offset + 4 - bpar_len, = unpack('>I', data[bpl:bpl+4]) - bpar_len += 8 - #print "bpar_len: 0x%x" % bpar_len - eo = bpar_offset + bpar_len - - # Walk bookmark entries - #print " --- %s --- " % self.path - current_entry = 1 - sig = data[eo:eo+4] - previous_block = None - - while sig == 'DATA': - text = None - entry_type = None - rec_len, = unpack('>I', data[eo+4:eo+8]) - if rec_len == 0: - current_block = "empty_data" - elif data[eo+8:eo+12] == "EBAR": - current_block = "data_header" - #entry_type = "data_header" - location, = unpack('>I', data[eo+0x34:eo+0x38]) - #print "data_header location: %d" % location - else: - current_block = "text_block" - if previous_block == 'empty_data': - entry_type = 'Note' - elif previous_block == 'data_header': - entry_type = 'Highlight' - text = data[eo+8:eo+8+rec_len].decode('utf-16-be') - - if entry_type: - displayed_location = location/MAGIC_MOBI_CONSTANT + 1 - user_notes[location] = dict(id=self.id, - displayed_location=displayed_location, - type=entry_type, - text=text) - - eo += rec_len + 8 - current_entry += 1 - previous_block = current_block - sig = data[eo:eo+4] - - while sig == 'BKMK': - # Fix start location for Highlights using BKMK data - end_loc, = unpack('>I', data[eo+0x10:eo+0x14]) - - if end_loc in user_notes and \ - (user_notes[end_loc]['type'] == 'Highlight' or \ - user_notes[end_loc]['type'] == 'Note'): - # Switch location to start (0x08:0x0c) - start, = unpack('>I', data[eo+8:eo+12]) - user_notes[start] = user_notes[end_loc] - ''' - print " %s: swapping 0x%x (%d) to 0x%x (%d)" % (user_notes[end_loc]['type'], - end_loc, - end_loc/MAGIC_MOBI_CONSTANT + 1, - start, - start//MAGIC_MOBI_CONSTANT + 1) - ''' - user_notes[start]['displayed_location'] = start/MAGIC_MOBI_CONSTANT + 1 - user_notes.pop(end_loc) - else: - # If a bookmark coincides with a user annotation, the locs could - # be the same - cheat by nudging -1 - # Skip bookmark for last_read_location - if end_loc != self.last_read: - # print " adding Bookmark at 0x%x (%d)" % (end_loc, end_loc/MAGIC_MOBI_CONSTANT + 1) - displayed_location = end_loc/MAGIC_MOBI_CONSTANT + 1 - user_notes[end_loc - 1] = dict(id=self.id, - displayed_location=displayed_location, - type='Bookmark', - text=None) - rec_len, = unpack('>I', data[eo+4:eo+8]) - eo += rec_len + 8 - sig = data[eo:eo+4] - - elif self.bookmark_extension == 'tan': - from calibre.ebooks.metadata.topaz import get_metadata as get_topaz_metadata - - def get_topaz_highlight(displayed_location): - # Parse My Clippings.txt for a matching highlight - # Search looks for book title match, highlight match, and location match - # Author is not matched - # This will find the first instance of a clipping only - book_fs = self.path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format) - with open(book_fs,'rb') as f2: - stream = StringIO(f2.read()) - mi = get_topaz_metadata(stream) - my_clippings = self.path - split = my_clippings.find('documents') + len('documents/') - my_clippings = my_clippings[:split] + "My Clippings.txt" - try: - with open(my_clippings, 'r') as f2: - marker_found = 0 - text = '' - search_str1 = '%s' % (mi.title) - search_str2 = '- Highlight Loc. %d' % (displayed_location) - for line in f2: - if marker_found == 0: - if line.startswith(search_str1): - marker_found = 1 - elif marker_found == 1: - if line.startswith(search_str2): - marker_found = 2 - elif marker_found == 2: - if line.startswith('=========='): - break - text += line.strip() - else: - raise Exception('error') - except: - text = '(Unable to extract highlight text from My Clippings.txt)' - return text - - MAGIC_TOPAZ_CONSTANT = 33.33 - self.timestamp = os.path.getmtime(self.path) - with open(self.path,'rb') as f: - stream = StringIO(f.read()) - data = StreamSlicer(stream) - self.last_read = int(unpack('>I', data[5:9])[0]) - self.last_read_location = self.last_read/MAGIC_TOPAZ_CONSTANT + 1 - entries, = unpack('>I', data[9:13]) - current_entry = 0 - e_base = 0x0d - while current_entry < entries: - location, = unpack('>I', data[e_base+2:e_base+6]) - text = None - text_len, = unpack('>I', data[e_base+0xA:e_base+0xE]) - e_type, = unpack('>B', data[e_base+1]) - if e_type == 0: - e_type = 'Bookmark' - elif e_type == 1: - e_type = 'Highlight' - text = get_topaz_highlight(location/MAGIC_TOPAZ_CONSTANT + 1) - elif e_type == 2: - e_type = 'Note' - text = data[e_base+0x10:e_base+0x10+text_len] - else: - e_type = 'Unknown annotation type' - - displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1 - user_notes[location] = dict(id=self.id, - displayed_location=displayed_location, - type=e_type, - text=text) - if text_len == 0xFFFFFFFF: - e_base = e_base + 14 - else: - e_base = e_base + 14 + 2 + text_len - current_entry += 1 - for location in user_notes: - if location == self.last_read: - user_notes.pop(location) - break - - elif self.bookmark_extension == 'pdr': - self.timestamp = os.path.getmtime(self.path) - with open(self.path,'rb') as f: - stream = StringIO(f.read()) - data = StreamSlicer(stream) - self.last_read = int(unpack('>I', data[5:9])[0]) - entries, = unpack('>I', data[9:13]) - current_entry = 0 - e_base = 0x0d - self.pdf_page_offset = 0 - while current_entry < entries: - ''' - location, = unpack('>I', data[e_base+2:e_base+6]) - text = None - text_len, = unpack('>I', data[e_base+0xA:e_base+0xE]) - e_type, = unpack('>B', data[e_base+1]) - if e_type == 0: - e_type = 'Bookmark' - elif e_type == 1: - e_type = 'Highlight' - text = get_topaz_highlight(location/MAGIC_TOPAZ_CONSTANT + 1) - elif e_type == 2: - e_type = 'Note' - text = data[e_base+0x10:e_base+0x10+text_len] - else: - e_type = 'Unknown annotation type' - - if self.book_format in ['tpz','azw1']: - displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1 - elif self.book_format == 'pdf': - # *** This needs implementation - displayed_location = location - user_notes[location] = dict(id=self.id, - displayed_location=displayed_location, - type=e_type, - text=text) - if text_len == 0xFFFFFFFF: - e_base = e_base + 14 - else: - e_base = e_base + 14 + 2 + text_len - current_entry += 1 - ''' - # Use label as page number - pdf_location, = unpack('>I', data[e_base+1:e_base+5]) - label_len, = unpack('>H', data[e_base+5:e_base+7]) - location = int(data[e_base+7:e_base+7+label_len]) - displayed_location = location - e_type = 'Bookmark' - text = None - user_notes[location] = dict(id=self.id, - displayed_location=displayed_location, - type=e_type, - text=text) - self.pdf_page_offset = pdf_location - location - e_base += (7 + label_len) - current_entry += 1 - - self.last_read_location = self.last_read - self.pdf_page_offset - - else: - print "unsupported bookmark_extension: %s" % self.bookmark_extension - self.user_notes = user_notes - - def get_book_length(self): - from calibre.ebooks.metadata.mobi import StreamSlicer - book_fs = self.path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format) - - self.book_length = 0 - if self.bookmark_extension == 'mbp': - # Read the book len from the header - try: - with open(book_fs,'rb') as f: - self.stream = StringIO(f.read()) - self.data = StreamSlicer(self.stream) - self.nrecs, = unpack('>H', self.data[76:78]) - record0 = self.record(0) - self.book_length = int(unpack('>I', record0[0x04:0x08])[0]) - except: - pass - elif self.bookmark_extension == 'tan': - # Read bookLength from metadata - from calibre.ebooks.metadata.topaz import MetadataUpdater - try: - with open(book_fs,'rb') as f: - mu = MetadataUpdater(f) - self.book_length = mu.book_length - except: - pass - elif self.bookmark_extension == 'pdr': - from calibre import plugins - try: - self.book_length = plugins['pdfreflow'][0].get_numpages(open(book_fs).read()) - except: - pass - - else: - print "unsupported bookmark_extension: %s" % self.bookmark_extension - -# }}} -