mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Kindle driver: When uploading MOBI files to the device, upload page information as well (used by the Kindle 3.1 firmware)
This commit is contained in:
commit
ece6bd536b
69
format_docs/pdb/apnx.txt
Normal file
69
format_docs/pdb/apnx.txt
Normal file
@ -0,0 +1,69 @@
|
||||
APNX
|
||||
----
|
||||
|
||||
apnx files are used by the Amazon Kindle (firmware revision 3.1+) to
|
||||
map pages from a print book to the Kindle version. Integers within
|
||||
the file are big-endian.
|
||||
|
||||
|
||||
Layout
|
||||
------
|
||||
|
||||
bytes content comments
|
||||
|
||||
4 00010001 Format identifier. Value of 65537 little-endian.
|
||||
4 start of next The offset after ending location of the first header.
|
||||
Starts a new sequence of header info
|
||||
4 length Length of first header
|
||||
N first header String containing content header
|
||||
Starts next sequence
|
||||
2 unknown Always 1
|
||||
2 length Length of second header
|
||||
2 page count Total number of bytes after second header that
|
||||
represent pages. This total includes bytes that
|
||||
are ignored by the pageMap.
|
||||
2 unknown Always 32
|
||||
N second header String containing the page mapping header
|
||||
4*N padding The first number given in the page mapping header indicates the number of 0 bytes.
|
||||
4*N page list
|
||||
|
||||
|
||||
Content Header
|
||||
--------------
|
||||
|
||||
The content header is a string enclosed in {} containing key, value pairs.
|
||||
|
||||
content comments
|
||||
|
||||
contentGuid Guid.
|
||||
asin Amazon identifier for the Kindle version of the book.
|
||||
cdeType MOBI cdeType. Should always be EBOK for ebooks.
|
||||
fileRevisionId Revision of this file.
|
||||
|
||||
Example:
|
||||
{"contentGuid":"d8c14b0","asin":"B000JML5VM","cdeType":"EBOK","fileRevisionId":"1296874359405"}
|
||||
|
||||
|
||||
Page Mapping Header
|
||||
-------------------
|
||||
|
||||
The page mapping header is a string enclosed in {} containing key, value pairs.
|
||||
|
||||
content comments
|
||||
|
||||
asin The ISBN 10 for the paper book the pages correspond to
|
||||
pageMap Three value tuple. Looks like: "(N,N,N)"
|
||||
1) Number of bytes after header that starts the page numbering sequence
|
||||
2) unknown
|
||||
3) unknown
|
||||
|
||||
Example:
|
||||
{"asin":"1906694184","pageMap":"(4,a,1)"}
|
||||
|
||||
|
||||
Page List
|
||||
---------
|
||||
|
||||
The page list is a sequence of offsets in the uncompressed HTML. Each
|
||||
value is the beginning of a new page. Each entry is a 4 byte big endian
|
||||
int. The list is ordered lowest to highest.
|
68
src/calibre/devices/kindle/apnx.py
Normal file
68
src/calibre/devices/kindle/apnx.py
Normal file
@ -0,0 +1,68 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, John Schember <john at nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Generates and writes an APNX page mapping file.
|
||||
'''
|
||||
|
||||
import struct
|
||||
import uuid
|
||||
|
||||
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||
|
||||
class APNXBuilder(object):
|
||||
'''
|
||||
Currently uses the Adobe 1024 byte count equal one page formula.
|
||||
'''
|
||||
|
||||
def write_apnx(self, mobi_file_path, apnx_path):
|
||||
with open(mobi_file_path, 'rb') as mf:
|
||||
phead = PdbHeaderReader(mf)
|
||||
r0 = phead.section_data(0)
|
||||
text_length = struct.unpack('>I', r0[4:8])[0]
|
||||
|
||||
pages = self.get_pages(text_length)
|
||||
apnx = self.generate_apnx(pages)
|
||||
|
||||
with open(apnx_path, 'wb') as apnxf:
|
||||
apnxf.write(apnx)
|
||||
|
||||
def generate_apnx(self, pages):
|
||||
apnx = ''
|
||||
|
||||
content_vals = {
|
||||
'guid': str(uuid.uuid4()).replace('-', '')[:8],
|
||||
'isbn': '',
|
||||
}
|
||||
|
||||
content_header = '{"contentGuid":"%(guid)s","asin":"%(isbn)s","cdeType":"EBOK","fileRevisionId":"1"}' % content_vals
|
||||
page_header = '{"asin":"%(isbn)s","pageMap":"(1,a,1)"}' % content_vals
|
||||
|
||||
apnx += struct.pack('>I', 65537)
|
||||
apnx += struct.pack('>I', 12 + len(content_header))
|
||||
apnx += struct.pack('>I', len(content_header))
|
||||
apnx += content_header
|
||||
apnx += struct.pack('>H', 1)
|
||||
apnx += struct.pack('>H', len(page_header))
|
||||
apnx += struct.pack('>H', len(pages))
|
||||
apnx += struct.pack('>H', 32)
|
||||
apnx += page_header
|
||||
|
||||
# write page values to apnx
|
||||
for page in pages:
|
||||
apnx += struct.pack('>L', page)
|
||||
|
||||
return apnx
|
||||
|
||||
def get_pages(self, text_length):
|
||||
pages = []
|
||||
count = 0
|
||||
|
||||
while count < text_length:
|
||||
pages.append(count)
|
||||
count += 1024
|
||||
|
||||
return pages
|
315
src/calibre/devices/kindle/bookmark.py
Normal file
315
src/calibre/devices/kindle/bookmark.py
Normal file
@ -0,0 +1,315 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
from cStringIO import StringIO
|
||||
from struct import unpack
|
||||
|
||||
class Bookmark(): # {{{
|
||||
'''
|
||||
A simple class fetching bookmark data
|
||||
Kindle-specific
|
||||
'''
|
||||
def __init__(self, path, id, book_format, bookmark_extension):
|
||||
self.book_format = book_format
|
||||
self.bookmark_extension = bookmark_extension
|
||||
self.book_length = 0
|
||||
self.id = id
|
||||
self.last_read = 0
|
||||
self.last_read_location = 0
|
||||
self.path = path
|
||||
self.timestamp = 0
|
||||
self.user_notes = None
|
||||
|
||||
self.get_bookmark_data()
|
||||
self.get_book_length()
|
||||
try:
|
||||
self.percent_read = min(float(100*self.last_read / self.book_length),100)
|
||||
except:
|
||||
self.percent_read = 0
|
||||
|
||||
def record(self, n):
|
||||
from calibre.ebooks.metadata.mobi import StreamSlicer
|
||||
if n >= self.nrecs:
|
||||
raise ValueError('non-existent record %r' % n)
|
||||
offoff = 78 + (8 * n)
|
||||
start, = unpack('>I', self.data[offoff + 0:offoff + 4])
|
||||
stop = None
|
||||
if n < (self.nrecs - 1):
|
||||
stop, = unpack('>I', self.data[offoff + 8:offoff + 12])
|
||||
return StreamSlicer(self.stream, start, stop)
|
||||
|
||||
def get_bookmark_data(self):
|
||||
''' Return the timestamp and last_read_location '''
|
||||
from calibre.ebooks.metadata.mobi import StreamSlicer
|
||||
user_notes = {}
|
||||
if self.bookmark_extension == 'mbp':
|
||||
MAGIC_MOBI_CONSTANT = 150
|
||||
with open(self.path,'rb') as f:
|
||||
stream = StringIO(f.read())
|
||||
data = StreamSlicer(stream)
|
||||
self.timestamp, = unpack('>I', data[0x24:0x28])
|
||||
bpar_offset, = unpack('>I', data[0x4e:0x52])
|
||||
lrlo = bpar_offset + 0x0c
|
||||
self.last_read = int(unpack('>I', data[lrlo:lrlo+4])[0])
|
||||
self.last_read_location = self.last_read/MAGIC_MOBI_CONSTANT + 1
|
||||
entries, = unpack('>I', data[0x4a:0x4e])
|
||||
|
||||
# Store the annotations/locations
|
||||
bpl = bpar_offset + 4
|
||||
bpar_len, = unpack('>I', data[bpl:bpl+4])
|
||||
bpar_len += 8
|
||||
#print "bpar_len: 0x%x" % bpar_len
|
||||
eo = bpar_offset + bpar_len
|
||||
|
||||
# Walk bookmark entries
|
||||
#print " --- %s --- " % self.path
|
||||
current_entry = 1
|
||||
sig = data[eo:eo+4]
|
||||
previous_block = None
|
||||
|
||||
while sig == 'DATA':
|
||||
text = None
|
||||
entry_type = None
|
||||
rec_len, = unpack('>I', data[eo+4:eo+8])
|
||||
if rec_len == 0:
|
||||
current_block = "empty_data"
|
||||
elif data[eo+8:eo+12] == "EBAR":
|
||||
current_block = "data_header"
|
||||
#entry_type = "data_header"
|
||||
location, = unpack('>I', data[eo+0x34:eo+0x38])
|
||||
#print "data_header location: %d" % location
|
||||
else:
|
||||
current_block = "text_block"
|
||||
if previous_block == 'empty_data':
|
||||
entry_type = 'Note'
|
||||
elif previous_block == 'data_header':
|
||||
entry_type = 'Highlight'
|
||||
text = data[eo+8:eo+8+rec_len].decode('utf-16-be')
|
||||
|
||||
if entry_type:
|
||||
displayed_location = location/MAGIC_MOBI_CONSTANT + 1
|
||||
user_notes[location] = dict(id=self.id,
|
||||
displayed_location=displayed_location,
|
||||
type=entry_type,
|
||||
text=text)
|
||||
|
||||
eo += rec_len + 8
|
||||
current_entry += 1
|
||||
previous_block = current_block
|
||||
sig = data[eo:eo+4]
|
||||
|
||||
while sig == 'BKMK':
|
||||
# Fix start location for Highlights using BKMK data
|
||||
end_loc, = unpack('>I', data[eo+0x10:eo+0x14])
|
||||
|
||||
if end_loc in user_notes and \
|
||||
(user_notes[end_loc]['type'] == 'Highlight' or \
|
||||
user_notes[end_loc]['type'] == 'Note'):
|
||||
# Switch location to start (0x08:0x0c)
|
||||
start, = unpack('>I', data[eo+8:eo+12])
|
||||
user_notes[start] = user_notes[end_loc]
|
||||
'''
|
||||
print " %s: swapping 0x%x (%d) to 0x%x (%d)" % (user_notes[end_loc]['type'],
|
||||
end_loc,
|
||||
end_loc/MAGIC_MOBI_CONSTANT + 1,
|
||||
start,
|
||||
start//MAGIC_MOBI_CONSTANT + 1)
|
||||
'''
|
||||
user_notes[start]['displayed_location'] = start/MAGIC_MOBI_CONSTANT + 1
|
||||
user_notes.pop(end_loc)
|
||||
else:
|
||||
# If a bookmark coincides with a user annotation, the locs could
|
||||
# be the same - cheat by nudging -1
|
||||
# Skip bookmark for last_read_location
|
||||
if end_loc != self.last_read:
|
||||
# print " adding Bookmark at 0x%x (%d)" % (end_loc, end_loc/MAGIC_MOBI_CONSTANT + 1)
|
||||
displayed_location = end_loc/MAGIC_MOBI_CONSTANT + 1
|
||||
user_notes[end_loc - 1] = dict(id=self.id,
|
||||
displayed_location=displayed_location,
|
||||
type='Bookmark',
|
||||
text=None)
|
||||
rec_len, = unpack('>I', data[eo+4:eo+8])
|
||||
eo += rec_len + 8
|
||||
sig = data[eo:eo+4]
|
||||
|
||||
elif self.bookmark_extension == 'tan':
|
||||
from calibre.ebooks.metadata.topaz import get_metadata as get_topaz_metadata
|
||||
|
||||
def get_topaz_highlight(displayed_location):
|
||||
# Parse My Clippings.txt for a matching highlight
|
||||
# Search looks for book title match, highlight match, and location match
|
||||
# Author is not matched
|
||||
# This will find the first instance of a clipping only
|
||||
book_fs = self.path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format)
|
||||
with open(book_fs,'rb') as f2:
|
||||
stream = StringIO(f2.read())
|
||||
mi = get_topaz_metadata(stream)
|
||||
my_clippings = self.path
|
||||
split = my_clippings.find('documents') + len('documents/')
|
||||
my_clippings = my_clippings[:split] + "My Clippings.txt"
|
||||
try:
|
||||
with open(my_clippings, 'r') as f2:
|
||||
marker_found = 0
|
||||
text = ''
|
||||
search_str1 = '%s' % (mi.title)
|
||||
search_str2 = '- Highlight Loc. %d' % (displayed_location)
|
||||
for line in f2:
|
||||
if marker_found == 0:
|
||||
if line.startswith(search_str1):
|
||||
marker_found = 1
|
||||
elif marker_found == 1:
|
||||
if line.startswith(search_str2):
|
||||
marker_found = 2
|
||||
elif marker_found == 2:
|
||||
if line.startswith('=========='):
|
||||
break
|
||||
text += line.strip()
|
||||
else:
|
||||
raise Exception('error')
|
||||
except:
|
||||
text = '(Unable to extract highlight text from My Clippings.txt)'
|
||||
return text
|
||||
|
||||
MAGIC_TOPAZ_CONSTANT = 33.33
|
||||
self.timestamp = os.path.getmtime(self.path)
|
||||
with open(self.path,'rb') as f:
|
||||
stream = StringIO(f.read())
|
||||
data = StreamSlicer(stream)
|
||||
self.last_read = int(unpack('>I', data[5:9])[0])
|
||||
self.last_read_location = self.last_read/MAGIC_TOPAZ_CONSTANT + 1
|
||||
entries, = unpack('>I', data[9:13])
|
||||
current_entry = 0
|
||||
e_base = 0x0d
|
||||
while current_entry < entries:
|
||||
location, = unpack('>I', data[e_base+2:e_base+6])
|
||||
text = None
|
||||
text_len, = unpack('>I', data[e_base+0xA:e_base+0xE])
|
||||
e_type, = unpack('>B', data[e_base+1])
|
||||
if e_type == 0:
|
||||
e_type = 'Bookmark'
|
||||
elif e_type == 1:
|
||||
e_type = 'Highlight'
|
||||
text = get_topaz_highlight(location/MAGIC_TOPAZ_CONSTANT + 1)
|
||||
elif e_type == 2:
|
||||
e_type = 'Note'
|
||||
text = data[e_base+0x10:e_base+0x10+text_len]
|
||||
else:
|
||||
e_type = 'Unknown annotation type'
|
||||
|
||||
displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1
|
||||
user_notes[location] = dict(id=self.id,
|
||||
displayed_location=displayed_location,
|
||||
type=e_type,
|
||||
text=text)
|
||||
if text_len == 0xFFFFFFFF:
|
||||
e_base = e_base + 14
|
||||
else:
|
||||
e_base = e_base + 14 + 2 + text_len
|
||||
current_entry += 1
|
||||
for location in user_notes:
|
||||
if location == self.last_read:
|
||||
user_notes.pop(location)
|
||||
break
|
||||
|
||||
elif self.bookmark_extension == 'pdr':
|
||||
self.timestamp = os.path.getmtime(self.path)
|
||||
with open(self.path,'rb') as f:
|
||||
stream = StringIO(f.read())
|
||||
data = StreamSlicer(stream)
|
||||
self.last_read = int(unpack('>I', data[5:9])[0])
|
||||
entries, = unpack('>I', data[9:13])
|
||||
current_entry = 0
|
||||
e_base = 0x0d
|
||||
self.pdf_page_offset = 0
|
||||
while current_entry < entries:
|
||||
'''
|
||||
location, = unpack('>I', data[e_base+2:e_base+6])
|
||||
text = None
|
||||
text_len, = unpack('>I', data[e_base+0xA:e_base+0xE])
|
||||
e_type, = unpack('>B', data[e_base+1])
|
||||
if e_type == 0:
|
||||
e_type = 'Bookmark'
|
||||
elif e_type == 1:
|
||||
e_type = 'Highlight'
|
||||
text = get_topaz_highlight(location/MAGIC_TOPAZ_CONSTANT + 1)
|
||||
elif e_type == 2:
|
||||
e_type = 'Note'
|
||||
text = data[e_base+0x10:e_base+0x10+text_len]
|
||||
else:
|
||||
e_type = 'Unknown annotation type'
|
||||
|
||||
if self.book_format in ['tpz','azw1']:
|
||||
displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1
|
||||
elif self.book_format == 'pdf':
|
||||
# *** This needs implementation
|
||||
displayed_location = location
|
||||
user_notes[location] = dict(id=self.id,
|
||||
displayed_location=displayed_location,
|
||||
type=e_type,
|
||||
text=text)
|
||||
if text_len == 0xFFFFFFFF:
|
||||
e_base = e_base + 14
|
||||
else:
|
||||
e_base = e_base + 14 + 2 + text_len
|
||||
current_entry += 1
|
||||
'''
|
||||
# Use label as page number
|
||||
pdf_location, = unpack('>I', data[e_base+1:e_base+5])
|
||||
label_len, = unpack('>H', data[e_base+5:e_base+7])
|
||||
location = int(data[e_base+7:e_base+7+label_len])
|
||||
displayed_location = location
|
||||
e_type = 'Bookmark'
|
||||
text = None
|
||||
user_notes[location] = dict(id=self.id,
|
||||
displayed_location=displayed_location,
|
||||
type=e_type,
|
||||
text=text)
|
||||
self.pdf_page_offset = pdf_location - location
|
||||
e_base += (7 + label_len)
|
||||
current_entry += 1
|
||||
|
||||
self.last_read_location = self.last_read - self.pdf_page_offset
|
||||
|
||||
else:
|
||||
print "unsupported bookmark_extension: %s" % self.bookmark_extension
|
||||
self.user_notes = user_notes
|
||||
|
||||
def get_book_length(self):
|
||||
from calibre.ebooks.metadata.mobi import StreamSlicer
|
||||
book_fs = self.path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format)
|
||||
|
||||
self.book_length = 0
|
||||
if self.bookmark_extension == 'mbp':
|
||||
# Read the book len from the header
|
||||
try:
|
||||
with open(book_fs,'rb') as f:
|
||||
self.stream = StringIO(f.read())
|
||||
self.data = StreamSlicer(self.stream)
|
||||
self.nrecs, = unpack('>H', self.data[76:78])
|
||||
record0 = self.record(0)
|
||||
self.book_length = int(unpack('>I', record0[0x04:0x08])[0])
|
||||
except:
|
||||
pass
|
||||
elif self.bookmark_extension == 'tan':
|
||||
# Read bookLength from metadata
|
||||
from calibre.ebooks.metadata.topaz import MetadataUpdater
|
||||
try:
|
||||
with open(book_fs,'rb') as f:
|
||||
mu = MetadataUpdater(f)
|
||||
self.book_length = mu.book_length
|
||||
except:
|
||||
pass
|
||||
elif self.bookmark_extension == 'pdr':
|
||||
from calibre import plugins
|
||||
try:
|
||||
self.book_length = plugins['pdfreflow'][0].get_numpages(open(book_fs).read())
|
||||
except:
|
||||
pass
|
||||
|
||||
else:
|
||||
print "unsupported bookmark_extension: %s" % self.bookmark_extension
|
||||
|
||||
# }}}
|
@ -7,10 +7,11 @@ __docformat__ = 'restructuredtext en'
|
||||
'''
|
||||
Device driver for Amazon's Kindle
|
||||
'''
|
||||
import datetime, os, re, sys, json, hashlib
|
||||
from cStringIO import StringIO
|
||||
from struct import unpack
|
||||
|
||||
import datetime, os, re, sys, json, hashlib
|
||||
|
||||
from calibre.devices.kindle.apnx import APNXBuilder
|
||||
from calibre.devices.kindle.bookmark import Bookmark
|
||||
from calibre.devices.usbms.driver import USBMS
|
||||
|
||||
'''
|
||||
@ -170,6 +171,8 @@ class KINDLE2(KINDLE):
|
||||
description = _('Communicate with the Kindle 2/3 eBook reader.')
|
||||
|
||||
FORMATS = KINDLE.FORMATS + ['pdf']
|
||||
DELETE_EXTS = KINDLE.DELETE_EXTS + ['.apnx']
|
||||
|
||||
PRODUCT_ID = [0x0002, 0x0004]
|
||||
BCD = [0x0100]
|
||||
|
||||
@ -205,6 +208,23 @@ class KINDLE2(KINDLE):
|
||||
if h in path_map:
|
||||
book.device_collections = list(sorted(path_map[h]))
|
||||
|
||||
def upload_cover(self, path, filename, metadata, filepath):
|
||||
'''
|
||||
Hijacking this function to write the apnx file.
|
||||
'''
|
||||
if not filepath.lower().endswith('.mobi'):
|
||||
return
|
||||
|
||||
apnx_path = '%s.apnx' % os.path.join(path, filename)
|
||||
apnx_builder = APNXBuilder()
|
||||
try:
|
||||
apnx_builder.write_apnx(filepath, apnx_path)
|
||||
except:
|
||||
print 'Failed to generate APNX'
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
class KINDLE_DX(KINDLE2):
|
||||
|
||||
name = 'Kindle DX Device Interface'
|
||||
@ -214,310 +234,3 @@ class KINDLE_DX(KINDLE2):
|
||||
PRODUCT_ID = [0x0003]
|
||||
BCD = [0x0100]
|
||||
|
||||
class Bookmark(): # {{{
|
||||
'''
|
||||
A simple class fetching bookmark data
|
||||
Kindle-specific
|
||||
'''
|
||||
def __init__(self, path, id, book_format, bookmark_extension):
|
||||
self.book_format = book_format
|
||||
self.bookmark_extension = bookmark_extension
|
||||
self.book_length = 0
|
||||
self.id = id
|
||||
self.last_read = 0
|
||||
self.last_read_location = 0
|
||||
self.path = path
|
||||
self.timestamp = 0
|
||||
self.user_notes = None
|
||||
|
||||
self.get_bookmark_data()
|
||||
self.get_book_length()
|
||||
try:
|
||||
self.percent_read = min(float(100*self.last_read / self.book_length),100)
|
||||
except:
|
||||
self.percent_read = 0
|
||||
|
||||
def record(self, n):
|
||||
from calibre.ebooks.metadata.mobi import StreamSlicer
|
||||
if n >= self.nrecs:
|
||||
raise ValueError('non-existent record %r' % n)
|
||||
offoff = 78 + (8 * n)
|
||||
start, = unpack('>I', self.data[offoff + 0:offoff + 4])
|
||||
stop = None
|
||||
if n < (self.nrecs - 1):
|
||||
stop, = unpack('>I', self.data[offoff + 8:offoff + 12])
|
||||
return StreamSlicer(self.stream, start, stop)
|
||||
|
||||
def get_bookmark_data(self):
|
||||
''' Return the timestamp and last_read_location '''
|
||||
from calibre.ebooks.metadata.mobi import StreamSlicer
|
||||
user_notes = {}
|
||||
if self.bookmark_extension == 'mbp':
|
||||
MAGIC_MOBI_CONSTANT = 150
|
||||
with open(self.path,'rb') as f:
|
||||
stream = StringIO(f.read())
|
||||
data = StreamSlicer(stream)
|
||||
self.timestamp, = unpack('>I', data[0x24:0x28])
|
||||
bpar_offset, = unpack('>I', data[0x4e:0x52])
|
||||
lrlo = bpar_offset + 0x0c
|
||||
self.last_read = int(unpack('>I', data[lrlo:lrlo+4])[0])
|
||||
self.last_read_location = self.last_read/MAGIC_MOBI_CONSTANT + 1
|
||||
entries, = unpack('>I', data[0x4a:0x4e])
|
||||
|
||||
# Store the annotations/locations
|
||||
bpl = bpar_offset + 4
|
||||
bpar_len, = unpack('>I', data[bpl:bpl+4])
|
||||
bpar_len += 8
|
||||
#print "bpar_len: 0x%x" % bpar_len
|
||||
eo = bpar_offset + bpar_len
|
||||
|
||||
# Walk bookmark entries
|
||||
#print " --- %s --- " % self.path
|
||||
current_entry = 1
|
||||
sig = data[eo:eo+4]
|
||||
previous_block = None
|
||||
|
||||
while sig == 'DATA':
|
||||
text = None
|
||||
entry_type = None
|
||||
rec_len, = unpack('>I', data[eo+4:eo+8])
|
||||
if rec_len == 0:
|
||||
current_block = "empty_data"
|
||||
elif data[eo+8:eo+12] == "EBAR":
|
||||
current_block = "data_header"
|
||||
#entry_type = "data_header"
|
||||
location, = unpack('>I', data[eo+0x34:eo+0x38])
|
||||
#print "data_header location: %d" % location
|
||||
else:
|
||||
current_block = "text_block"
|
||||
if previous_block == 'empty_data':
|
||||
entry_type = 'Note'
|
||||
elif previous_block == 'data_header':
|
||||
entry_type = 'Highlight'
|
||||
text = data[eo+8:eo+8+rec_len].decode('utf-16-be')
|
||||
|
||||
if entry_type:
|
||||
displayed_location = location/MAGIC_MOBI_CONSTANT + 1
|
||||
user_notes[location] = dict(id=self.id,
|
||||
displayed_location=displayed_location,
|
||||
type=entry_type,
|
||||
text=text)
|
||||
|
||||
eo += rec_len + 8
|
||||
current_entry += 1
|
||||
previous_block = current_block
|
||||
sig = data[eo:eo+4]
|
||||
|
||||
while sig == 'BKMK':
|
||||
# Fix start location for Highlights using BKMK data
|
||||
end_loc, = unpack('>I', data[eo+0x10:eo+0x14])
|
||||
|
||||
if end_loc in user_notes and \
|
||||
(user_notes[end_loc]['type'] == 'Highlight' or \
|
||||
user_notes[end_loc]['type'] == 'Note'):
|
||||
# Switch location to start (0x08:0x0c)
|
||||
start, = unpack('>I', data[eo+8:eo+12])
|
||||
user_notes[start] = user_notes[end_loc]
|
||||
'''
|
||||
print " %s: swapping 0x%x (%d) to 0x%x (%d)" % (user_notes[end_loc]['type'],
|
||||
end_loc,
|
||||
end_loc/MAGIC_MOBI_CONSTANT + 1,
|
||||
start,
|
||||
start//MAGIC_MOBI_CONSTANT + 1)
|
||||
'''
|
||||
user_notes[start]['displayed_location'] = start/MAGIC_MOBI_CONSTANT + 1
|
||||
user_notes.pop(end_loc)
|
||||
else:
|
||||
# If a bookmark coincides with a user annotation, the locs could
|
||||
# be the same - cheat by nudging -1
|
||||
# Skip bookmark for last_read_location
|
||||
if end_loc != self.last_read:
|
||||
# print " adding Bookmark at 0x%x (%d)" % (end_loc, end_loc/MAGIC_MOBI_CONSTANT + 1)
|
||||
displayed_location = end_loc/MAGIC_MOBI_CONSTANT + 1
|
||||
user_notes[end_loc - 1] = dict(id=self.id,
|
||||
displayed_location=displayed_location,
|
||||
type='Bookmark',
|
||||
text=None)
|
||||
rec_len, = unpack('>I', data[eo+4:eo+8])
|
||||
eo += rec_len + 8
|
||||
sig = data[eo:eo+4]
|
||||
|
||||
elif self.bookmark_extension == 'tan':
|
||||
from calibre.ebooks.metadata.topaz import get_metadata as get_topaz_metadata
|
||||
|
||||
def get_topaz_highlight(displayed_location):
|
||||
# Parse My Clippings.txt for a matching highlight
|
||||
# Search looks for book title match, highlight match, and location match
|
||||
# Author is not matched
|
||||
# This will find the first instance of a clipping only
|
||||
book_fs = self.path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format)
|
||||
with open(book_fs,'rb') as f2:
|
||||
stream = StringIO(f2.read())
|
||||
mi = get_topaz_metadata(stream)
|
||||
my_clippings = self.path
|
||||
split = my_clippings.find('documents') + len('documents/')
|
||||
my_clippings = my_clippings[:split] + "My Clippings.txt"
|
||||
try:
|
||||
with open(my_clippings, 'r') as f2:
|
||||
marker_found = 0
|
||||
text = ''
|
||||
search_str1 = '%s' % (mi.title)
|
||||
search_str2 = '- Highlight Loc. %d' % (displayed_location)
|
||||
for line in f2:
|
||||
if marker_found == 0:
|
||||
if line.startswith(search_str1):
|
||||
marker_found = 1
|
||||
elif marker_found == 1:
|
||||
if line.startswith(search_str2):
|
||||
marker_found = 2
|
||||
elif marker_found == 2:
|
||||
if line.startswith('=========='):
|
||||
break
|
||||
text += line.strip()
|
||||
else:
|
||||
raise Exception('error')
|
||||
except:
|
||||
text = '(Unable to extract highlight text from My Clippings.txt)'
|
||||
return text
|
||||
|
||||
MAGIC_TOPAZ_CONSTANT = 33.33
|
||||
self.timestamp = os.path.getmtime(self.path)
|
||||
with open(self.path,'rb') as f:
|
||||
stream = StringIO(f.read())
|
||||
data = StreamSlicer(stream)
|
||||
self.last_read = int(unpack('>I', data[5:9])[0])
|
||||
self.last_read_location = self.last_read/MAGIC_TOPAZ_CONSTANT + 1
|
||||
entries, = unpack('>I', data[9:13])
|
||||
current_entry = 0
|
||||
e_base = 0x0d
|
||||
while current_entry < entries:
|
||||
location, = unpack('>I', data[e_base+2:e_base+6])
|
||||
text = None
|
||||
text_len, = unpack('>I', data[e_base+0xA:e_base+0xE])
|
||||
e_type, = unpack('>B', data[e_base+1])
|
||||
if e_type == 0:
|
||||
e_type = 'Bookmark'
|
||||
elif e_type == 1:
|
||||
e_type = 'Highlight'
|
||||
text = get_topaz_highlight(location/MAGIC_TOPAZ_CONSTANT + 1)
|
||||
elif e_type == 2:
|
||||
e_type = 'Note'
|
||||
text = data[e_base+0x10:e_base+0x10+text_len]
|
||||
else:
|
||||
e_type = 'Unknown annotation type'
|
||||
|
||||
displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1
|
||||
user_notes[location] = dict(id=self.id,
|
||||
displayed_location=displayed_location,
|
||||
type=e_type,
|
||||
text=text)
|
||||
if text_len == 0xFFFFFFFF:
|
||||
e_base = e_base + 14
|
||||
else:
|
||||
e_base = e_base + 14 + 2 + text_len
|
||||
current_entry += 1
|
||||
for location in user_notes:
|
||||
if location == self.last_read:
|
||||
user_notes.pop(location)
|
||||
break
|
||||
|
||||
elif self.bookmark_extension == 'pdr':
|
||||
self.timestamp = os.path.getmtime(self.path)
|
||||
with open(self.path,'rb') as f:
|
||||
stream = StringIO(f.read())
|
||||
data = StreamSlicer(stream)
|
||||
self.last_read = int(unpack('>I', data[5:9])[0])
|
||||
entries, = unpack('>I', data[9:13])
|
||||
current_entry = 0
|
||||
e_base = 0x0d
|
||||
self.pdf_page_offset = 0
|
||||
while current_entry < entries:
|
||||
'''
|
||||
location, = unpack('>I', data[e_base+2:e_base+6])
|
||||
text = None
|
||||
text_len, = unpack('>I', data[e_base+0xA:e_base+0xE])
|
||||
e_type, = unpack('>B', data[e_base+1])
|
||||
if e_type == 0:
|
||||
e_type = 'Bookmark'
|
||||
elif e_type == 1:
|
||||
e_type = 'Highlight'
|
||||
text = get_topaz_highlight(location/MAGIC_TOPAZ_CONSTANT + 1)
|
||||
elif e_type == 2:
|
||||
e_type = 'Note'
|
||||
text = data[e_base+0x10:e_base+0x10+text_len]
|
||||
else:
|
||||
e_type = 'Unknown annotation type'
|
||||
|
||||
if self.book_format in ['tpz','azw1']:
|
||||
displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1
|
||||
elif self.book_format == 'pdf':
|
||||
# *** This needs implementation
|
||||
displayed_location = location
|
||||
user_notes[location] = dict(id=self.id,
|
||||
displayed_location=displayed_location,
|
||||
type=e_type,
|
||||
text=text)
|
||||
if text_len == 0xFFFFFFFF:
|
||||
e_base = e_base + 14
|
||||
else:
|
||||
e_base = e_base + 14 + 2 + text_len
|
||||
current_entry += 1
|
||||
'''
|
||||
# Use label as page number
|
||||
pdf_location, = unpack('>I', data[e_base+1:e_base+5])
|
||||
label_len, = unpack('>H', data[e_base+5:e_base+7])
|
||||
location = int(data[e_base+7:e_base+7+label_len])
|
||||
displayed_location = location
|
||||
e_type = 'Bookmark'
|
||||
text = None
|
||||
user_notes[location] = dict(id=self.id,
|
||||
displayed_location=displayed_location,
|
||||
type=e_type,
|
||||
text=text)
|
||||
self.pdf_page_offset = pdf_location - location
|
||||
e_base += (7 + label_len)
|
||||
current_entry += 1
|
||||
|
||||
self.last_read_location = self.last_read - self.pdf_page_offset
|
||||
|
||||
else:
|
||||
print "unsupported bookmark_extension: %s" % self.bookmark_extension
|
||||
self.user_notes = user_notes
|
||||
|
||||
def get_book_length(self):
|
||||
from calibre.ebooks.metadata.mobi import StreamSlicer
|
||||
book_fs = self.path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format)
|
||||
|
||||
self.book_length = 0
|
||||
if self.bookmark_extension == 'mbp':
|
||||
# Read the book len from the header
|
||||
try:
|
||||
with open(book_fs,'rb') as f:
|
||||
self.stream = StringIO(f.read())
|
||||
self.data = StreamSlicer(self.stream)
|
||||
self.nrecs, = unpack('>H', self.data[76:78])
|
||||
record0 = self.record(0)
|
||||
self.book_length = int(unpack('>I', record0[0x04:0x08])[0])
|
||||
except:
|
||||
pass
|
||||
elif self.bookmark_extension == 'tan':
|
||||
# Read bookLength from metadata
|
||||
from calibre.ebooks.metadata.topaz import MetadataUpdater
|
||||
try:
|
||||
with open(book_fs,'rb') as f:
|
||||
mu = MetadataUpdater(f)
|
||||
self.book_length = mu.book_length
|
||||
except:
|
||||
pass
|
||||
elif self.bookmark_extension == 'pdr':
|
||||
from calibre import plugins
|
||||
try:
|
||||
self.book_length = plugins['pdfreflow'][0].get_numpages(open(book_fs).read())
|
||||
except:
|
||||
pass
|
||||
|
||||
else:
|
||||
print "unsupported bookmark_extension: %s" % self.bookmark_extension
|
||||
|
||||
# }}}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user