diff --git a/resources/recipes/johm.recipe b/resources/recipes/johm.recipe index d488d0d3f0..6178af9d30 100644 --- a/resources/recipes/johm.recipe +++ b/resources/recipes/johm.recipe @@ -84,4 +84,4 @@ class JournalofHospitalMedicine(BasicNewsRecipe): for img in soup.findAll('img', src=True): img['src'] = img['src'].replace('tfig', 'nfig') return soup - \ No newline at end of file + diff --git a/src/calibre/devices/kindle/driver.py b/src/calibre/devices/kindle/driver.py index 8c16016fee..23a954bd63 100644 --- a/src/calibre/devices/kindle/driver.py +++ b/src/calibre/devices/kindle/driver.py @@ -61,6 +61,16 @@ class KINDLE(USBMS): return mi def get_annotations(self, path_map): + MBP_FORMATS = [u'azw', u'mobi', u'prc', u'txt'] + TAN_FORMATS = [u'tpz', u'azw1'] + + mbp_formats = set() + for fmt in MBP_FORMATS: + mbp_formats.add(fmt) + tan_formats = set() + for fmt in TAN_FORMATS: + tan_formats.add(fmt) + def get_storage(): storage = [] if self._main_prefix: @@ -71,36 +81,48 @@ class KINDLE(USBMS): storage.append(os.path.join(self._card_b_prefix, self.EBOOK_DIR_CARD_B)) return storage - def resolve_mbp_paths(storage, path_map): + def resolve_bookmark_paths(storage, path_map): pop_list = [] + book_ext = {} for id in path_map: - for vol in storage: - #print "path_map[id]: %s" % path_map[id] - mbp_path = path_map[id].replace(os.path.abspath('/'),vol) - #print "looking for mbp_path: %s" % mbp_path - if os.path.exists(mbp_path): - #print "mbp_path found" - path_map[id] = mbp_path - break + file_fmts = set() + for fmt in path_map[id]['fmts']: + file_fmts.add(fmt) + + bookmark_extension = None + if file_fmts.intersection(mbp_formats): + book_extension = list(file_fmts.intersection(mbp_formats))[0] + bookmark_extension = 'mbp' + elif file_fmts.intersection(tan_formats): + book_extension = list(file_fmts.intersection(tan_formats))[0] + bookmark_extension = 'tan' + + if bookmark_extension: + for vol in storage: + bkmk_path = path_map[id]['path'].replace(os.path.abspath('/'),vol) + bkmk_path = bkmk_path.replace('bookmark',bookmark_extension) + if os.path.exists(bkmk_path): + path_map[id] = bkmk_path + book_ext[id] = book_extension + break + else: + pop_list.append(id) else: - #print "mbp_path not found" pop_list.append(id) - # Remove non-existent mbp files + # Remove non-existent bookmark templates for id in pop_list: path_map.pop(id) - return path_map + return path_map, book_ext storage = get_storage() - path_map = resolve_mbp_paths(storage, path_map) + path_map, book_ext = resolve_bookmark_paths(storage, path_map) - # path_map is now a mapping of valid mbp files - # Not yet implemented - Topaz annotations bookmarked_books = {} - MBP_FORMATS = ['azw', 'mobi', 'prc', 'txt'] for id in path_map: - myBookmark = Bookmark(path_map[id], MBP_FORMATS, id) - bookmarked_books[id] = self.UserAnnotation(type='mobi', bookmark=myBookmark) + bookmark_ext = path_map[id].rpartition('.')[2] + myBookmark = Bookmark(path_map[id], id, book_ext[id], bookmark_ext) + bookmarked_books[id] = self.UserAnnotation(type='kindle', bookmark=myBookmark) # This returns as job.result in gui2.ui.annotations_fetched(self,job) return bookmarked_books @@ -130,18 +152,20 @@ class Bookmark(): A simple class fetching bookmark data Kindle-specific ''' - def __init__(self, path, formats, id): - self.book_format = None + def __init__(self, path, id, book_format, bookmark_extension): + self.book_format = book_format + self.bookmark_extension = bookmark_extension self.book_length = 0 self.id = id + self.last_read = 0 self.last_read_location = 0 self.timestamp = 0 self.user_notes = None self.get_bookmark_data(path) - self.get_book_length(path, formats) + self.get_book_length(path) try: - self.percent_read = float(100*self.last_read_location / self.book_length) + self.percent_read = float(100*self.last_read / self.book_length) except: self.percent_read = 0 @@ -156,20 +180,22 @@ class Bookmark(): stop, = unpack('>I', self.data[offoff + 8:offoff + 12]) return StreamSlicer(self.stream, start, stop) - def get_bookmark_data(self, path, fetchUserNotes=True): + def get_bookmark_data(self, path): ''' Return the timestamp and last_read_location ''' from calibre.ebooks.metadata.mobi import StreamSlicer - with open(path,'rb') as f: - stream = StringIO(f.read()) - data = StreamSlicer(stream) - self.timestamp, = unpack('>I', data[0x24:0x28]) - bpar_offset, = unpack('>I', data[0x4e:0x52]) - lrlo = bpar_offset + 0x0c - self.last_read_location = int(unpack('>I', data[lrlo:lrlo+4])[0]) - entries, = unpack('>I', data[0x4a:0x4e]) + user_notes = {} + if self.bookmark_extension == 'mbp': + with open(path,'rb') as f: + stream = StringIO(f.read()) + data = StreamSlicer(stream) + self.timestamp, = unpack('>I', data[0x24:0x28]) + bpar_offset, = unpack('>I', data[0x4e:0x52]) + lrlo = bpar_offset + 0x0c + self.last_read = int(unpack('>I', data[lrlo:lrlo+4])[0]) + self.last_read_location = self.last_read/150 + 1 + entries, = unpack('>I', data[0x4a:0x4e]) - # Store the annotations/locations - if fetchUserNotes: + # Store the annotations/locations bpl = bpar_offset + 4 bpar_len, = unpack('>I', data[bpl:bpl+4]) bpar_len += 8 @@ -182,7 +208,6 @@ class Bookmark(): current_entry = 1 sig = data[eo:eo+4] previous_block = None - user_notes = {} while sig == 'DATA': text = None @@ -204,7 +229,10 @@ class Bookmark(): text = data[eo+8:eo+8+rec_len].decode('utf-16-be') if entry_type: - user_notes[location] = dict(type=entry_type, id=self.id, + displayed_location = location/150 + 1 + user_notes[location] = dict(id=self.id, + displayed_location=displayed_location, + type=entry_type, text=text) #print " %2d: %s %s" % (current_entry, entry_type,'at %d' % location if location else '') #if current_block == 'text_block': @@ -227,39 +255,104 @@ class Bookmark(): # If a bookmark coincides with a user annotation, the locs could # be the same - cheat by nudging -1 # Skip bookmark for last_read_location - if end_loc != self.last_read_location: - user_notes[end_loc - 1] = dict(type='Bookmark',id=self.id,text=None) + if end_loc != self.last_read: + displayed_location = end_loc/150 + 1 + user_notes[end_loc - 1] = dict(id=self.id, + displayed_location=displayed_location, + type='Bookmark', + text=None) rec_len, = unpack('>I', data[eo+4:eo+8]) eo += rec_len + 8 sig = data[eo:eo+4] + elif self.bookmark_extension == 'tan': + # TAN bookmarks + self.timestamp = os.path.getmtime(path) + with open(path,'rb') as f: + stream = StringIO(f.read()) + data = StreamSlicer(stream) + self.last_read = int(unpack('>I', data[5:9])[0]) + self.last_read_location = self.last_read/33 + entries, = unpack('>I', data[9:13]) + current_entry = 0 + e_base = 0x0d + while current_entry < entries: + location, = unpack('>I', data[e_base+2:e_base+6]) + text = None + text_len, = unpack('>I', data[e_base+0xA:e_base+0xE]) + e_type, = unpack('>B', data[e_base+1]) + if e_type == 0: + e_type = 'Bookmark' + elif e_type == 1: + e_type = 'Highlight' + text = "(Topaz highlights not yet supported)" + elif e_type == 2: + e_type = 'Note' + text = data[e_base+0x10:e_base+0x10+text_len] + else: + e_type = 'Unknown annotation type' + + if self.book_format in ['tpz','azw1']: + # *** This needs fine-tuning + displayed_location = location/33 + elif self.book_format == 'pdf': + # *** This needs testing + displayed_location = location + user_notes[location] = dict(id=self.id, + displayed_location=displayed_location, + type=e_type, + text=text) + if text_len == 0xFFFFFFFF: + e_base = e_base + 14 + else: + e_base = e_base + 14 + 2 + text_len + current_entry += 1 + for location in user_notes: + if location == self.last_read: + user_notes.pop(location) + break + else: + print "unsupported bookmark_extension: %s" % self.bookmark_extension + self.user_notes = user_notes + ''' for location in sorted(user_notes): - print ' Location %d: %s\n%s' % self.magicKindleLocationCalculator(location), + print ' Location %d: %s\n%s' % (user_notes[location]['displayed_location'], user_notes[location]['type'], '\n'.join(self.textdump(user_notes[location]['text']))) ''' - self.user_notes = user_notes - def get_book_length(self, path, formats): + def get_book_length(self, path): from calibre.ebooks.metadata.mobi import StreamSlicer - # This assumes only one of the possible formats exists on the Kindle - book_fs = None - for format in formats: - fmt = format.rpartition('.')[2] - book_fs = path.replace('.mbp','.%s' % fmt) - if os.path.exists(book_fs): - self.book_format = fmt - break - else: - #print "no files matching library formats exist on device" - self.book_length = 0 - return + book_fs = path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format) - # Read the book len from the header - with open(book_fs,'rb') as f: - self.stream = StringIO(f.read()) - self.data = StreamSlicer(self.stream) - self.nrecs, = unpack('>H', self.data[76:78]) - record0 = self.record(0) - self.book_length = int(unpack('>I', record0[0x04:0x08])[0]) + self.book_length = 0 + if self.bookmark_extension == 'mbp': + # Read the book len from the header + with open(book_fs,'rb') as f: + self.stream = StringIO(f.read()) + self.data = StreamSlicer(self.stream) + self.nrecs, = unpack('>H', self.data[76:78]) + record0 = self.record(0) + self.book_length = int(unpack('>I', record0[0x04:0x08])[0]) + elif self.bookmark_extension == 'tan': + # Read bookLength from metadata + with open(book_fs,'rb') as f: + stream = StringIO(f.read()) + raw = stream.read(8*1024) + if not raw.startswith('TPZ'): + raise ValueError('Not a Topaz file') + first = raw.find('metadata') + if first < 0: + raise ValueError('Invalid Topaz file') + second = raw.find('metadata', first+10) + if second < 0: + raise ValueError('Invalid Topaz file') + raw = raw[second:second+1000] + idx = raw.find('bookLength') + if idx > -1: + length = ord(raw[idx+len('bookLength')]) + self.book_length = int(raw[idx+len('bookLength')+1:idx+len('bookLength')+1+length]) + + else: + print "unsupported bookmark_extension: %s" % self.bookmark_extension diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py index 140d652f72..c60ce0d76e 100644 --- a/src/calibre/gui2/ui.py +++ b/src/calibre/gui2/ui.py @@ -926,9 +926,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): ######################### Fetch annotations ################################ def fetch_annotations(self, *args): - # Figure out a list of ids using the same logic as the catalog generation - # FUnction. Use the currently connected device to map ids to paths - + # Generate a path_map from selected ids def get_ids_from_selected_rows(): rows = self.library_view.selectionModel().selectedRows() if not rows or len(rows) < 2: @@ -936,15 +934,22 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): ids = map(self.library_view.model().id, rows) return ids + def get_formats(id): + formats = db.formats(id, index_is_id=True) + fmts = [] + if formats: + for format in formats.split(','): + fmts.append(format.lower()) + return fmts + def generate_annotation_paths(ids, db, device): - # Generate a dict {1:'documents/documents/Asimov, Isaac/Foundation - Isaac Asimov.epub'} - # These are the not the absolute paths - individual storage mount points will need to be - # prepended during the search + # Generate path templates + # Individual storage mount points scanned/resolved in driver.get_annotations() path_map = {} for id in ids: mi = db.get_metadata(id, index_is_id=True) - a_path = device.create_upload_path(os.path.abspath('/'), mi, 'x.mbp', create_dirs=False) - path_map[id] = a_path + a_path = device.create_upload_path(os.path.abspath('/'), mi, 'x.bookmark', create_dirs=False) + path_map[id] = dict(path=a_path, fmts=get_formats(id)) return path_map device = self.device_manager.device @@ -1009,7 +1014,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): spanTag['style'] = 'font-weight:bold' spanTag.insert(0,NavigableString("%s
Last Page Read: Location %d (%d%%)" % \ (strftime(u'%x', timestamp.timetuple()), - last_read_location/150 + 1, percent_read))) + last_read_location, percent_read))) divTag.insert(dtc, spanTag) dtc += 1 @@ -1025,14 +1030,15 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): for location in sorted(user_notes): if user_notes[location]['text']: annotations.append('Location %d • %s
%s
' % \ - (location/150 + 1, user_notes[location]['type'], + (user_notes[location]['displayed_location'], + user_notes[location]['type'], user_notes[location]['text'] if \ - user_notes[location]['type'] == 'Note' else \ + user_notes[location]['type'] == 'Note' else \ '%s' % user_notes[location]['text'])) else: annotations.append('Location %d • %s
' % \ - (location/150 + 1, - user_notes[location]['type'])) + (user_notes[location]['displayed_location'], + user_notes[location]['type'])) for annotation in annotations: divTag.insert(dtc, annotation) @@ -1050,20 +1056,22 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): user_notes_soup = self.generate_annotation_html(bm.bookmark) mi = self.db.get_metadata(id, index_is_id=True) - a_offset = mi.comments.find('
') - ad_offset = mi.comments.find('
') - - if a_offset >= 0: - mi.comments = mi.comments[:a_offset] - if ad_offset >= 0: - mi.comments = mi.comments[:ad_offset] if mi.comments: - hrTag = Tag(user_notes_soup,'hr') - hrTag['class'] = 'annotations_divider' - user_notes_soup.insert(0,hrTag) + a_offset = mi.comments.find('
') + ad_offset = mi.comments.find('
') - mi.comments += user_notes_soup.prettify() + if a_offset >= 0: + mi.comments = mi.comments[:a_offset] + if ad_offset >= 0: + mi.comments = mi.comments[:ad_offset] + if mi.comments: + hrTag = Tag(user_notes_soup,'hr') + hrTag['class'] = 'annotations_divider' + user_notes_soup.insert(0,hrTag) + mi.comments += user_notes_soup.prettify() + else: + mi.comments = unicode(user_notes_soup.prettify()) # Update library comments self.db.set_comment(id, mi.comments) self.update_progress.emit(i) diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index 38fa8cf118..64523a87d5 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -1179,6 +1179,15 @@ class EPUB_MOBI(CatalogPlugin): from calibre.devices.kindle.driver import Bookmark from calibre.ebooks.metadata import MetaInformation + MBP_FORMATS = [u'azw', u'mobi', u'prc', u'txt'] + TAN_FORMATS = [u'tpz', u'azw1'] + mbp_formats = set() + for fmt in MBP_FORMATS: + mbp_formats.add(fmt) + tan_formats = set() + for fmt in TAN_FORMATS: + tan_formats.add(fmt) + class BookmarkDevice(Device): def initialize(self, save_template): self._save_template = save_template @@ -1186,6 +1195,40 @@ class EPUB_MOBI(CatalogPlugin): def save_template(self): return self._save_template + def resolve_bookmark_paths(storage, path_map): + pop_list = [] + book_ext = {} + for id in path_map: + file_fmts = set() + for fmt in path_map[id]['fmts']: + file_fmts.add(fmt) + + bookmark_extension = None + if file_fmts.intersection(mbp_formats): + book_extension = list(file_fmts.intersection(mbp_formats))[0] + bookmark_extension = 'mbp' + elif file_fmts.intersection(tan_formats): + book_extension = list(file_fmts.intersection(tan_formats))[0] + bookmark_extension = 'tan' + + if bookmark_extension: + for vol in storage: + bkmk_path = path_map[id]['path'].replace(os.path.abspath('/'),vol) + bkmk_path = bkmk_path.replace('bookmark',bookmark_extension) + print "looking for %s" % bkmk_path + if os.path.exists(bkmk_path): + path_map[id] = bkmk_path + book_ext[id] = book_extension + break + else: + pop_list.append(id) + else: + pop_list.append(id) + # Remove non-existent bookmark templates + for id in pop_list: + path_map.pop(id) + return path_map, book_ext + if self.generateRecentlyRead: self.opts.log.info(" Collecting Kindle bookmarks matching catalog entries") @@ -1194,26 +1237,32 @@ class EPUB_MOBI(CatalogPlugin): bookmarks = {} for book in self.booksByTitle: - original_title = book['title'][book['title'].find(':') + 2:] if book['series'] \ - else book['title'] - myMeta = MetaInformation(original_title, - authors=book['authors']) - myMeta.author_sort = book['author_sort'] - bm_found = False - for vol in self.opts.connected_device['storage']: - bm_path = d.create_upload_path(vol, myMeta, 'x.mbp', create_dirs=False) - if os.path.exists(bm_path): - myBookmark = Bookmark(bm_path, book['formats'], book['id']) + if 'formats' in book: + path_map = {} + id = book['id'] + original_title = book['title'][book['title'].find(':') + 2:] if book['series'] \ + else book['title'] + myMeta = MetaInformation(original_title, + authors=book['authors']) + myMeta.author_sort = book['author_sort'] + a_path = d.create_upload_path('/', myMeta, 'x.bookmark', create_dirs=False) + path_map[id] = dict(path=a_path, fmts=[x.rpartition('.')[2] for x in book['formats']]) + + path_map, book_ext = resolve_bookmark_paths(self.opts.connected_device['storage'], path_map) + if path_map: + bookmark_ext = path_map[id].rpartition('.')[2] + myBookmark = Bookmark(path_map[id], id, book_ext[id], bookmark_ext) + print "book: %s\nlast_read_location: %d\nlength: %d" % (book['title'], + myBookmark.last_read_location, + myBookmark.book_length) if myBookmark.book_length: book['percent_read'] = float(100*myBookmark.last_read_location / myBookmark.book_length) dots = int((book['percent_read'] + 5)/10) dot_string = self.READ_PROGRESS_SYMBOL * dots empty_dots = self.UNREAD_PROGRESS_SYMBOL * (10 - dots) book['reading_progress'] = '%s%s' % (dot_string,empty_dots) - bookmarks[book['id']] = ((myBookmark,book)) - bm_found = True - if bm_found: - break + bookmarks[id] = ((myBookmark,book)) + self.bookmarked_books = bookmarks else: self.bookmarked_books = {}