Experimental support for fetching annotations from the Kindle

2025-06-23 15:30:45 -04:00 · 2010-03-04 11:03:57 -07:00 · 2010-03-04 11:03:57 -07:00 · f6f2995f0a
commit f6f2995f0a
parent bf91ca5e93 a14fdbc543
10 changed files with 391 additions and 148 deletions
--- a/resources/catalog/stylesheet.css
+++ b/resources/catalog/stylesheet.css
@ -93,3 +93,10 @@ hr.series_divider {
 	margin-top:0em;
 	margin-bottom:0em;
 	}
+
+hr.annotations_divider {
+	width:50%;
+	margin-left:1em;
+	margin-top:0em;
+	margin-bottom:0em;
+	}
--- a/src/calibre/devices/interface.py
+++ b/src/calibre/devices/interface.py
@ -6,6 +6,7 @@ the GUI. A device backend must subclass the L{Device} class. See prs500.py for
 a backend that implement the Device interface for the SONY PRS500 Reader.
 """
 import os
+from collections import namedtuple

 from calibre.customize import Plugin
 from calibre.constants import iswindows
@ -43,6 +44,9 @@ class DevicePlugin(Plugin):
    #: Icon for this device
    icon = I('reader.svg')

+    # Used by gui2.ui:annotations_fetched() and devices.kindle.driver:get_annotations()
+    UserAnnotation = namedtuple('Annotation','type, bookmark')
+
    @classmethod
    def get_gui_name(cls):
        if hasattr(cls, 'gui_name'):
--- a/src/calibre/devices/kindle/driver.py
+++ b/src/calibre/devices/kindle/driver.py
@ -7,10 +7,9 @@ __docformat__ = 'restructuredtext en'
 '''
 Device driver for Amazon's Kindle
 '''
-
-import os
-import re
-import sys
+import os, re, sys
+from cStringIO import StringIO
+from struct import unpack

 from calibre.devices.usbms.driver import USBMS

@ -62,7 +61,49 @@ class KINDLE(USBMS):
        return mi

    def get_annotations(self, path_map):
-        return {}
+        def get_storage():
+            storage = []
+            if self._main_prefix:
+                storage.append(os.path.join(self._main_prefix, self.EBOOK_DIR_MAIN))
+            if self._card_a_prefix:
+                storage.append(os.path.join(self._card_a_prefix, self.EBOOK_DIR_CARD_A))
+            if self._card_b_prefix:
+                storage.append(os.path.join(self._card_b_prefix, self.EBOOK_DIR_CARD_B))
+            return storage
+
+        def resolve_mbp_paths(storage, path_map):
+            pop_list = []
+            for id in path_map:
+                for vol in storage:
+                    #print "path_map[id]: %s" % path_map[id]
+                    mbp_path = path_map[id].replace(os.path.abspath('/<storage>'),vol)
+                    #print "looking for mbp_path: %s" % mbp_path
+                    if os.path.exists(mbp_path):
+                        #print "mbp_path found"
+                        path_map[id] = mbp_path
+                        break
+                else:
+                    #print "mbp_path not found"
+                    pop_list.append(id)
+
+            # Remove non-existent mbp files
+            for id in pop_list:
+                path_map.pop(id)
+            return path_map
+
+        storage = get_storage()
+        path_map = resolve_mbp_paths(storage, path_map)
+
+        # path_map is now a mapping of valid mbp files
+        # Not yet implemented - Topaz annotations
+        bookmarked_books = {}
+        MBP_FORMATS = ['azw', 'mobi', 'prc', 'txt']
+        for id in path_map:
+            myBookmark = Bookmark(path_map[id], MBP_FORMATS, id)
+            bookmarked_books[id] = self.UserAnnotation(type='mobi', bookmark=myBookmark)
+
+        # This returns as job.result in gui2.ui.annotations_fetched(self,job)
+        return bookmarked_books


 class KINDLE2(KINDLE):
@ -83,3 +124,142 @@ class KINDLE_DX(KINDLE2):

    PRODUCT_ID = [0x0003]
    BCD        = [0x0100]
+
+class Bookmark():
+    '''
+    A simple class fetching bookmark data
+    Kindle-specific
+    '''
+    def __init__(self, path, formats, id):
+        self.book_format = None
+        self.book_length = 0
+        self.id = id
+        self.last_read_location = 0
+        self.timestamp = 0
+        self.user_notes = None
+
+        self.get_bookmark_data(path)
+        self.get_book_length(path, formats)
+        try:
+            self.percent_read = float(100*self.last_read_location / self.book_length)
+        except:
+            self.percent_read = 0
+
+    def record(self, n):
+        from calibre.ebooks.metadata.mobi import StreamSlicer
+        if n >= self.nrecs:
+            raise ValueError('non-existent record %r' % n)
+        offoff = 78 + (8 * n)
+        start, = unpack('>I', self.data[offoff + 0:offoff + 4])
+        stop = None
+        if n < (self.nrecs - 1):
+            stop, = unpack('>I', self.data[offoff + 8:offoff + 12])
+        return StreamSlicer(self.stream, start, stop)
+
+    def get_bookmark_data(self, path, fetchUserNotes=True):
+        ''' Return the timestamp and last_read_location '''
+        from calibre.ebooks.metadata.mobi import StreamSlicer
+        with open(path,'rb') as f:
+            stream = StringIO(f.read())
+            data = StreamSlicer(stream)
+            self.timestamp, = unpack('>I', data[0x24:0x28])
+            bpar_offset, = unpack('>I', data[0x4e:0x52])
+            lrlo = bpar_offset + 0x0c
+            self.last_read_location = int(unpack('>I', data[lrlo:lrlo+4])[0])
+            entries, = unpack('>I', data[0x4a:0x4e])
+
+            # Store the annotations/locations
+            if fetchUserNotes:
+                bpl = bpar_offset + 4
+                bpar_len, = unpack('>I', data[bpl:bpl+4])
+                bpar_len += 8
+                #print "bpar_len: 0x%x" % bpar_len
+                eo = bpar_offset + bpar_len
+
+                # Walk bookmark entries
+                #print " --- %s --- " % path
+                #print "  last_read_location: %d" % self.magicKindleLocationCalculator(last_read_location)
+                current_entry = 1
+                sig = data[eo:eo+4]
+                previous_block = None
+                user_notes = {}
+
+                while sig == 'DATA':
+                    text = None
+                    entry_type = None
+                    rec_len, = unpack('>I', data[eo+4:eo+8])
+                    if rec_len == 0:
+                        current_block = "empty_data"
+                    elif  data[eo+8:eo+12] == "EBAR":
+                        current_block = "data_header"
+                        #entry_type = "data_header"
+                        location, = unpack('>I', data[eo+0x34:eo+0x38])
+                        #print "data_header location: %d" % location
+                    else:
+                        current_block = "text_block"
+                        if previous_block == 'empty_data':
+                            entry_type = 'Note'
+                        elif previous_block == 'data_header':
+                            entry_type = 'Highlight'
+                        text = data[eo+8:eo+8+rec_len].decode('utf-16-be')
+
+                    if entry_type:
+                        user_notes[location] = dict(type=entry_type, id=self.id,
+                                                    text=text)
+                        #print " %2d: %s %s" % (current_entry, entry_type,'at %d' % location if location else '')
+                    #if current_block == 'text_block':
+                        #self.textdump(text)
+
+                    eo += rec_len + 8
+                    current_entry += 1
+                    previous_block = current_block
+                    sig = data[eo:eo+4]
+
+                while sig == 'BKMK':
+                    # Fix start location for Highlights using BKMK data
+                    end_loc, = unpack('>I', data[eo+0x10:eo+0x14])
+                    if end_loc in user_notes and user_notes[end_loc]['type'] != 'Note':
+                        start, = unpack('>I', data[eo+8:eo+12])
+                        user_notes[start] = user_notes[end_loc]
+                        user_notes.pop(end_loc)
+                        #print "changing start location of %d to %d" % (end_loc,start)
+                    else:
+                        # If a bookmark coincides with a user annotation, the locs could
+                        # be the same - cheat by nudging -1
+                        # Skip bookmark for last_read_location
+                        if end_loc != self.last_read_location:
+                            user_notes[end_loc - 1] = dict(type='Bookmark',id=self.id,text=None)
+                    rec_len, = unpack('>I', data[eo+4:eo+8])
+                    eo += rec_len + 8
+                    sig = data[eo:eo+4]
+
+        '''
+        for location in sorted(user_notes):
+            print '  Location %d: %s\n%s' % self.magicKindleLocationCalculator(location),
+                                                     user_notes[location]['type'],
+                                    '\n'.join(self.textdump(user_notes[location]['text'])))
+        '''
+        self.user_notes = user_notes
+
+    def get_book_length(self, path, formats):
+        from calibre.ebooks.metadata.mobi import StreamSlicer
+        # This assumes only one of the possible formats exists on the Kindle
+        book_fs = None
+        for format in formats:
+            fmt = format.rpartition('.')[2]
+            book_fs = path.replace('.mbp','.%s' % fmt)
+            if os.path.exists(book_fs):
+                self.book_format = fmt
+                break
+        else:
+            #print "no files matching library formats exist on device"
+            self.book_length = 0
+            return
+
+        # Read the book len from the header
+        with open(book_fs,'rb') as f:
+            self.stream = StringIO(f.read())
+            self.data = StreamSlicer(self.stream)
+            self.nrecs, = unpack('>H', self.data[76:78])
+            record0 = self.record(0)
+            self.book_length = int(unpack('>I', record0[0x04:0x08])[0])
--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@ -17,6 +17,7 @@ import time
 import re
 import sys
 import glob
+
 from itertools import repeat

 from calibre.devices.interface import DevicePlugin
@ -793,6 +794,12 @@ class Device(DeviceConfig, DevicePlugin):
        '''
        return components

+    def get_annotations(self, path_map):
+        '''
+        Resolve path_map to annotation_map of files found on the device
+        '''
+        return {}
+
    def create_upload_path(self, path, mdata, fname, create_dirs=True):
        path = os.path.abspath(path)
        extra_components = []
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@ -123,7 +123,7 @@ class USBMS(CLI, Device):
        '''
        :path: the full path were the associated book is located.
        :filename: the name of the book file without the extension.
-        :metatdata: metadata belonging to the book. Use metadata.thumbnail
+        :metadata: metadata belonging to the book. Use metadata.thumbnail
        for cover
        '''
        pass
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -221,7 +221,7 @@ class DeviceManager(Thread):
    def _annotations(self, path_map):
        return self.device.get_annotations(path_map)

-    def annotations(self, path_map, done):
+    def annotations(self, done, path_map):
        '''Return mapping of ids to annotations. Each annotation is of the
        form (type, location_info, content). path_map is a mapping of
        ids to paths on the device.'''
--- a/src/calibre/gui2/dialogs/book_info.py
+++ b/src/calibre/gui2/dialogs/book_info.py
@ -97,8 +97,7 @@ class BookInfo(QDialog, Ui_BookInfo):
        info = self.view.model().get_book_info(row)
        self.setWindowTitle(info[_('Title')])
        self.title.setText('<b>'+info.pop(_('Title')))
-        self.comments.setText(info.pop(_('Comments'), ''))
-
+        self.comments.setText('<div>%s</div>' % info.pop(_('Comments'), ''))
        cdata = info.pop('cover', '')
        self.cover_pixmap = QPixmap.fromImage(cdata)
        self.resize_cover()
--- a/src/calibre/gui2/dialogs/progress.py
+++ b/src/calibre/gui2/dialogs/progress.py
@ -40,7 +40,6 @@ class ProgressDialog(QDialog, Ui_Dialog):
        return property(fget=fget, fset=fset)


-
    def set_min(self, min):
        self.bar.setMinimum(min)

--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'

 '''The main GUI'''

-import os, shutil, sys, textwrap, collections, time
+import collections, datetime, os, shutil, sys, textwrap, time
 from xml.parsers.expat import ExpatError
 from Queue import Queue, Empty
 from threading import Thread
@ -18,10 +18,11 @@ from PyQt4.Qt import Qt, SIGNAL, QObject, QCoreApplication, QUrl, QTimer, \
                     QModelIndex, QPixmap, QColor, QPainter, QMenu, QIcon, \
                     QToolButton, QDialog, QDesktopServices, QFileDialog, \
                     QSystemTrayIcon, QApplication, QKeySequence, QAction, \
-                     QMessageBox, QStackedLayout, QHelpEvent, QInputDialog
+                     QMessageBox, QStackedLayout, QHelpEvent, QInputDialog,\
+                     QThread, pyqtSignal
 from PyQt4.QtSvg import QSvgRenderer

-from calibre import  prints, patheq
+from calibre import  prints, patheq, strftime
 from calibre.constants import __version__, __appname__, isfrozen, islinux, \
                    iswindows, isosx, filesystem_encoding
 from calibre.utils.filenames import ascii_filename
@ -54,6 +55,7 @@ from calibre.gui2.dialogs.search import SearchDialog
 from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
 from calibre.gui2.dialogs.book_info import BookInfo
 from calibre.ebooks import BOOK_EXTENSIONS
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString
 from calibre.library.database2 import LibraryDatabase2, CoverCache
 from calibre.gui2.dialogs.confirm_delete import confirm

@ -924,21 +926,162 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
    ######################### Fetch annotations ################################

    def fetch_annotations(self, *args):
-        #current_device = self.device_manager.device
+        # Figure out a list of ids using the same logic as the catalog generation
+        # FUnction.  Use the currently connected device to map ids to paths
+
+        def get_ids_from_selected_rows():
+			rows = self.library_view.selectionModel().selectedRows()
+			if not rows or len(rows) < 2:
+				rows = xrange(self.library_view.model().rowCount(QModelIndex()))
+			ids = map(self.library_view.model().id, rows)
+			return ids
+
+		def generate_annotation_paths(ids, db, device):
+			# Generate a dict {1:'documents/documents/Asimov, Isaac/Foundation - Isaac Asimov.epub'}
+			# These are the not the absolute paths - individual storage mount points will need to be
+			# prepended during the search
 			path_map = {}
-        # code to calculate path_map
+			for id in ids:
+				mi = db.get_metadata(id, index_is_id=True)
+				a_path = device.create_upload_path(os.path.abspath('/<storage>'), mi, 'x.mbp', create_dirs=False)
+				path_map[id] = a_path
+			return path_map
+
+		device = self.device_manager.device
+
+        if self.current_view() is not self.library_view:
+            return error_dialog(self, _('Use library only'),
+                    _('User annotations generated from main library only'),
+                    show=True)
+        db = self.library_view.model().db
+
+		# Get the list of ids
+        ids = get_ids_from_selected_rows()
+        if not ids:
+            return error_dialog(self, _('No books selected'),
+                    _('No books selected to fetch annotations from'),
+                    show=True)
+
+		# Map ids to paths
+		path_map = generate_annotation_paths(ids, db, device)
+
+		# Dispatch to devices.kindle.driver.get_annotations()
        self.device_manager.annotations(Dispatcher(self.annotations_fetched),
                path_map)

-    def annotations_fetched(self, annotation_map):
-        if not annotation_map: return
+    def annotations_fetched(self, job):
+		from calibre.devices.usbms.device import Device
 		from calibre.gui2.dialogs.progress import ProgressDialog
-        pd = ProgressDialog(_('Adding annotations'),
-                _('Annotations will be saved in the comments field'),
-                min=0, max=0, parent=self)
-        # code to add annotations to database should run in a separate
-        # thread as it could potentially take a long time
-        pd.exec_()
+
+		class Updater(QThread):
+
+            update_progress = pyqtSignal(int)
+            update_done     = pyqtSignal()
+
+			def __init__(self, parent, db, annotation_map, done_callback):
+				QThread.__init__(self, parent)
+				self.db = db
+				self.pd = ProgressDialog(_('Merging user annotations into database'), '',
+                        0, len(job.result), parent=parent)
+
+				self.am = annotation_map
+                self.done_callback = done_callback
+				self.connect(self.pd, SIGNAL('canceled()'), self.canceled)
+				self.pd.setModal(True)
+				self.pd.show()
+                self.update_progress.connect(self.pd.set_value,
+                        type=Qt.QueuedConnection)
+                self.update_done.connect(self.pd.hide, type=Qt.QueuedConnection)
+
+			def generate_annotation_html(self, bookmark):
+				# Returns <div class="user_annotations"> ... </div>
+				last_read_location = bookmark.last_read_location
+				timestamp = datetime.datetime.utcfromtimestamp(bookmark.timestamp)
+				percent_read = bookmark.percent_read
+
+				ka_soup = BeautifulSoup()
+				dtc = 0
+				divTag = Tag(ka_soup,'div')
+				divTag['class'] = 'user_annotations'
+
+				# Add the last-read location
+				spanTag = Tag(ka_soup, 'span')
+				spanTag['style'] = 'font-weight:bold'
+				spanTag.insert(0,NavigableString("%s<br />Last Page Read: Location %d (%d%%)" % \
+								(strftime(u'%x', timestamp.timetuple()),
+                                last_read_location/150 + 1, percent_read)))
+
+				divTag.insert(dtc, spanTag)
+				dtc += 1
+				divTag.insert(dtc, Tag(ka_soup,'br'))
+				dtc += 1
+
+				if bookmark.user_notes:
+					user_notes = bookmark.user_notes
+					annotations = []
+
+					# Add the annotations sorted by location
+					# Italicize highlighted text
+					for location in sorted(user_notes):
+						if user_notes[location]['text']:
+							annotations.append('<b>Location %d &bull; %s</b><br />%s<br />' % \
+												(location/150 + 1, user_notes[location]['type'],
+                                                    user_notes[location]['text'] if \
+													user_notes[location]['type'] == 'Note' else \
+													'<i>%s</i>' % user_notes[location]['text']))
+						else:
+							annotations.append('<b>Location %d &bull; %s</b><br />' % \
+												(location/150 + 1,
+                                                    user_notes[location]['type']))
+
+					for annotation in annotations:
+						divTag.insert(dtc, annotation)
+						dtc += 1
+
+				ka_soup.insert(0,divTag)
+				return ka_soup
+
+			def canceled(self):
+				self.pd.hide()
+
+			def run(self):
+				for (i, id) in enumerate(self.am):
+					bm = Device.UserAnnotation(self.am[id][0],self.am[id][1])
+					user_notes_soup = self.generate_annotation_html(bm.bookmark)
+
+					mi = self.db.get_metadata(id, index_is_id=True)
+					a_offset = mi.comments.find('<div class="user_annotations">')
+					ad_offset = mi.comments.find('<hr class="annotations_divider" />')
+
+					if a_offset >= 0:
+						mi.comments = mi.comments[:a_offset]
+					if ad_offset >= 0:
+						mi.comments = mi.comments[:ad_offset]
+					if mi.comments:
+						hrTag = Tag(user_notes_soup,'hr')
+						hrTag['class'] = 'annotations_divider'
+						user_notes_soup.insert(0,hrTag)
+
+					mi.comments += user_notes_soup.prettify()
+
+					# Update library comments
+					self.db.set_comment(id, mi.comments)
+                    self.update_progress.emit(i)
+                self.update_done.emit()
+                self.done_callback(self.am.keys())
+
+        if not job.result: return
+
+        if self.current_view() is not self.library_view:
+            return error_dialog(self, _('Use library only'),
+                    _('User annotations generated from main library only'),
+                    show=True)
+        db = self.library_view.model().db
+
+        self.__annotation_updater = Updater(self, db, job.result,
+                Dispatcher(self.library_view.model().refresh_ids))
+        self.__annotation_updater.start()
+

    ############################################################################

--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -1,3 +1,6 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Greg Riker <griker at hotmail.com>'
+
 import datetime, htmlentitydefs, os, re, shutil

 from collections import namedtuple
@ -1020,6 +1023,8 @@ class EPUB_MOBI(CatalogPlugin):
            for record in data:
                this_title = {}

+                this_title['id'] = record['id']
+
                this_title['title'] = self.convertHTMLEntities(record['title'])
                if record['series']:
                    this_title['series'] = record['series']
@ -1037,19 +1042,11 @@ class EPUB_MOBI(CatalogPlugin):
                    else:
                        this_title['author'] = 'Unknown'

-                '''
-                this_title['author_sort_original'] =  record['author_sort']
-                author_sort =  record['author_sort'] if len(record['author_sort'].strip()) \
-                     else self.author_to_author_sort(this_title['author'])
-                author_sort = author_sort[0].upper() + author_sort[1:]
-                this_title['author_sort'] = author_sort
-                '''
                if 'author_sort' in record and record['author_sort'].strip():
                    this_title['author_sort'] = record['author_sort']
                else:
                    this_title['author_sort'] = self.author_to_author_sort(this_title['author'])

-                this_title['id'] = record['id']
                if record['publisher']:
                    this_title['publisher'] = re.sub('&', '&amp;', record['publisher'])

@ -1178,12 +1175,9 @@ class EPUB_MOBI(CatalogPlugin):
            Preferences|Add/Save|Sending to device, not a customized one specified in
            the Kindle plugin
            '''
-            from cStringIO import StringIO
-            from struct import unpack
-
            from calibre.devices.usbms.device import Device
+            from calibre.devices.kindle.driver import Bookmark
            from calibre.ebooks.metadata import MetaInformation
-            from calibre.ebooks.metadata.mobi import StreamSlicer

            class BookmarkDevice(Device):
                def initialize(self, save_template):
@ -1192,115 +1186,12 @@ class EPUB_MOBI(CatalogPlugin):
                def save_template(self):
                    return self._save_template

-            class Bookmark():
-                '''
-                A simple class storing bookmark data
-                Kindle-specific
-                '''
-                def __init__(self,path, formats, id):
-                    self.book_format = None
-                    self.book_length = 0
-                    self.id = id
-                    self.last_read_location = 0
-                    self.timestamp = 0
-                    self.get_bookmark_data(path)
-                    self.get_book_length(path, formats)
-
-
-                def get_bookmark_data(self, path):
-                    ''' Return the timestamp and last_read_location '''
-                    with open(path,'rb') as f:
-                        stream = StringIO(f.read())
-                        data = StreamSlicer(stream)
-                        self.timestamp, = unpack('>I', data[0x24:0x28])
-                        bpar_offset, = unpack('>I', data[0x4e:0x52])
-                        #print "bpar_offset: 0x%x" % bpar_offset
-                        lrlo = bpar_offset + 0x0c
-                        self.last_read_location = int(unpack('>I', data[lrlo:lrlo+4])[0])
-                        '''
-                        iolr = bpar_offset + 0x14
-                        index_of_last_read, = unpack('>I', data[iolr:iolr+4])
-                        #print "index_of_last_read: 0x%x" % index_of_last_read
-                        bpar_len, = unpack('>I', data[bpl:bpl+4])
-                        bpar_len += 8
-                        #print "bpar_len: 0x%x" % bpar_len
-                        dro = bpar_offset + bpar_len
-                        #print "dro: 0x%x" % dro
-
-                        # Walk to index_of_last_read to find last_read_location
-                        # If BKMK - offset 8
-                        # If DATA - offset 0x18 + 0x1c
-                        current_entry = 1
-                        while current_entry < index_of_last_read:
-                            rec_len, = unpack('>I', data[dro+4:dro+8])
-                            rec_len += 8
-                            dro += rec_len
-                            current_entry += 1
-
-                        # Looking at the record with last_read_location
-                        if data[dro:dro+4] == 'DATA':
-                            lrlo = dro + 0x18 + 0x1c
-                        elif data[dro:dro+4] == 'BKMK':
-                            lrlo = dro + 8
-                        else:
-                            print "Unrecognized bookmark block type"
-
-                        #print "lrlo: 0x%x" % lrlo
-                        self.last_read_location = float(unpack('>I', data[lrlo:lrlo+4])[0])
-                        #print "last_read_location: 0x%x" % self.last_read_location
-                        '''
-
-                def get_book_length(self, path, formats):
-                    # This assumes only one of the possible formats exists on the Kindle
-                    book_fs = None
-                    for format in formats:
-                        fmt = format.rpartition('.')[2]
-                        if fmt in ['mobi','prc','azw']:
-                            book_fs = path.replace('.mbp','.%s' % fmt)
-                            if os.path.exists(book_fs):
-                                self.book_format = fmt
-                                #print "%s exists on device" % book_fs
-                                break
-                    else:
-                        #print "no files matching library formats exist on device"
-                        self.book_length = 0
-                        return
-                    # Read the book len from the header
-                    with open(book_fs,'rb') as f:
-                        self.stream = StringIO(f.read())
-                        self.data = StreamSlicer(self.stream)
-                        self.nrecs, = unpack('>H', self.data[76:78])
-                        record0 = self.record(0)
-                        #self.hexdump(record0)
-                        self.book_length = int(unpack('>I', record0[0x04:0x08])[0])
-
-                def record(self, n):
-                    if n >= self.nrecs:
-                        raise ValueError('non-existent record %r' % n)
-                    offoff = 78 + (8 * n)
-                    start, = unpack('>I', self.data[offoff + 0:offoff + 4])
-                    stop = None
-                    if n < (self.nrecs - 1):
-                        stop, = unpack('>I', self.data[offoff + 8:offoff + 12])
-                    return StreamSlicer(self.stream, start, stop)
-
-                def hexdump(self, src, length=16):
-                    # Diagnostic
-                    FILTER=''.join([(len(repr(chr(x)))==3) and chr(x) or '.' for x in range(256)])
-                    N=0; result=''
-                    while src:
-                       s,src = src[:length],src[length:]
-                       hexa = ' '.join(["%02X"%ord(x) for x in s])
-                       s = s.translate(FILTER)
-                       result += "%04X   %-*s   %s\n" % (N, length*3, hexa, s)
-                       N+=length
-                    print result
-
            if self.generateRecentlyRead:
                self.opts.log.info("     Collecting Kindle bookmarks matching catalog entries")

                d = BookmarkDevice(None)
                d.initialize(self.opts.connected_device['save_template'])
+
                bookmarks = {}
                for book in self.booksByTitle:
                    original_title = book['title'][book['title'].find(':') + 2:] if book['series'] \
@ -1323,7 +1214,6 @@ class EPUB_MOBI(CatalogPlugin):
                                bm_found = True
                        if bm_found:
                            break
-
                self.bookmarked_books = bookmarks
            else:
                self.bookmarked_books = {}
@ -1378,10 +1268,9 @@ class EPUB_MOBI(CatalogPlugin):

                # This will include the reading progress dots even if we're not generating Recently Read
                if self.opts.connected_kindle and title['id'] in self.bookmarked_books:
-                    authorTag.insert(0, NavigableString(title['reading_progress'] + " by "))
-                    authorTag.insert(1, aTag)
+                    authorTag.insert(0, NavigableString(self.READING_SYMBOL + " by "))
                else:
-                    # Insert READ_SYMBOL
+                    # Insert READ/NOT_READ SYMBOL
                    if title['read']:
                        authorTag.insert(0, NavigableString(self.READ_SYMBOL + "by "))
                    else:
@ -3914,6 +3803,14 @@ class EPUB_MOBI(CatalogPlugin):
                                            '%s%s\n\n%s' % (lost_cr.group(1),
                                                            lost_cr.group(2),
                                                            lost_cr.group(3)))
+            # Extract pre-built elements - annotations, etc.
+            soup = BeautifulSoup(comments)
+            elems = soup.findAll('div')
+            for elem in elems:
+                elem.extract()
+
+            # Reconstruct comments w/o <div>s
+            comments = soup.renderContents()

            # Convert \n\n to <p>s
            if re.search('\n\n', comments):
@ -3933,7 +3830,6 @@ class EPUB_MOBI(CatalogPlugin):
            # Convert two hypens to emdash
            comments = re.sub('--','&mdash;',comments)
            soup = BeautifulSoup(comments)
-
            result = BeautifulSoup()
            rtc = 0
            open_pTag = False
@ -3948,7 +3844,7 @@ class EPUB_MOBI(CatalogPlugin):
                    pTag.insert(ptc,prepare_string_for_xml(token))
                    ptc += 1

-                elif token.name in ['br','b','i']:
+                elif token.name in ['br','b','i','em']:
                    if not open_pTag:
                        pTag = Tag(result,'p')
                        open_pTag = True
@ -3977,8 +3873,16 @@ class EPUB_MOBI(CatalogPlugin):
            for p in paras:
                p['class'] = 'description'

+            # Add back <div> elems initially removed
+            for elem in elems:
+                result.insert(rtc,elem)
+                rtc += 1
+
            return result.renderContents(encoding=None)

+        def magicKindleLocationCalculator(self,offset):
+            return offset/150 + 1
+
        def processSpecialTags(self, tags, this_title, opts):
            tag_list = []
            for tag in tags: