GwR user annotations changes

2026-06-06 14:05:21 -04:00 · 2010-03-02 03:50:56 -08:00
parent f6c8da4ada
commit 8f32765dac
3 changed files with 241 additions and 85 deletions
@@ -93,3 +93,10 @@ hr.series_divider {
 	margin-top:0em;
 	margin-bottom:0em;
 	}
+
+hr.annotations_divider {
+	width:50%;
+	margin-left:1em;
+	margin-top:0em;
+	margin-bottom:0em;
+	}
@@ -97,8 +97,7 @@ class BookInfo(QDialog, Ui_BookInfo):
        info = self.view.model().get_book_info(row)
        self.setWindowTitle(info[_('Title')])
        self.title.setText('<b>'+info.pop(_('Title')))
-        self.comments.setText(info.pop(_('Comments'), ''))
-
+        self.comments.setText('<div>%s</div>' % info.pop(_('Comments'), ''))
        cdata = info.pop('cover', '')
        self.cover_pixmap = QPixmap.fromImage(cdata)
        self.resize_cover()
@@ -1,3 +1,6 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Greg Riker <griker at hotmail.com>'
+
 import datetime, htmlentitydefs, os, re, shutil

 from collections import namedtuple
@@ -922,6 +925,11 @@ class EPUB_MOBI(CatalogPlugin):
                    return False
            self.fetchBooksByAuthor()
            self.fetchBookmarks()
+
+            updateLibraryComments = True
+            if updateLibraryComments:
+                self.updateLibraryComments()
+
            self.generateHTMLDescriptions()
            self.generateHTMLByAuthor()
            if self.opts.generate_titles:
@@ -1020,6 +1028,8 @@ class EPUB_MOBI(CatalogPlugin):
            for record in data:
                this_title = {}

+                this_title['id'] = record['id']
+
                this_title['title'] = self.convertHTMLEntities(record['title'])
                if record['series']:
                    this_title['series'] = record['series']
@@ -1037,19 +1047,11 @@ class EPUB_MOBI(CatalogPlugin):
                    else:
                        this_title['author'] = 'Unknown'

-                '''
-                this_title['author_sort_original'] =  record['author_sort']
-                author_sort =  record['author_sort'] if len(record['author_sort'].strip()) \
-                     else self.author_to_author_sort(this_title['author'])
-                author_sort = author_sort[0].upper() + author_sort[1:]
-                this_title['author_sort'] = author_sort
-                '''
                if 'author_sort' in record and record['author_sort'].strip():
                    this_title['author_sort'] = record['author_sort']
                else:
                    this_title['author_sort'] = self.author_to_author_sort(this_title['author'])

-                this_title['id'] = record['id']
                if record['publisher']:
                    this_title['publisher'] = re.sub('&', '&amp;', record['publisher'])

@@ -1197,83 +1199,17 @@ class EPUB_MOBI(CatalogPlugin):
                A simple class storing bookmark data
                Kindle-specific
                '''
-                def __init__(self,path, formats, id):
+                def __init__(self, path, formats, id):
                    self.book_format = None
                    self.book_length = 0
                    self.id = id
                    self.last_read_location = 0
                    self.timestamp = 0
+                    self.user_notes = None
+
                    self.get_bookmark_data(path)
                    self.get_book_length(path, formats)

-
-                def get_bookmark_data(self, path):
-                    ''' Return the timestamp and last_read_location '''
-                    with open(path,'rb') as f:
-                        stream = StringIO(f.read())
-                        data = StreamSlicer(stream)
-                        self.timestamp, = unpack('>I', data[0x24:0x28])
-                        bpar_offset, = unpack('>I', data[0x4e:0x52])
-                        #print "bpar_offset: 0x%x" % bpar_offset
-                        lrlo = bpar_offset + 0x0c
-                        self.last_read_location = int(unpack('>I', data[lrlo:lrlo+4])[0])
-                        '''
-                        iolr = bpar_offset + 0x14
-                        index_of_last_read, = unpack('>I', data[iolr:iolr+4])
-                        #print "index_of_last_read: 0x%x" % index_of_last_read
-                        bpar_len, = unpack('>I', data[bpl:bpl+4])
-                        bpar_len += 8
-                        #print "bpar_len: 0x%x" % bpar_len
-                        dro = bpar_offset + bpar_len
-                        #print "dro: 0x%x" % dro
-
-                        # Walk to index_of_last_read to find last_read_location
-                        # If BKMK - offset 8
-                        # If DATA - offset 0x18 + 0x1c
-                        current_entry = 1
-                        while current_entry < index_of_last_read:
-                            rec_len, = unpack('>I', data[dro+4:dro+8])
-                            rec_len += 8
-                            dro += rec_len
-                            current_entry += 1
-
-                        # Looking at the record with last_read_location
-                        if data[dro:dro+4] == 'DATA':
-                            lrlo = dro + 0x18 + 0x1c
-                        elif data[dro:dro+4] == 'BKMK':
-                            lrlo = dro + 8
-                        else:
-                            print "Unrecognized bookmark block type"
-
-                        #print "lrlo: 0x%x" % lrlo
-                        self.last_read_location = float(unpack('>I', data[lrlo:lrlo+4])[0])
-                        #print "last_read_location: 0x%x" % self.last_read_location
-                        '''
-
-                def get_book_length(self, path, formats):
-                    # This assumes only one of the possible formats exists on the Kindle
-                    book_fs = None
-                    for format in formats:
-                        fmt = format.rpartition('.')[2]
-                        if fmt in ['mobi','prc','azw']:
-                            book_fs = path.replace('.mbp','.%s' % fmt)
-                            if os.path.exists(book_fs):
-                                self.book_format = fmt
-                                #print "%s exists on device" % book_fs
-                                break
-                    else:
-                        #print "no files matching library formats exist on device"
-                        self.book_length = 0
-                        return
-                    # Read the book len from the header
-                    with open(book_fs,'rb') as f:
-                        self.stream = StringIO(f.read())
-                        self.data = StreamSlicer(self.stream)
-                        self.nrecs, = unpack('>H', self.data[76:78])
-                        record0 = self.record(0)
-                        #self.hexdump(record0)
-                        self.book_length = int(unpack('>I', record0[0x04:0x08])[0])
-
                def record(self, n):
                    if n >= self.nrecs:
                        raise ValueError('non-existent record %r' % n)
@@ -1296,6 +1232,122 @@ class EPUB_MOBI(CatalogPlugin):
                       N+=length
                    print result

+                def textdump(self, src, width=80, indent=5):
+                    tokens = src.split(' ')
+                    result='%s' % (' ' * indent)
+                    results = []
+                    while tokens:
+                        result += tokens[0].decode('mac-roman') + ' '
+                        tokens.pop(0)
+                        if len(result) > width:
+                            results.append(result)
+                            result='%s' % (' ' * indent)
+                    if result.strip():
+                        results.append(result)
+                    return results
+
+                def get_bookmark_data(self, path, fetchUserNotes=True):
+                    ''' Return the timestamp and last_read_location '''
+                    with open(path,'rb') as f:
+                        stream = StringIO(f.read())
+                        data = StreamSlicer(stream)
+                        self.timestamp, = unpack('>I', data[0x24:0x28])
+                        bpar_offset, = unpack('>I', data[0x4e:0x52])
+                        lrlo = bpar_offset + 0x0c
+                        self.last_read_location = int(unpack('>I', data[lrlo:lrlo+4])[0])
+                        entries, = unpack('>I', data[0x4a:0x4e])
+
+                        # Store the annotations/locations
+                        if fetchUserNotes:
+                            bpl = bpar_offset + 4
+                            bpar_len, = unpack('>I', data[bpl:bpl+4])
+                            bpar_len += 8
+                            #print "bpar_len: 0x%x" % bpar_len
+                            eo = bpar_offset + bpar_len
+
+                            # Walk bookmark entries
+                            #print " --- %s --- " % path
+                            #print "  last_read_location: %d" % self.magicKindleLocationCalculator(last_read_location)
+                            current_entry = 1
+                            sig = data[eo:eo+4]
+                            previous_block = None
+                            user_notes = {}
+
+                            while sig == 'DATA':
+                                text = None
+                                entry_type = None
+                                rec_len, = unpack('>I', data[eo+4:eo+8])
+                                if rec_len == 0:
+                                    current_block = "empty_data"
+                                elif  data[eo+8:eo+12] == "EBAR":
+                                    current_block = "data_header"
+                                    #entry_type = "data_header"
+                                    location, = unpack('>I', data[eo+0x34:eo+0x38])
+                                    #print "data_header location: %d" % location
+                                else:
+                                    current_block = "text_block"
+                                    if previous_block == 'empty_data':
+                                        entry_type = 'Note'
+                                    elif previous_block == 'data_header':
+                                        entry_type = 'Highlight'
+                                    text = data[eo+8:eo+8+rec_len].decode('utf-16-be')
+
+                                if entry_type:
+                                    user_notes[location] = dict(type=entry_type, id=self.id,
+                                                                      text=data[eo+8:eo+8+rec_len].decode('utf-16-be'))
+                                    #print " %2d: %s %s" % (current_entry, entry_type,'at %d' % location if location else '')
+                                #if current_block == 'text_block':
+                                    #self.textdump(text)
+
+                                eo += rec_len + 8
+                                current_entry += 1
+                                previous_block = current_block
+                                sig = data[eo:eo+4]
+
+                            while sig == 'BKMK':
+                                # Fix start location for Highlights using BKMK data
+                                end_loc, = unpack('>I', data[eo+0x10:eo+0x14])
+                                #print "looking for end_loc %d in BKMK" % end_loc
+                                if end_loc in user_notes and user_notes[end_loc]['type'] != 'Note':
+                                    start, = unpack('>I', data[eo+8:eo+12])
+                                    user_notes[start] = user_notes[end_loc]
+                                    user_notes.pop(end_loc)
+                                    #print "changing start location of %d to %d" % (end_loc,start)
+                                rec_len, = unpack('>I', data[eo+4:eo+8])
+                                eo += rec_len + 8
+                                sig = data[eo:eo+4]
+
+                    '''
+                    for location in sorted(user_notes):
+                        print '  Location %d: %s\n%s' % self.magicKindleLocationCalculator(location),
+                                                                 user_notes[location]['type'],
+                                                '\n'.join(self.textdump(user_notes[location]['text'])))
+                    '''
+                    self.user_notes = user_notes
+
+                def get_book_length(self, path, formats):
+                    # This assumes only one of the possible formats exists on the Kindle
+                    book_fs = None
+                    for format in formats:
+                        fmt = format.rpartition('.')[2]
+                        if fmt in ['mobi','prc','azw']:
+                            book_fs = path.replace('.mbp','.%s' % fmt)
+                            if os.path.exists(book_fs):
+                                self.book_format = fmt
+                                break
+                    else:
+                        #print "no files matching library formats exist on device"
+                        self.book_length = 0
+                        return
+
+                    # Read the book len from the header
+                    with open(book_fs,'rb') as f:
+                        self.stream = StringIO(f.read())
+                        self.data = StreamSlicer(self.stream)
+                        self.nrecs, = unpack('>H', self.data[76:78])
+                        record0 = self.record(0)
+                        self.book_length = int(unpack('>I', record0[0x04:0x08])[0])
+
            if self.generateRecentlyRead:
                self.opts.log.info("     Collecting Kindle bookmarks matching catalog entries")

@@ -1378,15 +1430,14 @@ class EPUB_MOBI(CatalogPlugin):

                # This will include the reading progress dots even if we're not generating Recently Read
                if self.opts.connected_kindle and title['id'] in self.bookmarked_books:
-                    authorTag.insert(0, NavigableString(title['reading_progress'] + " by "))
-                    authorTag.insert(1, aTag)
+                    authorTag.insert(0, NavigableString(self.READING_SYMBOL + " by "))
                else:
-                    # Insert READ_SYMBOL
+                    # Insert READ/NOT_READ SYMBOL
                    if title['read']:
                        authorTag.insert(0, NavigableString(self.READ_SYMBOL + "by "))
                    else:
                        authorTag.insert(0, NavigableString(self.NOT_READ_SYMBOL + "by "))
-                    authorTag.insert(1, aTag)
+                authorTag.insert(1, aTag)

                '''
                # Insert Series info or remove.
@@ -3309,6 +3360,90 @@ class EPUB_MOBI(CatalogPlugin):

            self.ncxSoup = ncx_soup

+        def updateLibraryComments(self):
+            # Push user notes back to library
+            from calibre.library.cli import send_message as calibre_send_message
+
+            if self.bookmarked_books:
+                annotations_header = '<br /><hr class="series_divider" />' + \
+                                      '<i>Kindle Annotations</i><br />'
+                for id in self.bookmarked_books:
+                    last_read_location = self.bookmarked_books[id][0].last_read_location
+                    timestamp = datetime.datetime.utcfromtimestamp(self.bookmarked_books[id][0].timestamp)
+                    reading_progress = self.bookmarked_books[id][1]['reading_progress']
+                    percent_read = self.bookmarked_books[id][1]['percent_read']
+                    ka_soup = BeautifulSoup()
+                    dtc = 0
+                    divTag = Tag(ka_soup,'div')
+                    divTag['class'] = 'kindle_annotations'
+
+                    # Add the last-read location
+                    spanTag = Tag(ka_soup, 'span')
+                    spanTag['style'] = 'font-weight:bold'
+                    spanTag.insert(0,NavigableString("%s %s<br />Last Page Read: Location %d (%d%%)" % \
+                                    (strftime(u'%x', timestamp.timetuple()),
+                                     reading_progress,
+                                     self.magicKindleLocationCalculator(last_read_location),
+                                     percent_read)))
+
+                    divTag.insert(dtc, spanTag)
+                    dtc += 1
+                    divTag.insert(dtc, Tag(ka_soup,'br'))
+                    dtc += 1
+
+                    if self.bookmarked_books[id][0].user_notes:
+                        user_notes = self.bookmarked_books[id][0].user_notes
+                        annotations = []
+
+                        '''
+                        spanTag = Tag(ka_soup, 'span')
+                        spanTag['style'] = 'font-style:italic;font-weight:bold'
+                        spanTag.insert(0,NavigableString("Kindle Annotations"))
+                        divTag.insert(dtc, spanTag)
+                        dtc += 1
+                        divTag.insert(dtc, Tag(ka_soup,'br'))
+                        dtc += 1
+                        '''
+
+                        # Add the annotations sorted by location
+                        for location in sorted(user_notes):
+                            annotations.append('<b>Location %d &bull; %s</b><br />%s<br />' % \
+                                                self.magicKindleLocationCalculator(location),
+                                                user_notes[location]['type'],
+                                                user_notes[location]['text'])
+                        for annotation in annotations:
+                            divTag.insert(dtc, annotation)
+                            dtc += 1
+
+                    ka_soup.insert(0,divTag)
+
+                    mi = self.db.get_metadata(id, index_is_id=True)
+                    ka_offset = mi.comments.find('<div class="kindle_annotations">')
+                    kad_offset = mi.comments.find('<hr class="annotations_divider" />')
+
+                    if ka_offset >= 0:
+                        mi.comments = mi.comments[:ka_offset]
+                    if kad_offset >= 0:
+                        mi.comments = mi.comments[:kad_offset]
+                    if mi.comments:
+                        hrTag = Tag(ka_soup,'hr')
+                        hrTag['class'] = 'annotations_divider'
+                        ka_soup.insert(0,hrTag)
+
+                    mi.comments += ka_soup.prettify()
+
+                    # Update library comments
+                    self.db.set_comment(id, mi.comments)
+                    calibre_send_message()
+
+                    # Update catalog description prior to build
+                    # This might be better to do during fetchBooksByTitle?
+                    # Try self.bookmarked_books[id][1]['description']
+                    for title in self.booksByTitle:
+                        if title['id'] == id:
+                            title['description'] = mi.comments
+                            break
+
        def writeNCX(self):
            self.updateProgressFullStep("Saving NCX")

@@ -3914,6 +4049,14 @@ class EPUB_MOBI(CatalogPlugin):
                                            '%s%s\n\n%s' % (lost_cr.group(1),
                                                            lost_cr.group(2),
                                                            lost_cr.group(3)))
+            # Extract pre-built elements - annotations, etc.
+            soup = BeautifulSoup(comments)
+            elems = soup.findAll('div')
+            for elem in elems:
+                elem.extract()
+
+            # Reconstruct comments w/o <div>s
+            comments = soup.renderContents()

            # Convert \n\n to <p>s
            if re.search('\n\n', comments):
@@ -3933,7 +4076,6 @@ class EPUB_MOBI(CatalogPlugin):
            # Convert two hypens to emdash
            comments = re.sub('--','&mdash;',comments)
            soup = BeautifulSoup(comments)
-
            result = BeautifulSoup()
            rtc = 0
            open_pTag = False
@@ -3948,7 +4090,7 @@ class EPUB_MOBI(CatalogPlugin):
                    pTag.insert(ptc,prepare_string_for_xml(token))
                    ptc += 1

-                elif token.name in ['br','b','i']:
+                elif token.name in ['br','b','i','em']:
                    if not open_pTag:
                        pTag = Tag(result,'p')
                        open_pTag = True
@@ -3977,8 +4119,16 @@ class EPUB_MOBI(CatalogPlugin):
            for p in paras:
                p['class'] = 'description'

+            # Add back <div> elems initially removed
+            for elem in elems:
+                result.insert(rtc,elem)
+                rtc += 1
+
            return result.renderContents(encoding=None)

+        def magicKindleLocationCalculator(self,offset):
+            return offset/150 + 1
+
        def processSpecialTags(self, tags, this_title, opts):
            tag_list = []
            for tag in tags: