Fix some annotation fetching issues for Kobo devices

There have been some long-term problems with missed annotations for
kepubs. Plus a bug in the latest firmware causes problems with epub that
don't have a ToC entry for every file.
This commit is contained in:
David 2019-09-12 22:51:35 +10:00
parent 2a7b9e30ac
commit 8266ccd8c5
2 changed files with 119 additions and 52 deletions

View File

@ -7,6 +7,9 @@ __docformat__ = 'restructuredtext en'
import os import os
from calibre.constants import ispy3
from polyglot.builtins import unicode_type
from calibre.devices.usbms.driver import debug_print
class Bookmark(): # {{{ class Bookmark(): # {{{
''' '''
@ -14,7 +17,7 @@ class Bookmark(): # {{{
kobo-specific kobo-specific
''' '''
def __init__(self, db_connection, contentid, path, id, book_format, bookmark_extension): def __init__(self, db_connection, contentId, path, id, book_format, bookmark_extension):
self.book_format = book_format self.book_format = book_format
self.bookmark_extension = bookmark_extension self.bookmark_extension = bookmark_extension
self.book_length = 0 # Not Used self.book_length = 0 # Not Used
@ -25,8 +28,9 @@ class Bookmark(): # {{{
self.timestamp = 0 self.timestamp = 0
self.user_notes = None self.user_notes = None
self.db_connection = db_connection self.db_connection = db_connection
self.contentid = contentid self.contentId = contentId
self.percent_read = 0 self.percent_read = 0
self.kepub = (self.contentId.endswith('.kepub.epub') or not os.path.splitext(self.contentId)[1])
self.get_bookmark_data() self.get_bookmark_data()
self.get_book_length() # Not Used self.get_book_length() # Not Used
@ -36,53 +40,87 @@ class Bookmark(): # {{{
self.timestamp = os.path.getmtime(self.path) self.timestamp = os.path.getmtime(self.path)
cursor = self.db_connection.cursor() cursor = self.db_connection.cursor()
t = (self.contentid,) book_query_values = (self.contentId,)
kepub_chapter_query = ( kepub_chapter_query = (
'SELECT Title, volumeIndex ' 'SELECT c.ContentID, c.BookTitle, c.Title, c.VolumeIndex, '
'FROM content ' 'c.___NumPages, c.MimeType '
'WHERE ContentID LIKE ? ' 'FROM content c '
) 'WHERE ContentType = 899 '
bookmark_query = ('SELECT bm.bookmarkid, bm.ContentID, bm.text, bm.annotation, ' 'AND c.BookID = ? '
'ORDER BY c.VolumeIndex'
)
bookmark_query = (
'SELECT bm.BookmarkID, bm.ContentID, bm.Text, bm.Annotation, '
'bm.ChapterProgress, bm.StartContainerChildIndex, bm.StartOffset, ' 'bm.ChapterProgress, bm.StartContainerChildIndex, bm.StartOffset, '
'c.BookTitle, c.TITLE, c.volumeIndex, c.MimeType ' 'c.BookTitle, c.Title, c.volumeIndex, c.MimeType '
'FROM Bookmark bm LEFT OUTER JOIN Content c ON ' 'FROM Bookmark bm LEFT OUTER JOIN Content c ON '
'c.ContentID = bm.ContentID ' 'c.ContentID = bm.ContentID '
'WHERE bm.Hidden = "false" ' 'WHERE bm.Hidden = "false" '
'AND bm.volumeid = ? ' 'AND bm.volumeid = ? '
'ORDER BY bm.ContentID, bm.chapterprogress') 'ORDER BY bm.ContentID, bm.chapterprogress'
cursor.execute(bookmark_query, t) )
debug_print("Kobo::Bookmark::get_bookmark_data - getting kepub chapters: contentId={0}".format(self.contentId))
cursor.execute(kepub_chapter_query, book_query_values)
kepub_chapters = {}
if self.kepub:
try:
for chapter_row in cursor:
chapter_contentID = chapter_row['ContentID']
chapter_contentID = chapter_contentID[:chapter_contentID.rfind('-')]
kepub_chapters[chapter_contentID] = {
'chapter_title': chapter_row['Title'],
'chapter_index': chapter_row['VolumeIndex']
}
debug_print("Kobo::Bookmark::get_bookmark_data - getting kepub chapter: kepub chapters={0}".format(kepub_chapters))
except:
debug_print("Kobo::Bookmark::get_bookmark_data - No chapters found")
cursor.execute(bookmark_query, book_query_values)
previous_chapter = 0 previous_chapter = 0
bm_count = 0 bm_count = 0
for row in cursor: for row in cursor:
current_chapter = row[9] current_chapter = row['VolumeIndex'] if row['VolumeIndex'] is not None else 0
chapter_title = row[8] chapter_title = row['Title']
# For kepubs on newer firmware, the title needs to come from an 899 row. # For kepubs on newer firmware, the title needs to come from an 899 row.
if not row[10] or row[10] == 'application/xhtml+xml' or row[10] == 'application/x-kobo-epub+zip': if self.kepub:
cursor2 = self.db_connection.cursor() chapter_contentID = row['ContentID']
kepub_chapter_data = ('{0}-%'.format(row[1]), ) debug_print("Kobo::Bookmark::get_bookmark_data - getting kepub: chapter chapter_contentID='{0}'".format(chapter_contentID))
cursor2.execute(kepub_chapter_query, kepub_chapter_data) filename_index = chapter_contentID.find('!')
try: book_contentID_part = chapter_contentID[:filename_index]
kepub_chapter = next(cursor2) debug_print("Kobo::Bookmark::get_bookmark_data - getting kepub: chapter book_contentID_part='{0}'".format(book_contentID_part))
chapter_title = kepub_chapter[0] file_contentID_part = chapter_contentID[filename_index + 1:]
current_chapter = kepub_chapter[1] filename_index = file_contentID_part.find('!')
except StopIteration: opf_reference = file_contentID_part[:filename_index]
pass debug_print("Kobo::Bookmark::get_bookmark_data - getting kepub: chapter opf_reference='{0}'".format(opf_reference))
finally: file_contentID_part = file_contentID_part[filename_index + 1:]
cursor2.close debug_print("Kobo::Bookmark::get_bookmark_data - getting kepub: chapter file_contentID_part='{0}'".format(file_contentID_part))
# from urllib import quote
# file_contentID_part = quote(file_contentID_part)
chapter_contentID = book_contentID_part + "!" + opf_reference + "!" + file_contentID_part
debug_print("Kobo::Bookmark::get_bookmark_data - getting kepub chapter chapter_contentID='{0}'".format(chapter_contentID))
kepub_chapter = kepub_chapters.get(chapter_contentID, None)
if kepub_chapter is not None:
chapter_title = kepub_chapter['chapter_title']
current_chapter = kepub_chapter['chapter_index']
else:
chapter_title = ''
current_chapter = 0
if previous_chapter == current_chapter: if previous_chapter == current_chapter:
bm_count = bm_count + 1 bm_count = bm_count + 1
else: else:
bm_count = 0 bm_count = 0
text = row[2] text = row['Text']
annotation = row[3] annotation = row['Annotation']
# A dog ear (bent upper right corner) is a bookmark # A dog ear (bent upper right corner) is a bookmark
if row[5] == row[6] == 0: # StartContainerChildIndex = StartOffset = 0 if row['StartContainerChildIndex'] == row['StartOffset'] == 0: # StartContainerChildIndex = StartOffset = 0
e_type = 'Bookmark' e_type = 'Bookmark'
text = row[8] text = row['Title']
# highlight is text with no annotation # highlight is text with no annotation
elif text is not None and (annotation is None or annotation == ""): elif text is not None and (annotation is None or annotation == ""):
e_type = 'Highlight' e_type = 'Highlight'
@ -94,7 +132,7 @@ class Bookmark(): # {{{
note_id = current_chapter * 1000 + bm_count note_id = current_chapter * 1000 + bm_count
# book_title = row[8] # book_title = row[8]
chapter_progress = min(round(float(100*row[4]),2),100) chapter_progress = min(round(float(100*row['ChapterProgress']),2),100)
user_notes[note_id] = dict(id=self.id, user_notes[note_id] = dict(id=self.id,
displayed_location=note_id, displayed_location=note_id,
type=e_type, type=e_type,
@ -108,15 +146,16 @@ class Bookmark(): # {{{
# 'annotation: ', annotation, 'chapter_title: ', chapter_title, # 'annotation: ', annotation, 'chapter_title: ', chapter_title,
# 'chapter_progress: ', chapter_progress, 'date: ') # 'chapter_progress: ', chapter_progress, 'date: ')
cursor.execute('SELECT datelastread, ___PercentRead ' cursor.execute('SELECT DateLastRead, ___PercentRead, ReadStatus '
'FROM content ' 'FROM content '
'WHERE bookid IS NULL ' 'WHERE bookid IS NULL '
'AND ReadStatus > 0 ' 'AND ReadStatus > 0 '
'AND contentid = ?', 'AND ContentID = ? '
t) 'ORDER BY DateLastRead, ReadStatus',
book_query_values)
for row in cursor: for row in cursor:
self.last_read = row[0] self.last_read = row['DateLastRead']
self.percent_read = row[1] self.percent_read = 100 if (row['ReadStatus'] == 2) else row['___PercentRead']
# print row[1] # print row[1]
cursor.close() cursor.close()
@ -128,4 +167,32 @@ class Bookmark(): # {{{
# TL self.book_length = int(unpack('>I', record0[0x04:0x08])[0]) # TL self.book_length = int(unpack('>I', record0[0x04:0x08])[0])
pass pass
def __unicode__(self):
'''
A string representation of this object, suitable for printing to
console
'''
ans = [u"Kobo bookmark:"]
def fmt(x, y):
ans.append(u'%-20s: %s'%(unicode_type(x), unicode_type(y)))
if self.contentId:
fmt('ContentID', self.contentId)
if self.last_read:
fmt('Last Read', self.last_read)
if self.timestamp:
fmt('Timestamp', self.timestamp)
if self.percent_read:
fmt('Percent Read', self.percent_read)
if self.user_notes:
fmt('User Notes', self.user_notes)
ans = u'\n'.join(ans) + u"\n"
return ans
if ispy3:
__str__ = __unicode__
# }}} # }}}

View File

@ -1148,9 +1148,9 @@ class KOBO(USBMS):
def resolve_bookmark_paths(storage, path_map): def resolve_bookmark_paths(storage, path_map):
pop_list = [] pop_list = []
book_ext = {} book_ext = {}
for id in path_map: for book_id in path_map:
file_fmts = set() file_fmts = set()
for fmt in path_map[id]['fmts']: for fmt in path_map[book_id]['fmts']:
file_fmts.add(fmt) file_fmts.add(fmt)
bookmark_extension = None bookmark_extension = None
if file_fmts.intersection(epub_formats): if file_fmts.intersection(epub_formats):
@ -1159,37 +1159,37 @@ class KOBO(USBMS):
if bookmark_extension: if bookmark_extension:
for vol in storage: for vol in storage:
bkmk_path = path_map[id]['path'] bkmk_path = path_map[book_id]['path']
bkmk_path = bkmk_path bkmk_path = bkmk_path
if os.path.exists(bkmk_path): if os.path.exists(bkmk_path):
path_map[id] = bkmk_path path_map[book_id] = bkmk_path
book_ext[id] = book_extension book_ext[book_id] = book_extension
break break
else: else:
pop_list.append(id) pop_list.append(book_id)
else: else:
pop_list.append(id) pop_list.append(book_id)
# Remove non-existent bookmark templates # Remove non-existent bookmark templates
for id in pop_list: for book_id in pop_list:
path_map.pop(id) path_map.pop(book_id)
return path_map, book_ext return path_map, book_ext
storage = get_storage() storage = get_storage()
path_map, book_ext = resolve_bookmark_paths(storage, path_map) path_map, book_ext = resolve_bookmark_paths(storage, path_map)
bookmarked_books = {} bookmarked_books = {}
with closing(self.device_database_connection()) as connection: with closing(self.device_database_connection(use_row_factory=True)) as connection:
for id in path_map: for book_id in path_map:
extension = os.path.splitext(path_map[id])[1] extension = os.path.splitext(path_map[book_id])[1]
ContentType = self.get_content_type_from_extension(extension) if extension else self.get_content_type_from_path(path_map[id]) ContentType = self.get_content_type_from_extension(extension) if extension else self.get_content_type_from_path(path_map[book_id])
ContentID = self.contentid_from_path(path_map[id], ContentType) ContentID = self.contentid_from_path(path_map[book_id], ContentType)
debug_print("get_annotations - ContentID: ", ContentID, "ContentType: ", ContentType) debug_print("get_annotations - ContentID: ", ContentID, "ContentType: ", ContentType)
bookmark_ext = extension bookmark_ext = extension
myBookmark = Bookmark(connection, ContentID, path_map[id], id, book_ext[id], bookmark_ext) myBookmark = Bookmark(connection, ContentID, path_map[book_id], book_id, book_ext[book_id], bookmark_ext)
bookmarked_books[id] = self.UserAnnotation(type='kobo_bookmark', value=myBookmark) bookmarked_books[book_id] = self.UserAnnotation(type='kobo_bookmark', value=myBookmark)
# This returns as job.result in gui2.ui.annotations_fetched(self,job) # This returns as job.result in gui2.ui.annotations_fetched(self,job)
return bookmarked_books return bookmarked_books
@ -2431,7 +2431,7 @@ class KOBOTOUCH(KOBO):
category_added = False category_added = False
if book.contentID is None: if book.contentID is None:
debug_print(' Do not know ContentID - Title="%s, Authors=%s"'%(book.title, book.author)) debug_print(' Do not know ContentID - Title="%s", Authors="%s", path="%s"'%(book.title, book.author, book.path))
extension = os.path.splitext(book.path)[1] extension = os.path.splitext(book.path)[1]
ContentType = self.get_content_type_from_extension(extension) if extension else self.get_content_type_from_path(book.path) ContentType = self.get_content_type_from_extension(extension) if extension else self.get_content_type_from_path(book.path)
book.contentID = self.contentid_from_path(book.path, ContentType) book.contentID = self.contentid_from_path(book.path, ContentType)