diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 391b7d22e6..38a6cbad67 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -378,6 +378,17 @@ class RTFMetadataWriter(MetadataWriterPlugin): from calibre.ebooks.metadata.rtf import set_metadata set_metadata(stream, mi) +class TOPAZMetadataWriter(MetadataWriterPlugin): + + name = 'Set TOPAZ metadata' + file_types = set(['tpz', 'azw1']) + description = _('Set metadata in %s files')%'TOPAZ' + author = 'Greg Riker' + + def set_metadata(self, stream, mi, type): + from calibre.ebooks.metadata.topaz import set_metadata + set_metadata(stream, mi) + from calibre.ebooks.comic.input import ComicInput from calibre.ebooks.epub.input import EPUBInput diff --git a/src/calibre/devices/kindle/driver.py b/src/calibre/devices/kindle/driver.py index 1482c304b4..123e13e85e 100644 --- a/src/calibre/devices/kindle/driver.py +++ b/src/calibre/devices/kindle/driver.py @@ -41,7 +41,7 @@ class KINDLE(USBMS): EBOOK_DIR_MAIN = 'documents' EBOOK_DIR_CARD_A = 'documents' - DELETE_EXTS = ['.mbp'] + DELETE_EXTS = ['.mbp','.tan','.pdr'] SUPPORTS_SUB_DIRS = True SUPPORTS_ANNOTATIONS = True @@ -63,6 +63,7 @@ class KINDLE(USBMS): def get_annotations(self, path_map): MBP_FORMATS = [u'azw', u'mobi', u'prc', u'txt'] TAN_FORMATS = [u'tpz', u'azw1'] + PDR_FORMATS = [u'pdf'] mbp_formats = set() for fmt in MBP_FORMATS: @@ -70,6 +71,9 @@ class KINDLE(USBMS): tan_formats = set() for fmt in TAN_FORMATS: tan_formats.add(fmt) + pdr_formats = set() + for fmt in PDR_FORMATS: + pdr_formats.add(fmt) def get_storage(): storage = [] @@ -88,7 +92,6 @@ class KINDLE(USBMS): file_fmts = set() for fmt in path_map[id]['fmts']: file_fmts.add(fmt) - bookmark_extension = None if file_fmts.intersection(mbp_formats): book_extension = list(file_fmts.intersection(mbp_formats))[0] @@ -96,6 +99,9 @@ class KINDLE(USBMS): elif file_fmts.intersection(tan_formats): book_extension = list(file_fmts.intersection(tan_formats))[0] bookmark_extension = 'tan' + elif file_fmts.intersection(pdr_formats): + book_extension = list(file_fmts.intersection(pdr_formats))[0] + bookmark_extension = 'pdr' if bookmark_extension: for vol in storage: @@ -165,10 +171,13 @@ class Bookmark(): self.get_bookmark_data() self.get_book_length() - try: - self.percent_read = float(100*self.last_read / self.book_length) - except: - self.percent_read = 0 + if self.book_length >= 0: + try: + self.percent_read = float(100*self.last_read / self.book_length) + except: + self.percent_read = 0 + else: + self.percent_read = -1 def record(self, n): from calibre.ebooks.metadata.mobi import StreamSlicer @@ -280,6 +289,9 @@ class Bookmark(): def get_topaz_highlight(displayed_location): # Parse My Clippings.txt for a matching highlight + # Search looks for book title match, highlight match, and location match + # Author is not matched + # This will find the first instance of a clipping only book_fs = self.path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format) with open(book_fs,'rb') as f2: stream = StringIO(f2.read()) @@ -291,7 +303,7 @@ class Bookmark(): with open(my_clippings, 'r') as f2: marker_found = 0 text = '' - search_str1 = '%s (%s)' % (mi.title, str(mi.author[0])) + search_str1 = '%s' % (mi.title) search_str2 = '- Highlight Loc. %d' % (displayed_location) for line in f2: if marker_found == 0: @@ -336,6 +348,47 @@ class Bookmark(): else: e_type = 'Unknown annotation type' + displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1 + user_notes[location] = dict(id=self.id, + displayed_location=displayed_location, + type=e_type, + text=text) + if text_len == 0xFFFFFFFF: + e_base = e_base + 14 + else: + e_base = e_base + 14 + 2 + text_len + current_entry += 1 + for location in user_notes: + if location == self.last_read: + user_notes.pop(location) + break + + elif self.bookmark_extension == 'pdr': + self.timestamp = os.path.getmtime(self.path) + with open(self.path,'rb') as f: + stream = StringIO(f.read()) + data = StreamSlicer(stream) + self.last_read = int(unpack('>I', data[5:9])[0]) + entries, = unpack('>I', data[9:13]) + current_entry = 0 + e_base = 0x0d + while current_entry < entries: + ''' + location, = unpack('>I', data[e_base+2:e_base+6]) + text = None + text_len, = unpack('>I', data[e_base+0xA:e_base+0xE]) + e_type, = unpack('>B', data[e_base+1]) + if e_type == 0: + e_type = 'Bookmark' + elif e_type == 1: + e_type = 'Highlight' + text = get_topaz_highlight(location/MAGIC_TOPAZ_CONSTANT + 1) + elif e_type == 2: + e_type = 'Note' + text = data[e_base+0x10:e_base+0x10+text_len] + else: + e_type = 'Unknown annotation type' + if self.book_format in ['tpz','azw1']: displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1 elif self.book_format == 'pdf': @@ -350,10 +403,24 @@ class Bookmark(): else: e_base = e_base + 14 + 2 + text_len current_entry += 1 - for location in user_notes: - if location == self.last_read: - user_notes.pop(location) - break + ''' + # Use label as page number + pdf_location, = unpack('>I', data[e_base+1:e_base+5]) + label_len, = unpack('>H', data[e_base+5:e_base+7]) + location = int(data[e_base+7:e_base+7+label_len]) + displayed_location = location + e_type = 'Bookmark' + text = None + user_notes[location] = dict(id=self.id, + displayed_location=displayed_location, + type=e_type, + text=text) + self.pdf_page_offset = pdf_location - location + e_base += (7 + label_len) + current_entry += 1 + + self.last_read_location = self.last_read - self.pdf_page_offset + else: print "unsupported bookmark_extension: %s" % self.bookmark_extension self.user_notes = user_notes @@ -390,5 +457,9 @@ class Bookmark(): length = ord(raw[idx+len('bookLength')]) self.book_length = int(raw[idx+len('bookLength')+1:idx+len('bookLength')+1+length]) + elif self.bookmark_extension == 'pdr': + # Book length not yet implemented for PDF files + self.book_length = -1 + else: print "unsupported bookmark_extension: %s" % self.bookmark_extension diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py index d630cfbf63..6596e9b1a2 100644 --- a/src/calibre/ebooks/__init__.py +++ b/src/calibre/ebooks/__init__.py @@ -23,7 +23,7 @@ class DRMError(ValueError): pass BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm', - 'html', 'xhtml', 'pdf', 'pdb', 'prc', 'mobi', 'azw', 'doc', + 'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc', 'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip', 'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'mbp', 'tan'] diff --git a/src/calibre/ebooks/metadata/topaz.py b/src/calibre/ebooks/metadata/topaz.py index 55eb9d6e69..bed982011d 100644 --- a/src/calibre/ebooks/metadata/topaz.py +++ b/src/calibre/ebooks/metadata/topaz.py @@ -1,9 +1,13 @@ from __future__ import with_statement __license__ = 'GPL 3' -__copyright__ = '2009, Kovid Goyal ' +__copyright__ = '2010, Greg Riker ' __docformat__ = 'restructuredtext en' -''' Read metadata from Amazon's topaz format ''' +''' Read/write metadata from Amazon's topaz format ''' +import copy, StringIO +from struct import pack, unpack + +from calibre.ebooks.metadata import MetaInformation def read_record(raw, name): idx = raw.find(name) @@ -32,9 +36,336 @@ def get_metadata(stream): title = title.decode('utf-8', 'replace') else: raise ValueError('No metadata in file') - from calibre.ebooks.metadata import MetaInformation + #from calibre.ebooks.metadata import MetaInformation return MetaInformation(title, authors) +class StreamSlicer(object): + + def __init__(self, stream, start=0, stop=None): + self._stream = stream + self.start = start + if stop is None: + stream.seek(0, 2) + stop = stream.tell() + self.stop = stop + self._len = stop - start + + def __len__(self): + return self._len + + def __getitem__(self, key): + stream = self._stream + base = self.start + if isinstance(key, (int, long)): + stream.seek(base + key) + return stream.read(1) + if isinstance(key, slice): + start, stop, stride = key.indices(self._len) + if stride < 0: + start, stop = stop, start + size = stop - start + if size <= 0: + return "" + stream.seek(base + start) + data = stream.read(size) + if stride != 1: + data = data[::stride] + return data + raise TypeError("stream indices must be integers") + + def __setitem__(self, key, value): + stream = self._stream + base = self.start + if isinstance(key, (int, long)): + if len(value) != 1: + raise ValueError("key and value lengths must match") + stream.seek(base + key) + return stream.write(value) + if isinstance(key, slice): + start, stop, stride = key.indices(self._len) + if stride < 0: + start, stop = stop, start + size = stop - start + if stride != 1: + value = value[::stride] + if len(value) != size: + raise ValueError("key and value lengths must match") + stream.seek(base + start) + return stream.write(value) + raise TypeError("stream indices must be integers") + + def update(self, data_blocks): + # Rewrite the stream + stream = self._stream + base = self.start + stream.seek(base) + self._stream.truncate(base) + for block in data_blocks: + stream.write(block) + + def truncate(self, value): + self._stream.truncate(value) + +class MetadataUpdater(object): + def __init__(self, stream): + self.stream = stream + raw = stream.read(8*1024) + if not raw.startswith('TPZ'): + raise ValueError('Not a Topaz file') + first = raw.find('metadata') + if first < 0: + raise ValueError('Invalid Topaz file') + second = raw.find('metadata', first+10) + if second < 0: + raise ValueError('Invalid Topaz file') + self.md_start = second-1 + self.data = StreamSlicer(stream) + self.header_records, = unpack('>B',self.data[4]) + offset = self.get_md_header(self.md_start) + self.metadata = {} + self.md_end = self.get_original_metadata(offset) + self.orig_md_len = self.md_end - self.md_start + + def decode_vwi(self,bytes): + pos, val = 0, 0 + done = False + while pos < len(bytes) and not done: + b = ord(bytes[pos]) + pos += 1 + if (b & 0x80) == 0: + done = True + b &= 0x7F + val <<= 7 + val |= b + if done: break + return val, pos + + def encode_vwi(self,value): + bytes = [] + multi_byte = (value > 0x7f) + while value: + b = value & 0x7f + value >>= 7 + if value == 0: + if multi_byte: + bytes.append(b|0x80) + if len(bytes) == 4: + return pack('>BBBB',bytes[3],bytes[2],bytes[1],bytes[0]).decode('iso-8859-1') + elif len(bytes) == 3: + return pack('>BBB',bytes[2],bytes[1],bytes[0]).decode('iso-8859-1') + elif len(bytes) == 2: + return pack('>BB',bytes[1],bytes[0]).decode('iso-8859-1') + else: + return pack('>B', b).decode('iso-8859-1') + else: + if len(bytes): + bytes.append(b|0x80) + else: + bytes.append(b) + + # If value == 0, return 0 + return pack('>B', 0x0).decode('iso-8859-1') + + def fixup_topaz_headers(self, size_delta): + # Rewrite Topaz Header. Any offset > md_hdr_offset needs to be adjusted + ths = StringIO.StringIO() + md_header_offset = self.md_header_offset + # Copy the first 5 bytes + ths.write(self.data[:5]) + md_record = False + for th in self.topaz_headers: + ths.write('c') + ths.write(self.encode_vwi(len(self.topaz_headers[th]['tag']))) + ths.write(self.topaz_headers[th]['tag']) + ths.write(self.encode_vwi(len(self.topaz_headers[th]['blocks']))) + for block in self.topaz_headers[th]['blocks']: + b = self.topaz_headers[th]['blocks'][block] + if b['hdr_offset'] > md_header_offset: + vwi = self.encode_vwi(b['hdr_offset'] + size_delta) + else: + vwi = self.encode_vwi(b['hdr_offset']) + ths.write(vwi) + if self.topaz_headers[th]['tag'] == 'metadata': + ths.write(self.encode_vwi(b['len_uncomp'] + size_delta)) + else: + ths.write(self.encode_vwi(b['len_uncomp'])) + ths.write(self.encode_vwi(b['len_comp'])) + + return ths.getvalue().encode('iso-8859-1') + + def generate_dkey(self): + for x in self.topaz_headers: + if self.topaz_headers[x]['tag'] == 'dkey': + offset = self.base + self.topaz_headers[x]['blocks'][0]['hdr_offset'] + len_uncomp = self.topaz_headers[x]['blocks'][0]['len_uncomp'] + break + dkey = self.topaz_headers[x] + dks = StringIO.StringIO() + dks.write('d@') + dks.write(self.encode_vwi(len(dkey['tag']))) + offset += 1 + dks.write(dkey['tag']) + offset += len('dkey') + dks.write(chr(0)) + offset += 1 + dks.write(self.data[offset:offset + len_uncomp].decode('iso-8859-1')) + return dks.getvalue().encode('iso-8859-1') + + def get_topaz_headers(self): + offset = 5 + md_header_offset = 0 + dkey_len = 0 + # Find the offset of the metadata header record + for hr in range(self.header_records): + marker = self.data[offset] + offset += 1 + taglen, consumed = self.decode_vwi(self.data[offset:offset+4]) + offset += consumed + tag = self.data[offset:offset+taglen] + offset += taglen + if not tag == 'metadata': + num_vals, consumed = self.decode_vwi(self.data[offset:offset+4]) + offset += consumed + for val in range(num_vals): + foo, consumed = self.decode_vwi(self.data[offset:offset+4]) + offset += consumed + foo, consumed = self.decode_vwi(self.data[offset:offset+4]) + offset += consumed + foo, consumed = self.decode_vwi(self.data[offset:offset+4]) + offset += consumed + continue + num_vals, consumed = self.decode_vwi(self.data[offset:offset+4]) + offset += consumed + md_header_offset, consumed = self.decode_vwi(self.data[offset:offset+4]) + break + self.md_header_offset = md_header_offset + + offset = 5 + topaz_headers = {} + for x in range(self.header_records): + marker = self.data[offset] + offset += 1 + taglen, consumed = self.decode_vwi(self.data[offset:offset+4]) + offset += consumed + tag = self.data[offset:offset+taglen] + offset += taglen + num_vals, consumed = self.decode_vwi(self.data[offset:offset+4]) + offset += consumed + blocks = {} + for val in range(num_vals): + hdr_offset, consumed = self.decode_vwi(self.data[offset:offset+4]) + offset += consumed + len_uncomp, consumed = self.decode_vwi(self.data[offset:offset+4]) + offset += consumed + len_comp, consumed = self.decode_vwi(self.data[offset:offset+4]) + offset += consumed + blocks[val] = dict(hdr_offset=hdr_offset,len_uncomp=len_uncomp,len_comp=len_comp) + topaz_headers[x] = dict(tag=tag,blocks=blocks) + self.topaz_headers = topaz_headers + + eod = self.data[offset] + offset += 1 + self.base = offset + + return md_header_offset, topaz_headers + + def generate_metadata_stream(self): + ms = StringIO.StringIO() + # Generate the header + ms.write(self.encode_vwi(len(self.md_header['tag'])).encode('iso-8859-1')) + ms.write(self.md_header['tag']) + ms.write(chr(self.md_header['flags'])) + ms.write(chr(len(self.metadata))) + + # Add the metadata fields. + for item in self.metadata: + ms.write(self.encode_vwi(len(self.metadata[item]['tag'])).encode('iso-8859-1')) + ms.write(self.metadata[item]['tag']) + ms.write(self.encode_vwi(len(self.metadata[item]['metadata'])).encode('iso-8859-1')) + ms.write(self.metadata[item]['metadata']) + + return ms.getvalue() + + def get_md_header(self,offset): + md_header = {} + taglen, consumed = self.decode_vwi(self.data[offset:offset+4]) + offset += consumed + md_header['tag'] = self.data[offset:offset+taglen] + offset += taglen + md_header['flags'] = ord(self.data[offset]) + offset += 1 + md_header['records'] = ord(self.data[offset]) + offset += 1 + self.md_header = md_header + return offset + + def get_original_metadata(self,offset): + for x in range(self.md_header['records']): + md_record = {} + taglen, consumed = self.decode_vwi(self.data[offset:offset+4]) + offset += consumed + md_record['tag'] = self.data[offset:offset+taglen] + offset += taglen + md_len, consumed = self.decode_vwi(self.data[offset:offset+4]) + offset += consumed + md_record['metadata'] = self.data[offset:offset + md_len] + offset += md_len + self.metadata[x] = md_record + return offset + + def hexdump(self, src, length=16): + # Diagnostic + FILTER=''.join([(len(repr(chr(x)))==3) and chr(x) or '.' for x in range(256)]) + N=0; result='' + while src: + s,src = src[:length],src[length:] + hexa = ' '.join(["%02X"%ord(x) for x in s]) + s = s.translate(FILTER) + result += "%04X %-*s %s\n" % (N, length*3, hexa, s) + N+=length + print result + + def update(self,mi): + def update_metadata(tag,value): + for item in self.metadata: + if self.metadata[item]['tag'] == tag: + self.metadata[item]['metadata'] = value + return + + self.get_topaz_headers() + + try: + from calibre.ebooks.conversion.config import load_defaults + prefs = load_defaults('mobi_output') + pas = prefs.get('prefer_author_sort', False) + except: + pas = False + + if mi.author_sort and pas: + authors = mi.author_sort + update_metadata('Authors',authors.encode('utf-8')) + elif mi.authors: + authors = '; '.join(mi.authors) + update_metadata('Authors',authors) + update_metadata('Title',mi.title.encode('utf-8')) + + updated_metadata = self.generate_metadata_stream() + head = self.fixup_topaz_headers(len(updated_metadata) - self.orig_md_len) + dkey = self.generate_dkey() + tail = copy.copy(self.data[self.md_end:]) + + self.stream.seek(0) + self.stream.truncate(0) + self.stream.write(head) + self.stream.write(dkey) + self.stream.write(updated_metadata) + self.stream.write(tail) + +def set_metadata(stream, mi): + mu = MetadataUpdater(stream) + mu.update(mi) + return + if __name__ == '__main__': - import sys - print get_metadata(open(sys.argv[1], 'rb')) \ No newline at end of file + import cStringIO, sys + print get_metadata(open(sys.argv[1], 'rb')) diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index d420515326..870414bc66 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -326,12 +326,17 @@ class FileIconProvider(QFileIconProvider): 'lrf' : 'lrf', 'lrx' : 'lrx', 'pdf' : 'pdf', + 'pdr' : 'zero', 'rar' : 'rar', 'zip' : 'zip', 'txt' : 'txt', 'prc' : 'mobi', 'azw' : 'mobi', 'mobi' : 'mobi', + 'mbp' : 'zero', + 'azw1' : 'mobi', + 'tpz' : 'mobi', + 'tan' : 'zero', 'epub' : 'epub', 'fb2' : 'fb2', } diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py index b72ac7d1c4..1377453b0e 100644 --- a/src/calibre/gui2/ui.py +++ b/src/calibre/gui2/ui.py @@ -1012,9 +1012,15 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): # Add the last-read location spanTag = Tag(ka_soup, 'span') spanTag['style'] = 'font-weight:bold' - spanTag.insert(0,NavigableString("%s
Last Page Read: Location %d (%d%%)" % \ - (strftime(u'%x', timestamp.timetuple()), - last_read_location, percent_read))) + if bookmark.book_format == 'pdf': + spanTag.insert(0,NavigableString("%s
Last Page Read: %d" % \ + (strftime(u'%x', timestamp.timetuple()), + last_read_location))) + else: + spanTag.insert(0,NavigableString("%s
Last Page Read: Location %d (%d%%)" % \ + (strftime(u'%x', timestamp.timetuple()), + last_read_location, + percent_read))) divTag.insert(dtc, spanTag) dtc += 1 @@ -1036,9 +1042,14 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): user_notes[location]['type'] == 'Note' else \ '%s' % user_notes[location]['text'])) else: - annotations.append('Location %d • %s
' % \ - (user_notes[location]['displayed_location'], - user_notes[location]['type'])) + if bookmark.book_format == 'pdf': + annotations.append('Page %d • %s
' % \ + (user_notes[location]['displayed_location'], + user_notes[location]['type'])) + else: + annotations.append('Location %d • %s
' % \ + (user_notes[location]['displayed_location'], + user_notes[location]['type'])) for annotation in annotations: divTag.insert(dtc, annotation) @@ -1074,11 +1085,9 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): mi.comments = unicode(user_notes_soup.prettify()) # Update library comments self.db.set_comment(id, mi.comments) - ''' # Add bookmark file to id self.db.add_format_with_hooks(id, bm.bookmark.bookmark_extension, bm.bookmark.path, index_is_id=True) - ''' self.update_progress.emit(i) self.update_done.emit() self.done_callback(self.am.keys()) @@ -1522,7 +1531,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): if single_format is not None: opts.formats = single_format # Special case for Kindle annotation files - if single_format.lower() == 'mbp' or single_format == 'tan': + if single_format.lower() in ['mbp','pdr','tan']: opts.to_lowercase = False opts.save_cover = False opts.write_opf = False