From acfa27d4c5046b40479b289a98d4aa2d04466fdf Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Mon, 28 Jun 2010 15:52:02 +0100 Subject: [PATCH 1/7] Performance enhancements for sony driver and book matching --- src/calibre/devices/prs505/sony_cache.py | 133 ++++++++++++++--------- src/calibre/devices/usbms/books.py | 16 ++- src/calibre/gui2/device.py | 11 -- 3 files changed, 95 insertions(+), 65 deletions(-) diff --git a/src/calibre/devices/prs505/sony_cache.py b/src/calibre/devices/prs505/sony_cache.py index 9bd005d71c..8a11839902 100644 --- a/src/calibre/devices/prs505/sony_cache.py +++ b/src/calibre/devices/prs505/sony_cache.py @@ -144,52 +144,67 @@ class XMLCache(object): if title+str(i) not in seen: title = title+str(i) playlist.set('title', title) + seen.add(title) break else: seen.add(title) - def build_playlist_id_map(self): - debug_print('Start build_playlist_id_map') - ans = {} - self.ensure_unique_playlist_titles() - debug_print('after ensure_unique_playlist_titles') - self.prune_empty_playlists() - for i, root in self.record_roots.items(): - debug_print('build_playlist_id_map loop', i) - id_map = self.build_id_map(root) - ans[i] = [] - for playlist in root.xpath('//*[local-name()="playlist"]'): - items = [] - for item in playlist: - id_ = item.get('id', None) - record = id_map.get(id_, None) - if record is not None: - items.append(record) - ans[i].append((playlist.get('title'), items)) - debug_print('end build_playlist_id_map') - return ans - def build_id_playlist_map(self, bl_index): + ''' + Return a map of the collections in books: {lpaths: [collection names]} + ''' debug_print('Start build_id_playlist_map') - pmap = self.build_playlist_id_map()[bl_index] + self.ensure_unique_playlist_titles() + self.prune_empty_playlists() + debug_print('after cleaning playlists') + root = self.record_roots[bl_index] + if root is None: + return + id_map = self.build_id_map(root) playlist_map = {} - for title, records in pmap: - for record in records: - path = record.get('path', None) - if path: - if path not in playlist_map: - playlist_map[path] = [] - playlist_map[path].append(title) + # foreach playlist, get the lpaths for the ids in it, then add to dict + for playlist in root.xpath('//*[local-name()="playlist"]'): + name = playlist.get('title') + if name is None: + debug_print('build_id_playlist_map: unnamed playlist!') + continue + for item in playlist: + # translate each id into its lpath + id_ = item.get('id', None) + if id_ is None: + debug_print('build_id_playlist_map: id_ is None!') + continue + bk = id_map.get(id_, None) + if bk is None: + debug_print('build_id_playlist_map: book is None!', id_) + continue + lpath = bk.get('path', None) + if lpath is None: + debug_print('build_id_playlist_map: lpath is None!', id_) + continue + if lpath not in playlist_map: + playlist_map[lpath] = [] + playlist_map[lpath].append(name) debug_print('Finish build_id_playlist_map. Found', len(playlist_map)) return playlist_map + def reset_existing_playlists_map(self): + self._playlist_to_playlist_id_map = {} + def get_or_create_playlist(self, bl_idx, title): root = self.record_roots[bl_idx] - for playlist in root.xpath('//*[local-name()="playlist"]'): - if playlist.get('title', None) == title: - return playlist - if DEBUG: - debug_print('Creating playlist:', title) + # maintain a private map of playlists to their ids. Don't check if it + # exists, because reset_existing_playlist_map must be called before it + # is used to ensure that deleted playlists are taken into account + if bl_idx not in self._playlist_to_playlist_id_map: + self._playlist_to_playlist_id_map[bl_idx] = {} + for playlist in root.xpath('//*[local-name()="playlist"]'): + pl_title = playlist.get('title', None) + if pl_title is not None: + self._playlist_to_playlist_id_map[bl_idx][pl_title] = playlist + if title in self._playlist_to_playlist_id_map[bl_idx]: + return self._playlist_to_playlist_id_map[bl_idx][title] + debug_print('Creating playlist:', title) ans = root.makeelement('{%s}playlist'%self.namespaces[bl_idx], nsmap=root.nsmap, attrib={ 'uuid' : uuid(), @@ -198,6 +213,7 @@ class XMLCache(object): 'sourceid': '1' }) root.append(ans) + self._playlist_to_playlist_id_map[bl_idx][title] = ans return ans # }}} @@ -260,7 +276,9 @@ class XMLCache(object): ensure_media_xml_base_ids(root) idmap = ensure_numeric_ids(root) - remap_playlist_references(root, idmap) + if len(idmap) > 0: + debug_print('fix_ids: found some non-numeric ids') + remap_playlist_references(root, idmap) if i == 0: sourceid, playlist_sid = 1, 0 base = 0 @@ -326,7 +344,9 @@ class XMLCache(object): record = lpath_map.get(book.lpath, None) if record is None: record = self.create_text_record(root, i, book.lpath) - self.update_text_record(record, book, path, i) + date = self.check_timestamp(record, book, path) + if date is not None: + self.update_text_record(record, book, date, path, i) # Ensure the collections in the XML database are recorded for # this book if book.device_collections is None: @@ -352,8 +372,10 @@ class XMLCache(object): def update_playlists(self, bl_index, root, booklist, collections_attributes): debug_print('Starting update_playlists', collections_attributes, bl_index) + self.reset_existing_playlists_map() collections = booklist.get_collections(collections_attributes) lpath_map = self.build_lpath_map(root) + debug_print('update_playlists: finished building maps') for category, books in collections.items(): records = [lpath_map.get(b.lpath, None) for b in books] # Remove any books that were not found, although this @@ -362,24 +384,23 @@ class XMLCache(object): debug_print('WARNING: Some elements in the JSON cache were not' ' found in the XML cache') records = [x for x in records if x is not None] + ids = set() for rec in records: - if rec.get('id', None) is None: + id = rec.get('id', None) + if id is None: rec.set('id', str(self.max_id(root)+1)) - ids = [x.get('id', None) for x in records] - if None in ids: - debug_print('WARNING: Some elements do not have ids') - ids = [x for x in ids if x is not None] + id = rec.get('id', None) + ids.add(id) + # ids cannot contain None, so no reason to check + playlist = self.get_or_create_playlist(bl_index, category) - playlist_ids = [] + # Reduce ids to books not already in the playlist for item in playlist: id_ = item.get('id', None) if id_ is not None: - playlist_ids.append(id_) - for item in list(playlist): - playlist.remove(item) - - extra_ids = [x for x in playlist_ids if x not in ids] - for id_ in ids + extra_ids: + ids.discard(id_) + # Add the books in ids that were not already in the playlist + for id_ in ids: item = playlist.makeelement( '{%s}item'%self.namespaces[bl_index], nsmap=playlist.nsmap, attrib={'id':id_}) @@ -416,11 +437,23 @@ class XMLCache(object): root.append(ans) return ans - def update_text_record(self, record, book, path, bl_index): + def check_timestamp(self, record, book, path): + ''' + Checks the timestamp in the Sony DB against the file. If different, + return the file timestamp. Otherwise return None. + ''' timestamp = os.path.getmtime(path) date = strftime(timestamp) if date != record.get('date', None): - record.set('date', date) + return date + return None + + def update_text_record(self, record, book, date, path, bl_index): + ''' + Update the Sony database from the book. This is done if the timestamp in + the db differs from the timestamp on the file. + ''' + record.set('date', date) record.set('size', str(os.stat(path).st_size)) title = book.title if book.title else _('Unknown') record.set('title', title) diff --git a/src/calibre/devices/usbms/books.py b/src/calibre/devices/usbms/books.py index 0f5c848c1a..6204ee3ccb 100644 --- a/src/calibre/devices/usbms/books.py +++ b/src/calibre/devices/usbms/books.py @@ -134,9 +134,16 @@ class CollectionsBookList(BookList): def get_collections(self, collection_attributes): collections = {} series_categories = set([]) + # This map of sets is used to avoid linear searches when testing for + # book equality + collections_lpaths = {} for book in self: - # The default: leave the book in all existing collections. Do not - # add any new ones. + # Make sure we can identify this book via the lpath + lpath = getattr(book, 'lpath', None) + if lpath is None: + continue + # Decide how we will build the collections. The default: leave the + # book in all existing collections. Do not add any new ones. attrs = ['device_collections'] if getattr(book, '_new_book', False): if prefs['preserve_user_collections']: @@ -163,11 +170,12 @@ class CollectionsBookList(BookList): continue if category not in collections: collections[category] = [] - if book not in collections[category]: + collections_lpaths[category] = set() + if lpath not in collections_lpaths[category]: + collections_lpaths[category].add(lpath) collections[category].append(book) if attr == 'series': series_categories.add(category) - # Sort collections for category, books in collections.items(): def tgetter(x): diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index 6d33f95184..584796231a 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -1416,7 +1416,6 @@ class DeviceMixin(object): # {{{ # the application_id, which is really the db key, but as this can # accidentally match across libraries we also verify the title. The # db_id exists on Sony devices. Fallback is title and author match - resend_metadata = False for booklist in booklists: for book in booklist: if getattr(book, 'uuid', None) in self.db_book_uuid_cache: @@ -1433,12 +1432,10 @@ class DeviceMixin(object): # {{{ if getattr(book, 'application_id', None) in d['db_ids']: book.in_library = True book.smart_update(d['db_ids'][book.application_id]) - resend_metadata = True continue if book.db_id in d['db_ids']: book.in_library = True book.smart_update(d['db_ids'][book.db_id]) - resend_metadata = True continue if book.authors: # Compare against both author and author sort, because @@ -1448,21 +1445,13 @@ class DeviceMixin(object): # {{{ if book_authors in d['authors']: book.in_library = True book.smart_update(d['authors'][book_authors]) - resend_metadata = True elif book_authors in d['author_sort']: book.in_library = True book.smart_update(d['author_sort'][book_authors]) - resend_metadata = True # Set author_sort if it isn't already asort = getattr(book, 'author_sort', None) if not asort and book.authors: book.author_sort = self.library_view.model().db.author_sort_from_authors(book.authors) - resend_metadata = True - - if resend_metadata: - # Correct the metadata cache on device. - if self.device_manager.is_device_connected: - self.device_manager.sync_booklists(None, booklists) # }}} From aafeb64d762dc6322064672826b07bc5be1f3464 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Mon, 28 Jun 2010 16:07:23 +0100 Subject: [PATCH 2/7] Clean up some comments --- src/calibre/devices/prs505/sony_cache.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/calibre/devices/prs505/sony_cache.py b/src/calibre/devices/prs505/sony_cache.py index 8a11839902..03ae23db1c 100644 --- a/src/calibre/devices/prs505/sony_cache.py +++ b/src/calibre/devices/prs505/sony_cache.py @@ -189,13 +189,19 @@ class XMLCache(object): return playlist_map def reset_existing_playlists_map(self): - self._playlist_to_playlist_id_map = {} + ''' + Call this method before calling get_or_create_playlist in the context of + a given job. Call it again after deleting any playlists. The current + implementation adds all new playlists before deleting any, so that + constraint is respected. + ''' + self._playlist_to_playlist_id_map = {} def get_or_create_playlist(self, bl_idx, title): - root = self.record_roots[bl_idx] # maintain a private map of playlists to their ids. Don't check if it # exists, because reset_existing_playlist_map must be called before it # is used to ensure that deleted playlists are taken into account + root = self.record_roots[bl_idx] if bl_idx not in self._playlist_to_playlist_id_map: self._playlist_to_playlist_id_map[bl_idx] = {} for playlist in root.xpath('//*[local-name()="playlist"]'): From 8525faf60c91956f9a311656bac6cd349348eacb Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 28 Jun 2010 11:22:53 -0600 Subject: [PATCH 3/7] Fix #6018 (Updated recipe for The Old New Thing blog) and fix regression in metadata download that could cause errors with some books --- resources/recipes/oldnewthing.recipe | 4 ++-- src/calibre/ebooks/metadata/fetch.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/resources/recipes/oldnewthing.recipe b/resources/recipes/oldnewthing.recipe index fc9749d403..8280451e17 100644 --- a/resources/recipes/oldnewthing.recipe +++ b/resources/recipes/oldnewthing.recipe @@ -28,7 +28,7 @@ class OldNewThing(BasicNewsRecipe): } remove_attributes = ['width','height'] - keep_only_tags = [dict(attrs={'class':['postsub','comment']})] - + keep_only_tags = [dict(attrs={'class':'full-post'})] + remove_tags = [dict(attrs={'class':['post-attributes','post-tags','post-actions']})] feeds = [(u'Posts', u'http://blogs.msdn.com/oldnewthing/rss.xml')] diff --git a/src/calibre/ebooks/metadata/fetch.py b/src/calibre/ebooks/metadata/fetch.py index 8b82d3c972..cb75d93f59 100644 --- a/src/calibre/ebooks/metadata/fetch.py +++ b/src/calibre/ebooks/metadata/fetch.py @@ -313,6 +313,8 @@ def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None, def sort_func(x, y): def cleanup_title(s): + if s is None: + s = _('Unknown') s = s.strip().lower() s = prefix_pat.sub(' ', s) s = trailing_paren_pat.sub('', s) From 1ef607f544739334119a231986322fbd7b36da97 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 28 Jun 2010 15:13:09 -0600 Subject: [PATCH 4/7] MOBI Output: Fix a memory leak and a crash in the palmdoc compression routine --- src/calibre/ebooks/compression/palmdoc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/compression/palmdoc.c b/src/calibre/ebooks/compression/palmdoc.c index b85a404eb6..4d913dfd2b 100644 --- a/src/calibre/ebooks/compression/palmdoc.c +++ b/src/calibre/ebooks/compression/palmdoc.c @@ -151,6 +151,7 @@ cpalmdoc_do_compress(buffer *b, char *output) { for (j=0; j < temp.len; j++) *(output++) = (char)temp.data[j]; } } + PyMem_Free(temp.data); return output - head; } @@ -168,7 +169,9 @@ cpalmdoc_compress(PyObject *self, PyObject *args) { for (j = 0; j < input_len; j++) b.data[j] = (_input[j] < 0) ? _input[j]+256 : _input[j]; b.len = input_len; - output = (char *)PyMem_Malloc(sizeof(char) * b.len); + // Make the output buffer larger than the input as sometimes + // compression results in a larger block + output = (char *)PyMem_Malloc(sizeof(char) * (int)(1.25*b.len)); if (output == NULL) return PyErr_NoMemory(); j = cpalmdoc_do_compress(&b, output); if ( j == 0) return PyErr_NoMemory(); From f861a14a082206f4a82976cfc686d8163fba1e7f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 28 Jun 2010 16:07:48 -0600 Subject: [PATCH 5/7] Windows installer: Reduce time taken to calculate disk space requirements --- setup/installer/windows/wix-template.xml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/setup/installer/windows/wix-template.xml b/setup/installer/windows/wix-template.xml index 35560f5162..17976783aa 100644 --- a/setup/installer/windows/wix-template.xml +++ b/setup/installer/windows/wix-template.xml @@ -153,6 +153,18 @@ + + + + + + + + + + + + From b688d399346ac5de2fe75ba4af6b301165c02eb1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 28 Jun 2010 19:29:05 -0600 Subject: [PATCH 6/7] EPUB Output: Ensure that language setting is conformant to the specs --- src/calibre/devices/apple/driver.py | 2 +- src/calibre/ebooks/chm/input.py | 2 +- src/calibre/ebooks/html/input.py | 2 +- src/calibre/ebooks/metadata/opf2.py | 6 ++++-- src/calibre/ebooks/oeb/reader.py | 2 +- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/calibre/devices/apple/driver.py b/src/calibre/devices/apple/driver.py index 845423247c..59bef5b334 100644 --- a/src/calibre/devices/apple/driver.py +++ b/src/calibre/devices/apple/driver.py @@ -2347,7 +2347,7 @@ class ITUNES(DriverBase): self.log.info(" add timestamp: %s" % metadata.timestamp) # Force the language declaration for iBooks 1.1 - metadata.language = get_lang() + metadata.language = get_lang().replace('_', '-') if DEBUG: self.log.info(" rewriting language: %s" % metadata.language) diff --git a/src/calibre/ebooks/chm/input.py b/src/calibre/ebooks/chm/input.py index a3a49296d2..820178408c 100644 --- a/src/calibre/ebooks/chm/input.py +++ b/src/calibre/ebooks/chm/input.py @@ -92,7 +92,7 @@ class CHMInput(InputFormatPlugin): metadata.add('identifier', mi.isbn, attrib={'scheme':'ISBN'}) if not metadata.language: oeb.logger.warn(u'Language not specified') - metadata.add('language', get_lang()) + metadata.add('language', get_lang().replace('_', '-')) if not metadata.creator: oeb.logger.warn('Creator not specified') metadata.add('creator', _('Unknown')) diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index 73fd020d7b..73bc22be66 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -329,7 +329,7 @@ class HTMLInput(InputFormatPlugin): metadata.add('identifier', mi.isbn, attrib={'scheme':'ISBN'}) if not metadata.language: oeb.logger.warn(u'Language not specified') - metadata.add('language', get_lang()) + metadata.add('language', get_lang().replace('_', '-')) if not metadata.creator: oeb.logger.warn('Creator not specified') metadata.add('creator', self.oeb.translate(__('Unknown'))) diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index 579398d3b0..0bb0c570ed 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -1069,7 +1069,8 @@ class OPFCreator(MetaInformation): dc_attrs={'id':__appname__+'_id'})) if getattr(self, 'pubdate', None) is not None: a(DC_ELEM('date', self.pubdate.isoformat())) - a(DC_ELEM('language', self.language if self.language else get_lang())) + a(DC_ELEM('language', self.language if self.language else + get_lang().replace('_', '-'))) if self.comments: a(DC_ELEM('description', self.comments)) if self.publisher: @@ -1194,7 +1195,8 @@ def metadata_to_opf(mi, as_string=True): factory(DC('identifier'), mi.isbn, scheme='ISBN') if mi.rights: factory(DC('rights'), mi.rights) - factory(DC('language'), mi.language if mi.language and mi.language.lower() != 'und' else get_lang()) + factory(DC('language'), mi.language if mi.language and mi.language.lower() + != 'und' else get_lang().replace('_', '-')) if mi.tags: for tag in mi.tags: factory(DC('subject'), tag) diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py index ebe6e78d08..d7d7bbf725 100644 --- a/src/calibre/ebooks/oeb/reader.py +++ b/src/calibre/ebooks/oeb/reader.py @@ -131,7 +131,7 @@ class OEBReader(object): stream = cStringIO.StringIO(etree.tostring(opf)) mi = MetaInformation(OPF(stream)) if not mi.language: - mi.language = get_lang() + mi.language = get_lang().replace('_', '-') self.oeb.metadata.add('language', mi.language) if not mi.title: mi.title = self.oeb.translate(__('Unknown')) From 0a8048345427f1e21d512f91c908701c49489001 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 28 Jun 2010 19:46:36 -0600 Subject: [PATCH 7/7] Fix #6020 (Error loading some images) --- src/calibre/gui2/dialogs/metadata_single.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py index 8be3774203..5a4eaec9d7 100644 --- a/src/calibre/gui2/dialogs/metadata_single.py +++ b/src/calibre/gui2/dialogs/metadata_single.py @@ -103,7 +103,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): if _file: _file = os.path.abspath(_file) if not os.access(_file, os.R_OK): - d = error_dialog(self.window, _('Cannot read'), + d = error_dialog(self, _('Cannot read'), _('You do not have permission to read the file: ') + _file) d.exec_() return @@ -112,14 +112,14 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): cf = open(_file, "rb") cover = cf.read() except IOError, e: - d = error_dialog(self.window, _('Error reading file'), + d = error_dialog(self, _('Error reading file'), _("

There was an error reading from file:
") + _file + "


"+str(e)) d.exec_() if cover: pix = QPixmap() pix.loadFromData(cover) if pix.isNull(): - d = error_dialog(self.window, + d = error_dialog(self, _("Not a valid picture"), _file + _(" is not a valid picture")) d.exec_() @@ -162,7 +162,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): self.formats_changed = True added = True if bad_perms: - error_dialog(self.window, _('No permission'), + error_dialog(self, _('No permission'), _('You do not have ' 'permission to read the following files:'), det_msg='\n'.join(bad_perms), show=True)