From 0027f82e1d918eeaa7f5332fd7b7d9b45322c06d Mon Sep 17 00:00:00 2001 From: GRiker Date: Sun, 31 Jan 2010 15:54:24 -0700 Subject: [PATCH] Reworked tag/genre parsing code --- src/calibre/library/catalog.py | 210 +++++++++++++++++---------------- 1 file changed, 109 insertions(+), 101 deletions(-) diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index 0defc419c8..44ab82ac19 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -765,6 +765,8 @@ class EPUB_MOBI(CatalogPlugin): # Methods def buildSources(self): self.fetchBooksByTitle() + if not self.booksByTitle: + return False self.fetchBooksByAuthor() self.generateHTMLDescriptions() self.generateHTMLByAuthor() @@ -784,6 +786,7 @@ class EPUB_MOBI(CatalogPlugin): self.generateNCXByDateAdded("Recently Added") self.generateNCXByGenre("Genres") self.writeNCX() + return True def cleanUp(self): pass @@ -1448,107 +1451,109 @@ class EPUB_MOBI(CatalogPlugin): def generateHTMLByTags(self): # Generate individual HTML files for each tag, e.g. Fiction, Nonfiction ... # Note that special tags - ~+*[] - have already been filtered from books[] + # There may be synonomous tags self.updateProgressFullStep("'Genres'") - # filtered_tags = {friendly:normalized, } self.genre_tags_dict = self.filterDbTags(self.db.all_tags()) # Extract books matching filtered_tags genre_list = [] - for friendly_tag in self.genre_tags_dict: + for friendly_tag in sorted(self.genre_tags_dict): #print "\ngenerateHTMLByTags(): looking for books with friendly_tag '%s'" % friendly_tag # tag_list => {'tag': '', 'books':[{}, {}, {}]} + # tag_list => { normalized_genre_tag : [{book},{},{}], + # normalized_genre_tag : [{book},{},{}] } + tag_list = {} - tag_list['tag'] = self.genre_tags_dict[friendly_tag] - tag_list['books'] = [] for book in self.booksByAuthor: # Scan each book for tag matching friendly_tag - #if 'tags' in book: print " evaluating %s with tags: %s" % (book['title'], book['tags']) if 'tags' in book and friendly_tag in book['tags']: - #print " adding '%s'" % (book['title']) this_book = {} this_book['author'] = book['author'] this_book['title'] = book['title'] this_book['author_sort'] = book['author_sort'] this_book['read'] = book['read'] this_book['id'] = book['id'] - tag_list['books'].append(this_book) - - if len(tag_list['books']): - genre_exists = False - book_not_in_genre = True - if not genre_list: - #print " genre_list empty, adding '%s'" % tag_list['tag'] - genre_list.append(tag_list) - else: - # Check for existing_genre - for genre in genre_list: - if genre['tag'] == tag_list['tag']: - genre_exists = True - # Check to see if the book is already in this list - for existing_book in genre['books']: - if this_book['title'] == existing_book['title']: - #print "%s already in %s" % (this_book['title'], genre) - book_not_in_genre = False - break - break - - if genre_exists: - if book_not_in_genre: - #print " adding %s to existing genre '%s'" % (this_book['title'],genre['tag']) - genre['books'].append(this_book) + normalized_tag = self.genre_tags_dict[friendly_tag] + genre_tag_list = [key for genre in genre_list for key in genre] + if normalized_tag in genre_tag_list: + for existing_genre in genre_list: + for key in existing_genre: + new_book = None + if key == normalized_tag: + for book in existing_genre[key]: + if book['title'] == this_book['title']: + new_book = False + break + else: + new_book = True + if new_book: + existing_genre[key].append(this_book) else: - #print " appending genre '%s'" % tag_list['tag'] + tag_list[normalized_tag] = [this_book] genre_list.append(tag_list) if self.opts.verbose: - self.opts.log.info(" Genre summary: %d active genres" % len(genre_list)) + self.opts.log.info(" Genre summary: %d active genre tags used in generating catalog with %d titles" % + (len(genre_list), len(self.booksByTitle))) + for genre in genre_list: - self.opts.log.info(" %s: %d titles" % (genre['tag'], len(genre['books']))) + for key in genre: + self.opts.log.info(" %s: %d titles" % (key, len(genre[key]))) # Write the results - # genre_list = [ [tag_list], [tag_list] ...] + # genre_list = [ {friendly_tag:[{book},{book}]}, {friendly_tag:[{book},{book}]}, ...] master_genre_list = [] - for (index, genre) in enumerate(genre_list): - # Create sorted_authors[0] = friendly, [1] = author_sort for NCX creation - authors = [] - for book in genre['books']: - authors.append((book['author'],book['author_sort'])) + for genre_tag_set in genre_list: + for (index, genre) in enumerate(genre_tag_set): + #print "genre: %s \t genre_tag_set[genre]: %s" % (genre, genre_tag_set[genre]) - # authors[] contains a list of all book authors, with multiple entries for multiple books by author - # Create unique_authors with a count of books per author as the third tuple element - books_by_current_author = 1 - current_author = authors[0] - unique_authors = [] - for (i,author) in enumerate(authors): - if author != current_author and i: - unique_authors.append((current_author[0], current_author[1], books_by_current_author)) - current_author = author - books_by_current_author = 1 - elif i==0 and len(authors) == 1: - # Allow for single-book lists - unique_authors.append((current_author[0], current_author[1], books_by_current_author)) - else: - books_by_current_author += 1 - ''' - # Extract the unique entries - unique_authors = [] - for author in authors: - if not author in unique_authors: - unique_authors.append(author) - ''' - # Write the genre book list as an article - titles_spanned = self.generateHTMLByGenre(genre['tag'], True if index==0 else False, genre['books'], - "%s/Genre_%s.html" % (self.contentDir, genre['tag'])) + # Create sorted_authors[0] = friendly, [1] = author_sort for NCX creation + authors = [] + for book in genre_tag_set[genre]: + authors.append((book['author'],book['author_sort'])) - tag_file = "content/Genre_%s.html" % genre['tag'] - master_genre_list.append({'tag':genre['tag'], - 'file':tag_file, - 'authors':unique_authors, - 'books':genre['books'], - 'titles_spanned':titles_spanned}) + # authors[] contains a list of all book authors, with multiple entries for multiple books by author + # Create unique_authors with a count of books per author as the third tuple element + books_by_current_author = 1 + current_author = authors[0] + unique_authors = [] + for (i,author) in enumerate(authors): + if author != current_author and i: + unique_authors.append((current_author[0], current_author[1], books_by_current_author)) + current_author = author + books_by_current_author = 1 + elif i==0 and len(authors) == 1: + # Allow for single-book lists + unique_authors.append((current_author[0], current_author[1], books_by_current_author)) + else: + books_by_current_author += 1 + ''' + # Extract the unique entries + unique_authors = [] + for author in authors: + if not author in unique_authors: + unique_authors.append(author) + ''' + # Write the genre book list as an article + titles_spanned = self.generateHTMLByGenre(genre, True if index==0 else False, + genre_tag_set[genre], + "%s/Genre_%s.html" % (self.contentDir, + genre)) + tag_file = "content/Genre_%s.html" % genre + master_genre_list.append({'tag':genre, + 'file':tag_file, + 'authors':unique_authors, + 'books':genre_tag_set[genre], + 'titles_spanned':titles_spanned}) + + if False and self.opts.verbose: + for genre in master_genre_list: + print "genre['tag']: %s" % genre['tag'] + for book in genre['books']: + print book['title'] self.genres = master_genre_list def generateThumbnails(self): @@ -2351,7 +2356,7 @@ class EPUB_MOBI(CatalogPlugin): else: yield tag - self.opts.log.info(u' %d total genre tags in database (exclude_genre: %s):' % \ + self.opts.log.info(u' %d available genre tags in database (exclude_genre: %s):' % \ (len(genre_tags_dict), self.opts.exclude_genre)) # Display friendly/normalized genres @@ -2395,19 +2400,15 @@ class EPUB_MOBI(CatalogPlugin): # Create an anchor from the tag aTag = Tag(soup, 'a') - #aTag['name'] = "Genre%s" % re.sub("\W","", genre) aTag['name'] = "Genre_%s" % genre body.insert(btc,aTag) btc += 1 - # Insert the genre title using the friendly name + # Find the first instance of friendly_tag matching genre # GwR *** optimize - for genre_tag in self.genre_tags_dict: - if self.genre_tags_dict[genre_tag] == genre: - friendly_tag = genre_tag + for friendly_tag in self.genre_tags_dict: + if self.genre_tags_dict[friendly_tag] == genre: break - - titleTag = body.find(attrs={'class':'title'}) titleTag.insert(0,NavigableString('%s' % escape(friendly_tag))) @@ -2748,8 +2749,8 @@ class EPUB_MOBI(CatalogPlugin): if opts.verbose: opts_dict = vars(opts) - log("%s(): Generating %s for %s in %s environment" % - (self.name,self.fmt,opts.output_profile, + log("%s(): Generating %s %sin %s environment" % + (self.name,self.fmt,'for %s ' % opts.output_profile if opts.output_profile else '', 'CLI' if opts.cli_environment else 'GUI')) if opts_dict['ids']: log(" Book count: %d" % len(opts_dict['ids'])) @@ -2765,32 +2766,39 @@ class EPUB_MOBI(CatalogPlugin): # Launch the Catalog builder if opts.verbose: - log.info("Begin generating catalog source") + log.info("Begin catalog source generation") catalog = self.CatalogBuilder(db, opts, self, report_progress=notification) catalog.createDirectoryStructure() catalog.copyResources() - catalog.buildSources() + catalog_source_built = catalog.buildSources() if opts.verbose: - log.info("Finished generating catalog source\n") + if catalog_source_built: + log.info("Finished catalog source generation\n") + else: + log.warn("No database hits with supplied criteria") - recommendations = [] + if catalog_source_built: + recommendations = [] - dp = getattr(opts, 'debug_pipeline', None) - if dp is not None: - recommendations.append(('debug_pipeline', dp, - OptionRecommendation.HIGH)) + dp = getattr(opts, 'debug_pipeline', None) + if dp is not None: + recommendations.append(('debug_pipeline', dp, + OptionRecommendation.HIGH)) - if opts.fmt == 'mobi' and opts.output_profile and opts.output_profile.startswith("kindle"): - recommendations.append(('output_profile', opts.output_profile, - OptionRecommendation.HIGH)) - recommendations.append(('no_inline_toc', True, - OptionRecommendation.HIGH)) + if opts.fmt == 'mobi' and opts.output_profile and opts.output_profile.startswith("kindle"): + recommendations.append(('output_profile', opts.output_profile, + OptionRecommendation.HIGH)) + recommendations.append(('no_inline_toc', True, + OptionRecommendation.HIGH)) - # Run ebook-convert - from calibre.ebooks.conversion.plumber import Plumber - plumber = Plumber(os.path.join(catalog.catalogPath, - opts.basename + '.opf'), path_to_output, log, report_progress=notification, - abort_after_input_dump=False) - plumber.merge_ui_recommendations(recommendations) + # Run ebook-convert + from calibre.ebooks.conversion.plumber import Plumber + plumber = Plumber(os.path.join(catalog.catalogPath, + opts.basename + '.opf'), path_to_output, log, report_progress=notification, + abort_after_input_dump=False) + plumber.merge_ui_recommendations(recommendations) - plumber.run() + plumber.run() + return 0 + else: + return 1