Reworked tag/genre parsing code

2025-12-11 23:55:44 -05:00 · 2010-01-31 15:54:24 -07:00 · 2010-01-31 15:54:24 -07:00 · 0027f82e1d
commit 0027f82e1d
parent e067ad567a
1 changed files with 109 additions and 101 deletions
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -765,6 +765,8 @@ class EPUB_MOBI(CatalogPlugin):
        # Methods
        def buildSources(self):
            self.fetchBooksByTitle()
            if not self.booksByTitle:
                return False
            self.fetchBooksByAuthor()
            self.generateHTMLDescriptions()
            self.generateHTMLByAuthor()
@ -784,6 +786,7 @@ class EPUB_MOBI(CatalogPlugin):
            self.generateNCXByDateAdded("Recently Added")
            self.generateNCXByGenre("Genres")
            self.writeNCX()
            return True
        def cleanUp(self):
            pass
@ -1448,107 +1451,109 @@ class EPUB_MOBI(CatalogPlugin):
        def generateHTMLByTags(self):
            # Generate individual HTML files for each tag, e.g. Fiction, Nonfiction ...
            # Note that special tags - ~+*[] -  have already been filtered from books[]
            # There may be synonomous tags
            self.updateProgressFullStep("'Genres'")
            # filtered_tags = {friendly:normalized, }
            self.genre_tags_dict = self.filterDbTags(self.db.all_tags())
            # Extract books matching filtered_tags
            genre_list = []
-            for friendly_tag in self.genre_tags_dict:
+            for friendly_tag in sorted(self.genre_tags_dict):
                #print "\ngenerateHTMLByTags(): looking for books with friendly_tag '%s'" % friendly_tag
                # tag_list => {'tag': '<normalized_genre_tag>', 'books':[{}, {}, {}]}
                # tag_list => { normalized_genre_tag : [{book},{},{}],
                #               normalized_genre_tag : [{book},{},{}] }
                tag_list = {}
                tag_list['tag'] = self.genre_tags_dict[friendly_tag]
                tag_list['books'] = []
                for book in self.booksByAuthor:
                    # Scan each book for tag matching friendly_tag
                    #if 'tags' in book: print "  evaluating %s with tags: %s" % (book['title'], book['tags'])
                    if 'tags' in book and friendly_tag in book['tags']:
                        #print "   adding '%s'" % (book['title'])
                        this_book = {}
                        this_book['author'] = book['author']
                        this_book['title'] = book['title']
                        this_book['author_sort'] = book['author_sort']
                        this_book['read'] = book['read']
                        this_book['id'] = book['id']
-                        tag_list['books'].append(this_book)
+                        normalized_tag = self.genre_tags_dict[friendly_tag]
-
+                        genre_tag_list = [key for genre in genre_list for key in genre]
-                if len(tag_list['books']):
+                        if normalized_tag in genre_tag_list:
-                    genre_exists = False
+                            for existing_genre in genre_list:
-                    book_not_in_genre = True
+                                for key in existing_genre:
-                    if not genre_list:
+                                    new_book = None
-                        #print "   genre_list empty, adding '%s'" % tag_list['tag']
+                                    if key == normalized_tag:
-                        genre_list.append(tag_list)
+                                        for book in existing_genre[key]:
-                    else:
+                                            if book['title'] == this_book['title']:
-                        # Check for existing_genre
+                                                new_book = False
-                        for genre in genre_list:
+                                                break
-                            if genre['tag'] == tag_list['tag']:
+                                        else:
-                                genre_exists = True
+                                            new_book = True
-                                # Check to see if the book is already in this list
+                                    if new_book:
-                                for existing_book in genre['books']:
+                                        existing_genre[key].append(this_book)
                                    if this_book['title'] == existing_book['title']:
                                        #print "%s already in %s" % (this_book['title'], genre)
                                        book_not_in_genre = False
                                        break
                                break
                        if genre_exists:
                            if book_not_in_genre:
                                #print "    adding %s to existing genre '%s'" % (this_book['title'],genre['tag'])
                                genre['books'].append(this_book)
                        else:
-                            #print "   appending genre '%s'" % tag_list['tag']
+                            tag_list[normalized_tag] = [this_book]
                            genre_list.append(tag_list)
            if self.opts.verbose:
-                self.opts.log.info("     Genre summary: %d active genres" % len(genre_list))
+                self.opts.log.info("     Genre summary: %d active genre tags used in generating catalog with %d titles" %
                                    (len(genre_list), len(self.booksByTitle)))
                for genre in genre_list:
-                    self.opts.log.info("      %s: %d titles" % (genre['tag'], len(genre['books'])))
+                    for key in genre:
                        self.opts.log.info("      %s: %d titles" % (key, len(genre[key])))
            # Write the results
-            # genre_list = [ [tag_list], [tag_list] ...]
+            # genre_list = [ {friendly_tag:[{book},{book}]}, {friendly_tag:[{book},{book}]}, ...]
            master_genre_list = []
-            for (index, genre) in enumerate(genre_list):
+            for genre_tag_set in genre_list:
-                # Create sorted_authors[0] = friendly, [1] = author_sort for NCX creation
+                for (index, genre) in enumerate(genre_tag_set):
-                authors = []
+                    #print "genre: %s  \t  genre_tag_set[genre]: %s" % (genre, genre_tag_set[genre])
                for book in genre['books']:
                    authors.append((book['author'],book['author_sort']))
-                # authors[] contains a list of all book authors, with multiple entries for multiple books by author
+                    # Create sorted_authors[0] = friendly, [1] = author_sort for NCX creation
-                # Create unique_authors with a count of books per author as the third tuple element
+                    authors = []
-                books_by_current_author = 1
+                    for book in genre_tag_set[genre]:
-                current_author = authors[0]
+                        authors.append((book['author'],book['author_sort']))
                unique_authors = []
                for (i,author) in enumerate(authors):
                    if author != current_author and i:
                        unique_authors.append((current_author[0], current_author[1], books_by_current_author))
                        current_author = author
                        books_by_current_author = 1
                    elif i==0 and len(authors) == 1:
                        # Allow for single-book lists
                        unique_authors.append((current_author[0], current_author[1], books_by_current_author))
                    else:
                        books_by_current_author += 1
                '''
                # Extract the unique entries
                unique_authors = []
                for author in authors:
                    if not author in unique_authors:
                        unique_authors.append(author)
                '''
                # Write the genre book list as an article
                titles_spanned = self.generateHTMLByGenre(genre['tag'], True if index==0 else False, genre['books'],
                                    "%s/Genre_%s.html" % (self.contentDir, genre['tag']))
-                tag_file = "content/Genre_%s.html" % genre['tag']
+                    # authors[] contains a list of all book authors, with multiple entries for multiple books by author
-                master_genre_list.append({'tag':genre['tag'],
+                    # Create unique_authors with a count of books per author as the third tuple element
-                                          'file':tag_file,
+                    books_by_current_author = 1
-                                          'authors':unique_authors,
+                    current_author = authors[0]
-                                          'books':genre['books'],
+                    unique_authors = []
-                                          'titles_spanned':titles_spanned})
+                    for (i,author) in enumerate(authors):
                        if author != current_author and i:
                            unique_authors.append((current_author[0], current_author[1], books_by_current_author))
                            current_author = author
                            books_by_current_author = 1
                        elif i==0 and len(authors) == 1:
                            # Allow for single-book lists
                            unique_authors.append((current_author[0], current_author[1], books_by_current_author))
                        else:
                            books_by_current_author += 1
                    '''
                    # Extract the unique entries
                    unique_authors = []
                    for author in authors:
                        if not author in unique_authors:
                            unique_authors.append(author)
                    '''
                    # Write the genre book list as an article
                    titles_spanned = self.generateHTMLByGenre(genre, True if index==0 else False,
                                          genre_tag_set[genre],
                                          "%s/Genre_%s.html" % (self.contentDir,
                                                                genre))
                    tag_file = "content/Genre_%s.html" % genre
                    master_genre_list.append({'tag':genre,
                                              'file':tag_file,
                                              'authors':unique_authors,
                                              'books':genre_tag_set[genre],
                                              'titles_spanned':titles_spanned})
            if False and self.opts.verbose:
                for genre in master_genre_list:
                    print "genre['tag']: %s" % genre['tag']
                    for book in genre['books']:
                        print book['title']
            self.genres = master_genre_list
        def generateThumbnails(self):
@ -2351,7 +2356,7 @@ class EPUB_MOBI(CatalogPlugin):
                        else:
                            yield tag
-                self.opts.log.info(u'     %d total genre tags in database (exclude_genre: %s):' % \
+                self.opts.log.info(u'     %d available genre tags in database (exclude_genre: %s):' % \
                                     (len(genre_tags_dict), self.opts.exclude_genre))
                # Display friendly/normalized genres
@ -2395,19 +2400,15 @@ class EPUB_MOBI(CatalogPlugin):
            # Create an anchor from the tag
            aTag = Tag(soup, 'a')
            #aTag['name'] = "Genre%s" % re.sub("\W","", genre)
            aTag['name'] = "Genre_%s" % genre
            body.insert(btc,aTag)
            btc += 1
-            # Insert the genre title using the friendly name
+            # Find the first instance of friendly_tag matching genre
            # GwR *** optimize
-            for genre_tag in self.genre_tags_dict:
+            for friendly_tag in self.genre_tags_dict:
-                if self.genre_tags_dict[genre_tag] == genre:
+                if self.genre_tags_dict[friendly_tag] == genre:
                    friendly_tag = genre_tag
                    break
            titleTag = body.find(attrs={'class':'title'})
            titleTag.insert(0,NavigableString('<b><i>%s</i></b>' % escape(friendly_tag)))
@ -2748,8 +2749,8 @@ class EPUB_MOBI(CatalogPlugin):
        if opts.verbose:
            opts_dict = vars(opts)
-            log("%s(): Generating %s for %s in %s environment" %
+            log("%s(): Generating %s %sin %s environment" %
-                (self.name,self.fmt,opts.output_profile,
+                (self.name,self.fmt,'for %s ' % opts.output_profile if opts.output_profile else '',
                 'CLI' if opts.cli_environment else 'GUI'))
            if opts_dict['ids']:
                log(" Book count: %d" % len(opts_dict['ids']))
@ -2765,32 +2766,39 @@ class EPUB_MOBI(CatalogPlugin):
        # Launch the Catalog builder
        if opts.verbose:
-            log.info("Begin generating catalog source")
+            log.info("Begin catalog source generation")
        catalog = self.CatalogBuilder(db, opts, self, report_progress=notification)
        catalog.createDirectoryStructure()
        catalog.copyResources()
-        catalog.buildSources()
+        catalog_source_built = catalog.buildSources()
        if opts.verbose:
-            log.info("Finished generating catalog source\n")
+            if catalog_source_built:
                log.info("Finished catalog source generation\n")
            else:
                log.warn("No database hits with supplied criteria")
-        recommendations = []
+        if catalog_source_built:
            recommendations = []
-        dp = getattr(opts, 'debug_pipeline', None)
+            dp = getattr(opts, 'debug_pipeline', None)
-        if dp is not None:
+            if dp is not None:
-            recommendations.append(('debug_pipeline', dp,
+                recommendations.append(('debug_pipeline', dp,
-                OptionRecommendation.HIGH))
+                    OptionRecommendation.HIGH))
-        if opts.fmt == 'mobi' and opts.output_profile and opts.output_profile.startswith("kindle"):
+            if opts.fmt == 'mobi' and opts.output_profile and opts.output_profile.startswith("kindle"):
-            recommendations.append(('output_profile', opts.output_profile,
+                recommendations.append(('output_profile', opts.output_profile,
-                OptionRecommendation.HIGH))
+                    OptionRecommendation.HIGH))
-            recommendations.append(('no_inline_toc', True,
+                recommendations.append(('no_inline_toc', True,
-                OptionRecommendation.HIGH))
+                    OptionRecommendation.HIGH))
-        # Run ebook-convert
+            # Run ebook-convert
-        from calibre.ebooks.conversion.plumber import Plumber
+            from calibre.ebooks.conversion.plumber import Plumber
-        plumber = Plumber(os.path.join(catalog.catalogPath,
+            plumber = Plumber(os.path.join(catalog.catalogPath,
-                        opts.basename + '.opf'), path_to_output, log, report_progress=notification,
+                            opts.basename + '.opf'), path_to_output, log, report_progress=notification,
-                        abort_after_input_dump=False)
+                            abort_after_input_dump=False)
-        plumber.merge_ui_recommendations(recommendations)
+            plumber.merge_ui_recommendations(recommendations)
-        plumber.run()
+            plumber.run()
            return 0
        else:
            return 1