Revised debug catalog generation to use initialize_container, refactored initial database fetch of titles and subsequent access.

2025-07-09 03:04:10 -04:00 · 2012-09-06 09:49:55 -06:00 · 2012-09-06 09:49:55 -06:00 · bbce378f13
commit bbce378f13
parent 1b6ee88d8f
2 changed files with 89 additions and 94 deletions
--- a/src/calibre/library/catalogs/epub_mobi.py
+++ b/src/calibre/library/catalogs/epub_mobi.py
@ -412,10 +412,15 @@ class EPUB_MOBI(CatalogPlugin):
                pass
            if GENERATE_DEBUG_EPUB:
                from calibre.ebooks.epub import initialize_container
                from calibre.ebooks.tweak import zip_rebuilder
                from calibre.utils.zipfile import ZipFile
                input_path = os.path.join(catalog_debug_path,'input')
-                shutil.copy(P('catalog/mimetype'),input_path)
+                epub_shell = os.path.join(catalog_debug_path,'epub_shell.zip')
-                shutil.copytree(P('catalog/META-INF'),os.path.join(input_path,'META-INF'))
+                initialize_container(epub_shell, opf_name='content.opf')
                with ZipFile(epub_shell, 'r') as zf:
                    zf.extractall(path=input_path)
                os.remove(epub_shell)
                zip_rebuilder(input_path, os.path.join(catalog_debug_path,'input.epub'))
        # returns to gui2.actions.catalog:catalog_generated()
--- a/src/calibre/library/catalogs/epub_mobi_builder.py
+++ b/src/calibre/library/catalogs/epub_mobi_builder.py
@ -5,6 +5,7 @@ __copyright__ = '2010, Greg Riker'
 import datetime, htmlentitydefs, os, re, shutil, unicodedata, zlib
 from copy import deepcopy
 from operator import itemgetter
 from xml.sax.saxutils import escape
 from calibre import (prepare_string_for_xml, strftime, force_unicode)
@ -56,15 +57,6 @@ class CatalogBuilder(object):
    """ property decorators for attributes """
    if True:
        ''' directory to store cached thumbs '''
        @property
        def cache_dir(self):
@ -102,10 +94,6 @@ class CatalogBuilder(object):
        def generate_recently_read(self):
            return self.__generate_recently_read
        ''' additional field to include before/after comments '''
        @property
        def merge_comments_rule(self):
@ -128,9 +116,6 @@ class CatalogBuilder(object):
        def plugin(self):
            return self.__plugin
        ''' Progress Reporter for Jobs '''
        @property
        def reporter(self):
@ -199,6 +184,7 @@ class CatalogBuilder(object):
        self.__stylesheet = stylesheet
        self.__cache_dir = os.path.join(config_dir, 'caches', 'catalog')
        self.__catalog_path = PersistentTemporaryDirectory("_epub_mobi_catalog", prefix='')
        self.__excluded_tags = self.get_excluded_tags()
        self.__generate_for_kindle = True if (_opts.fmt == 'mobi' and
                                              _opts.output_profile and
                                              _opts.output_profile.startswith("kindle")) else False
@ -221,12 +207,13 @@ class CatalogBuilder(object):
        self.books_by_title = None
        ''' list of books in series, without series prefix '''
        self.books_by_title_no_series_prefix = None
        ''' Initial list of books to catalog from which all sections are built '''
        self.books_to_catalog = None
        self.__content_dir = os.path.join(self.catalog_path, "content")
        ''' track Job progress '''
        self.current_step = 0.0
        ''' cumulative error messages to report at conclusion  '''
        self.error = []
        self.__excluded_tags = self.get_excluded_tags()
        self.__generate_recently_read = True if (_opts.generate_recently_added and
                                                 _opts.connected_kindle and
                                                 self.generate_for_kindle) else False
@ -262,6 +249,7 @@ class CatalogBuilder(object):
        self.total_steps = 6.0
        self.__use_series_prefix_in_titles_section = False
        self.books_to_catalog = self.fetch_books_to_catalog()
        self.compute_total_steps()
        self.calculate_thumbnail_dimensions()
        self.confirm_thumbs_archive()
@ -343,6 +331,15 @@ class CatalogBuilder(object):
                            series_index)
        return key
    def _kf_books_by_series_sorter(self, book):
        index = book['series_index']
        integer = int(index)
        fraction = index-integer
        series_index = '%04d%s' % (integer, str('%0.4f' % fraction).lstrip('0'))
        key = '%s %s' % (self.generate_sort_title(book['series']),
                         series_index)
        return key
    """ Methods """
    def build_sources(self):
@ -614,7 +611,7 @@ class CatalogBuilder(object):
        annoyance for EPUB.
        Inputs:
-         self.books_by_title (list): list of books to catalog
+         self.books_to_catalog (list): list of books to catalog
        Output:
         self.books_by_author (list): sorted by author
@ -623,7 +620,7 @@ class CatalogBuilder(object):
         AuthorSortMismatchException: author_sort mismatch detected
        """
-        self.books_by_author = sorted(list(self.books_by_title), key=self._kf_books_by_author_sorter_author)
+        self.books_by_author = sorted(list(self.books_to_catalog), key=self._kf_books_by_author_sorter_author)
        authors = [(record['author'], record['author_sort']) for record in self.books_by_author]
        current_author = authors[0]
        for (i,author) in enumerate(authors):
@ -671,7 +668,7 @@ class CatalogBuilder(object):
         None: no match
        """
        def _log_prefix_rule_match_info(rule, record):
-            self.opts.log.info("     %s '%s' by %s (Prefix rule '%s')" %
+            self.opts.log.info("  %s '%s' by %s (Prefix rule '%s')" %
                               (rule['prefix'],record['title'],
                                record['authors'][0], rule['name']))
@ -770,7 +767,7 @@ class CatalogBuilder(object):
        to self.authors.
        Inputs:
-         self.books_by_title (list): database, sorted by title
+         self.books_to_catalog (list): database, sorted by title
        Outputs:
         books_by_author: database, sorted by author
@ -790,7 +787,7 @@ class CatalogBuilder(object):
        # Determine the longest author_sort length before sorting
        asl = [i['author_sort'] for i in self.books_by_author]
        las = max(asl, key=len)
-        self.books_by_author = sorted(self.books_by_author,
+        self.books_by_author = sorted(self.books_to_catalog,
            key=lambda x: sort_key(self._kf_books_by_author_sorter_author_sort(x, len(las))))
        if self.DEBUG and self.opts.verbose:
@ -843,9 +840,42 @@ class CatalogBuilder(object):
        return True
    def fetch_books_by_title(self):
-        """ Populate self.books_by_title from database
+        """ Generate a list of books sorted by title.
-        Create self.books_by_title from filtered database.
+        Sort the database by title.
        Inputs:
         self.books_to_catalog (list): database
        Outputs:
         books_by_title: database, sorted by title
        Return:
         True: no errors
         False: author_sort mismatch detected while building MOBI
        """
        self.update_progress_full_step(_("Sorting titles"))
        # Re-sort based on title_sort
        if len(self.books_to_catalog):
            self.books_by_title = sorted(self.books_to_catalog, key=lambda x: sort_key(x['title_sort'].upper()))
            if self.DEBUG and self.opts.verbose:
                self.opts.log.info("fetch_books_by_title(): %d books" % len(self.books_by_title))
                self.opts.log.info(" %-40s %-40s" % ('title', 'title_sort'))
                for title in self.books_by_title:
                    self.opts.log.info((u" %-40s %-40s" % (title['title'][0:40],
                                                            title['title_sort'][0:40])).encode('utf-8'))
        else:
            error_msg = _("No books to catalog.\nCheck 'Excluded books' rules in E-book options.\n")
            self.opts.log.error('*** ' + error_msg + ' ***')
            self.error.append(_('No books available to include in catalog'))
            self.error.append(error_msg)
            raise EmptyCatalogException, error_msg
    def fetch_books_to_catalog(self):
        """ Populate self.books_to_catalog from database
        Create self.books_to_catalog from filtered database.
        Keys:
         authors            massaged
         author_sort        record['author_sort'] or computed
@ -871,7 +901,7 @@ class CatalogBuilder(object):
         data (list): filtered list of book metadata dicts
        Outputs:
-         (list) books_by_title
+         (list) books_to_catalog
        Returns:
         True: Successful
@ -980,7 +1010,6 @@ class CatalogBuilder(object):
            return this_title
        # Entry point
        self.update_progress_full_step(_("Fetching database"))
        self.opts.sort_by = 'title'
        search_phrase = ''
@ -1003,28 +1032,15 @@ class CatalogBuilder(object):
        data = self.plugin.search_sort_db(self.db, self.opts)
        data = self.process_exclusions(data)
        if self.opts.verbose and self.prefix_rules:
            self.opts.log.info(" Added prefixes:")
        # Populate this_title{} from data[{},{}]
        titles = []
        for record in data:
            this_title = _populate_title(record)
            titles.append(this_title)
-
+        return titles
        # Re-sort based on title_sort
        if len(titles):
            self.books_by_title = sorted(titles, key=lambda x: sort_key(x['title_sort'].upper()))
            if self.DEBUG and self.opts.verbose:
                self.opts.log.info("fetch_books_by_title(): %d books" % len(self.books_by_title))
                self.opts.log.info(" %-40s %-40s" % ('title', 'title_sort'))
                for title in self.books_by_title:
                    self.opts.log.info((u" %-40s %-40s" % (title['title'][0:40],
                                                            title['title_sort'][0:40])).encode('utf-8'))
        else:
            error_msg = _("No books to catalog.\nCheck 'Excluded books' rules in E-book options.\n")
            self.opts.log.error('*** ' + error_msg + ' ***')
            self.error.append(_('No books available to include in catalog'))
            self.error.append(error_msg)
            raise EmptyCatalogException, error_msg
    def fetch_bookmarks(self):
        """ Interrogate connected Kindle for bookmarks.
@ -1104,7 +1120,7 @@ class CatalogBuilder(object):
            d.initialize(self.opts.connected_device['save_template'])
            bookmarks = {}
-            for book in self.books_by_title:
+            for book in self.books_to_catalog:
                if 'formats' in book:
                    path_map = {}
                    id = book['id']
@ -1148,7 +1164,7 @@ class CatalogBuilder(object):
         genre_tags_dict (dict): dict of filtered, normalized tags in data set
        """
-        def _format_tag_list(tags, indent=5, line_break=70, header='Tag list'):
+        def _format_tag_list(tags, indent=2, line_break=70, header='Tag list'):
            def _next_tag(sorted_tags):
                for (i, tag) in enumerate(sorted_tags):
                    if i < len(tags) - 1:
@ -1541,7 +1557,7 @@ class CatalogBuilder(object):
    def generate_html_by_date_added(self):
        """ Generate content/ByDateAdded.html.
-        Loop through self.books_by_title sorted by reverse date, generate HTML.
+        Loop through self.books_to_catalog sorted by reverse date, generate HTML.
        Input:
         books_by_title (list): books, sorted by title
@ -1735,10 +1751,10 @@ class CatalogBuilder(object):
        # >>> Books by date range <<<
        if self.use_series_prefix_in_titles_section:
-            self.books_by_date_range = sorted(self.books_by_title,
+            self.books_by_date_range = sorted(self.books_to_catalog,
                                key=lambda x:(x['timestamp'], x['timestamp']),reverse=True)
        else:
-            nspt = deepcopy(self.books_by_title)
+            nspt = deepcopy(self.books_to_catalog)
            self.books_by_date_range = sorted(nspt, key=lambda x:(x['timestamp'], x['timestamp']),reverse=True)
        date_range_list = []
@ -1763,7 +1779,7 @@ class CatalogBuilder(object):
        # >>>> Books by month <<<<
        # Sort titles case-insensitive for by month using series prefix
-        self.books_by_month = sorted(self.books_by_title,
+        self.books_by_month = sorted(self.books_to_catalog,
                                key=lambda x:(x['timestamp'], x['timestamp']),reverse=True)
        # Loop through books by date
@ -2026,12 +2042,12 @@ class CatalogBuilder(object):
        if self.opts.verbose:
            if len(genre_list):
-                self.opts.log.info("     Genre summary: %d active genre tags used in generating catalog with %d titles" %
+                self.opts.log.info("  Genre summary: %d active genre tags used in generating catalog with %d titles" %
-                                (len(genre_list), len(self.books_by_title)))
+                                (len(genre_list), len(self.books_to_catalog)))
                for genre in genre_list:
                    for key in genre:
-                        self.opts.log.info("      %s: %d %s" % (self.get_friendly_genre_tag(key),
+                        self.opts.log.info("   %s: %d %s" % (self.get_friendly_genre_tag(key),
                                            len(genre[key]),
                                            'titles' if len(genre[key]) > 1 else 'title'))
@ -2226,48 +2242,28 @@ class CatalogBuilder(object):
        Output:
         content/BySeries.html (file)
        To do:
         self.books_by_series = [i for i in self.books_by_title if i['series']]
        """
        friendly_name = _("Series")
        self.update_progress_full_step("%s HTML" % friendly_name)
        self.opts.sort_by = 'series'
-        # Merge self.excluded_tags with opts.search_text
+        # *** Convert the existing database, resort by series/index ***
-        # Updated to use exact match syntax
+        self.books_by_series = [i for i in self.books_to_catalog if i['series']]
-
+        self.books_by_series = sorted(self.books_by_series, key=lambda x: sort_key(self._kf_books_by_series_sorter(x)))
        search_phrase = 'series:true '
        if self.excluded_tags:
            search_terms = []
            for tag in self.excluded_tags:
                search_terms.append("tag:=%s" % tag)
            search_phrase += "not (%s)" % " or ".join(search_terms)
        # If a list of ids are provided, don't use search_text
        if self.opts.ids:
            self.opts.search_text = search_phrase
        else:
            if self.opts.search_text:
                self.opts.search_text += " " + search_phrase
            else:
                self.opts.search_text = search_phrase
        # Fetch the database as a dictionary
        data = self.plugin.search_sort_db(self.db, self.opts)
        # Remove exclusions
        self.books_by_series = self.process_exclusions(data, log_exclusion=False)
        if not self.books_by_series:
            self.opts.generate_series = False
-            self.opts.log(" no series found in selected books, cancelling series generation")
+            self.opts.log("  no series found in selected books, skipping Series section")
            return
        # Generate series_sort
        for book in self.books_by_series:
            book['series_sort'] = self.generate_sort_title(book['series'])
        # Establish initial letter equivalencies
        sort_equivalents = self.establish_equivalencies(self.books_by_series, key='series_sort')
        soup = self.generate_html_empty_header(friendly_name)
        body = soup.find('body')
@ -2277,9 +2273,6 @@ class CatalogBuilder(object):
        current_letter = ""
        current_series = None
        # Establish initial letter equivalencies
        sort_equivalents = self.establish_equivalencies(self.books_by_series, key='series_sort')
        # Loop through books_by_series
        series_count = 0
        for idx, book in enumerate(self.books_by_series):
@ -2335,11 +2328,6 @@ class CatalogBuilder(object):
            # Use series, series index if avail else just title
            #aTag.insert(0,'%d. %s &middot; %s' % (book['series_index'],escape(book['title']), ' & '.join(book['authors'])))
            if is_date_undefined(book['pubdate']):
                book['date'] = None
            else:
                book['date'] = strftime(u'%B %Y', book['pubdate'].timetuple())
            args = self.generate_format_args(book)
            formatted_title = self.by_series_title_template.format(**args).rstrip()
            aTag.insert(0,NavigableString(escape(formatted_title)))
@ -2438,7 +2426,7 @@ class CatalogBuilder(object):
        # Re-sort title list without leading series/series_index
        # Incoming title <series> <series_index>: <title>
        if not self.use_series_prefix_in_titles_section:
-            nspt = deepcopy(self.books_by_title)
+            nspt = deepcopy(self.books_to_catalog)
            nspt = sorted(nspt, key=lambda x: sort_key(x['title_sort'].upper()))
            self.books_by_title_no_series_prefix = nspt
@ -4339,7 +4327,7 @@ class CatalogBuilder(object):
        # Report excluded books
        if self.opts.verbose and excluded_tags:
-            self.opts.log.info(" Excluded books by Tags:")
+            self.opts.log.info(" Excluded books:")
            data = self.db.get_data_as_dict(ids=self.opts.ids)
            for record in data:
                matched = list(set(record['tags']) & set(excluded_tags))
@ -4632,7 +4620,7 @@ class CatalogBuilder(object):
                    normalized += c
        return normalized
-    def process_exclusions(self, data_set, log_exclusion=True):
+    def process_exclusions(self, data_set):
        """ Filter data_set based on exclusion_rules.
        Compare each book in data_set to each exclusion_rule. Remove
@ -4666,16 +4654,18 @@ class CatalogBuilder(object):
                        matched = re.search(pat, unicode(field_contents),
                                re.IGNORECASE)
                        if matched is not None:
-                            if self.opts.verbose and log_exclusion:
+                            if self.opts.verbose:
                                field_md = self.db.metadata_for_field(field)
                                for rule in self.opts.exclusion_rules:
                                    if rule[1] == '#%s' % field_md['label']:
-                                        self.opts.log.info("     - '%s' by %s (Exclusion rule '%s')" %
+                                        self.opts.log.info("  - '%s' by %s (Exclusion rule '%s')" %
                                            (record['title'], record['authors'][0], rule[0]))
                            exclusion_set.append(record)
                            if record in filtered_data_set:
                                filtered_data_set.remove(record)
                            break
                        else:
                            filtered_data_set.append(record)
                    else:
                        if (record not in filtered_data_set and
                            record not in exclusion_set):