Revised debug catalog generation to use initialize_container, refactored initial database fetch of titles and subsequent access.

This commit is contained in:
GRiker 2012-09-06 09:49:55 -06:00
parent 1b6ee88d8f
commit bbce378f13
2 changed files with 89 additions and 94 deletions

View File

@ -412,10 +412,15 @@ class EPUB_MOBI(CatalogPlugin):
pass pass
if GENERATE_DEBUG_EPUB: if GENERATE_DEBUG_EPUB:
from calibre.ebooks.epub import initialize_container
from calibre.ebooks.tweak import zip_rebuilder from calibre.ebooks.tweak import zip_rebuilder
from calibre.utils.zipfile import ZipFile
input_path = os.path.join(catalog_debug_path,'input') input_path = os.path.join(catalog_debug_path,'input')
shutil.copy(P('catalog/mimetype'),input_path) epub_shell = os.path.join(catalog_debug_path,'epub_shell.zip')
shutil.copytree(P('catalog/META-INF'),os.path.join(input_path,'META-INF')) initialize_container(epub_shell, opf_name='content.opf')
with ZipFile(epub_shell, 'r') as zf:
zf.extractall(path=input_path)
os.remove(epub_shell)
zip_rebuilder(input_path, os.path.join(catalog_debug_path,'input.epub')) zip_rebuilder(input_path, os.path.join(catalog_debug_path,'input.epub'))
# returns to gui2.actions.catalog:catalog_generated() # returns to gui2.actions.catalog:catalog_generated()

View File

@ -5,6 +5,7 @@ __copyright__ = '2010, Greg Riker'
import datetime, htmlentitydefs, os, re, shutil, unicodedata, zlib import datetime, htmlentitydefs, os, re, shutil, unicodedata, zlib
from copy import deepcopy from copy import deepcopy
from operator import itemgetter
from xml.sax.saxutils import escape from xml.sax.saxutils import escape
from calibre import (prepare_string_for_xml, strftime, force_unicode) from calibre import (prepare_string_for_xml, strftime, force_unicode)
@ -56,15 +57,6 @@ class CatalogBuilder(object):
""" property decorators for attributes """ """ property decorators for attributes """
if True: if True:
''' directory to store cached thumbs ''' ''' directory to store cached thumbs '''
@property @property
def cache_dir(self): def cache_dir(self):
@ -102,10 +94,6 @@ class CatalogBuilder(object):
def generate_recently_read(self): def generate_recently_read(self):
return self.__generate_recently_read return self.__generate_recently_read
''' additional field to include before/after comments ''' ''' additional field to include before/after comments '''
@property @property
def merge_comments_rule(self): def merge_comments_rule(self):
@ -128,9 +116,6 @@ class CatalogBuilder(object):
def plugin(self): def plugin(self):
return self.__plugin return self.__plugin
''' Progress Reporter for Jobs ''' ''' Progress Reporter for Jobs '''
@property @property
def reporter(self): def reporter(self):
@ -199,6 +184,7 @@ class CatalogBuilder(object):
self.__stylesheet = stylesheet self.__stylesheet = stylesheet
self.__cache_dir = os.path.join(config_dir, 'caches', 'catalog') self.__cache_dir = os.path.join(config_dir, 'caches', 'catalog')
self.__catalog_path = PersistentTemporaryDirectory("_epub_mobi_catalog", prefix='') self.__catalog_path = PersistentTemporaryDirectory("_epub_mobi_catalog", prefix='')
self.__excluded_tags = self.get_excluded_tags()
self.__generate_for_kindle = True if (_opts.fmt == 'mobi' and self.__generate_for_kindle = True if (_opts.fmt == 'mobi' and
_opts.output_profile and _opts.output_profile and
_opts.output_profile.startswith("kindle")) else False _opts.output_profile.startswith("kindle")) else False
@ -221,12 +207,13 @@ class CatalogBuilder(object):
self.books_by_title = None self.books_by_title = None
''' list of books in series, without series prefix ''' ''' list of books in series, without series prefix '''
self.books_by_title_no_series_prefix = None self.books_by_title_no_series_prefix = None
''' Initial list of books to catalog from which all sections are built '''
self.books_to_catalog = None
self.__content_dir = os.path.join(self.catalog_path, "content") self.__content_dir = os.path.join(self.catalog_path, "content")
''' track Job progress ''' ''' track Job progress '''
self.current_step = 0.0 self.current_step = 0.0
''' cumulative error messages to report at conclusion ''' ''' cumulative error messages to report at conclusion '''
self.error = [] self.error = []
self.__excluded_tags = self.get_excluded_tags()
self.__generate_recently_read = True if (_opts.generate_recently_added and self.__generate_recently_read = True if (_opts.generate_recently_added and
_opts.connected_kindle and _opts.connected_kindle and
self.generate_for_kindle) else False self.generate_for_kindle) else False
@ -262,6 +249,7 @@ class CatalogBuilder(object):
self.total_steps = 6.0 self.total_steps = 6.0
self.__use_series_prefix_in_titles_section = False self.__use_series_prefix_in_titles_section = False
self.books_to_catalog = self.fetch_books_to_catalog()
self.compute_total_steps() self.compute_total_steps()
self.calculate_thumbnail_dimensions() self.calculate_thumbnail_dimensions()
self.confirm_thumbs_archive() self.confirm_thumbs_archive()
@ -343,6 +331,15 @@ class CatalogBuilder(object):
series_index) series_index)
return key return key
def _kf_books_by_series_sorter(self, book):
index = book['series_index']
integer = int(index)
fraction = index-integer
series_index = '%04d%s' % (integer, str('%0.4f' % fraction).lstrip('0'))
key = '%s %s' % (self.generate_sort_title(book['series']),
series_index)
return key
""" Methods """ """ Methods """
def build_sources(self): def build_sources(self):
@ -614,7 +611,7 @@ class CatalogBuilder(object):
annoyance for EPUB. annoyance for EPUB.
Inputs: Inputs:
self.books_by_title (list): list of books to catalog self.books_to_catalog (list): list of books to catalog
Output: Output:
self.books_by_author (list): sorted by author self.books_by_author (list): sorted by author
@ -623,7 +620,7 @@ class CatalogBuilder(object):
AuthorSortMismatchException: author_sort mismatch detected AuthorSortMismatchException: author_sort mismatch detected
""" """
self.books_by_author = sorted(list(self.books_by_title), key=self._kf_books_by_author_sorter_author) self.books_by_author = sorted(list(self.books_to_catalog), key=self._kf_books_by_author_sorter_author)
authors = [(record['author'], record['author_sort']) for record in self.books_by_author] authors = [(record['author'], record['author_sort']) for record in self.books_by_author]
current_author = authors[0] current_author = authors[0]
for (i,author) in enumerate(authors): for (i,author) in enumerate(authors):
@ -671,7 +668,7 @@ class CatalogBuilder(object):
None: no match None: no match
""" """
def _log_prefix_rule_match_info(rule, record): def _log_prefix_rule_match_info(rule, record):
self.opts.log.info(" %s '%s' by %s (Prefix rule '%s')" % self.opts.log.info(" %s '%s' by %s (Prefix rule '%s')" %
(rule['prefix'],record['title'], (rule['prefix'],record['title'],
record['authors'][0], rule['name'])) record['authors'][0], rule['name']))
@ -770,7 +767,7 @@ class CatalogBuilder(object):
to self.authors. to self.authors.
Inputs: Inputs:
self.books_by_title (list): database, sorted by title self.books_to_catalog (list): database, sorted by title
Outputs: Outputs:
books_by_author: database, sorted by author books_by_author: database, sorted by author
@ -790,7 +787,7 @@ class CatalogBuilder(object):
# Determine the longest author_sort length before sorting # Determine the longest author_sort length before sorting
asl = [i['author_sort'] for i in self.books_by_author] asl = [i['author_sort'] for i in self.books_by_author]
las = max(asl, key=len) las = max(asl, key=len)
self.books_by_author = sorted(self.books_by_author, self.books_by_author = sorted(self.books_to_catalog,
key=lambda x: sort_key(self._kf_books_by_author_sorter_author_sort(x, len(las)))) key=lambda x: sort_key(self._kf_books_by_author_sorter_author_sort(x, len(las))))
if self.DEBUG and self.opts.verbose: if self.DEBUG and self.opts.verbose:
@ -843,9 +840,42 @@ class CatalogBuilder(object):
return True return True
def fetch_books_by_title(self): def fetch_books_by_title(self):
""" Populate self.books_by_title from database """ Generate a list of books sorted by title.
Create self.books_by_title from filtered database. Sort the database by title.
Inputs:
self.books_to_catalog (list): database
Outputs:
books_by_title: database, sorted by title
Return:
True: no errors
False: author_sort mismatch detected while building MOBI
"""
self.update_progress_full_step(_("Sorting titles"))
# Re-sort based on title_sort
if len(self.books_to_catalog):
self.books_by_title = sorted(self.books_to_catalog, key=lambda x: sort_key(x['title_sort'].upper()))
if self.DEBUG and self.opts.verbose:
self.opts.log.info("fetch_books_by_title(): %d books" % len(self.books_by_title))
self.opts.log.info(" %-40s %-40s" % ('title', 'title_sort'))
for title in self.books_by_title:
self.opts.log.info((u" %-40s %-40s" % (title['title'][0:40],
title['title_sort'][0:40])).encode('utf-8'))
else:
error_msg = _("No books to catalog.\nCheck 'Excluded books' rules in E-book options.\n")
self.opts.log.error('*** ' + error_msg + ' ***')
self.error.append(_('No books available to include in catalog'))
self.error.append(error_msg)
raise EmptyCatalogException, error_msg
def fetch_books_to_catalog(self):
""" Populate self.books_to_catalog from database
Create self.books_to_catalog from filtered database.
Keys: Keys:
authors massaged authors massaged
author_sort record['author_sort'] or computed author_sort record['author_sort'] or computed
@ -871,7 +901,7 @@ class CatalogBuilder(object):
data (list): filtered list of book metadata dicts data (list): filtered list of book metadata dicts
Outputs: Outputs:
(list) books_by_title (list) books_to_catalog
Returns: Returns:
True: Successful True: Successful
@ -980,7 +1010,6 @@ class CatalogBuilder(object):
return this_title return this_title
# Entry point # Entry point
self.update_progress_full_step(_("Fetching database"))
self.opts.sort_by = 'title' self.opts.sort_by = 'title'
search_phrase = '' search_phrase = ''
@ -1003,28 +1032,15 @@ class CatalogBuilder(object):
data = self.plugin.search_sort_db(self.db, self.opts) data = self.plugin.search_sort_db(self.db, self.opts)
data = self.process_exclusions(data) data = self.process_exclusions(data)
if self.opts.verbose and self.prefix_rules:
self.opts.log.info(" Added prefixes:")
# Populate this_title{} from data[{},{}] # Populate this_title{} from data[{},{}]
titles = [] titles = []
for record in data: for record in data:
this_title = _populate_title(record) this_title = _populate_title(record)
titles.append(this_title) titles.append(this_title)
return titles
# Re-sort based on title_sort
if len(titles):
self.books_by_title = sorted(titles, key=lambda x: sort_key(x['title_sort'].upper()))
if self.DEBUG and self.opts.verbose:
self.opts.log.info("fetch_books_by_title(): %d books" % len(self.books_by_title))
self.opts.log.info(" %-40s %-40s" % ('title', 'title_sort'))
for title in self.books_by_title:
self.opts.log.info((u" %-40s %-40s" % (title['title'][0:40],
title['title_sort'][0:40])).encode('utf-8'))
else:
error_msg = _("No books to catalog.\nCheck 'Excluded books' rules in E-book options.\n")
self.opts.log.error('*** ' + error_msg + ' ***')
self.error.append(_('No books available to include in catalog'))
self.error.append(error_msg)
raise EmptyCatalogException, error_msg
def fetch_bookmarks(self): def fetch_bookmarks(self):
""" Interrogate connected Kindle for bookmarks. """ Interrogate connected Kindle for bookmarks.
@ -1104,7 +1120,7 @@ class CatalogBuilder(object):
d.initialize(self.opts.connected_device['save_template']) d.initialize(self.opts.connected_device['save_template'])
bookmarks = {} bookmarks = {}
for book in self.books_by_title: for book in self.books_to_catalog:
if 'formats' in book: if 'formats' in book:
path_map = {} path_map = {}
id = book['id'] id = book['id']
@ -1148,7 +1164,7 @@ class CatalogBuilder(object):
genre_tags_dict (dict): dict of filtered, normalized tags in data set genre_tags_dict (dict): dict of filtered, normalized tags in data set
""" """
def _format_tag_list(tags, indent=5, line_break=70, header='Tag list'): def _format_tag_list(tags, indent=2, line_break=70, header='Tag list'):
def _next_tag(sorted_tags): def _next_tag(sorted_tags):
for (i, tag) in enumerate(sorted_tags): for (i, tag) in enumerate(sorted_tags):
if i < len(tags) - 1: if i < len(tags) - 1:
@ -1541,7 +1557,7 @@ class CatalogBuilder(object):
def generate_html_by_date_added(self): def generate_html_by_date_added(self):
""" Generate content/ByDateAdded.html. """ Generate content/ByDateAdded.html.
Loop through self.books_by_title sorted by reverse date, generate HTML. Loop through self.books_to_catalog sorted by reverse date, generate HTML.
Input: Input:
books_by_title (list): books, sorted by title books_by_title (list): books, sorted by title
@ -1735,10 +1751,10 @@ class CatalogBuilder(object):
# >>> Books by date range <<< # >>> Books by date range <<<
if self.use_series_prefix_in_titles_section: if self.use_series_prefix_in_titles_section:
self.books_by_date_range = sorted(self.books_by_title, self.books_by_date_range = sorted(self.books_to_catalog,
key=lambda x:(x['timestamp'], x['timestamp']),reverse=True) key=lambda x:(x['timestamp'], x['timestamp']),reverse=True)
else: else:
nspt = deepcopy(self.books_by_title) nspt = deepcopy(self.books_to_catalog)
self.books_by_date_range = sorted(nspt, key=lambda x:(x['timestamp'], x['timestamp']),reverse=True) self.books_by_date_range = sorted(nspt, key=lambda x:(x['timestamp'], x['timestamp']),reverse=True)
date_range_list = [] date_range_list = []
@ -1763,7 +1779,7 @@ class CatalogBuilder(object):
# >>>> Books by month <<<< # >>>> Books by month <<<<
# Sort titles case-insensitive for by month using series prefix # Sort titles case-insensitive for by month using series prefix
self.books_by_month = sorted(self.books_by_title, self.books_by_month = sorted(self.books_to_catalog,
key=lambda x:(x['timestamp'], x['timestamp']),reverse=True) key=lambda x:(x['timestamp'], x['timestamp']),reverse=True)
# Loop through books by date # Loop through books by date
@ -2026,12 +2042,12 @@ class CatalogBuilder(object):
if self.opts.verbose: if self.opts.verbose:
if len(genre_list): if len(genre_list):
self.opts.log.info(" Genre summary: %d active genre tags used in generating catalog with %d titles" % self.opts.log.info(" Genre summary: %d active genre tags used in generating catalog with %d titles" %
(len(genre_list), len(self.books_by_title))) (len(genre_list), len(self.books_to_catalog)))
for genre in genre_list: for genre in genre_list:
for key in genre: for key in genre:
self.opts.log.info(" %s: %d %s" % (self.get_friendly_genre_tag(key), self.opts.log.info(" %s: %d %s" % (self.get_friendly_genre_tag(key),
len(genre[key]), len(genre[key]),
'titles' if len(genre[key]) > 1 else 'title')) 'titles' if len(genre[key]) > 1 else 'title'))
@ -2226,48 +2242,28 @@ class CatalogBuilder(object):
Output: Output:
content/BySeries.html (file) content/BySeries.html (file)
To do:
self.books_by_series = [i for i in self.books_by_title if i['series']]
""" """
friendly_name = _("Series") friendly_name = _("Series")
self.update_progress_full_step("%s HTML" % friendly_name) self.update_progress_full_step("%s HTML" % friendly_name)
self.opts.sort_by = 'series' self.opts.sort_by = 'series'
# Merge self.excluded_tags with opts.search_text # *** Convert the existing database, resort by series/index ***
# Updated to use exact match syntax self.books_by_series = [i for i in self.books_to_catalog if i['series']]
self.books_by_series = sorted(self.books_by_series, key=lambda x: sort_key(self._kf_books_by_series_sorter(x)))
search_phrase = 'series:true '
if self.excluded_tags:
search_terms = []
for tag in self.excluded_tags:
search_terms.append("tag:=%s" % tag)
search_phrase += "not (%s)" % " or ".join(search_terms)
# If a list of ids are provided, don't use search_text
if self.opts.ids:
self.opts.search_text = search_phrase
else:
if self.opts.search_text:
self.opts.search_text += " " + search_phrase
else:
self.opts.search_text = search_phrase
# Fetch the database as a dictionary
data = self.plugin.search_sort_db(self.db, self.opts)
# Remove exclusions
self.books_by_series = self.process_exclusions(data, log_exclusion=False)
if not self.books_by_series: if not self.books_by_series:
self.opts.generate_series = False self.opts.generate_series = False
self.opts.log(" no series found in selected books, cancelling series generation") self.opts.log(" no series found in selected books, skipping Series section")
return return
# Generate series_sort # Generate series_sort
for book in self.books_by_series: for book in self.books_by_series:
book['series_sort'] = self.generate_sort_title(book['series']) book['series_sort'] = self.generate_sort_title(book['series'])
# Establish initial letter equivalencies
sort_equivalents = self.establish_equivalencies(self.books_by_series, key='series_sort')
soup = self.generate_html_empty_header(friendly_name) soup = self.generate_html_empty_header(friendly_name)
body = soup.find('body') body = soup.find('body')
@ -2277,9 +2273,6 @@ class CatalogBuilder(object):
current_letter = "" current_letter = ""
current_series = None current_series = None
# Establish initial letter equivalencies
sort_equivalents = self.establish_equivalencies(self.books_by_series, key='series_sort')
# Loop through books_by_series # Loop through books_by_series
series_count = 0 series_count = 0
for idx, book in enumerate(self.books_by_series): for idx, book in enumerate(self.books_by_series):
@ -2335,11 +2328,6 @@ class CatalogBuilder(object):
# Use series, series index if avail else just title # Use series, series index if avail else just title
#aTag.insert(0,'%d. %s &middot; %s' % (book['series_index'],escape(book['title']), ' & '.join(book['authors']))) #aTag.insert(0,'%d. %s &middot; %s' % (book['series_index'],escape(book['title']), ' & '.join(book['authors'])))
if is_date_undefined(book['pubdate']):
book['date'] = None
else:
book['date'] = strftime(u'%B %Y', book['pubdate'].timetuple())
args = self.generate_format_args(book) args = self.generate_format_args(book)
formatted_title = self.by_series_title_template.format(**args).rstrip() formatted_title = self.by_series_title_template.format(**args).rstrip()
aTag.insert(0,NavigableString(escape(formatted_title))) aTag.insert(0,NavigableString(escape(formatted_title)))
@ -2438,7 +2426,7 @@ class CatalogBuilder(object):
# Re-sort title list without leading series/series_index # Re-sort title list without leading series/series_index
# Incoming title <series> <series_index>: <title> # Incoming title <series> <series_index>: <title>
if not self.use_series_prefix_in_titles_section: if not self.use_series_prefix_in_titles_section:
nspt = deepcopy(self.books_by_title) nspt = deepcopy(self.books_to_catalog)
nspt = sorted(nspt, key=lambda x: sort_key(x['title_sort'].upper())) nspt = sorted(nspt, key=lambda x: sort_key(x['title_sort'].upper()))
self.books_by_title_no_series_prefix = nspt self.books_by_title_no_series_prefix = nspt
@ -4339,7 +4327,7 @@ class CatalogBuilder(object):
# Report excluded books # Report excluded books
if self.opts.verbose and excluded_tags: if self.opts.verbose and excluded_tags:
self.opts.log.info(" Excluded books by Tags:") self.opts.log.info(" Excluded books:")
data = self.db.get_data_as_dict(ids=self.opts.ids) data = self.db.get_data_as_dict(ids=self.opts.ids)
for record in data: for record in data:
matched = list(set(record['tags']) & set(excluded_tags)) matched = list(set(record['tags']) & set(excluded_tags))
@ -4632,7 +4620,7 @@ class CatalogBuilder(object):
normalized += c normalized += c
return normalized return normalized
def process_exclusions(self, data_set, log_exclusion=True): def process_exclusions(self, data_set):
""" Filter data_set based on exclusion_rules. """ Filter data_set based on exclusion_rules.
Compare each book in data_set to each exclusion_rule. Remove Compare each book in data_set to each exclusion_rule. Remove
@ -4666,16 +4654,18 @@ class CatalogBuilder(object):
matched = re.search(pat, unicode(field_contents), matched = re.search(pat, unicode(field_contents),
re.IGNORECASE) re.IGNORECASE)
if matched is not None: if matched is not None:
if self.opts.verbose and log_exclusion: if self.opts.verbose:
field_md = self.db.metadata_for_field(field) field_md = self.db.metadata_for_field(field)
for rule in self.opts.exclusion_rules: for rule in self.opts.exclusion_rules:
if rule[1] == '#%s' % field_md['label']: if rule[1] == '#%s' % field_md['label']:
self.opts.log.info(" - '%s' by %s (Exclusion rule '%s')" % self.opts.log.info(" - '%s' by %s (Exclusion rule '%s')" %
(record['title'], record['authors'][0], rule[0])) (record['title'], record['authors'][0], rule[0]))
exclusion_set.append(record) exclusion_set.append(record)
if record in filtered_data_set: if record in filtered_data_set:
filtered_data_set.remove(record) filtered_data_set.remove(record)
break break
else:
filtered_data_set.append(record)
else: else:
if (record not in filtered_data_set and if (record not in filtered_data_set and
record not in exclusion_set): record not in exclusion_set):