mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Revised debug catalog generation to use initialize_container, refactored initial database fetch of titles and subsequent access.
This commit is contained in:
parent
1b6ee88d8f
commit
bbce378f13
@ -412,10 +412,15 @@ class EPUB_MOBI(CatalogPlugin):
|
||||
pass
|
||||
|
||||
if GENERATE_DEBUG_EPUB:
|
||||
from calibre.ebooks.epub import initialize_container
|
||||
from calibre.ebooks.tweak import zip_rebuilder
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
input_path = os.path.join(catalog_debug_path,'input')
|
||||
shutil.copy(P('catalog/mimetype'),input_path)
|
||||
shutil.copytree(P('catalog/META-INF'),os.path.join(input_path,'META-INF'))
|
||||
epub_shell = os.path.join(catalog_debug_path,'epub_shell.zip')
|
||||
initialize_container(epub_shell, opf_name='content.opf')
|
||||
with ZipFile(epub_shell, 'r') as zf:
|
||||
zf.extractall(path=input_path)
|
||||
os.remove(epub_shell)
|
||||
zip_rebuilder(input_path, os.path.join(catalog_debug_path,'input.epub'))
|
||||
|
||||
# returns to gui2.actions.catalog:catalog_generated()
|
||||
|
@ -5,6 +5,7 @@ __copyright__ = '2010, Greg Riker'
|
||||
|
||||
import datetime, htmlentitydefs, os, re, shutil, unicodedata, zlib
|
||||
from copy import deepcopy
|
||||
from operator import itemgetter
|
||||
from xml.sax.saxutils import escape
|
||||
|
||||
from calibre import (prepare_string_for_xml, strftime, force_unicode)
|
||||
@ -56,15 +57,6 @@ class CatalogBuilder(object):
|
||||
|
||||
""" property decorators for attributes """
|
||||
if True:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
''' directory to store cached thumbs '''
|
||||
@property
|
||||
def cache_dir(self):
|
||||
@ -102,10 +94,6 @@ class CatalogBuilder(object):
|
||||
def generate_recently_read(self):
|
||||
return self.__generate_recently_read
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
''' additional field to include before/after comments '''
|
||||
@property
|
||||
def merge_comments_rule(self):
|
||||
@ -128,9 +116,6 @@ class CatalogBuilder(object):
|
||||
def plugin(self):
|
||||
return self.__plugin
|
||||
|
||||
|
||||
|
||||
|
||||
''' Progress Reporter for Jobs '''
|
||||
@property
|
||||
def reporter(self):
|
||||
@ -199,6 +184,7 @@ class CatalogBuilder(object):
|
||||
self.__stylesheet = stylesheet
|
||||
self.__cache_dir = os.path.join(config_dir, 'caches', 'catalog')
|
||||
self.__catalog_path = PersistentTemporaryDirectory("_epub_mobi_catalog", prefix='')
|
||||
self.__excluded_tags = self.get_excluded_tags()
|
||||
self.__generate_for_kindle = True if (_opts.fmt == 'mobi' and
|
||||
_opts.output_profile and
|
||||
_opts.output_profile.startswith("kindle")) else False
|
||||
@ -221,12 +207,13 @@ class CatalogBuilder(object):
|
||||
self.books_by_title = None
|
||||
''' list of books in series, without series prefix '''
|
||||
self.books_by_title_no_series_prefix = None
|
||||
''' Initial list of books to catalog from which all sections are built '''
|
||||
self.books_to_catalog = None
|
||||
self.__content_dir = os.path.join(self.catalog_path, "content")
|
||||
''' track Job progress '''
|
||||
self.current_step = 0.0
|
||||
''' cumulative error messages to report at conclusion '''
|
||||
self.error = []
|
||||
self.__excluded_tags = self.get_excluded_tags()
|
||||
self.__generate_recently_read = True if (_opts.generate_recently_added and
|
||||
_opts.connected_kindle and
|
||||
self.generate_for_kindle) else False
|
||||
@ -262,6 +249,7 @@ class CatalogBuilder(object):
|
||||
self.total_steps = 6.0
|
||||
self.__use_series_prefix_in_titles_section = False
|
||||
|
||||
self.books_to_catalog = self.fetch_books_to_catalog()
|
||||
self.compute_total_steps()
|
||||
self.calculate_thumbnail_dimensions()
|
||||
self.confirm_thumbs_archive()
|
||||
@ -343,6 +331,15 @@ class CatalogBuilder(object):
|
||||
series_index)
|
||||
return key
|
||||
|
||||
def _kf_books_by_series_sorter(self, book):
|
||||
index = book['series_index']
|
||||
integer = int(index)
|
||||
fraction = index-integer
|
||||
series_index = '%04d%s' % (integer, str('%0.4f' % fraction).lstrip('0'))
|
||||
key = '%s %s' % (self.generate_sort_title(book['series']),
|
||||
series_index)
|
||||
return key
|
||||
|
||||
""" Methods """
|
||||
|
||||
def build_sources(self):
|
||||
@ -614,7 +611,7 @@ class CatalogBuilder(object):
|
||||
annoyance for EPUB.
|
||||
|
||||
Inputs:
|
||||
self.books_by_title (list): list of books to catalog
|
||||
self.books_to_catalog (list): list of books to catalog
|
||||
|
||||
Output:
|
||||
self.books_by_author (list): sorted by author
|
||||
@ -623,7 +620,7 @@ class CatalogBuilder(object):
|
||||
AuthorSortMismatchException: author_sort mismatch detected
|
||||
"""
|
||||
|
||||
self.books_by_author = sorted(list(self.books_by_title), key=self._kf_books_by_author_sorter_author)
|
||||
self.books_by_author = sorted(list(self.books_to_catalog), key=self._kf_books_by_author_sorter_author)
|
||||
authors = [(record['author'], record['author_sort']) for record in self.books_by_author]
|
||||
current_author = authors[0]
|
||||
for (i,author) in enumerate(authors):
|
||||
@ -671,7 +668,7 @@ class CatalogBuilder(object):
|
||||
None: no match
|
||||
"""
|
||||
def _log_prefix_rule_match_info(rule, record):
|
||||
self.opts.log.info(" %s '%s' by %s (Prefix rule '%s')" %
|
||||
self.opts.log.info(" %s '%s' by %s (Prefix rule '%s')" %
|
||||
(rule['prefix'],record['title'],
|
||||
record['authors'][0], rule['name']))
|
||||
|
||||
@ -770,7 +767,7 @@ class CatalogBuilder(object):
|
||||
to self.authors.
|
||||
|
||||
Inputs:
|
||||
self.books_by_title (list): database, sorted by title
|
||||
self.books_to_catalog (list): database, sorted by title
|
||||
|
||||
Outputs:
|
||||
books_by_author: database, sorted by author
|
||||
@ -790,7 +787,7 @@ class CatalogBuilder(object):
|
||||
# Determine the longest author_sort length before sorting
|
||||
asl = [i['author_sort'] for i in self.books_by_author]
|
||||
las = max(asl, key=len)
|
||||
self.books_by_author = sorted(self.books_by_author,
|
||||
self.books_by_author = sorted(self.books_to_catalog,
|
||||
key=lambda x: sort_key(self._kf_books_by_author_sorter_author_sort(x, len(las))))
|
||||
|
||||
if self.DEBUG and self.opts.verbose:
|
||||
@ -843,9 +840,42 @@ class CatalogBuilder(object):
|
||||
return True
|
||||
|
||||
def fetch_books_by_title(self):
|
||||
""" Populate self.books_by_title from database
|
||||
""" Generate a list of books sorted by title.
|
||||
|
||||
Create self.books_by_title from filtered database.
|
||||
Sort the database by title.
|
||||
|
||||
Inputs:
|
||||
self.books_to_catalog (list): database
|
||||
|
||||
Outputs:
|
||||
books_by_title: database, sorted by title
|
||||
|
||||
Return:
|
||||
True: no errors
|
||||
False: author_sort mismatch detected while building MOBI
|
||||
"""
|
||||
self.update_progress_full_step(_("Sorting titles"))
|
||||
# Re-sort based on title_sort
|
||||
if len(self.books_to_catalog):
|
||||
self.books_by_title = sorted(self.books_to_catalog, key=lambda x: sort_key(x['title_sort'].upper()))
|
||||
|
||||
if self.DEBUG and self.opts.verbose:
|
||||
self.opts.log.info("fetch_books_by_title(): %d books" % len(self.books_by_title))
|
||||
self.opts.log.info(" %-40s %-40s" % ('title', 'title_sort'))
|
||||
for title in self.books_by_title:
|
||||
self.opts.log.info((u" %-40s %-40s" % (title['title'][0:40],
|
||||
title['title_sort'][0:40])).encode('utf-8'))
|
||||
else:
|
||||
error_msg = _("No books to catalog.\nCheck 'Excluded books' rules in E-book options.\n")
|
||||
self.opts.log.error('*** ' + error_msg + ' ***')
|
||||
self.error.append(_('No books available to include in catalog'))
|
||||
self.error.append(error_msg)
|
||||
raise EmptyCatalogException, error_msg
|
||||
|
||||
def fetch_books_to_catalog(self):
|
||||
""" Populate self.books_to_catalog from database
|
||||
|
||||
Create self.books_to_catalog from filtered database.
|
||||
Keys:
|
||||
authors massaged
|
||||
author_sort record['author_sort'] or computed
|
||||
@ -871,7 +901,7 @@ class CatalogBuilder(object):
|
||||
data (list): filtered list of book metadata dicts
|
||||
|
||||
Outputs:
|
||||
(list) books_by_title
|
||||
(list) books_to_catalog
|
||||
|
||||
Returns:
|
||||
True: Successful
|
||||
@ -980,7 +1010,6 @@ class CatalogBuilder(object):
|
||||
return this_title
|
||||
|
||||
# Entry point
|
||||
self.update_progress_full_step(_("Fetching database"))
|
||||
|
||||
self.opts.sort_by = 'title'
|
||||
search_phrase = ''
|
||||
@ -1003,28 +1032,15 @@ class CatalogBuilder(object):
|
||||
data = self.plugin.search_sort_db(self.db, self.opts)
|
||||
data = self.process_exclusions(data)
|
||||
|
||||
if self.opts.verbose and self.prefix_rules:
|
||||
self.opts.log.info(" Added prefixes:")
|
||||
|
||||
# Populate this_title{} from data[{},{}]
|
||||
titles = []
|
||||
for record in data:
|
||||
this_title = _populate_title(record)
|
||||
titles.append(this_title)
|
||||
|
||||
# Re-sort based on title_sort
|
||||
if len(titles):
|
||||
self.books_by_title = sorted(titles, key=lambda x: sort_key(x['title_sort'].upper()))
|
||||
|
||||
if self.DEBUG and self.opts.verbose:
|
||||
self.opts.log.info("fetch_books_by_title(): %d books" % len(self.books_by_title))
|
||||
self.opts.log.info(" %-40s %-40s" % ('title', 'title_sort'))
|
||||
for title in self.books_by_title:
|
||||
self.opts.log.info((u" %-40s %-40s" % (title['title'][0:40],
|
||||
title['title_sort'][0:40])).encode('utf-8'))
|
||||
else:
|
||||
error_msg = _("No books to catalog.\nCheck 'Excluded books' rules in E-book options.\n")
|
||||
self.opts.log.error('*** ' + error_msg + ' ***')
|
||||
self.error.append(_('No books available to include in catalog'))
|
||||
self.error.append(error_msg)
|
||||
raise EmptyCatalogException, error_msg
|
||||
return titles
|
||||
|
||||
def fetch_bookmarks(self):
|
||||
""" Interrogate connected Kindle for bookmarks.
|
||||
@ -1104,7 +1120,7 @@ class CatalogBuilder(object):
|
||||
d.initialize(self.opts.connected_device['save_template'])
|
||||
|
||||
bookmarks = {}
|
||||
for book in self.books_by_title:
|
||||
for book in self.books_to_catalog:
|
||||
if 'formats' in book:
|
||||
path_map = {}
|
||||
id = book['id']
|
||||
@ -1148,7 +1164,7 @@ class CatalogBuilder(object):
|
||||
genre_tags_dict (dict): dict of filtered, normalized tags in data set
|
||||
"""
|
||||
|
||||
def _format_tag_list(tags, indent=5, line_break=70, header='Tag list'):
|
||||
def _format_tag_list(tags, indent=2, line_break=70, header='Tag list'):
|
||||
def _next_tag(sorted_tags):
|
||||
for (i, tag) in enumerate(sorted_tags):
|
||||
if i < len(tags) - 1:
|
||||
@ -1541,7 +1557,7 @@ class CatalogBuilder(object):
|
||||
def generate_html_by_date_added(self):
|
||||
""" Generate content/ByDateAdded.html.
|
||||
|
||||
Loop through self.books_by_title sorted by reverse date, generate HTML.
|
||||
Loop through self.books_to_catalog sorted by reverse date, generate HTML.
|
||||
|
||||
Input:
|
||||
books_by_title (list): books, sorted by title
|
||||
@ -1735,10 +1751,10 @@ class CatalogBuilder(object):
|
||||
|
||||
# >>> Books by date range <<<
|
||||
if self.use_series_prefix_in_titles_section:
|
||||
self.books_by_date_range = sorted(self.books_by_title,
|
||||
self.books_by_date_range = sorted(self.books_to_catalog,
|
||||
key=lambda x:(x['timestamp'], x['timestamp']),reverse=True)
|
||||
else:
|
||||
nspt = deepcopy(self.books_by_title)
|
||||
nspt = deepcopy(self.books_to_catalog)
|
||||
self.books_by_date_range = sorted(nspt, key=lambda x:(x['timestamp'], x['timestamp']),reverse=True)
|
||||
|
||||
date_range_list = []
|
||||
@ -1763,7 +1779,7 @@ class CatalogBuilder(object):
|
||||
|
||||
# >>>> Books by month <<<<
|
||||
# Sort titles case-insensitive for by month using series prefix
|
||||
self.books_by_month = sorted(self.books_by_title,
|
||||
self.books_by_month = sorted(self.books_to_catalog,
|
||||
key=lambda x:(x['timestamp'], x['timestamp']),reverse=True)
|
||||
|
||||
# Loop through books by date
|
||||
@ -2026,12 +2042,12 @@ class CatalogBuilder(object):
|
||||
|
||||
if self.opts.verbose:
|
||||
if len(genre_list):
|
||||
self.opts.log.info(" Genre summary: %d active genre tags used in generating catalog with %d titles" %
|
||||
(len(genre_list), len(self.books_by_title)))
|
||||
self.opts.log.info(" Genre summary: %d active genre tags used in generating catalog with %d titles" %
|
||||
(len(genre_list), len(self.books_to_catalog)))
|
||||
|
||||
for genre in genre_list:
|
||||
for key in genre:
|
||||
self.opts.log.info(" %s: %d %s" % (self.get_friendly_genre_tag(key),
|
||||
self.opts.log.info(" %s: %d %s" % (self.get_friendly_genre_tag(key),
|
||||
len(genre[key]),
|
||||
'titles' if len(genre[key]) > 1 else 'title'))
|
||||
|
||||
@ -2226,48 +2242,28 @@ class CatalogBuilder(object):
|
||||
Output:
|
||||
content/BySeries.html (file)
|
||||
|
||||
To do:
|
||||
self.books_by_series = [i for i in self.books_by_title if i['series']]
|
||||
"""
|
||||
friendly_name = _("Series")
|
||||
self.update_progress_full_step("%s HTML" % friendly_name)
|
||||
|
||||
self.opts.sort_by = 'series'
|
||||
|
||||
# Merge self.excluded_tags with opts.search_text
|
||||
# Updated to use exact match syntax
|
||||
|
||||
search_phrase = 'series:true '
|
||||
if self.excluded_tags:
|
||||
search_terms = []
|
||||
for tag in self.excluded_tags:
|
||||
search_terms.append("tag:=%s" % tag)
|
||||
search_phrase += "not (%s)" % " or ".join(search_terms)
|
||||
|
||||
# If a list of ids are provided, don't use search_text
|
||||
if self.opts.ids:
|
||||
self.opts.search_text = search_phrase
|
||||
else:
|
||||
if self.opts.search_text:
|
||||
self.opts.search_text += " " + search_phrase
|
||||
else:
|
||||
self.opts.search_text = search_phrase
|
||||
|
||||
# Fetch the database as a dictionary
|
||||
data = self.plugin.search_sort_db(self.db, self.opts)
|
||||
|
||||
# Remove exclusions
|
||||
self.books_by_series = self.process_exclusions(data, log_exclusion=False)
|
||||
# *** Convert the existing database, resort by series/index ***
|
||||
self.books_by_series = [i for i in self.books_to_catalog if i['series']]
|
||||
self.books_by_series = sorted(self.books_by_series, key=lambda x: sort_key(self._kf_books_by_series_sorter(x)))
|
||||
|
||||
if not self.books_by_series:
|
||||
self.opts.generate_series = False
|
||||
self.opts.log(" no series found in selected books, cancelling series generation")
|
||||
self.opts.log(" no series found in selected books, skipping Series section")
|
||||
return
|
||||
|
||||
# Generate series_sort
|
||||
for book in self.books_by_series:
|
||||
book['series_sort'] = self.generate_sort_title(book['series'])
|
||||
|
||||
# Establish initial letter equivalencies
|
||||
sort_equivalents = self.establish_equivalencies(self.books_by_series, key='series_sort')
|
||||
|
||||
soup = self.generate_html_empty_header(friendly_name)
|
||||
body = soup.find('body')
|
||||
|
||||
@ -2277,9 +2273,6 @@ class CatalogBuilder(object):
|
||||
current_letter = ""
|
||||
current_series = None
|
||||
|
||||
# Establish initial letter equivalencies
|
||||
sort_equivalents = self.establish_equivalencies(self.books_by_series, key='series_sort')
|
||||
|
||||
# Loop through books_by_series
|
||||
series_count = 0
|
||||
for idx, book in enumerate(self.books_by_series):
|
||||
@ -2335,11 +2328,6 @@ class CatalogBuilder(object):
|
||||
# Use series, series index if avail else just title
|
||||
#aTag.insert(0,'%d. %s · %s' % (book['series_index'],escape(book['title']), ' & '.join(book['authors'])))
|
||||
|
||||
if is_date_undefined(book['pubdate']):
|
||||
book['date'] = None
|
||||
else:
|
||||
book['date'] = strftime(u'%B %Y', book['pubdate'].timetuple())
|
||||
|
||||
args = self.generate_format_args(book)
|
||||
formatted_title = self.by_series_title_template.format(**args).rstrip()
|
||||
aTag.insert(0,NavigableString(escape(formatted_title)))
|
||||
@ -2438,7 +2426,7 @@ class CatalogBuilder(object):
|
||||
# Re-sort title list without leading series/series_index
|
||||
# Incoming title <series> <series_index>: <title>
|
||||
if not self.use_series_prefix_in_titles_section:
|
||||
nspt = deepcopy(self.books_by_title)
|
||||
nspt = deepcopy(self.books_to_catalog)
|
||||
nspt = sorted(nspt, key=lambda x: sort_key(x['title_sort'].upper()))
|
||||
self.books_by_title_no_series_prefix = nspt
|
||||
|
||||
@ -4339,7 +4327,7 @@ class CatalogBuilder(object):
|
||||
|
||||
# Report excluded books
|
||||
if self.opts.verbose and excluded_tags:
|
||||
self.opts.log.info(" Excluded books by Tags:")
|
||||
self.opts.log.info(" Excluded books:")
|
||||
data = self.db.get_data_as_dict(ids=self.opts.ids)
|
||||
for record in data:
|
||||
matched = list(set(record['tags']) & set(excluded_tags))
|
||||
@ -4632,7 +4620,7 @@ class CatalogBuilder(object):
|
||||
normalized += c
|
||||
return normalized
|
||||
|
||||
def process_exclusions(self, data_set, log_exclusion=True):
|
||||
def process_exclusions(self, data_set):
|
||||
""" Filter data_set based on exclusion_rules.
|
||||
|
||||
Compare each book in data_set to each exclusion_rule. Remove
|
||||
@ -4666,16 +4654,18 @@ class CatalogBuilder(object):
|
||||
matched = re.search(pat, unicode(field_contents),
|
||||
re.IGNORECASE)
|
||||
if matched is not None:
|
||||
if self.opts.verbose and log_exclusion:
|
||||
if self.opts.verbose:
|
||||
field_md = self.db.metadata_for_field(field)
|
||||
for rule in self.opts.exclusion_rules:
|
||||
if rule[1] == '#%s' % field_md['label']:
|
||||
self.opts.log.info(" - '%s' by %s (Exclusion rule '%s')" %
|
||||
self.opts.log.info(" - '%s' by %s (Exclusion rule '%s')" %
|
||||
(record['title'], record['authors'][0], rule[0]))
|
||||
exclusion_set.append(record)
|
||||
if record in filtered_data_set:
|
||||
filtered_data_set.remove(record)
|
||||
break
|
||||
else:
|
||||
filtered_data_set.append(record)
|
||||
else:
|
||||
if (record not in filtered_data_set and
|
||||
record not in exclusion_set):
|
||||
|
Loading…
x
Reference in New Issue
Block a user