diff --git a/src/calibre/library/catalogs/epub_mobi.py b/src/calibre/library/catalogs/epub_mobi.py index f0a4d1cb78..5acb0cfc7a 100644 --- a/src/calibre/library/catalogs/epub_mobi.py +++ b/src/calibre/library/catalogs/epub_mobi.py @@ -412,10 +412,15 @@ class EPUB_MOBI(CatalogPlugin): pass if GENERATE_DEBUG_EPUB: + from calibre.ebooks.epub import initialize_container from calibre.ebooks.tweak import zip_rebuilder + from calibre.utils.zipfile import ZipFile input_path = os.path.join(catalog_debug_path,'input') - shutil.copy(P('catalog/mimetype'),input_path) - shutil.copytree(P('catalog/META-INF'),os.path.join(input_path,'META-INF')) + epub_shell = os.path.join(catalog_debug_path,'epub_shell.zip') + initialize_container(epub_shell, opf_name='content.opf') + with ZipFile(epub_shell, 'r') as zf: + zf.extractall(path=input_path) + os.remove(epub_shell) zip_rebuilder(input_path, os.path.join(catalog_debug_path,'input.epub')) # returns to gui2.actions.catalog:catalog_generated() diff --git a/src/calibre/library/catalogs/epub_mobi_builder.py b/src/calibre/library/catalogs/epub_mobi_builder.py index a2a22d2c74..6e912f3295 100644 --- a/src/calibre/library/catalogs/epub_mobi_builder.py +++ b/src/calibre/library/catalogs/epub_mobi_builder.py @@ -5,6 +5,7 @@ __copyright__ = '2010, Greg Riker' import datetime, htmlentitydefs, os, re, shutil, unicodedata, zlib from copy import deepcopy +from operator import itemgetter from xml.sax.saxutils import escape from calibre import (prepare_string_for_xml, strftime, force_unicode) @@ -56,15 +57,6 @@ class CatalogBuilder(object): """ property decorators for attributes """ if True: - - - - - - - - - ''' directory to store cached thumbs ''' @property def cache_dir(self): @@ -102,10 +94,6 @@ class CatalogBuilder(object): def generate_recently_read(self): return self.__generate_recently_read - - - - ''' additional field to include before/after comments ''' @property def merge_comments_rule(self): @@ -128,9 +116,6 @@ class CatalogBuilder(object): def plugin(self): return self.__plugin - - - ''' Progress Reporter for Jobs ''' @property def reporter(self): @@ -199,6 +184,7 @@ class CatalogBuilder(object): self.__stylesheet = stylesheet self.__cache_dir = os.path.join(config_dir, 'caches', 'catalog') self.__catalog_path = PersistentTemporaryDirectory("_epub_mobi_catalog", prefix='') + self.__excluded_tags = self.get_excluded_tags() self.__generate_for_kindle = True if (_opts.fmt == 'mobi' and _opts.output_profile and _opts.output_profile.startswith("kindle")) else False @@ -221,12 +207,13 @@ class CatalogBuilder(object): self.books_by_title = None ''' list of books in series, without series prefix ''' self.books_by_title_no_series_prefix = None + ''' Initial list of books to catalog from which all sections are built ''' + self.books_to_catalog = None self.__content_dir = os.path.join(self.catalog_path, "content") ''' track Job progress ''' self.current_step = 0.0 ''' cumulative error messages to report at conclusion ''' self.error = [] - self.__excluded_tags = self.get_excluded_tags() self.__generate_recently_read = True if (_opts.generate_recently_added and _opts.connected_kindle and self.generate_for_kindle) else False @@ -262,6 +249,7 @@ class CatalogBuilder(object): self.total_steps = 6.0 self.__use_series_prefix_in_titles_section = False + self.books_to_catalog = self.fetch_books_to_catalog() self.compute_total_steps() self.calculate_thumbnail_dimensions() self.confirm_thumbs_archive() @@ -343,6 +331,15 @@ class CatalogBuilder(object): series_index) return key + def _kf_books_by_series_sorter(self, book): + index = book['series_index'] + integer = int(index) + fraction = index-integer + series_index = '%04d%s' % (integer, str('%0.4f' % fraction).lstrip('0')) + key = '%s %s' % (self.generate_sort_title(book['series']), + series_index) + return key + """ Methods """ def build_sources(self): @@ -614,7 +611,7 @@ class CatalogBuilder(object): annoyance for EPUB. Inputs: - self.books_by_title (list): list of books to catalog + self.books_to_catalog (list): list of books to catalog Output: self.books_by_author (list): sorted by author @@ -623,7 +620,7 @@ class CatalogBuilder(object): AuthorSortMismatchException: author_sort mismatch detected """ - self.books_by_author = sorted(list(self.books_by_title), key=self._kf_books_by_author_sorter_author) + self.books_by_author = sorted(list(self.books_to_catalog), key=self._kf_books_by_author_sorter_author) authors = [(record['author'], record['author_sort']) for record in self.books_by_author] current_author = authors[0] for (i,author) in enumerate(authors): @@ -671,7 +668,7 @@ class CatalogBuilder(object): None: no match """ def _log_prefix_rule_match_info(rule, record): - self.opts.log.info(" %s '%s' by %s (Prefix rule '%s')" % + self.opts.log.info(" %s '%s' by %s (Prefix rule '%s')" % (rule['prefix'],record['title'], record['authors'][0], rule['name'])) @@ -770,7 +767,7 @@ class CatalogBuilder(object): to self.authors. Inputs: - self.books_by_title (list): database, sorted by title + self.books_to_catalog (list): database, sorted by title Outputs: books_by_author: database, sorted by author @@ -790,7 +787,7 @@ class CatalogBuilder(object): # Determine the longest author_sort length before sorting asl = [i['author_sort'] for i in self.books_by_author] las = max(asl, key=len) - self.books_by_author = sorted(self.books_by_author, + self.books_by_author = sorted(self.books_to_catalog, key=lambda x: sort_key(self._kf_books_by_author_sorter_author_sort(x, len(las)))) if self.DEBUG and self.opts.verbose: @@ -843,9 +840,42 @@ class CatalogBuilder(object): return True def fetch_books_by_title(self): - """ Populate self.books_by_title from database + """ Generate a list of books sorted by title. - Create self.books_by_title from filtered database. + Sort the database by title. + + Inputs: + self.books_to_catalog (list): database + + Outputs: + books_by_title: database, sorted by title + + Return: + True: no errors + False: author_sort mismatch detected while building MOBI + """ + self.update_progress_full_step(_("Sorting titles")) + # Re-sort based on title_sort + if len(self.books_to_catalog): + self.books_by_title = sorted(self.books_to_catalog, key=lambda x: sort_key(x['title_sort'].upper())) + + if self.DEBUG and self.opts.verbose: + self.opts.log.info("fetch_books_by_title(): %d books" % len(self.books_by_title)) + self.opts.log.info(" %-40s %-40s" % ('title', 'title_sort')) + for title in self.books_by_title: + self.opts.log.info((u" %-40s %-40s" % (title['title'][0:40], + title['title_sort'][0:40])).encode('utf-8')) + else: + error_msg = _("No books to catalog.\nCheck 'Excluded books' rules in E-book options.\n") + self.opts.log.error('*** ' + error_msg + ' ***') + self.error.append(_('No books available to include in catalog')) + self.error.append(error_msg) + raise EmptyCatalogException, error_msg + + def fetch_books_to_catalog(self): + """ Populate self.books_to_catalog from database + + Create self.books_to_catalog from filtered database. Keys: authors massaged author_sort record['author_sort'] or computed @@ -871,7 +901,7 @@ class CatalogBuilder(object): data (list): filtered list of book metadata dicts Outputs: - (list) books_by_title + (list) books_to_catalog Returns: True: Successful @@ -980,7 +1010,6 @@ class CatalogBuilder(object): return this_title # Entry point - self.update_progress_full_step(_("Fetching database")) self.opts.sort_by = 'title' search_phrase = '' @@ -1003,28 +1032,15 @@ class CatalogBuilder(object): data = self.plugin.search_sort_db(self.db, self.opts) data = self.process_exclusions(data) + if self.opts.verbose and self.prefix_rules: + self.opts.log.info(" Added prefixes:") + # Populate this_title{} from data[{},{}] titles = [] for record in data: this_title = _populate_title(record) titles.append(this_title) - - # Re-sort based on title_sort - if len(titles): - self.books_by_title = sorted(titles, key=lambda x: sort_key(x['title_sort'].upper())) - - if self.DEBUG and self.opts.verbose: - self.opts.log.info("fetch_books_by_title(): %d books" % len(self.books_by_title)) - self.opts.log.info(" %-40s %-40s" % ('title', 'title_sort')) - for title in self.books_by_title: - self.opts.log.info((u" %-40s %-40s" % (title['title'][0:40], - title['title_sort'][0:40])).encode('utf-8')) - else: - error_msg = _("No books to catalog.\nCheck 'Excluded books' rules in E-book options.\n") - self.opts.log.error('*** ' + error_msg + ' ***') - self.error.append(_('No books available to include in catalog')) - self.error.append(error_msg) - raise EmptyCatalogException, error_msg + return titles def fetch_bookmarks(self): """ Interrogate connected Kindle for bookmarks. @@ -1104,7 +1120,7 @@ class CatalogBuilder(object): d.initialize(self.opts.connected_device['save_template']) bookmarks = {} - for book in self.books_by_title: + for book in self.books_to_catalog: if 'formats' in book: path_map = {} id = book['id'] @@ -1148,7 +1164,7 @@ class CatalogBuilder(object): genre_tags_dict (dict): dict of filtered, normalized tags in data set """ - def _format_tag_list(tags, indent=5, line_break=70, header='Tag list'): + def _format_tag_list(tags, indent=2, line_break=70, header='Tag list'): def _next_tag(sorted_tags): for (i, tag) in enumerate(sorted_tags): if i < len(tags) - 1: @@ -1541,7 +1557,7 @@ class CatalogBuilder(object): def generate_html_by_date_added(self): """ Generate content/ByDateAdded.html. - Loop through self.books_by_title sorted by reverse date, generate HTML. + Loop through self.books_to_catalog sorted by reverse date, generate HTML. Input: books_by_title (list): books, sorted by title @@ -1735,10 +1751,10 @@ class CatalogBuilder(object): # >>> Books by date range <<< if self.use_series_prefix_in_titles_section: - self.books_by_date_range = sorted(self.books_by_title, + self.books_by_date_range = sorted(self.books_to_catalog, key=lambda x:(x['timestamp'], x['timestamp']),reverse=True) else: - nspt = deepcopy(self.books_by_title) + nspt = deepcopy(self.books_to_catalog) self.books_by_date_range = sorted(nspt, key=lambda x:(x['timestamp'], x['timestamp']),reverse=True) date_range_list = [] @@ -1763,7 +1779,7 @@ class CatalogBuilder(object): # >>>> Books by month <<<< # Sort titles case-insensitive for by month using series prefix - self.books_by_month = sorted(self.books_by_title, + self.books_by_month = sorted(self.books_to_catalog, key=lambda x:(x['timestamp'], x['timestamp']),reverse=True) # Loop through books by date @@ -2026,12 +2042,12 @@ class CatalogBuilder(object): if self.opts.verbose: if len(genre_list): - self.opts.log.info(" Genre summary: %d active genre tags used in generating catalog with %d titles" % - (len(genre_list), len(self.books_by_title))) + self.opts.log.info(" Genre summary: %d active genre tags used in generating catalog with %d titles" % + (len(genre_list), len(self.books_to_catalog))) for genre in genre_list: for key in genre: - self.opts.log.info(" %s: %d %s" % (self.get_friendly_genre_tag(key), + self.opts.log.info(" %s: %d %s" % (self.get_friendly_genre_tag(key), len(genre[key]), 'titles' if len(genre[key]) > 1 else 'title')) @@ -2226,48 +2242,28 @@ class CatalogBuilder(object): Output: content/BySeries.html (file) - To do: - self.books_by_series = [i for i in self.books_by_title if i['series']] """ friendly_name = _("Series") self.update_progress_full_step("%s HTML" % friendly_name) self.opts.sort_by = 'series' - # Merge self.excluded_tags with opts.search_text - # Updated to use exact match syntax - - search_phrase = 'series:true ' - if self.excluded_tags: - search_terms = [] - for tag in self.excluded_tags: - search_terms.append("tag:=%s" % tag) - search_phrase += "not (%s)" % " or ".join(search_terms) - - # If a list of ids are provided, don't use search_text - if self.opts.ids: - self.opts.search_text = search_phrase - else: - if self.opts.search_text: - self.opts.search_text += " " + search_phrase - else: - self.opts.search_text = search_phrase - - # Fetch the database as a dictionary - data = self.plugin.search_sort_db(self.db, self.opts) - - # Remove exclusions - self.books_by_series = self.process_exclusions(data, log_exclusion=False) + # *** Convert the existing database, resort by series/index *** + self.books_by_series = [i for i in self.books_to_catalog if i['series']] + self.books_by_series = sorted(self.books_by_series, key=lambda x: sort_key(self._kf_books_by_series_sorter(x))) if not self.books_by_series: self.opts.generate_series = False - self.opts.log(" no series found in selected books, cancelling series generation") + self.opts.log(" no series found in selected books, skipping Series section") return # Generate series_sort for book in self.books_by_series: book['series_sort'] = self.generate_sort_title(book['series']) + # Establish initial letter equivalencies + sort_equivalents = self.establish_equivalencies(self.books_by_series, key='series_sort') + soup = self.generate_html_empty_header(friendly_name) body = soup.find('body') @@ -2277,9 +2273,6 @@ class CatalogBuilder(object): current_letter = "" current_series = None - # Establish initial letter equivalencies - sort_equivalents = self.establish_equivalencies(self.books_by_series, key='series_sort') - # Loop through books_by_series series_count = 0 for idx, book in enumerate(self.books_by_series): @@ -2335,11 +2328,6 @@ class CatalogBuilder(object): # Use series, series index if avail else just title #aTag.insert(0,'%d. %s · %s' % (book['series_index'],escape(book['title']), ' & '.join(book['authors']))) - if is_date_undefined(book['pubdate']): - book['date'] = None - else: - book['date'] = strftime(u'%B %Y', book['pubdate'].timetuple()) - args = self.generate_format_args(book) formatted_title = self.by_series_title_template.format(**args).rstrip() aTag.insert(0,NavigableString(escape(formatted_title))) @@ -2438,7 +2426,7 @@ class CatalogBuilder(object): # Re-sort title list without leading series/series_index # Incoming title : if not self.use_series_prefix_in_titles_section: - nspt = deepcopy(self.books_by_title) + nspt = deepcopy(self.books_to_catalog) nspt = sorted(nspt, key=lambda x: sort_key(x['title_sort'].upper())) self.books_by_title_no_series_prefix = nspt @@ -4339,7 +4327,7 @@ class CatalogBuilder(object): # Report excluded books if self.opts.verbose and excluded_tags: - self.opts.log.info(" Excluded books by Tags:") + self.opts.log.info(" Excluded books:") data = self.db.get_data_as_dict(ids=self.opts.ids) for record in data: matched = list(set(record['tags']) & set(excluded_tags)) @@ -4632,7 +4620,7 @@ class CatalogBuilder(object): normalized += c return normalized - def process_exclusions(self, data_set, log_exclusion=True): + def process_exclusions(self, data_set): """ Filter data_set based on exclusion_rules. Compare each book in data_set to each exclusion_rule. Remove @@ -4666,16 +4654,18 @@ class CatalogBuilder(object): matched = re.search(pat, unicode(field_contents), re.IGNORECASE) if matched is not None: - if self.opts.verbose and log_exclusion: + if self.opts.verbose: field_md = self.db.metadata_for_field(field) for rule in self.opts.exclusion_rules: if rule[1] == '#%s' % field_md['label']: - self.opts.log.info(" - '%s' by %s (Exclusion rule '%s')" % + self.opts.log.info(" - '%s' by %s (Exclusion rule '%s')" % (record['title'], record['authors'][0], rule[0])) exclusion_set.append(record) if record in filtered_data_set: filtered_data_set.remove(record) break + else: + filtered_data_set.append(record) else: if (record not in filtered_data_set and record not in exclusion_set):