diff --git a/src/calibre/gui2/catalog/catalog_epub_mobi.py b/src/calibre/gui2/catalog/catalog_epub_mobi.py index 1ed11a55d7..04a5fe9527 100644 --- a/src/calibre/gui2/catalog/catalog_epub_mobi.py +++ b/src/calibre/gui2/catalog/catalog_epub_mobi.py @@ -239,10 +239,11 @@ class PluginWidget(QWidget,Ui_Form): def initialize(self, name, db): ''' - CheckBoxControls (c_type: check_box): - ['generate_titles','generate_series','generate_genres', - 'generate_recently_added','generate_descriptions','include_hr'] + ['cross_reference_authors', + 'generate_titles','generate_series','generate_genres', + 'generate_recently_added','generate_descriptions', + 'include_hr'] ComboBoxControls (c_type: combo_box): ['exclude_source_field','header_note_source_field', 'merge_source_field'] diff --git a/src/calibre/gui2/catalog/catalog_epub_mobi.ui b/src/calibre/gui2/catalog/catalog_epub_mobi.ui index b32e596f54..5c016ffdb5 100644 --- a/src/calibre/gui2/catalog/catalog_epub_mobi.ui +++ b/src/calibre/gui2/catalog/catalog_epub_mobi.ui @@ -305,7 +305,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book] Other options - + @@ -372,7 +372,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book] - + @@ -397,7 +397,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book] - + @@ -413,7 +413,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book] - + @@ -447,7 +447,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book] - + E&xtra Description note: @@ -460,7 +460,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book] - + @@ -561,6 +561,27 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book] + + + + Author cross-references: + + + Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter + + + + + + + + + For books with multiple authors, list each author separately + + + + + diff --git a/src/calibre/library/catalogs/epub_mobi.py b/src/calibre/library/catalogs/epub_mobi.py index 251db5cf88..de56e27e6e 100644 --- a/src/calibre/library/catalogs/epub_mobi.py +++ b/src/calibre/library/catalogs/epub_mobi.py @@ -41,6 +41,13 @@ class EPUB_MOBI(CatalogPlugin): help = _('Title of generated catalog used as title in metadata.\n' "Default: '%default'\n" "Applies to: AZW3, ePub, MOBI output formats")), + Option('--cross-reference-authors', + default=False, + dest='cross_reference_authors', + action = 'store_true', + help=_("Create cross-references in Authors section for books with multiple authors.\n" + "Default: '%default'\n" + "Applies to: AZW3, ePub, MOBI output formats")), Option('--debug-pipeline', default=None, dest='debug_pipeline', @@ -58,7 +65,6 @@ class EPUB_MOBI(CatalogPlugin): help=_("Regex describing tags to exclude as genres.\n" "Default: '%default' excludes bracketed tags, e.g. '[Project Gutenberg]', and '+', the default tag for read books.\n" "Applies to: AZW3, ePub, MOBI output formats")), - Option('--exclusion-rules', default="(('Catalogs','Tags','Catalog'),)", dest='exclusion_rules', @@ -72,7 +78,6 @@ class EPUB_MOBI(CatalogPlugin): "When multiple rules are defined, all rules will be applied.\n" "Default: \n" + '"' + '%default' + '"' + "\n" "Applies to AZW3, ePub, MOBI output formats")), - Option('--generate-authors', default=False, dest='generate_authors', @@ -318,8 +323,8 @@ class EPUB_MOBI(CatalogPlugin): build_log.append(" opts:") for key in keys: if key in ['catalog_title','author_clip','connected_kindle','creator', - 'description_clip','exclude_book_marker','exclude_genre', - 'exclude_tags','exclusion_rules', 'fmt', + 'cross_reference_authors','description_clip','exclude_book_marker', + 'exclude_genre','exclude_tags','exclusion_rules', 'fmt', 'header_note_source_field','merge_comments_rule', 'output_profile','prefix_rules','read_book_marker', 'search_text','sort_by','sort_descriptions_by_author','sync', diff --git a/src/calibre/library/catalogs/epub_mobi_builder.py b/src/calibre/library/catalogs/epub_mobi_builder.py index a04e1bd868..dbc73925b6 100644 --- a/src/calibre/library/catalogs/epub_mobi_builder.py +++ b/src/calibre/library/catalogs/epub_mobi_builder.py @@ -14,11 +14,12 @@ from calibre.customize.conversion import DummyReporter from calibre.customize.ui import output_profiles from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag, NavigableString from calibre.ebooks.chardet import substitute_entites +from calibre.ebooks.metadata import author_to_author_sort from calibre.library.catalogs import AuthorSortMismatchException, EmptyCatalogException from calibre.ptempfile import PersistentTemporaryDirectory from calibre.utils.config import config_dir from calibre.utils.date import format_date, is_date_undefined, now as nowf -from calibre.utils.filenames import ascii_text +from calibre.utils.filenames import ascii_text, shorten_components_to from calibre.utils.icu import capitalize, collation_order, sort_key from calibre.utils.magick.draw import thumbnail from calibre.utils.zipfile import ZipFile @@ -109,6 +110,7 @@ class CatalogBuilder(object): self.stylesheet = stylesheet self.cache_dir = os.path.join(config_dir, 'caches', 'catalog') self.catalog_path = PersistentTemporaryDirectory("_epub_mobi_catalog", prefix='') + self.content_dir = os.path.join(self.catalog_path, "content") self.excluded_tags = self.get_excluded_tags() self.generate_for_kindle_azw3 = True if (_opts.fmt == 'azw3' and _opts.output_profile and @@ -127,12 +129,13 @@ class CatalogBuilder(object): self.books_by_title = None self.books_by_title_no_series_prefix = None self.books_to_catalog = None - self.content_dir = os.path.join(self.catalog_path, "content") self.current_step = 0.0 self.error = [] self.generate_recently_read = False self.genres = [] - self.genre_tags_dict = None + self.genre_tags_dict = \ + self.filter_db_tags(max_len = 245 - len("%s/Genre_.html" % self.content_dir)) \ + if self.opts.generate_genres else None self.html_filelist_1 = [] self.html_filelist_2 = [] self.merge_comments_rule = dict(zip(['field','position','hr'], @@ -505,7 +508,7 @@ class CatalogBuilder(object): if not os.path.isdir(images_path): os.makedirs(images_path) - def detect_author_sort_mismatches(self): + def detect_author_sort_mismatches(self, books_to_test): """ Detect author_sort mismatches. Sort by author, look for inconsistencies in author_sort among @@ -513,17 +516,18 @@ class CatalogBuilder(object): annoyance for EPUB. Inputs: - self.books_to_catalog (list): list of books to catalog + books_by_author (list): list of books to test, possibly unsorted Output: - self.books_by_author (list): sorted by author + (none) Exceptions: AuthorSortMismatchException: author_sort mismatch detected """ - self.books_by_author = sorted(list(self.books_to_catalog), key=self._kf_books_by_author_sorter_author) - authors = [(record['author'], record['author_sort']) for record in self.books_by_author] + books_by_author = sorted(list(books_to_test), key=self._kf_books_by_author_sorter_author) + + authors = [(record['author'], record['author_sort']) for record in books_by_author] current_author = authors[0] for (i,author) in enumerate(authors): if author != current_author and i: @@ -701,6 +705,7 @@ class CatalogBuilder(object): def fetch_books_by_author(self): """ Generate a list of books sorted by author. + For books with multiple authors, relist book with additional authors. Sort the database by author. Report author_sort inconsistencies as warning when building EPUB or MOBI, error when building MOBI. Collect a list of unique authors to self.authors. @@ -720,25 +725,30 @@ class CatalogBuilder(object): self.update_progress_full_step(_("Sorting database")) - self.detect_author_sort_mismatches() + books_by_author = list(self.books_to_catalog) + self.detect_author_sort_mismatches(books_by_author) + if self.opts.cross_reference_authors: + books_by_author = self.relist_multiple_authors(books_by_author) + + #books_by_author = sorted(list(books_by_author), key=self._kf_books_by_author_sorter_author) - # Sort authors using sort_key to normalize accented letters # Determine the longest author_sort length before sorting - asl = [i['author_sort'] for i in self.books_by_author] + asl = [i['author_sort'] for i in books_by_author] las = max(asl, key=len) - self.books_by_author = sorted(self.books_to_catalog, + + books_by_author = sorted(books_by_author, key=lambda x: sort_key(self._kf_books_by_author_sorter_author_sort(x, len(las)))) if self.DEBUG and self.opts.verbose: - tl = [i['title'] for i in self.books_by_author] + tl = [i['title'] for i in books_by_author] lt = max(tl, key=len) fs = '{:<6}{:<%d} {:<%d} {!s}' % (len(lt),len(las)) print(fs.format('','Title','Author','Series')) - for i in self.books_by_author: + for i in books_by_author: print(fs.format('', i['title'],i['author_sort'],i['series'])) # Build the unique_authors set from existing data - authors = [(record['author'], capitalize(record['author_sort'])) for record in self.books_by_author] + authors = [(record['author'], capitalize(record['author_sort'])) for record in books_by_author] # authors[] contains a list of all book authors, with multiple entries for multiple books by author # authors[]: (([0]:friendly [1]:sort)) @@ -776,6 +786,7 @@ class CatalogBuilder(object): author[2])).encode('utf-8')) self.authors = unique_authors + self.books_by_author = books_by_author return True def fetch_books_by_title(self): @@ -863,15 +874,15 @@ class CatalogBuilder(object): this_title['series_index'] = 0.0 this_title['title_sort'] = self.generate_sort_title(this_title['title']) - if 'authors' in record: - # from calibre.ebooks.metadata import authors_to_string - # return authors_to_string(self.authors) + if 'authors' in record: this_title['authors'] = record['authors'] + # Synthesize author attribution from authors list if record['authors']: this_title['author'] = " & ".join(record['authors']) else: - this_title['author'] = 'Unknown' + this_title['author'] = _('Unknown') + this_title['authors'] = [this_title['author']] if 'author_sort' in record and record['author_sort'].strip(): this_title['author_sort'] = record['author_sort'] @@ -1093,7 +1104,7 @@ class CatalogBuilder(object): self.bookmarked_books = bookmarks - def filter_db_tags(self): + def filter_db_tags(self, max_len): """ Remove excluded tags from data set, return normalized genre list. Filter all db tags, removing excluded tags supplied in opts. @@ -1101,13 +1112,13 @@ class CatalogBuilder(object): tags are flattened to alphanumeric ascii_text. Args: - (none) + max_len: maximum length of normalized tag to fit within OS constraints Return: genre_tags_dict (dict): dict of filtered, normalized tags in data set """ - def _format_tag_list(tags, indent=2, line_break=70, header='Tag list'): + def _format_tag_list(tags, indent=1, line_break=70, header='Tag list'): def _next_tag(sorted_tags): for (i, tag) in enumerate(sorted_tags): if i < len(tags) - 1: @@ -1126,6 +1137,31 @@ class CatalogBuilder(object): out_str = ' ' * (indent + 1) return ans + out_str + def _normalize_tag(tag, max_len): + """ Generate an XHTML-legal anchor string from tag. + + Parse tag for non-ascii, convert to unicode name. + + Args: + tags (str): tag name possible containing symbols + max_len (int): maximum length of tag + + Return: + normalized (str): unicode names substituted for non-ascii chars, + clipped to max_len + """ + + normalized = massaged = re.sub('\s','',ascii_text(tag).lower()) + if re.search('\W',normalized): + normalized = '' + for c in massaged: + if re.search('\W',c): + normalized += self.generate_unicode_name(c) + else: + normalized += c + shortened = shorten_components_to(max_len, [normalized])[0] + return shortened + # Entry point normalized_tags = [] friendly_tags = [] @@ -1144,7 +1180,7 @@ class CatalogBuilder(object): if tag == ' ': continue - normalized_tags.append(self.normalize_tag(tag)) + normalized_tags.append(_normalize_tag(tag, max_len)) friendly_tags.append(tag) genre_tags_dict = dict(zip(friendly_tags,normalized_tags)) @@ -1941,8 +1977,6 @@ class CatalogBuilder(object): self.update_progress_full_step(_("Genres HTML")) - self.genre_tags_dict = self.filter_db_tags() - # Extract books matching filtered_tags genre_list = [] for friendly_tag in sorted(self.genre_tags_dict, key=sort_key): @@ -2024,10 +2058,11 @@ class CatalogBuilder(object): books_by_current_author += 1 # Write the genre book list as an article - titles_spanned = self.generate_html_by_genre(genre, True if index==0 else False, - genre_tag_set[genre], - "%s/Genre_%s.html" % (self.content_dir, - genre)) + outfile = "%s/Genre_%s.html" % (self.content_dir, genre) + titles_spanned = self.generate_html_by_genre(genre, + True if index==0 else False, + genre_tag_set[genre], + outfile) tag_file = "content/Genre_%s.html" % genre master_genre_list.append({'tag':genre, @@ -2549,7 +2584,7 @@ class CatalogBuilder(object): for (i, tag) in enumerate(sorted(book.get('tags', []))): aTag = Tag(_soup,'a') if self.opts.generate_genres: - aTag['href'] = "Genre_%s.html" % self.normalize_tag(tag) + aTag['href'] = "Genre_%s.html" % self.genre_tags_dict[tag] aTag.insert(0,escape(NavigableString(tag))) genresTag.insert(gtc, aTag) gtc += 1 @@ -4603,28 +4638,6 @@ class CatalogBuilder(object): return merged - def normalize_tag(self, tag): - """ Generate an XHTML-legal anchor string from tag. - - Parse tag for non-ascii, convert to unicode name. - - Args: - tags (str): tag name possible containing symbols - - Return: - normalized (str): unicode names substituted for non-ascii chars - """ - - normalized = massaged = re.sub('\s','',ascii_text(tag).lower()) - if re.search('\W',normalized): - normalized = '' - for c in massaged: - if re.search('\W',c): - normalized += self.generate_unicode_name(c) - else: - normalized += c - return normalized - def process_exclusions(self, data_set): """ Filter data_set based on exclusion_rules. @@ -4697,6 +4710,43 @@ class CatalogBuilder(object): else: return data_set + def relist_multiple_authors(self, books_by_author): + """ Create multiple entries for books with multiple authors + + Given a list of books by author, scan list for books with multiple + authors. Add a cloned copy of the book per additional author. + + Args: + books_by_author (list): book list possibly containing books + with multiple authors + + Return: + (list): books_by_author with additional cloned entries for books with + multiple authors + """ + + multiple_author_books = [] + + # Find the multiple author books + for book in books_by_author: + if len(book['authors']) > 1: + multiple_author_books.append(book) + + for book in multiple_author_books: + cloned_authors = list(book['authors']) + for x, author in enumerate(book['authors']): + if x: + first_author = cloned_authors.pop(0) + cloned_authors.append(first_author) + new_book = deepcopy(book) + new_book['author'] = ' & '.join(cloned_authors) + new_book['authors'] = list(cloned_authors) + asl = [author_to_author_sort(auth) for auth in cloned_authors] + new_book['author_sort'] = ' & '.join(asl) + books_by_author.append(new_book) + + return books_by_author + def update_progress_full_step(self, description): """ Update calibre's job status UI.