diff --git a/src/calibre/gui2/catalog/catalog_epub_mobi.py b/src/calibre/gui2/catalog/catalog_epub_mobi.py
index 1ed11a55d7..04a5fe9527 100644
--- a/src/calibre/gui2/catalog/catalog_epub_mobi.py
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.py
@@ -239,10 +239,11 @@ class PluginWidget(QWidget,Ui_Form):
def initialize(self, name, db):
'''
-
CheckBoxControls (c_type: check_box):
- ['generate_titles','generate_series','generate_genres',
- 'generate_recently_added','generate_descriptions','include_hr']
+ ['cross_reference_authors',
+ 'generate_titles','generate_series','generate_genres',
+ 'generate_recently_added','generate_descriptions',
+ 'include_hr']
ComboBoxControls (c_type: combo_box):
['exclude_source_field','header_note_source_field',
'merge_source_field']
diff --git a/src/calibre/gui2/catalog/catalog_epub_mobi.ui b/src/calibre/gui2/catalog/catalog_epub_mobi.ui
index b32e596f54..5c016ffdb5 100644
--- a/src/calibre/gui2/catalog/catalog_epub_mobi.ui
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.ui
@@ -305,7 +305,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
Other options
- -
+
-
-
@@ -372,7 +372,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
- -
+
-
@@ -397,7 +397,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
- -
+
-
@@ -413,7 +413,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
- -
+
-
-
@@ -447,7 +447,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
- -
+
-
E&xtra Description note:
@@ -460,7 +460,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
- -
+
-
-
@@ -561,6 +561,27 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
+ -
+
+
+ Author cross-references:
+
+
+ Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
+
+
+
+ -
+
+
-
+
+
+ For books with multiple authors, list each author separately
+
+
+
+
+
diff --git a/src/calibre/library/catalogs/epub_mobi.py b/src/calibre/library/catalogs/epub_mobi.py
index 251db5cf88..de56e27e6e 100644
--- a/src/calibre/library/catalogs/epub_mobi.py
+++ b/src/calibre/library/catalogs/epub_mobi.py
@@ -41,6 +41,13 @@ class EPUB_MOBI(CatalogPlugin):
help = _('Title of generated catalog used as title in metadata.\n'
"Default: '%default'\n"
"Applies to: AZW3, ePub, MOBI output formats")),
+ Option('--cross-reference-authors',
+ default=False,
+ dest='cross_reference_authors',
+ action = 'store_true',
+ help=_("Create cross-references in Authors section for books with multiple authors.\n"
+ "Default: '%default'\n"
+ "Applies to: AZW3, ePub, MOBI output formats")),
Option('--debug-pipeline',
default=None,
dest='debug_pipeline',
@@ -58,7 +65,6 @@ class EPUB_MOBI(CatalogPlugin):
help=_("Regex describing tags to exclude as genres.\n"
"Default: '%default' excludes bracketed tags, e.g. '[Project Gutenberg]', and '+', the default tag for read books.\n"
"Applies to: AZW3, ePub, MOBI output formats")),
-
Option('--exclusion-rules',
default="(('Catalogs','Tags','Catalog'),)",
dest='exclusion_rules',
@@ -72,7 +78,6 @@ class EPUB_MOBI(CatalogPlugin):
"When multiple rules are defined, all rules will be applied.\n"
"Default: \n" + '"' + '%default' + '"' + "\n"
"Applies to AZW3, ePub, MOBI output formats")),
-
Option('--generate-authors',
default=False,
dest='generate_authors',
@@ -318,8 +323,8 @@ class EPUB_MOBI(CatalogPlugin):
build_log.append(" opts:")
for key in keys:
if key in ['catalog_title','author_clip','connected_kindle','creator',
- 'description_clip','exclude_book_marker','exclude_genre',
- 'exclude_tags','exclusion_rules', 'fmt',
+ 'cross_reference_authors','description_clip','exclude_book_marker',
+ 'exclude_genre','exclude_tags','exclusion_rules', 'fmt',
'header_note_source_field','merge_comments_rule',
'output_profile','prefix_rules','read_book_marker',
'search_text','sort_by','sort_descriptions_by_author','sync',
diff --git a/src/calibre/library/catalogs/epub_mobi_builder.py b/src/calibre/library/catalogs/epub_mobi_builder.py
index a04e1bd868..dbc73925b6 100644
--- a/src/calibre/library/catalogs/epub_mobi_builder.py
+++ b/src/calibre/library/catalogs/epub_mobi_builder.py
@@ -14,11 +14,12 @@ from calibre.customize.conversion import DummyReporter
from calibre.customize.ui import output_profiles
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag, NavigableString
from calibre.ebooks.chardet import substitute_entites
+from calibre.ebooks.metadata import author_to_author_sort
from calibre.library.catalogs import AuthorSortMismatchException, EmptyCatalogException
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.utils.config import config_dir
from calibre.utils.date import format_date, is_date_undefined, now as nowf
-from calibre.utils.filenames import ascii_text
+from calibre.utils.filenames import ascii_text, shorten_components_to
from calibre.utils.icu import capitalize, collation_order, sort_key
from calibre.utils.magick.draw import thumbnail
from calibre.utils.zipfile import ZipFile
@@ -109,6 +110,7 @@ class CatalogBuilder(object):
self.stylesheet = stylesheet
self.cache_dir = os.path.join(config_dir, 'caches', 'catalog')
self.catalog_path = PersistentTemporaryDirectory("_epub_mobi_catalog", prefix='')
+ self.content_dir = os.path.join(self.catalog_path, "content")
self.excluded_tags = self.get_excluded_tags()
self.generate_for_kindle_azw3 = True if (_opts.fmt == 'azw3' and
_opts.output_profile and
@@ -127,12 +129,13 @@ class CatalogBuilder(object):
self.books_by_title = None
self.books_by_title_no_series_prefix = None
self.books_to_catalog = None
- self.content_dir = os.path.join(self.catalog_path, "content")
self.current_step = 0.0
self.error = []
self.generate_recently_read = False
self.genres = []
- self.genre_tags_dict = None
+ self.genre_tags_dict = \
+ self.filter_db_tags(max_len = 245 - len("%s/Genre_.html" % self.content_dir)) \
+ if self.opts.generate_genres else None
self.html_filelist_1 = []
self.html_filelist_2 = []
self.merge_comments_rule = dict(zip(['field','position','hr'],
@@ -505,7 +508,7 @@ class CatalogBuilder(object):
if not os.path.isdir(images_path):
os.makedirs(images_path)
- def detect_author_sort_mismatches(self):
+ def detect_author_sort_mismatches(self, books_to_test):
""" Detect author_sort mismatches.
Sort by author, look for inconsistencies in author_sort among
@@ -513,17 +516,18 @@ class CatalogBuilder(object):
annoyance for EPUB.
Inputs:
- self.books_to_catalog (list): list of books to catalog
+ books_by_author (list): list of books to test, possibly unsorted
Output:
- self.books_by_author (list): sorted by author
+ (none)
Exceptions:
AuthorSortMismatchException: author_sort mismatch detected
"""
- self.books_by_author = sorted(list(self.books_to_catalog), key=self._kf_books_by_author_sorter_author)
- authors = [(record['author'], record['author_sort']) for record in self.books_by_author]
+ books_by_author = sorted(list(books_to_test), key=self._kf_books_by_author_sorter_author)
+
+ authors = [(record['author'], record['author_sort']) for record in books_by_author]
current_author = authors[0]
for (i,author) in enumerate(authors):
if author != current_author and i:
@@ -701,6 +705,7 @@ class CatalogBuilder(object):
def fetch_books_by_author(self):
""" Generate a list of books sorted by author.
+ For books with multiple authors, relist book with additional authors.
Sort the database by author. Report author_sort inconsistencies as warning when
building EPUB or MOBI, error when building MOBI. Collect a list of unique authors
to self.authors.
@@ -720,25 +725,30 @@ class CatalogBuilder(object):
self.update_progress_full_step(_("Sorting database"))
- self.detect_author_sort_mismatches()
+ books_by_author = list(self.books_to_catalog)
+ self.detect_author_sort_mismatches(books_by_author)
+ if self.opts.cross_reference_authors:
+ books_by_author = self.relist_multiple_authors(books_by_author)
+
+ #books_by_author = sorted(list(books_by_author), key=self._kf_books_by_author_sorter_author)
- # Sort authors using sort_key to normalize accented letters
# Determine the longest author_sort length before sorting
- asl = [i['author_sort'] for i in self.books_by_author]
+ asl = [i['author_sort'] for i in books_by_author]
las = max(asl, key=len)
- self.books_by_author = sorted(self.books_to_catalog,
+
+ books_by_author = sorted(books_by_author,
key=lambda x: sort_key(self._kf_books_by_author_sorter_author_sort(x, len(las))))
if self.DEBUG and self.opts.verbose:
- tl = [i['title'] for i in self.books_by_author]
+ tl = [i['title'] for i in books_by_author]
lt = max(tl, key=len)
fs = '{:<6}{:<%d} {:<%d} {!s}' % (len(lt),len(las))
print(fs.format('','Title','Author','Series'))
- for i in self.books_by_author:
+ for i in books_by_author:
print(fs.format('', i['title'],i['author_sort'],i['series']))
# Build the unique_authors set from existing data
- authors = [(record['author'], capitalize(record['author_sort'])) for record in self.books_by_author]
+ authors = [(record['author'], capitalize(record['author_sort'])) for record in books_by_author]
# authors[] contains a list of all book authors, with multiple entries for multiple books by author
# authors[]: (([0]:friendly [1]:sort))
@@ -776,6 +786,7 @@ class CatalogBuilder(object):
author[2])).encode('utf-8'))
self.authors = unique_authors
+ self.books_by_author = books_by_author
return True
def fetch_books_by_title(self):
@@ -863,15 +874,15 @@ class CatalogBuilder(object):
this_title['series_index'] = 0.0
this_title['title_sort'] = self.generate_sort_title(this_title['title'])
- if 'authors' in record:
- # from calibre.ebooks.metadata import authors_to_string
- # return authors_to_string(self.authors)
+ if 'authors' in record:
this_title['authors'] = record['authors']
+ # Synthesize author attribution from authors list
if record['authors']:
this_title['author'] = " & ".join(record['authors'])
else:
- this_title['author'] = 'Unknown'
+ this_title['author'] = _('Unknown')
+ this_title['authors'] = [this_title['author']]
if 'author_sort' in record and record['author_sort'].strip():
this_title['author_sort'] = record['author_sort']
@@ -1093,7 +1104,7 @@ class CatalogBuilder(object):
self.bookmarked_books = bookmarks
- def filter_db_tags(self):
+ def filter_db_tags(self, max_len):
""" Remove excluded tags from data set, return normalized genre list.
Filter all db tags, removing excluded tags supplied in opts.
@@ -1101,13 +1112,13 @@ class CatalogBuilder(object):
tags are flattened to alphanumeric ascii_text.
Args:
- (none)
+ max_len: maximum length of normalized tag to fit within OS constraints
Return:
genre_tags_dict (dict): dict of filtered, normalized tags in data set
"""
- def _format_tag_list(tags, indent=2, line_break=70, header='Tag list'):
+ def _format_tag_list(tags, indent=1, line_break=70, header='Tag list'):
def _next_tag(sorted_tags):
for (i, tag) in enumerate(sorted_tags):
if i < len(tags) - 1:
@@ -1126,6 +1137,31 @@ class CatalogBuilder(object):
out_str = ' ' * (indent + 1)
return ans + out_str
+ def _normalize_tag(tag, max_len):
+ """ Generate an XHTML-legal anchor string from tag.
+
+ Parse tag for non-ascii, convert to unicode name.
+
+ Args:
+ tags (str): tag name possible containing symbols
+ max_len (int): maximum length of tag
+
+ Return:
+ normalized (str): unicode names substituted for non-ascii chars,
+ clipped to max_len
+ """
+
+ normalized = massaged = re.sub('\s','',ascii_text(tag).lower())
+ if re.search('\W',normalized):
+ normalized = ''
+ for c in massaged:
+ if re.search('\W',c):
+ normalized += self.generate_unicode_name(c)
+ else:
+ normalized += c
+ shortened = shorten_components_to(max_len, [normalized])[0]
+ return shortened
+
# Entry point
normalized_tags = []
friendly_tags = []
@@ -1144,7 +1180,7 @@ class CatalogBuilder(object):
if tag == ' ':
continue
- normalized_tags.append(self.normalize_tag(tag))
+ normalized_tags.append(_normalize_tag(tag, max_len))
friendly_tags.append(tag)
genre_tags_dict = dict(zip(friendly_tags,normalized_tags))
@@ -1941,8 +1977,6 @@ class CatalogBuilder(object):
self.update_progress_full_step(_("Genres HTML"))
- self.genre_tags_dict = self.filter_db_tags()
-
# Extract books matching filtered_tags
genre_list = []
for friendly_tag in sorted(self.genre_tags_dict, key=sort_key):
@@ -2024,10 +2058,11 @@ class CatalogBuilder(object):
books_by_current_author += 1
# Write the genre book list as an article
- titles_spanned = self.generate_html_by_genre(genre, True if index==0 else False,
- genre_tag_set[genre],
- "%s/Genre_%s.html" % (self.content_dir,
- genre))
+ outfile = "%s/Genre_%s.html" % (self.content_dir, genre)
+ titles_spanned = self.generate_html_by_genre(genre,
+ True if index==0 else False,
+ genre_tag_set[genre],
+ outfile)
tag_file = "content/Genre_%s.html" % genre
master_genre_list.append({'tag':genre,
@@ -2549,7 +2584,7 @@ class CatalogBuilder(object):
for (i, tag) in enumerate(sorted(book.get('tags', []))):
aTag = Tag(_soup,'a')
if self.opts.generate_genres:
- aTag['href'] = "Genre_%s.html" % self.normalize_tag(tag)
+ aTag['href'] = "Genre_%s.html" % self.genre_tags_dict[tag]
aTag.insert(0,escape(NavigableString(tag)))
genresTag.insert(gtc, aTag)
gtc += 1
@@ -4603,28 +4638,6 @@ class CatalogBuilder(object):
return merged
- def normalize_tag(self, tag):
- """ Generate an XHTML-legal anchor string from tag.
-
- Parse tag for non-ascii, convert to unicode name.
-
- Args:
- tags (str): tag name possible containing symbols
-
- Return:
- normalized (str): unicode names substituted for non-ascii chars
- """
-
- normalized = massaged = re.sub('\s','',ascii_text(tag).lower())
- if re.search('\W',normalized):
- normalized = ''
- for c in massaged:
- if re.search('\W',c):
- normalized += self.generate_unicode_name(c)
- else:
- normalized += c
- return normalized
-
def process_exclusions(self, data_set):
""" Filter data_set based on exclusion_rules.
@@ -4697,6 +4710,43 @@ class CatalogBuilder(object):
else:
return data_set
+ def relist_multiple_authors(self, books_by_author):
+ """ Create multiple entries for books with multiple authors
+
+ Given a list of books by author, scan list for books with multiple
+ authors. Add a cloned copy of the book per additional author.
+
+ Args:
+ books_by_author (list): book list possibly containing books
+ with multiple authors
+
+ Return:
+ (list): books_by_author with additional cloned entries for books with
+ multiple authors
+ """
+
+ multiple_author_books = []
+
+ # Find the multiple author books
+ for book in books_by_author:
+ if len(book['authors']) > 1:
+ multiple_author_books.append(book)
+
+ for book in multiple_author_books:
+ cloned_authors = list(book['authors'])
+ for x, author in enumerate(book['authors']):
+ if x:
+ first_author = cloned_authors.pop(0)
+ cloned_authors.append(first_author)
+ new_book = deepcopy(book)
+ new_book['author'] = ' & '.join(cloned_authors)
+ new_book['authors'] = list(cloned_authors)
+ asl = [author_to_author_sort(auth) for auth in cloned_authors]
+ new_book['author_sort'] = ' & '.join(asl)
+ books_by_author.append(new_book)
+
+ return books_by_author
+
def update_progress_full_step(self, description):
""" Update calibre's job status UI.