mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Normalized genre names truncated to available width.
This commit is contained in:
parent
b306cfb3ae
commit
5cdbd831af
@ -35,3 +35,4 @@ nbproject/
|
||||
.settings/
|
||||
*.DS_Store
|
||||
calibre_plugins/
|
||||
./src/calibre/gui2/catalog/catalog_csv_xml.ui.autosave
|
||||
|
@ -14,11 +14,12 @@ from calibre.customize.conversion import DummyReporter
|
||||
from calibre.customize.ui import output_profiles
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag, NavigableString
|
||||
from calibre.ebooks.chardet import substitute_entites
|
||||
from calibre.ebooks.metadata import author_to_author_sort
|
||||
from calibre.library.catalogs import AuthorSortMismatchException, EmptyCatalogException
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
from calibre.utils.config import config_dir
|
||||
from calibre.utils.date import format_date, is_date_undefined, now as nowf
|
||||
from calibre.utils.filenames import ascii_text
|
||||
from calibre.utils.filenames import ascii_text, shorten_components_to
|
||||
from calibre.utils.icu import capitalize, collation_order, sort_key
|
||||
from calibre.utils.magick.draw import thumbnail
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
@ -109,6 +110,7 @@ class CatalogBuilder(object):
|
||||
self.stylesheet = stylesheet
|
||||
self.cache_dir = os.path.join(config_dir, 'caches', 'catalog')
|
||||
self.catalog_path = PersistentTemporaryDirectory("_epub_mobi_catalog", prefix='')
|
||||
self.content_dir = os.path.join(self.catalog_path, "content")
|
||||
self.excluded_tags = self.get_excluded_tags()
|
||||
self.generate_for_kindle_azw3 = True if (_opts.fmt == 'azw3' and
|
||||
_opts.output_profile and
|
||||
@ -127,12 +129,13 @@ class CatalogBuilder(object):
|
||||
self.books_by_title = None
|
||||
self.books_by_title_no_series_prefix = None
|
||||
self.books_to_catalog = None
|
||||
self.content_dir = os.path.join(self.catalog_path, "content")
|
||||
self.current_step = 0.0
|
||||
self.error = []
|
||||
self.generate_recently_read = False
|
||||
self.genres = []
|
||||
self.genre_tags_dict = None
|
||||
self.genre_tags_dict = \
|
||||
self.filter_db_tags(max_len = 245 - len("%s/Genre_.html" % self.content_dir)) \
|
||||
if self.opts.generate_genres else None
|
||||
self.html_filelist_1 = []
|
||||
self.html_filelist_2 = []
|
||||
self.merge_comments_rule = dict(zip(['field','position','hr'],
|
||||
@ -505,7 +508,7 @@ class CatalogBuilder(object):
|
||||
if not os.path.isdir(images_path):
|
||||
os.makedirs(images_path)
|
||||
|
||||
def detect_author_sort_mismatches(self):
|
||||
def detect_author_sort_mismatches(self, books_to_test):
|
||||
""" Detect author_sort mismatches.
|
||||
|
||||
Sort by author, look for inconsistencies in author_sort among
|
||||
@ -513,17 +516,18 @@ class CatalogBuilder(object):
|
||||
annoyance for EPUB.
|
||||
|
||||
Inputs:
|
||||
self.books_to_catalog (list): list of books to catalog
|
||||
books_by_author (list): list of books to test, possibly unsorted
|
||||
|
||||
Output:
|
||||
self.books_by_author (list): sorted by author
|
||||
(none)
|
||||
|
||||
Exceptions:
|
||||
AuthorSortMismatchException: author_sort mismatch detected
|
||||
"""
|
||||
|
||||
self.books_by_author = sorted(list(self.books_to_catalog), key=self._kf_books_by_author_sorter_author)
|
||||
authors = [(record['author'], record['author_sort']) for record in self.books_by_author]
|
||||
books_by_author = sorted(list(books_to_test), key=self._kf_books_by_author_sorter_author)
|
||||
|
||||
authors = [(record['author'], record['author_sort']) for record in books_by_author]
|
||||
current_author = authors[0]
|
||||
for (i,author) in enumerate(authors):
|
||||
if author != current_author and i:
|
||||
@ -701,6 +705,7 @@ class CatalogBuilder(object):
|
||||
def fetch_books_by_author(self):
|
||||
""" Generate a list of books sorted by author.
|
||||
|
||||
For books with multiple authors, relist book with additional authors.
|
||||
Sort the database by author. Report author_sort inconsistencies as warning when
|
||||
building EPUB or MOBI, error when building MOBI. Collect a list of unique authors
|
||||
to self.authors.
|
||||
@ -720,25 +725,29 @@ class CatalogBuilder(object):
|
||||
|
||||
self.update_progress_full_step(_("Sorting database"))
|
||||
|
||||
self.detect_author_sort_mismatches()
|
||||
books_by_author = list(self.books_to_catalog)
|
||||
self.detect_author_sort_mismatches(books_by_author)
|
||||
books_by_author = self.relist_multiple_authors(books_by_author)
|
||||
|
||||
#books_by_author = sorted(list(books_by_author), key=self._kf_books_by_author_sorter_author)
|
||||
|
||||
# Sort authors using sort_key to normalize accented letters
|
||||
# Determine the longest author_sort length before sorting
|
||||
asl = [i['author_sort'] for i in self.books_by_author]
|
||||
asl = [i['author_sort'] for i in books_by_author]
|
||||
las = max(asl, key=len)
|
||||
self.books_by_author = sorted(self.books_to_catalog,
|
||||
|
||||
books_by_author = sorted(books_by_author,
|
||||
key=lambda x: sort_key(self._kf_books_by_author_sorter_author_sort(x, len(las))))
|
||||
|
||||
if self.DEBUG and self.opts.verbose:
|
||||
tl = [i['title'] for i in self.books_by_author]
|
||||
tl = [i['title'] for i in books_by_author]
|
||||
lt = max(tl, key=len)
|
||||
fs = '{:<6}{:<%d} {:<%d} {!s}' % (len(lt),len(las))
|
||||
print(fs.format('','Title','Author','Series'))
|
||||
for i in self.books_by_author:
|
||||
for i in books_by_author:
|
||||
print(fs.format('', i['title'],i['author_sort'],i['series']))
|
||||
|
||||
# Build the unique_authors set from existing data
|
||||
authors = [(record['author'], capitalize(record['author_sort'])) for record in self.books_by_author]
|
||||
authors = [(record['author'], capitalize(record['author_sort'])) for record in books_by_author]
|
||||
|
||||
# authors[] contains a list of all book authors, with multiple entries for multiple books by author
|
||||
# authors[]: (([0]:friendly [1]:sort))
|
||||
@ -776,6 +785,7 @@ class CatalogBuilder(object):
|
||||
author[2])).encode('utf-8'))
|
||||
|
||||
self.authors = unique_authors
|
||||
self.books_by_author = books_by_author
|
||||
return True
|
||||
|
||||
def fetch_books_by_title(self):
|
||||
@ -863,15 +873,15 @@ class CatalogBuilder(object):
|
||||
this_title['series_index'] = 0.0
|
||||
|
||||
this_title['title_sort'] = self.generate_sort_title(this_title['title'])
|
||||
if 'authors' in record:
|
||||
# from calibre.ebooks.metadata import authors_to_string
|
||||
# return authors_to_string(self.authors)
|
||||
|
||||
if 'authors' in record:
|
||||
this_title['authors'] = record['authors']
|
||||
# Synthesize author attribution from authors list
|
||||
if record['authors']:
|
||||
this_title['author'] = " & ".join(record['authors'])
|
||||
else:
|
||||
this_title['author'] = 'Unknown'
|
||||
this_title['author'] = _('Unknown')
|
||||
this_title['authors'] = [this_title['author']]
|
||||
|
||||
if 'author_sort' in record and record['author_sort'].strip():
|
||||
this_title['author_sort'] = record['author_sort']
|
||||
@ -1093,7 +1103,7 @@ class CatalogBuilder(object):
|
||||
|
||||
self.bookmarked_books = bookmarks
|
||||
|
||||
def filter_db_tags(self):
|
||||
def filter_db_tags(self, max_len):
|
||||
""" Remove excluded tags from data set, return normalized genre list.
|
||||
|
||||
Filter all db tags, removing excluded tags supplied in opts.
|
||||
@ -1101,13 +1111,13 @@ class CatalogBuilder(object):
|
||||
tags are flattened to alphanumeric ascii_text.
|
||||
|
||||
Args:
|
||||
(none)
|
||||
max_len: maximum length of normalized tag to fit within OS constraints
|
||||
|
||||
Return:
|
||||
genre_tags_dict (dict): dict of filtered, normalized tags in data set
|
||||
"""
|
||||
|
||||
def _format_tag_list(tags, indent=2, line_break=70, header='Tag list'):
|
||||
def _format_tag_list(tags, indent=1, line_break=70, header='Tag list'):
|
||||
def _next_tag(sorted_tags):
|
||||
for (i, tag) in enumerate(sorted_tags):
|
||||
if i < len(tags) - 1:
|
||||
@ -1126,6 +1136,31 @@ class CatalogBuilder(object):
|
||||
out_str = ' ' * (indent + 1)
|
||||
return ans + out_str
|
||||
|
||||
def _normalize_tag(tag, max_len):
|
||||
""" Generate an XHTML-legal anchor string from tag.
|
||||
|
||||
Parse tag for non-ascii, convert to unicode name.
|
||||
|
||||
Args:
|
||||
tags (str): tag name possible containing symbols
|
||||
max_len (int): maximum length of tag
|
||||
|
||||
Return:
|
||||
normalized (str): unicode names substituted for non-ascii chars,
|
||||
clipped to max_len
|
||||
"""
|
||||
|
||||
normalized = massaged = re.sub('\s','',ascii_text(tag).lower())
|
||||
if re.search('\W',normalized):
|
||||
normalized = ''
|
||||
for c in massaged:
|
||||
if re.search('\W',c):
|
||||
normalized += self.generate_unicode_name(c)
|
||||
else:
|
||||
normalized += c
|
||||
shortened = shorten_components_to(max_len, [normalized])[0]
|
||||
return shortened
|
||||
|
||||
# Entry point
|
||||
normalized_tags = []
|
||||
friendly_tags = []
|
||||
@ -1144,7 +1179,7 @@ class CatalogBuilder(object):
|
||||
if tag == ' ':
|
||||
continue
|
||||
|
||||
normalized_tags.append(self.normalize_tag(tag))
|
||||
normalized_tags.append(_normalize_tag(tag, max_len))
|
||||
friendly_tags.append(tag)
|
||||
|
||||
genre_tags_dict = dict(zip(friendly_tags,normalized_tags))
|
||||
@ -1941,8 +1976,6 @@ class CatalogBuilder(object):
|
||||
|
||||
self.update_progress_full_step(_("Genres HTML"))
|
||||
|
||||
self.genre_tags_dict = self.filter_db_tags()
|
||||
|
||||
# Extract books matching filtered_tags
|
||||
genre_list = []
|
||||
for friendly_tag in sorted(self.genre_tags_dict, key=sort_key):
|
||||
@ -2024,10 +2057,11 @@ class CatalogBuilder(object):
|
||||
books_by_current_author += 1
|
||||
|
||||
# Write the genre book list as an article
|
||||
titles_spanned = self.generate_html_by_genre(genre, True if index==0 else False,
|
||||
outfile = "%s/Genre_%s.html" % (self.content_dir, genre)
|
||||
titles_spanned = self.generate_html_by_genre(genre,
|
||||
True if index==0 else False,
|
||||
genre_tag_set[genre],
|
||||
"%s/Genre_%s.html" % (self.content_dir,
|
||||
genre))
|
||||
outfile)
|
||||
|
||||
tag_file = "content/Genre_%s.html" % genre
|
||||
master_genre_list.append({'tag':genre,
|
||||
@ -2549,7 +2583,7 @@ class CatalogBuilder(object):
|
||||
for (i, tag) in enumerate(sorted(book.get('tags', []))):
|
||||
aTag = Tag(_soup,'a')
|
||||
if self.opts.generate_genres:
|
||||
aTag['href'] = "Genre_%s.html" % self.normalize_tag(tag)
|
||||
aTag['href'] = "Genre_%s.html" % self.genre_tags_dict[tag]
|
||||
aTag.insert(0,escape(NavigableString(tag)))
|
||||
genresTag.insert(gtc, aTag)
|
||||
gtc += 1
|
||||
@ -4603,28 +4637,6 @@ class CatalogBuilder(object):
|
||||
|
||||
return merged
|
||||
|
||||
def normalize_tag(self, tag):
|
||||
""" Generate an XHTML-legal anchor string from tag.
|
||||
|
||||
Parse tag for non-ascii, convert to unicode name.
|
||||
|
||||
Args:
|
||||
tags (str): tag name possible containing symbols
|
||||
|
||||
Return:
|
||||
normalized (str): unicode names substituted for non-ascii chars
|
||||
"""
|
||||
|
||||
normalized = massaged = re.sub('\s','',ascii_text(tag).lower())
|
||||
if re.search('\W',normalized):
|
||||
normalized = ''
|
||||
for c in massaged:
|
||||
if re.search('\W',c):
|
||||
normalized += self.generate_unicode_name(c)
|
||||
else:
|
||||
normalized += c
|
||||
return normalized
|
||||
|
||||
def process_exclusions(self, data_set):
|
||||
""" Filter data_set based on exclusion_rules.
|
||||
|
||||
@ -4697,6 +4709,51 @@ class CatalogBuilder(object):
|
||||
else:
|
||||
return data_set
|
||||
|
||||
def relist_multiple_authors(self, books_by_author):
|
||||
""" Create multiple entries for books with multiple authors
|
||||
|
||||
Given a list of books by author, scan list for books with multiple
|
||||
authors. Add a cloned copy of the book per additional author.
|
||||
|
||||
Args:
|
||||
books_by_author (list): book list possibly containing books
|
||||
with multiple authors
|
||||
|
||||
Return:
|
||||
(list): books_by_author with additional entries for books with
|
||||
multiple authors
|
||||
"""
|
||||
|
||||
# Scan list looking for entries with len(authors) > 1
|
||||
# Clone multiples, swapping additional author into first place,
|
||||
# computing author_sort to match
|
||||
|
||||
# from calibre.ebooks.metadata import authors_to_string
|
||||
# return authors_to_string(self.authors)
|
||||
|
||||
multiple_author_books = []
|
||||
|
||||
# Find the multiple author books
|
||||
for book in books_by_author:
|
||||
if len(book['authors']) > 1:
|
||||
multiple_author_books.append(book)
|
||||
|
||||
for book in multiple_author_books:
|
||||
cloned_authors = list(book['authors'])
|
||||
for x, author in enumerate(book['authors']):
|
||||
if x:
|
||||
first_author = cloned_authors.pop(0)
|
||||
cloned_authors.append(first_author)
|
||||
new_book = deepcopy(book)
|
||||
new_book['author'] = ' & '.join(cloned_authors)
|
||||
new_book['authors'] = list(cloned_authors)
|
||||
asl = [author_to_author_sort(auth) for auth in cloned_authors]
|
||||
new_book['author_sort'] = ' & '.join(asl)
|
||||
#print("'%s' (%s) (%s)" % (new_book['title'], new_book['authors'], new_book['author_sort']))
|
||||
books_by_author.append(new_book)
|
||||
|
||||
return books_by_author
|
||||
|
||||
def update_progress_full_step(self, description):
|
||||
""" Update calibre's job status UI.
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user