Add a tweak that controls what words are treated as suffixes when geenrating an author sort string from an author name. Also Fix #782551 (authorsort error on brackets)

This commit is contained in:
Kovid Goyal 2011-05-17 14:31:54 -06:00
parent 9908fc6632
commit 1f31873432
3 changed files with 54 additions and 14 deletions

View File

@ -41,14 +41,19 @@ authors_completer_append_separator = False
#: Author sort name algorithm #: Author sort name algorithm
# The algorithm used to copy author to author_sort # The algorithm used to copy author to author_sort
# Possible values are: # Possible values are:
# invert: use "fn ln" -> "ln, fn" (the default algorithm) # invert: use "fn ln" -> "ln, fn"
# copy : copy author to author_sort without modification # copy : copy author to author_sort without modification
# comma : use 'copy' if there is a ',' in the name, otherwise use 'invert' # comma : use 'copy' if there is a ',' in the name, otherwise use 'invert'
# nocomma : "fn ln" -> "ln fn" (without the comma) # nocomma : "fn ln" -> "ln fn" (without the comma)
# When this tweak is changed, the author_sort values stored with each author # When this tweak is changed, the author_sort values stored with each author
# must be recomputed by right-clicking on an author in the left-hand tags pane, # must be recomputed by right-clicking on an author in the left-hand tags pane,
# selecting 'manage authors', and pressing 'Recalculate all author sort values'. # selecting 'manage authors', and pressing 'Recalculate all author sort values'.
# The author name suffixes are words that are ignored when they occur at the
# end of an author name. The case of the suffix is ignored and trailing
# periods are automatically handled.
author_sort_copy_method = 'comma' author_sort_copy_method = 'comma'
author_name_suffixes = ('Jr', 'Sr', 'Inc', 'Ph.D', 'Phd',
'MD', 'M.D', 'I', 'II', 'III', 'IV')
#: Use author sort in Tag Browser #: Use author sort in Tag Browser
# Set which author field to display in the tags pane (the list of authors, # Set which author field to display in the tags pane (the list of authors,

View File

@ -630,6 +630,24 @@ def human_readable(size):
size = size[:-2] size = size[:-2]
return size + " " + suffix return size + " " + suffix
def remove_bracketed_text(src,
brackets={u'(':u')', u'[':u']', u'{':u'}'}):
from collections import Counter
counts = Counter()
buf = []
src = force_unicode(src)
rmap = dict([(v, k) for k, v in brackets.iteritems()])
for char in src:
if char in brackets:
counts[char] += 1
elif char in rmap:
idx = rmap[char]
if counts[idx] > 0:
counts[idx] -= 1
elif sum(counts.itervalues()) < 1:
buf.append(char)
return u''.join(buf)
if isosx: if isosx:
import glob, shutil import glob, shutil
fdir = os.path.expanduser('~/.fonts') fdir = os.path.expanduser('~/.fonts')

View File

@ -10,7 +10,7 @@ import os, sys, re
from urllib import unquote, quote from urllib import unquote, quote
from urlparse import urlparse from urlparse import urlparse
from calibre import relpath, guess_type from calibre import relpath, guess_type, remove_bracketed_text
from calibre.utils.config import tweaks from calibre.utils.config import tweaks
@ -27,20 +27,37 @@ def authors_to_string(authors):
else: else:
return '' return ''
_bracket_pat = re.compile(r'[\[({].*?[})\]]') def author_to_author_sort(author, method=None):
def author_to_author_sort(author):
if not author: if not author:
return '' return u''
method = tweaks['author_sort_copy_method'] sauthor = remove_bracketed_text(author).strip()
if method == 'copy' or (method == 'comma' and ',' in author): tokens = sauthor.split()
if len(tokens) < 2:
return author return author
author = _bracket_pat.sub('', author).strip() if method is None:
tokens = author.split() method = tweaks['author_sort_copy_method']
if tokens and tokens[-1] not in ('Inc.', 'Inc'): if method == u'copy':
tokens = tokens[-1:] + tokens[:-1] return author
if len(tokens) > 1 and method != 'nocomma': suffixes = set([x.lower() for x in tweaks['author_name_suffixes']])
tokens[0] += ',' suffixes |= set([x+u'.' for x in suffixes])
return ' '.join(tokens)
last = tokens[-1].lower()
suffix = None
if last in suffixes:
suffix = tokens[-1]
tokens = tokens[:-1]
if method == u'comma' and u',' in u''.join(tokens):
return author
atokens = tokens[-1:] + tokens[:-1]
if suffix:
atokens.append(suffix)
if method != u'nocomma' and len(atokens) > 1:
atokens[0] += u','
return u' '.join(atokens)
def authors_to_sort_string(authors): def authors_to_sort_string(authors):
return ' & '.join(map(author_to_author_sort, authors)) return ' & '.join(map(author_to_author_sort, authors))