Add a tweak that controls what words are treated as suffixes when geenrating an author sort string from an author name. Also Fix #782551 (authorsort error on brackets)

This commit is contained in:
Kovid Goyal 2011-05-17 14:31:54 -06:00
parent 9908fc6632
commit 1f31873432
3 changed files with 54 additions and 14 deletions

View File

@ -41,14 +41,19 @@ authors_completer_append_separator = False
#: Author sort name algorithm
# The algorithm used to copy author to author_sort
# Possible values are:
# invert: use "fn ln" -> "ln, fn" (the default algorithm)
# invert: use "fn ln" -> "ln, fn"
# copy : copy author to author_sort without modification
# comma : use 'copy' if there is a ',' in the name, otherwise use 'invert'
# nocomma : "fn ln" -> "ln fn" (without the comma)
# When this tweak is changed, the author_sort values stored with each author
# must be recomputed by right-clicking on an author in the left-hand tags pane,
# selecting 'manage authors', and pressing 'Recalculate all author sort values'.
# The author name suffixes are words that are ignored when they occur at the
# end of an author name. The case of the suffix is ignored and trailing
# periods are automatically handled.
author_sort_copy_method = 'comma'
author_name_suffixes = ('Jr', 'Sr', 'Inc', 'Ph.D', 'Phd',
'MD', 'M.D', 'I', 'II', 'III', 'IV')
#: Use author sort in Tag Browser
# Set which author field to display in the tags pane (the list of authors,

View File

@ -630,6 +630,24 @@ def human_readable(size):
size = size[:-2]
return size + " " + suffix
def remove_bracketed_text(src,
brackets={u'(':u')', u'[':u']', u'{':u'}'}):
from collections import Counter
counts = Counter()
buf = []
src = force_unicode(src)
rmap = dict([(v, k) for k, v in brackets.iteritems()])
for char in src:
if char in brackets:
counts[char] += 1
elif char in rmap:
idx = rmap[char]
if counts[idx] > 0:
counts[idx] -= 1
elif sum(counts.itervalues()) < 1:
buf.append(char)
return u''.join(buf)
if isosx:
import glob, shutil
fdir = os.path.expanduser('~/.fonts')

View File

@ -10,7 +10,7 @@ import os, sys, re
from urllib import unquote, quote
from urlparse import urlparse
from calibre import relpath, guess_type
from calibre import relpath, guess_type, remove_bracketed_text
from calibre.utils.config import tweaks
@ -27,20 +27,37 @@ def authors_to_string(authors):
else:
return ''
_bracket_pat = re.compile(r'[\[({].*?[})\]]')
def author_to_author_sort(author):
def author_to_author_sort(author, method=None):
if not author:
return ''
method = tweaks['author_sort_copy_method']
if method == 'copy' or (method == 'comma' and ',' in author):
return u''
sauthor = remove_bracketed_text(author).strip()
tokens = sauthor.split()
if len(tokens) < 2:
return author
author = _bracket_pat.sub('', author).strip()
tokens = author.split()
if tokens and tokens[-1] not in ('Inc.', 'Inc'):
tokens = tokens[-1:] + tokens[:-1]
if len(tokens) > 1 and method != 'nocomma':
tokens[0] += ','
return ' '.join(tokens)
if method is None:
method = tweaks['author_sort_copy_method']
if method == u'copy':
return author
suffixes = set([x.lower() for x in tweaks['author_name_suffixes']])
suffixes |= set([x+u'.' for x in suffixes])
last = tokens[-1].lower()
suffix = None
if last in suffixes:
suffix = tokens[-1]
tokens = tokens[:-1]
if method == u'comma' and u',' in u''.join(tokens):
return author
atokens = tokens[-1:] + tokens[:-1]
if suffix:
atokens.append(suffix)
if method != u'nocomma' and len(atokens) > 1:
atokens[0] += u','
return u' '.join(atokens)
def authors_to_sort_string(authors):
return ' & '.join(map(author_to_author_sort, authors))