Add a tweak that controls what words are treated as suffixes when geenrating an author sort string from an author name. Also Fix #782551 (authorsort error on brackets)

2025-07-09 03:04:10 -04:00 · 2011-05-17 14:31:54 -06:00 · 2011-05-17 14:31:54 -06:00 · 1f31873432
commit 1f31873432
parent 9908fc6632
3 changed files with 54 additions and 14 deletions
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -41,14 +41,19 @@ authors_completer_append_separator = False
 #: Author sort name algorithm
 # The algorithm used to copy author to author_sort
 # Possible values are:
-#  invert: use "fn ln" -> "ln, fn" (the default algorithm)
+#  invert: use "fn ln" -> "ln, fn"
 #  copy  : copy author to author_sort without modification
 #  comma : use 'copy' if there is a ',' in the name, otherwise use 'invert'
 #  nocomma : "fn ln" -> "ln fn" (without the comma)
 # When this tweak is changed, the author_sort values stored with each author
 # must be recomputed by right-clicking on an author in the left-hand tags pane,
 # selecting 'manage authors', and pressing 'Recalculate all author sort values'.
+# The author name suffixes are words that are ignored when they occur at the
+# end of an author name. The case of the suffix is ignored and trailing
+# periods are automatically handled.
 author_sort_copy_method = 'comma'
+author_name_suffixes = ('Jr', 'Sr', 'Inc', 'Ph.D', 'Phd',
+                        'MD', 'M.D', 'I', 'II', 'III', 'IV')

 #: Use author sort in Tag Browser
 # Set which author field to display in the tags pane (the list of authors,
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -630,6 +630,24 @@ def human_readable(size):
        size = size[:-2]
    return size + " " + suffix

+def remove_bracketed_text(src,
+        brackets={u'(':u')', u'[':u']', u'{':u'}'}):
+    from collections import Counter
+    counts = Counter()
+    buf = []
+    src = force_unicode(src)
+    rmap = dict([(v, k) for k, v in brackets.iteritems()])
+    for char in src:
+        if char in brackets:
+            counts[char] += 1
+        elif char in rmap:
+            idx = rmap[char]
+            if counts[idx] > 0:
+                counts[idx] -= 1
+        elif sum(counts.itervalues()) < 1:
+            buf.append(char)
+    return u''.join(buf)
+
 if isosx:
    import glob, shutil
    fdir = os.path.expanduser('~/.fonts')
--- a/src/calibre/ebooks/metadata/init.py
+++ b/src/calibre/ebooks/metadata/init.py
@ -10,7 +10,7 @@ import os, sys, re
 from urllib import unquote, quote
 from urlparse import urlparse

-from calibre import relpath, guess_type
+from calibre import relpath, guess_type, remove_bracketed_text

 from calibre.utils.config import tweaks

@ -27,20 +27,37 @@ def authors_to_string(authors):
    else:
        return ''

-_bracket_pat = re.compile(r'[\[({].*?[})\]]')
-def author_to_author_sort(author):
+def author_to_author_sort(author, method=None):
    if not author:
-        return ''
-    method = tweaks['author_sort_copy_method']
-    if method == 'copy' or (method == 'comma' and ',' in author):
+        return u''
+    sauthor = remove_bracketed_text(author).strip()
+    tokens = sauthor.split()
+    if len(tokens) < 2:
        return author
-    author = _bracket_pat.sub('', author).strip()
-    tokens = author.split()
-    if tokens and tokens[-1] not in ('Inc.', 'Inc'):
-        tokens = tokens[-1:] + tokens[:-1]
-        if len(tokens) > 1 and method != 'nocomma':
-            tokens[0] += ','
-    return ' '.join(tokens)
+    if method is None:
+        method = tweaks['author_sort_copy_method']
+    if method == u'copy':
+        return author
+    suffixes = set([x.lower() for x in tweaks['author_name_suffixes']])
+    suffixes |= set([x+u'.' for x in suffixes])
+
+    last = tokens[-1].lower()
+    suffix = None
+    if last in suffixes:
+        suffix = tokens[-1]
+        tokens = tokens[:-1]
+
+    if method == u'comma' and u',' in u''.join(tokens):
+        return author
+
+    atokens = tokens[-1:] + tokens[:-1]
+    if suffix:
+        atokens.append(suffix)
+
+    if method != u'nocomma' and len(atokens) > 1:
+        atokens[0] += u','
+
+    return u' '.join(atokens)

 def authors_to_sort_string(authors):
    return ' & '.join(map(author_to_author_sort, authors))