When automatically generating author sort for author name, ignore common prefixes like Mr. Dr. etc. Controllable via tweak. Also add a tweak to allow control of how a string is split up into multiple authors. Fixes #795984 ([Request] Extra author sorting options)

2025-07-08 10:44:09 -04:00 · 2011-09-04 09:52:22 -06:00 · 2011-09-04 09:52:22 -06:00 · c77684a033
commit c77684a033
parent 91b769bd1f
2 changed files with 28 additions and 2 deletions
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -70,9 +70,18 @@ author_sort_copy_method = 'comma'
 author_name_suffixes = ('Jr', 'Sr', 'Inc', 'Ph.D', 'Phd',
                        'MD', 'M.D', 'I', 'II', 'III', 'IV',
                        'Junior', 'Senior')
+author_name_prefixes = ('Mr', 'Mrs', 'Ms', 'Dr', 'Prof')
 author_name_copywords = ('Corporation', 'Company', 'Co.', 'Agency', 'Council',
        'Committee', 'Inc.', 'Institute', 'Society', 'Club', 'Team')

+#: Splitting multiple author names
+# By default, calibre splits a string containing multiple author names on
+# ampersands and the words "and" and "with". You can customize the splitting
+# by changing the regular expression below. Strings are split on whatever the
+# specified regular expression matches.
+# Default: r'(?i),?\s+(and|with)\s+'
+authors_split_regex = r'(?i),?\s+(and|with)\s+'
+
 #: Use author sort in Tag Browser
 # Set which author field to display in the tags pane (the list of authors,
 # series, publishers etc on the left hand side). The choices are author and
--- a/src/calibre/ebooks/metadata/init.py
+++ b/src/calibre/ebooks/metadata/init.py
@ -10,11 +10,17 @@ import os, sys, re
 from urllib import unquote, quote
 from urlparse import urlparse

-from calibre import relpath, guess_type, remove_bracketed_text
+from calibre import relpath, guess_type, remove_bracketed_text, prints

 from calibre.utils.config import tweaks

-_author_pat = re.compile(',?\s+(and|with)\s+', re.IGNORECASE)
+try:
+    _author_pat = re.compile(tweaks['authors_split_regex'])
+except:
+    prints ('Author split regexp:', tweaks['authors_split_regex'],
+            'is invalid, using default')
+    _author_pat = re.compile(r'(?i),?\s+(and|with)\s+')
+
 def string_to_authors(raw):
    raw = raw.replace('&&', u'\uffff')
    raw = _author_pat.sub('&', raw)
@ -45,6 +51,17 @@ def author_to_author_sort(author, method=None):
    if method == u'copy':
        return author

+    prefixes = set([x.lower() for x in tweaks['author_name_prefixes']])
+    prefixes |= set([x+u'.' for x in prefixes])
+    while True:
+        if not tokens:
+            return author
+        tok = tokens[0].lower()
+        if tok in prefixes:
+            tokens = tokens[1:]
+        else:
+            break
+
    suffixes = set([x.lower() for x in tweaks['author_name_suffixes']])
    suffixes |= set([x+u'.' for x in suffixes])