From c77684a03314a93b9f8343e9ce99e2b7129098e4 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 4 Sep 2011 09:52:22 -0600
Subject: [PATCH] When automatically generating author sort for author name,
 ignore common prefixes like Mr. Dr. etc. Controllable via tweak. Also add a
 tweak to allow control of how a string is split up into multiple authors.
 Fixes #795984 ([Request] Extra author sorting options)

---
 resources/default_tweaks.py             |  9 +++++++++
 src/calibre/ebooks/metadata/__init__.py | 21 +++++++++++++++++++--
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py
index b385511d56..ead9995eb3 100644
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@@ -70,9 +70,18 @@ author_sort_copy_method = 'comma'
 author_name_suffixes = ('Jr', 'Sr', 'Inc', 'Ph.D', 'Phd',
                         'MD', 'M.D', 'I', 'II', 'III', 'IV',
                         'Junior', 'Senior')
+author_name_prefixes = ('Mr', 'Mrs', 'Ms', 'Dr', 'Prof')
 author_name_copywords = ('Corporation', 'Company', 'Co.', 'Agency', 'Council',
         'Committee', 'Inc.', 'Institute', 'Society', 'Club', 'Team')
 
+#: Splitting multiple author names
+# By default, calibre splits a string containing multiple author names on
+# ampersands and the words "and" and "with". You can customize the splitting
+# by changing the regular expression below. Strings are split on whatever the
+# specified regular expression matches.
+# Default: r'(?i),?\s+(and|with)\s+'
+authors_split_regex = r'(?i),?\s+(and|with)\s+'
+
 #: Use author sort in Tag Browser
 # Set which author field to display in the tags pane (the list of authors,
 # series, publishers etc on the left hand side). The choices are author and
diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py
index a9816db5ae..c3a229fe3c 100644
--- a/src/calibre/ebooks/metadata/__init__.py
+++ b/src/calibre/ebooks/metadata/__init__.py
@@ -10,11 +10,17 @@ import os, sys, re
 from urllib import unquote, quote
 from urlparse import urlparse
 
-from calibre import relpath, guess_type, remove_bracketed_text
+from calibre import relpath, guess_type, remove_bracketed_text, prints
 
 from calibre.utils.config import tweaks
 
-_author_pat = re.compile(',?\s+(and|with)\s+', re.IGNORECASE)
+try:
+    _author_pat = re.compile(tweaks['authors_split_regex'])
+except:
+    prints ('Author split regexp:', tweaks['authors_split_regex'],
+            'is invalid, using default')
+    _author_pat = re.compile(r'(?i),?\s+(and|with)\s+')
+
 def string_to_authors(raw):
     raw = raw.replace('&&', u'\uffff')
     raw = _author_pat.sub('&', raw)
@@ -45,6 +51,17 @@ def author_to_author_sort(author, method=None):
     if method == u'copy':
         return author
 
+    prefixes = set([x.lower() for x in tweaks['author_name_prefixes']])
+    prefixes |= set([x+u'.' for x in prefixes])
+    while True:
+        if not tokens:
+            return author
+        tok = tokens[0].lower()
+        if tok in prefixes:
+            tokens = tokens[1:]
+        else:
+            break
+
     suffixes = set([x.lower() for x in tweaks['author_name_suffixes']])
     suffixes |= set([x+u'.' for x in suffixes])