From 2a4d733eace934ebab115324033f981d41c4198e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 18 Jul 2018 14:30:23 +0530
Subject: [PATCH] Intelligent name capitalization for the author mapper

---
 src/calibre/ebooks/metadata/author_mapper.py | 37 ++++++++++++++++++--
 src/calibre/ebooks/metadata/sources/base.py  | 30 +---------------
 2 files changed, 35 insertions(+), 32 deletions(-)
diff --git a/src/calibre/ebooks/metadata/author_mapper.py b/src/calibre/ebooks/metadata/author_mapper.py
index 38996cb83f..d4e10d118b 100644
--- a/src/calibre/ebooks/metadata/author_mapper.py
+++ b/src/calibre/ebooks/metadata/author_mapper.py
@@ -2,10 +2,41 @@
 # vim:fileencoding=utf-8
 # License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
 
-from __future__ import (unicode_literals, division, absolute_import,
-                        print_function)
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import re
 from collections import deque
 
+from calibre.utils.icu import capitalize, lower, upper
+
+
+def cap_author_token(token):
+    lt = lower(token)
+    if lt in ('von', 'de', 'el', 'van', 'le'):
+        return lt
+    # no digits no spez. characters
+    if re.match(r'([^\d\W]\.){2,}$', lt, re.UNICODE) is not None:
+        # Normalize tokens of the form J.K. to J. K.
+        parts = token.split('.')
+        return '. '.join(map(capitalize, parts)).strip()
+    scots_name = None
+    for x in ('mc', 'mac'):
+        if (token.lower().startswith(x) and len(token) > len(x) and
+                (
+                    token[len(x)] == upper(token[len(x)]) or
+                    lt == token
+                )):
+            scots_name = len(x)
+            break
+    ans = capitalize(token)
+    if scots_name is not None:
+        ans = ans[:scots_name] + upper(ans[scots_name]) + ans[scots_name+1:]
+    for x in ('-', "'"):
+        idx = ans.find(x)
+        if idx > -1 and len(ans) > idx+2:
+            ans = ans[:idx+1] + upper(ans[idx+1]) + ans[idx+2:]
+    return ans
+
 
 def compile_pat(pat):
     import regex
@@ -74,7 +105,7 @@ def apply_rules(author, rules):
                         authors.appendleft(author)
                     break
                 if ac == 'capitalize':
-                    ans.append(author.capitalize())
+                    ans.append(' '.join(map(cap_author_token, author.split())))
                     break
                 if ac == 'lower':
                     ans.append(icu_lower(author))
diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index c8dfd02ac5..baf793b4d0 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -12,8 +12,8 @@ from future_builtins import map
 
 from calibre import browser, random_user_agent
 from calibre.customize import Plugin
-from calibre.utils.icu import capitalize, lower, upper
 from calibre.ebooks.metadata import check_isbn
+from calibre.ebooks.metadata.author_mapper import cap_author_token
 from calibre.utils.localization import canonicalize_lang, get_lang
 
 
@@ -128,34 +128,6 @@ def load_caches(dump):
             p.load_caches(cache)
 
 
-def cap_author_token(token):
-    lt = lower(token)
-    if lt in ('von', 'de', 'el', 'van', 'le'):
-        return lt
-    # no digits no spez. characters
-    if re.match(r'([^\d\W]\.){2,}$', lt, re.UNICODE) is not None:
-        # Normalize tokens of the form J.K. to J. K.
-        parts = token.split('.')
-        return '. '.join(map(capitalize, parts)).strip()
-    scots_name = None
-    for x in ('mc', 'mac'):
-        if (token.lower().startswith(x) and len(token) > len(x) and
-                (
-                    token[len(x)] == upper(token[len(x)]) or
-                    lt == token
-                )):
-            scots_name = len(x)
-            break
-    ans = capitalize(token)
-    if scots_name is not None:
-        ans = ans[:scots_name] + upper(ans[scots_name]) + ans[scots_name+1:]
-    for x in ('-', "'"):
-        idx = ans.find(x)
-        if idx > -1 and len(ans) > idx+2:
-            ans = ans[:idx+1] + upper(ans[idx+1]) + ans[idx+2:]
-    return ans
-
-
 def fixauthors(authors):
     if not authors:
         return authors