From f699b436a048e68ace6b49feea2dee5537ea84cc Mon Sep 17 00:00:00 2001
From: "Joseph R. Fox-Rabinovitz" <jfoxrabinovitz@gmail.com>
Date: Wed, 20 Jan 2021 12:31:35 -0500
Subject: [PATCH] Added config key author_use_surname_prefixes and tests

---
 resources/default_tweaks.py             |  6 ++--
 src/calibre/ebooks/metadata/__init__.py | 43 +++++++++++++++++++------
 2 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py
index cb54af8f9d..bb48897314 100644
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@@ -67,8 +67,10 @@ author_name_suffixes = ('Jr', 'Sr', 'Inc', 'Ph.D', 'Phd',
                         'MD', 'M.D', 'I', 'II', 'III', 'IV',
                         'Junior', 'Senior')
 author_name_prefixes = ('Mr', 'Mrs', 'Ms', 'Dr', 'Prof')
-author_name_copywords = ('Corporation', 'Company', 'Co.', 'Agency', 'Council',
-        'Committee', 'Inc.', 'Institute', 'Society', 'Club', 'Team')
+author_name_copywords = ('Agency', 'Corporation', 'Company', 'Co.', 'Council',
+                         'Committee', 'Inc.', 'Institute', 'National',
+                         'Society', 'Club', 'Team')
+author_use_surname_prefixes = False
 author_surname_prefixes = ('da', 'de', 'di', 'la', 'le', 'van', 'von')
 
 #: Splitting multiple author names
diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py
index a517d566fb..cecd12d3ae 100644
--- a/src/calibre/ebooks/metadata/__init__.py
+++ b/src/calibre/ebooks/metadata/__init__.py
@@ -86,9 +86,11 @@ def author_to_author_sort(author, method=None):
     if ltoks.intersection(copy_words):
         return author
 
-    author_surname_prefixes = frozenset(x.lower() for x in tweaks['author_surname_prefixes'])
-    if len(tokens) == 2 and tokens[0].lower() in author_surname_prefixes:
-        return author
+    author_use_surname_prefixes = tweaks['author_use_surname_prefixes']
+    if author_use_surname_prefixes:
+        author_surname_prefixes = frozenset(x.lower() for x in tweaks['author_surname_prefixes'])
+        if len(tokens) == 2 and tokens[0].lower() in author_surname_prefixes:
+            return author
 
     prefixes = {force_unicode(y).lower() for y in tweaks['author_name_prefixes']}
     prefixes |= {y+'.' for y in prefixes}
@@ -110,9 +112,10 @@ def author_to_author_sort(author, method=None):
 
     suffix = ' '.join(tokens[last + 1:])
 
-    if last > first and tokens[last - 1].lower() in author_surname_prefixes:
-        tokens[last - 1] += ' ' + tokens[last]
-        last -= 1
+    if author_use_surname_prefixes:
+        if last > first and tokens[last - 1].lower() in author_surname_prefixes:
+            tokens[last - 1] += ' ' + tokens[last]
+            last -= 1
 
     atokens = tokens[last:last + 1] + tokens[first:last]
     num_toks = len(atokens)
@@ -448,6 +451,7 @@ def rating_to_stars(value, allow_half_stars=False, star='★', half='⯨'):
 
 def find_tests():
     import unittest
+    from calibre.utils.config_base import Tweak
 
     class TestRemoveBracketedText(unittest.TestCase):
         def test_brackets(self):
@@ -495,6 +499,23 @@ def find_tests():
             self.check_all_methods('Don "Team" Smith', 'Smith, Don "Team"', 'Smith Don "Team"')
             self.check_all_methods('Don Team Smith')
 
+        def test_national(self):
+            c = tweaks['author_name_copywords']
+            try:
+                # Assume that 'author_name_copywords' is a common sequence type
+                i = c.index('National')
+            except ValueError:
+                # If "National" not found, check first without, then temporarily add
+                self.check_all_methods('National Lampoon', 'Lampoon, National', 'Lampoon National')
+                t = type(c)
+                with Tweak('author_name_copywords', c + t(['National'])):
+                     self.check_all_methods('National Lampoon')
+            else:
+                # If "National" found, check with, then temporarily remove
+                self.check_all_methods('National Lampoon')
+                with Tweak('author_name_copywords', c[:i] + c[i + 1:]):
+                    self.check_all_methods('National Lampoon', 'Lampoon, National', 'Lampoon National')
+
         def test_method(self):
             self.check_all_methods('Jane Doe', 'Doe, Jane', 'Doe Jane')
 
@@ -502,15 +523,19 @@ def find_tests():
             self.check_all_methods('Mrs. Jane Q. Doe III', 'Doe, Jane Q. III', 'Doe Jane Q. III')
 
         def test_surname_prefix(self):
-            self.check_all_methods('Leonardo Da Vinci', 'Da Vinci, Leonardo', 'Da Vinci Leonardo')
-            self.check_all_methods('Van Gogh')
+            with Tweak('author_use_surname_prefixes', True):
+                self.check_all_methods('Leonardo Da Vinci', 'Da Vinci, Leonardo', 'Da Vinci Leonardo')
+                self.check_all_methods('Van Gogh')
+            with Tweak('author_use_surname_prefixes', False):
+                self.check_all_methods('Leonardo Da Vinci', 'Vinci, Leonardo Da', 'Vinci Leonardo Da')
+                self.check_all_methods('Van Gogh', 'Gogh, Van', 'Gogh Van')
 
         def test_comma(self):
             self.check_all_methods('James Wesley, Rawles', nocomma='Rawles James Wesley,')
 
         def test_brackets(self):
             self.check_all_methods('Seventh Author [7]', 'Author, Seventh', 'Author Seventh')
-            self.check_all_methods('John [x]von Neumann (III)', 'von Neumann, John', 'von Neumann John')
+            self.check_all_methods('John [x]von Neumann (III)', 'Neumann, John von', 'Neumann John von')
 
         def test_falsy(self):
             self.check_all_methods('')