Intelligent name capitalization for the author mapper

2025-07-09 03:04:10 -04:00 · 2018-07-18 14:30:23 +05:30 · 2018-07-18 14:30:23 +05:30 · 2a4d733eac
commit 2a4d733eac
parent 848a7267d2
2 changed files with 35 additions and 32 deletions
--- a/src/calibre/ebooks/metadata/author_mapper.py
+++ b/src/calibre/ebooks/metadata/author_mapper.py
@ -2,10 +2,41 @@
 # vim:fileencoding=utf-8
 # License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
-from __future__ import (unicode_literals, division, absolute_import,
+from __future__ import absolute_import, division, print_function, unicode_literals
-                        print_function)
+
 import re
 from collections import deque
 from calibre.utils.icu import capitalize, lower, upper
 def cap_author_token(token):
    lt = lower(token)
    if lt in ('von', 'de', 'el', 'van', 'le'):
        return lt
    # no digits no spez. characters
    if re.match(r'([^\d\W]\.){2,}$', lt, re.UNICODE) is not None:
        # Normalize tokens of the form J.K. to J. K.
        parts = token.split('.')
        return '. '.join(map(capitalize, parts)).strip()
    scots_name = None
    for x in ('mc', 'mac'):
        if (token.lower().startswith(x) and len(token) > len(x) and
                (
                    token[len(x)] == upper(token[len(x)]) or
                    lt == token
                )):
            scots_name = len(x)
            break
    ans = capitalize(token)
    if scots_name is not None:
        ans = ans[:scots_name] + upper(ans[scots_name]) + ans[scots_name+1:]
    for x in ('-', "'"):
        idx = ans.find(x)
        if idx > -1 and len(ans) > idx+2:
            ans = ans[:idx+1] + upper(ans[idx+1]) + ans[idx+2:]
    return ans
 def compile_pat(pat):
    import regex
@ -74,7 +105,7 @@ def apply_rules(author, rules):
                        authors.appendleft(author)
                    break
                if ac == 'capitalize':
-                    ans.append(author.capitalize())
+                    ans.append(' '.join(map(cap_author_token, author.split())))
                    break
                if ac == 'lower':
                    ans.append(icu_lower(author))
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -12,8 +12,8 @@ from future_builtins import map
 from calibre import browser, random_user_agent
 from calibre.customize import Plugin
 from calibre.utils.icu import capitalize, lower, upper
 from calibre.ebooks.metadata import check_isbn
 from calibre.ebooks.metadata.author_mapper import cap_author_token
 from calibre.utils.localization import canonicalize_lang, get_lang
@ -128,34 +128,6 @@ def load_caches(dump):
            p.load_caches(cache)
 def cap_author_token(token):
    lt = lower(token)
    if lt in ('von', 'de', 'el', 'van', 'le'):
        return lt
    # no digits no spez. characters
    if re.match(r'([^\d\W]\.){2,}$', lt, re.UNICODE) is not None:
        # Normalize tokens of the form J.K. to J. K.
        parts = token.split('.')
        return '. '.join(map(capitalize, parts)).strip()
    scots_name = None
    for x in ('mc', 'mac'):
        if (token.lower().startswith(x) and len(token) > len(x) and
                (
                    token[len(x)] == upper(token[len(x)]) or
                    lt == token
                )):
            scots_name = len(x)
            break
    ans = capitalize(token)
    if scots_name is not None:
        ans = ans[:scots_name] + upper(ans[scots_name]) + ans[scots_name+1:]
    for x in ('-', "'"):
        idx = ans.find(x)
        if idx > -1 and len(ans) > idx+2:
            ans = ans[:idx+1] + upper(ans[idx+1]) + ans[idx+2:]
    return ans
 def fixauthors(authors):
    if not authors:
        return authors