mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Intelligent name capitalization for the author mapper
This commit is contained in:
parent
848a7267d2
commit
2a4d733eac
@ -2,10 +2,41 @@
|
||||
# vim:fileencoding=utf-8
|
||||
# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import re
|
||||
from collections import deque
|
||||
|
||||
from calibre.utils.icu import capitalize, lower, upper
|
||||
|
||||
|
||||
def cap_author_token(token):
|
||||
lt = lower(token)
|
||||
if lt in ('von', 'de', 'el', 'van', 'le'):
|
||||
return lt
|
||||
# no digits no spez. characters
|
||||
if re.match(r'([^\d\W]\.){2,}$', lt, re.UNICODE) is not None:
|
||||
# Normalize tokens of the form J.K. to J. K.
|
||||
parts = token.split('.')
|
||||
return '. '.join(map(capitalize, parts)).strip()
|
||||
scots_name = None
|
||||
for x in ('mc', 'mac'):
|
||||
if (token.lower().startswith(x) and len(token) > len(x) and
|
||||
(
|
||||
token[len(x)] == upper(token[len(x)]) or
|
||||
lt == token
|
||||
)):
|
||||
scots_name = len(x)
|
||||
break
|
||||
ans = capitalize(token)
|
||||
if scots_name is not None:
|
||||
ans = ans[:scots_name] + upper(ans[scots_name]) + ans[scots_name+1:]
|
||||
for x in ('-', "'"):
|
||||
idx = ans.find(x)
|
||||
if idx > -1 and len(ans) > idx+2:
|
||||
ans = ans[:idx+1] + upper(ans[idx+1]) + ans[idx+2:]
|
||||
return ans
|
||||
|
||||
|
||||
def compile_pat(pat):
|
||||
import regex
|
||||
@ -74,7 +105,7 @@ def apply_rules(author, rules):
|
||||
authors.appendleft(author)
|
||||
break
|
||||
if ac == 'capitalize':
|
||||
ans.append(author.capitalize())
|
||||
ans.append(' '.join(map(cap_author_token, author.split())))
|
||||
break
|
||||
if ac == 'lower':
|
||||
ans.append(icu_lower(author))
|
||||
|
@ -12,8 +12,8 @@ from future_builtins import map
|
||||
|
||||
from calibre import browser, random_user_agent
|
||||
from calibre.customize import Plugin
|
||||
from calibre.utils.icu import capitalize, lower, upper
|
||||
from calibre.ebooks.metadata import check_isbn
|
||||
from calibre.ebooks.metadata.author_mapper import cap_author_token
|
||||
from calibre.utils.localization import canonicalize_lang, get_lang
|
||||
|
||||
|
||||
@ -128,34 +128,6 @@ def load_caches(dump):
|
||||
p.load_caches(cache)
|
||||
|
||||
|
||||
def cap_author_token(token):
|
||||
lt = lower(token)
|
||||
if lt in ('von', 'de', 'el', 'van', 'le'):
|
||||
return lt
|
||||
# no digits no spez. characters
|
||||
if re.match(r'([^\d\W]\.){2,}$', lt, re.UNICODE) is not None:
|
||||
# Normalize tokens of the form J.K. to J. K.
|
||||
parts = token.split('.')
|
||||
return '. '.join(map(capitalize, parts)).strip()
|
||||
scots_name = None
|
||||
for x in ('mc', 'mac'):
|
||||
if (token.lower().startswith(x) and len(token) > len(x) and
|
||||
(
|
||||
token[len(x)] == upper(token[len(x)]) or
|
||||
lt == token
|
||||
)):
|
||||
scots_name = len(x)
|
||||
break
|
||||
ans = capitalize(token)
|
||||
if scots_name is not None:
|
||||
ans = ans[:scots_name] + upper(ans[scots_name]) + ans[scots_name+1:]
|
||||
for x in ('-', "'"):
|
||||
idx = ans.find(x)
|
||||
if idx > -1 and len(ans) > idx+2:
|
||||
ans = ans[:idx+1] + upper(ans[idx+1]) + ans[idx+2:]
|
||||
return ans
|
||||
|
||||
|
||||
def fixauthors(authors):
|
||||
if not authors:
|
||||
return authors
|
||||
|
Loading…
x
Reference in New Issue
Block a user