When automatically generating author sort for author name, ignore common prefixes like Mr. Dr. etc. Controllable via tweak. Also add a tweak to allow control of how a string is split up into multiple authors. Fixes #795984 ([Request] Extra author sorting options)

This commit is contained in:
Kovid Goyal 2011-09-04 09:52:22 -06:00
parent 91b769bd1f
commit c77684a033
2 changed files with 28 additions and 2 deletions

View File

@ -70,9 +70,18 @@ author_sort_copy_method = 'comma'
author_name_suffixes = ('Jr', 'Sr', 'Inc', 'Ph.D', 'Phd',
'MD', 'M.D', 'I', 'II', 'III', 'IV',
'Junior', 'Senior')
author_name_prefixes = ('Mr', 'Mrs', 'Ms', 'Dr', 'Prof')
author_name_copywords = ('Corporation', 'Company', 'Co.', 'Agency', 'Council',
'Committee', 'Inc.', 'Institute', 'Society', 'Club', 'Team')
#: Splitting multiple author names
# By default, calibre splits a string containing multiple author names on
# ampersands and the words "and" and "with". You can customize the splitting
# by changing the regular expression below. Strings are split on whatever the
# specified regular expression matches.
# Default: r'(?i),?\s+(and|with)\s+'
authors_split_regex = r'(?i),?\s+(and|with)\s+'
#: Use author sort in Tag Browser
# Set which author field to display in the tags pane (the list of authors,
# series, publishers etc on the left hand side). The choices are author and

View File

@ -10,11 +10,17 @@ import os, sys, re
from urllib import unquote, quote
from urlparse import urlparse
from calibre import relpath, guess_type, remove_bracketed_text
from calibre import relpath, guess_type, remove_bracketed_text, prints
from calibre.utils.config import tweaks
_author_pat = re.compile(',?\s+(and|with)\s+', re.IGNORECASE)
try:
_author_pat = re.compile(tweaks['authors_split_regex'])
except:
prints ('Author split regexp:', tweaks['authors_split_regex'],
'is invalid, using default')
_author_pat = re.compile(r'(?i),?\s+(and|with)\s+')
def string_to_authors(raw):
raw = raw.replace('&&', u'\uffff')
raw = _author_pat.sub('&', raw)
@ -45,6 +51,17 @@ def author_to_author_sort(author, method=None):
if method == u'copy':
return author
prefixes = set([x.lower() for x in tweaks['author_name_prefixes']])
prefixes |= set([x+u'.' for x in prefixes])
while True:
if not tokens:
return author
tok = tokens[0].lower()
if tok in prefixes:
tokens = tokens[1:]
else:
break
suffixes = set([x.lower() for x in tweaks['author_name_suffixes']])
suffixes |= set([x+u'.' for x in suffixes])