diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py index e91b4a62d5..691a82fc36 100644 --- a/resources/default_tweaks.py +++ b/resources/default_tweaks.py @@ -41,14 +41,19 @@ authors_completer_append_separator = False #: Author sort name algorithm # The algorithm used to copy author to author_sort # Possible values are: -# invert: use "fn ln" -> "ln, fn" (the default algorithm) +# invert: use "fn ln" -> "ln, fn" # copy : copy author to author_sort without modification # comma : use 'copy' if there is a ',' in the name, otherwise use 'invert' # nocomma : "fn ln" -> "ln fn" (without the comma) # When this tweak is changed, the author_sort values stored with each author # must be recomputed by right-clicking on an author in the left-hand tags pane, # selecting 'manage authors', and pressing 'Recalculate all author sort values'. +# The author name suffixes are words that are ignored when they occur at the +# end of an author name. The case of the suffix is ignored and trailing +# periods are automatically handled. author_sort_copy_method = 'comma' +author_name_suffixes = ('Jr', 'Sr', 'Inc', 'Ph.D', 'Phd', + 'MD', 'M.D', 'I', 'II', 'III', 'IV') #: Use author sort in Tag Browser # Set which author field to display in the tags pane (the list of authors, diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index bc99947345..b82ea984ec 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -630,6 +630,24 @@ def human_readable(size): size = size[:-2] return size + " " + suffix +def remove_bracketed_text(src, + brackets={u'(':u')', u'[':u']', u'{':u'}'}): + from collections import Counter + counts = Counter() + buf = [] + src = force_unicode(src) + rmap = dict([(v, k) for k, v in brackets.iteritems()]) + for char in src: + if char in brackets: + counts[char] += 1 + elif char in rmap: + idx = rmap[char] + if counts[idx] > 0: + counts[idx] -= 1 + elif sum(counts.itervalues()) < 1: + buf.append(char) + return u''.join(buf) + if isosx: import glob, shutil fdir = os.path.expanduser('~/.fonts') diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py index 9c7838cb2c..2c26d011b7 100644 --- a/src/calibre/ebooks/metadata/__init__.py +++ b/src/calibre/ebooks/metadata/__init__.py @@ -10,7 +10,7 @@ import os, sys, re from urllib import unquote, quote from urlparse import urlparse -from calibre import relpath, guess_type +from calibre import relpath, guess_type, remove_bracketed_text from calibre.utils.config import tweaks @@ -27,20 +27,37 @@ def authors_to_string(authors): else: return '' -_bracket_pat = re.compile(r'[\[({].*?[})\]]') -def author_to_author_sort(author): +def author_to_author_sort(author, method=None): if not author: - return '' - method = tweaks['author_sort_copy_method'] - if method == 'copy' or (method == 'comma' and ',' in author): + return u'' + sauthor = remove_bracketed_text(author).strip() + tokens = sauthor.split() + if len(tokens) < 2: return author - author = _bracket_pat.sub('', author).strip() - tokens = author.split() - if tokens and tokens[-1] not in ('Inc.', 'Inc'): - tokens = tokens[-1:] + tokens[:-1] - if len(tokens) > 1 and method != 'nocomma': - tokens[0] += ',' - return ' '.join(tokens) + if method is None: + method = tweaks['author_sort_copy_method'] + if method == u'copy': + return author + suffixes = set([x.lower() for x in tweaks['author_name_suffixes']]) + suffixes |= set([x+u'.' for x in suffixes]) + + last = tokens[-1].lower() + suffix = None + if last in suffixes: + suffix = tokens[-1] + tokens = tokens[:-1] + + if method == u'comma' and u',' in u''.join(tokens): + return author + + atokens = tokens[-1:] + tokens[:-1] + if suffix: + atokens.append(suffix) + + if method != u'nocomma' and len(atokens) > 1: + atokens[0] += u',' + + return u' '.join(atokens) def authors_to_sort_string(authors): return ' & '.join(map(author_to_author_sort, authors))