diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py index b385511d56..ead9995eb3 100644 --- a/resources/default_tweaks.py +++ b/resources/default_tweaks.py @@ -70,9 +70,18 @@ author_sort_copy_method = 'comma' author_name_suffixes = ('Jr', 'Sr', 'Inc', 'Ph.D', 'Phd', 'MD', 'M.D', 'I', 'II', 'III', 'IV', 'Junior', 'Senior') +author_name_prefixes = ('Mr', 'Mrs', 'Ms', 'Dr', 'Prof') author_name_copywords = ('Corporation', 'Company', 'Co.', 'Agency', 'Council', 'Committee', 'Inc.', 'Institute', 'Society', 'Club', 'Team') +#: Splitting multiple author names +# By default, calibre splits a string containing multiple author names on +# ampersands and the words "and" and "with". You can customize the splitting +# by changing the regular expression below. Strings are split on whatever the +# specified regular expression matches. +# Default: r'(?i),?\s+(and|with)\s+' +authors_split_regex = r'(?i),?\s+(and|with)\s+' + #: Use author sort in Tag Browser # Set which author field to display in the tags pane (the list of authors, # series, publishers etc on the left hand side). The choices are author and diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py index a9816db5ae..c3a229fe3c 100644 --- a/src/calibre/ebooks/metadata/__init__.py +++ b/src/calibre/ebooks/metadata/__init__.py @@ -10,11 +10,17 @@ import os, sys, re from urllib import unquote, quote from urlparse import urlparse -from calibre import relpath, guess_type, remove_bracketed_text +from calibre import relpath, guess_type, remove_bracketed_text, prints from calibre.utils.config import tweaks -_author_pat = re.compile(',?\s+(and|with)\s+', re.IGNORECASE) +try: + _author_pat = re.compile(tweaks['authors_split_regex']) +except: + prints ('Author split regexp:', tweaks['authors_split_regex'], + 'is invalid, using default') + _author_pat = re.compile(r'(?i),?\s+(and|with)\s+') + def string_to_authors(raw): raw = raw.replace('&&', u'\uffff') raw = _author_pat.sub('&', raw) @@ -45,6 +51,17 @@ def author_to_author_sort(author, method=None): if method == u'copy': return author + prefixes = set([x.lower() for x in tweaks['author_name_prefixes']]) + prefixes |= set([x+u'.' for x in prefixes]) + while True: + if not tokens: + return author + tok = tokens[0].lower() + if tok in prefixes: + tokens = tokens[1:] + else: + break + suffixes = set([x.lower() for x in tweaks['author_name_suffixes']]) suffixes |= set([x+u'.' for x in suffixes])