diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py index ec96c7cd3c..8c82f1d4e6 100644 --- a/resources/default_tweaks.py +++ b/resources/default_tweaks.py @@ -201,15 +201,49 @@ save_template_title_series_sorting = 'library_order' #: Set the list of words considered to be "articles" for sort strings # Set the list of words that are to be considered 'articles' when computing the -# title sort strings. The list is a regular expression, with the articles -# separated by 'or' bars. Comparisons are case insensitive, and that cannot be -# changed. Changes to this tweak won't have an effect until the book is modified -# in some way. If you enter an invalid pattern, it is silently ignored. -# To disable use the expression: '^$' -# This expression is designed for articles that are followed by spaces. If you -# also need to match articles that are followed by other characters, for example L' -# in French, use: "^(A\s+|The\s+|An\s+|L')" instead. -# Default: '^(A|The|An)\s+' +# title sort strings. The articles differ by language. By default, calibre uses +# a combination of articles from English and whatever language the calibre user +# interface is set to. In addition, in some contexts where the book language is +# available, the language of the book is used. You can change the list of +# articles for a given language or add a new language by editing +# per_language_title_sort_articles. To tell calibre to use a language other +# than the user interface language, set, default_language_for_title_sort. For +# example, to use German, set it to 'deu'. A value of None means the user +# interface language is used. The setting title_sort_articles is ignored +# (present only for legacy reasons). +per_language_title_sort_articles = { + # English + 'eng' : (r'A\s+', r'The\s+', r'An\s+'), + # Spanish + 'spa' : (r'El\s+', r'La\s+', r'Lo\s+', r'Los\s+', r'Las\s+', r'Un\s+', + r'Una\s+', r'Unos\s+', r'Unas\s+'), + # French + 'fra' : (r'Le\s+', r'La\s+', r"L'", r'Les\s+', r'Un\s+', r'Une\s+', + r'Des\s+'), + # Italian + 'ita' : (r'Lo\s+', r'Il\s+', r"L'", r'La\s+', r'Gli\s+', r'I\s+', + r'Le\s+', ), + # Portuguese + 'por' : (r'A\s+', r'O\s+', r'Os\s+', r'As\s+', r'Um\s+', r'Uns\s+', + r'Uma\s+', r'Umas\s+', ), + # Romanian + 'ron' : (r'Un\s+', r'O\s+', r'Nişte\s+', ), + # German + 'deu' : (r'Der\s+', r'Die\s+', r'Das\s+', r'Den\s+', r'Ein\s+', + r'Eine\s+', r'Einen\s+', ), + # Dutch + 'nld' : (r'De\s+', r'Het\s+', r'Een\s+', ), + # Swedish + 'swe' : (r'En\s+', r'Ett\s+', r'Det\s+', r'Den\s+', r'De\s+', ), + # Turkish + 'tur' : (r'Bir\s+', ), + # Afrikaans + 'afr' : (r"'n\s+", r'Die\s+', ), + # Greek + 'ell' : (r'O\s+', r'I\s+', r'To\s+', r'Ta\s+', r'Tus\s+', r'Tis\s+', + r"'Enas\s+", r"'Mia\s+", r"'Ena\s+", r"'Enan\s+", ), +} +default_language_for_title_sort = None title_sort_articles=r'^(A|The|An)\s+' #: Specify a folder calibre should connect to at startup diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py index a6b3c1ad21..d9399e9a3e 100644 --- a/src/calibre/ebooks/metadata/__init__.py +++ b/src/calibre/ebooks/metadata/__init__.py @@ -95,18 +95,33 @@ def author_to_author_sort(author, method=None): def authors_to_sort_string(authors): return ' & '.join(map(author_to_author_sort, authors)) -try: - _title_pat = re.compile(tweaks.get('title_sort_articles', - r'^(A|The|An)\s+'), re.IGNORECASE) -except: - print 'Error in title sort pattern' - import traceback - traceback.print_exc() - _title_pat = re.compile('^(A|The|An)\s+', re.IGNORECASE) +_title_pats = {} +def get_title_sort_pat(lang=None): + ans = _title_pats.get(lang, None) + if ans is not None: + return ans + q = lang + from calibre.utils.localization import canonicalize_lang, get_lang + if lang is None: + q = tweaks['default_language_for_title_sort'] + if q is None: + q = get_lang() + q = canonicalize_lang(q) if q else q + data = tweaks['per_language_title_sort_articles'] + ans = data.get(q, None) + if ans is None: + ans = data['eng'] + ans = frozenset(ans + data['eng']) + ans = '|'.join(ans) + ans = '^(%s)'%ans + ans = re.compile(ans, re.IGNORECASE) + _title_pats[lang] = ans + return ans -_ignore_starts = u'\'"'+u''.join(unichr(x) for x in range(0x2018, 0x201e)+[0x2032, 0x2033]) +_ignore_starts = u'\'"'+u''.join(unichr(x) for x in + range(0x2018, 0x201e)+[0x2032, 0x2033]) -def title_sort(title, order=None): +def title_sort(title, order=None, lang=None): if order is None: order = tweaks['title_series_sorting'] title = title.strip() @@ -114,7 +129,7 @@ def title_sort(title, order=None): return title if title and title[0] in _ignore_starts: title = title[1:] - match = _title_pat.search(title) + match = get_title_sort_pat(lang).search(title) if match: try: prep = match.group(1) diff --git a/src/calibre/gui2/metadata/basic_widgets.py b/src/calibre/gui2/metadata/basic_widgets.py index 3dee6ad179..27112eba9a 100644 --- a/src/calibre/gui2/metadata/basic_widgets.py +++ b/src/calibre/gui2/metadata/basic_widgets.py @@ -138,9 +138,10 @@ class TitleSortEdit(TitleEdit): ' For example, The Exorcist might be sorted as Exorcist, The.') LABEL = _('Title &sort:') - def __init__(self, parent, title_edit, autogen_button): + def __init__(self, parent, title_edit, autogen_button, languages_edit): TitleEdit.__init__(self, parent) self.title_edit = title_edit + self.languages_edit = languages_edit base = self.TOOLTIP ok_tooltip = '

' + textwrap.fill(base+'

'+ @@ -157,10 +158,20 @@ class TitleSortEdit(TitleEdit): self.autogen_button = autogen_button autogen_button.clicked.connect(self.auto_generate) + languages_edit.editTextChanged.connect(self.update_state) + languages_edit.currentIndexChanged.connect(self.update_state) self.update_state() + @property + def book_lang(self): + try: + book_lang = self.languages_edit.lang_codes[0] + except: + book_lang = None + return book_lang + def update_state(self, *args): - ts = title_sort(self.title_edit.current_val) + ts = title_sort(self.title_edit.current_val, lang=self.book_lang) normal = ts == self.current_val if normal: col = 'rgb(0, 255, 0, 20%)' @@ -173,7 +184,8 @@ class TitleSortEdit(TitleEdit): self.setWhatsThis(tt) def auto_generate(self, *args): - self.current_val = title_sort(self.title_edit.current_val) + self.current_val = title_sort(self.title_edit.current_val, + lang=self.book_lang) def break_cycles(self): try: diff --git a/src/calibre/gui2/metadata/single.py b/src/calibre/gui2/metadata/single.py index 3e2886f3bf..2cb9c74890 100644 --- a/src/calibre/gui2/metadata/single.py +++ b/src/calibre/gui2/metadata/single.py @@ -109,6 +109,9 @@ class MetadataSingleDialogBase(ResizableDialog): def create_basic_metadata_widgets(self): # {{{ self.basic_metadata_widgets = [] + self.languages = LanguagesEdit(self) + self.basic_metadata_widgets.append(self.languages) + self.title = TitleEdit(self) self.title.textChanged.connect(self.update_window_title) self.deduce_title_sort_button = QToolButton(self) @@ -119,7 +122,7 @@ class MetadataSingleDialogBase(ResizableDialog): self.deduce_title_sort_button.setWhatsThis( self.deduce_title_sort_button.toolTip()) self.title_sort = TitleSortEdit(self, self.title, - self.deduce_title_sort_button) + self.deduce_title_sort_button, self.languages) self.basic_metadata_widgets.extend([self.title, self.title_sort]) self.deduce_author_sort_button = b = QToolButton(self) @@ -203,9 +206,6 @@ class MetadataSingleDialogBase(ResizableDialog): self.publisher = PublisherEdit(self) self.basic_metadata_widgets.append(self.publisher) - self.languages = LanguagesEdit(self) - self.basic_metadata_widgets.append(self.languages) - self.timestamp = DateEdit(self) self.pubdate = PubdateEdit(self) self.basic_metadata_widgets.extend([self.timestamp, self.pubdate]) @@ -282,7 +282,6 @@ class MetadataSingleDialogBase(ResizableDialog): # Commented out as it doesn't play nice with Next, Prev buttons #self.fetch_metadata_button.setFocus(Qt.OtherFocusReason) - # Miscellaneous interaction methods {{{ def update_window_title(self, *args): title = self.title.current_val diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index f19db7a33f..a917aa09b2 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -15,7 +15,7 @@ from math import ceil from calibre import prints from calibre.ebooks.metadata import (title_sort, author_to_author_sort, - string_to_authors, authors_to_string) + string_to_authors, authors_to_string, get_title_sort_pat) from calibre.ebooks.metadata.opf2 import metadata_to_opf from calibre.library.database import LibraryDatabase from calibre.library.field_metadata import FieldMetadata, TagsIcons @@ -1004,10 +1004,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): return False def find_identical_books(self, mi): - fuzzy_title_patterns = [(re.compile(pat, re.IGNORECASE), repl) for pat, repl in + fuzzy_title_patterns = [(re.compile(pat, re.IGNORECASE) if + isinstance(pat, basestring) else pat, repl) for pat, repl in [ (r'[\[\](){}<>\'";,:#]', ''), - (tweaks.get('title_sort_articles', r'^(a|the|an)\s+'), ''), + (get_title_sort_pat(), ''), (r'[-._]', ' '), (r'\s+', ' ') ]