diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py index ec96c7cd3c..8c82f1d4e6 100644 --- a/resources/default_tweaks.py +++ b/resources/default_tweaks.py @@ -201,15 +201,49 @@ save_template_title_series_sorting = 'library_order' #: Set the list of words considered to be "articles" for sort strings # Set the list of words that are to be considered 'articles' when computing the -# title sort strings. The list is a regular expression, with the articles -# separated by 'or' bars. Comparisons are case insensitive, and that cannot be -# changed. Changes to this tweak won't have an effect until the book is modified -# in some way. If you enter an invalid pattern, it is silently ignored. -# To disable use the expression: '^$' -# This expression is designed for articles that are followed by spaces. If you -# also need to match articles that are followed by other characters, for example L' -# in French, use: "^(A\s+|The\s+|An\s+|L')" instead. -# Default: '^(A|The|An)\s+' +# title sort strings. The articles differ by language. By default, calibre uses +# a combination of articles from English and whatever language the calibre user +# interface is set to. In addition, in some contexts where the book language is +# available, the language of the book is used. You can change the list of +# articles for a given language or add a new language by editing +# per_language_title_sort_articles. To tell calibre to use a language other +# than the user interface language, set, default_language_for_title_sort. For +# example, to use German, set it to 'deu'. A value of None means the user +# interface language is used. The setting title_sort_articles is ignored +# (present only for legacy reasons). +per_language_title_sort_articles = { + # English + 'eng' : (r'A\s+', r'The\s+', r'An\s+'), + # Spanish + 'spa' : (r'El\s+', r'La\s+', r'Lo\s+', r'Los\s+', r'Las\s+', r'Un\s+', + r'Una\s+', r'Unos\s+', r'Unas\s+'), + # French + 'fra' : (r'Le\s+', r'La\s+', r"L'", r'Les\s+', r'Un\s+', r'Une\s+', + r'Des\s+'), + # Italian + 'ita' : (r'Lo\s+', r'Il\s+', r"L'", r'La\s+', r'Gli\s+', r'I\s+', + r'Le\s+', ), + # Portuguese + 'por' : (r'A\s+', r'O\s+', r'Os\s+', r'As\s+', r'Um\s+', r'Uns\s+', + r'Uma\s+', r'Umas\s+', ), + # Romanian + 'ron' : (r'Un\s+', r'O\s+', r'Nişte\s+', ), + # German + 'deu' : (r'Der\s+', r'Die\s+', r'Das\s+', r'Den\s+', r'Ein\s+', + r'Eine\s+', r'Einen\s+', ), + # Dutch + 'nld' : (r'De\s+', r'Het\s+', r'Een\s+', ), + # Swedish + 'swe' : (r'En\s+', r'Ett\s+', r'Det\s+', r'Den\s+', r'De\s+', ), + # Turkish + 'tur' : (r'Bir\s+', ), + # Afrikaans + 'afr' : (r"'n\s+", r'Die\s+', ), + # Greek + 'ell' : (r'O\s+', r'I\s+', r'To\s+', r'Ta\s+', r'Tus\s+', r'Tis\s+', + r"'Enas\s+", r"'Mia\s+", r"'Ena\s+", r"'Enan\s+", ), +} +default_language_for_title_sort = None title_sort_articles=r'^(A|The|An)\s+' #: Specify a folder calibre should connect to at startup diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py index a6b3c1ad21..d9399e9a3e 100644 --- a/src/calibre/ebooks/metadata/__init__.py +++ b/src/calibre/ebooks/metadata/__init__.py @@ -95,18 +95,33 @@ def author_to_author_sort(author, method=None): def authors_to_sort_string(authors): return ' & '.join(map(author_to_author_sort, authors)) -try: - _title_pat = re.compile(tweaks.get('title_sort_articles', - r'^(A|The|An)\s+'), re.IGNORECASE) -except: - print 'Error in title sort pattern' - import traceback - traceback.print_exc() - _title_pat = re.compile('^(A|The|An)\s+', re.IGNORECASE) +_title_pats = {} +def get_title_sort_pat(lang=None): + ans = _title_pats.get(lang, None) + if ans is not None: + return ans + q = lang + from calibre.utils.localization import canonicalize_lang, get_lang + if lang is None: + q = tweaks['default_language_for_title_sort'] + if q is None: + q = get_lang() + q = canonicalize_lang(q) if q else q + data = tweaks['per_language_title_sort_articles'] + ans = data.get(q, None) + if ans is None: + ans = data['eng'] + ans = frozenset(ans + data['eng']) + ans = '|'.join(ans) + ans = '^(%s)'%ans + ans = re.compile(ans, re.IGNORECASE) + _title_pats[lang] = ans + return ans -_ignore_starts = u'\'"'+u''.join(unichr(x) for x in range(0x2018, 0x201e)+[0x2032, 0x2033]) +_ignore_starts = u'\'"'+u''.join(unichr(x) for x in + range(0x2018, 0x201e)+[0x2032, 0x2033]) -def title_sort(title, order=None): +def title_sort(title, order=None, lang=None): if order is None: order = tweaks['title_series_sorting'] title = title.strip() @@ -114,7 +129,7 @@ def title_sort(title, order=None): return title if title and title[0] in _ignore_starts: title = title[1:] - match = _title_pat.search(title) + match = get_title_sort_pat(lang).search(title) if match: try: prep = match.group(1) diff --git a/src/calibre/gui2/metadata/basic_widgets.py b/src/calibre/gui2/metadata/basic_widgets.py index 3dee6ad179..27112eba9a 100644 --- a/src/calibre/gui2/metadata/basic_widgets.py +++ b/src/calibre/gui2/metadata/basic_widgets.py @@ -138,9 +138,10 @@ class TitleSortEdit(TitleEdit): ' For example, The Exorcist might be sorted as Exorcist, The.') LABEL = _('Title &sort:') - def __init__(self, parent, title_edit, autogen_button): + def __init__(self, parent, title_edit, autogen_button, languages_edit): TitleEdit.__init__(self, parent) self.title_edit = title_edit + self.languages_edit = languages_edit base = self.TOOLTIP ok_tooltip = '
' + textwrap.fill(base+'
'+
@@ -157,10 +158,20 @@ class TitleSortEdit(TitleEdit):
self.autogen_button = autogen_button
autogen_button.clicked.connect(self.auto_generate)
+ languages_edit.editTextChanged.connect(self.update_state)
+ languages_edit.currentIndexChanged.connect(self.update_state)
self.update_state()
+ @property
+ def book_lang(self):
+ try:
+ book_lang = self.languages_edit.lang_codes[0]
+ except:
+ book_lang = None
+ return book_lang
+
def update_state(self, *args):
- ts = title_sort(self.title_edit.current_val)
+ ts = title_sort(self.title_edit.current_val, lang=self.book_lang)
normal = ts == self.current_val
if normal:
col = 'rgb(0, 255, 0, 20%)'
@@ -173,7 +184,8 @@ class TitleSortEdit(TitleEdit):
self.setWhatsThis(tt)
def auto_generate(self, *args):
- self.current_val = title_sort(self.title_edit.current_val)
+ self.current_val = title_sort(self.title_edit.current_val,
+ lang=self.book_lang)
def break_cycles(self):
try:
diff --git a/src/calibre/gui2/metadata/single.py b/src/calibre/gui2/metadata/single.py
index 3e2886f3bf..2cb9c74890 100644
--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@@ -109,6 +109,9 @@ class MetadataSingleDialogBase(ResizableDialog):
def create_basic_metadata_widgets(self): # {{{
self.basic_metadata_widgets = []
+ self.languages = LanguagesEdit(self)
+ self.basic_metadata_widgets.append(self.languages)
+
self.title = TitleEdit(self)
self.title.textChanged.connect(self.update_window_title)
self.deduce_title_sort_button = QToolButton(self)
@@ -119,7 +122,7 @@ class MetadataSingleDialogBase(ResizableDialog):
self.deduce_title_sort_button.setWhatsThis(
self.deduce_title_sort_button.toolTip())
self.title_sort = TitleSortEdit(self, self.title,
- self.deduce_title_sort_button)
+ self.deduce_title_sort_button, self.languages)
self.basic_metadata_widgets.extend([self.title, self.title_sort])
self.deduce_author_sort_button = b = QToolButton(self)
@@ -203,9 +206,6 @@ class MetadataSingleDialogBase(ResizableDialog):
self.publisher = PublisherEdit(self)
self.basic_metadata_widgets.append(self.publisher)
- self.languages = LanguagesEdit(self)
- self.basic_metadata_widgets.append(self.languages)
-
self.timestamp = DateEdit(self)
self.pubdate = PubdateEdit(self)
self.basic_metadata_widgets.extend([self.timestamp, self.pubdate])
@@ -282,7 +282,6 @@ class MetadataSingleDialogBase(ResizableDialog):
# Commented out as it doesn't play nice with Next, Prev buttons
#self.fetch_metadata_button.setFocus(Qt.OtherFocusReason)
-
# Miscellaneous interaction methods {{{
def update_window_title(self, *args):
title = self.title.current_val
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index f19db7a33f..a917aa09b2 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -15,7 +15,7 @@ from math import ceil
from calibre import prints
from calibre.ebooks.metadata import (title_sort, author_to_author_sort,
- string_to_authors, authors_to_string)
+ string_to_authors, authors_to_string, get_title_sort_pat)
from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre.library.database import LibraryDatabase
from calibre.library.field_metadata import FieldMetadata, TagsIcons
@@ -1004,10 +1004,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
return False
def find_identical_books(self, mi):
- fuzzy_title_patterns = [(re.compile(pat, re.IGNORECASE), repl) for pat, repl in
+ fuzzy_title_patterns = [(re.compile(pat, re.IGNORECASE) if
+ isinstance(pat, basestring) else pat, repl) for pat, repl in
[
(r'[\[\](){}<>\'";,:#]', ''),
- (tweaks.get('title_sort_articles', r'^(a|the|an)\s+'), ''),
+ (get_title_sort_pat(), ''),
(r'[-._]', ' '),
(r'\s+', ' ')
]