mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Change the algorithm that generates title sort strings to strip leading articles from both english and the current language set for the calibre user interface. In addition, in the edit metadata dialog, calibre will use the book's language when calculating the sort string. This behavior can be adjusted via Preferences->Tweaks. Fixes #886763 ([Enhancement] multi-lingual adjustment of (in)definite articles in title_sort)
This commit is contained in:
parent
6d155607fd
commit
e4575abba4
@ -201,15 +201,49 @@ save_template_title_series_sorting = 'library_order'
|
||||
|
||||
#: Set the list of words considered to be "articles" for sort strings
|
||||
# Set the list of words that are to be considered 'articles' when computing the
|
||||
# title sort strings. The list is a regular expression, with the articles
|
||||
# separated by 'or' bars. Comparisons are case insensitive, and that cannot be
|
||||
# changed. Changes to this tweak won't have an effect until the book is modified
|
||||
# in some way. If you enter an invalid pattern, it is silently ignored.
|
||||
# To disable use the expression: '^$'
|
||||
# This expression is designed for articles that are followed by spaces. If you
|
||||
# also need to match articles that are followed by other characters, for example L'
|
||||
# in French, use: "^(A\s+|The\s+|An\s+|L')" instead.
|
||||
# Default: '^(A|The|An)\s+'
|
||||
# title sort strings. The articles differ by language. By default, calibre uses
|
||||
# a combination of articles from English and whatever language the calibre user
|
||||
# interface is set to. In addition, in some contexts where the book language is
|
||||
# available, the language of the book is used. You can change the list of
|
||||
# articles for a given language or add a new language by editing
|
||||
# per_language_title_sort_articles. To tell calibre to use a language other
|
||||
# than the user interface language, set, default_language_for_title_sort. For
|
||||
# example, to use German, set it to 'deu'. A value of None means the user
|
||||
# interface language is used. The setting title_sort_articles is ignored
|
||||
# (present only for legacy reasons).
|
||||
per_language_title_sort_articles = {
|
||||
# English
|
||||
'eng' : (r'A\s+', r'The\s+', r'An\s+'),
|
||||
# Spanish
|
||||
'spa' : (r'El\s+', r'La\s+', r'Lo\s+', r'Los\s+', r'Las\s+', r'Un\s+',
|
||||
r'Una\s+', r'Unos\s+', r'Unas\s+'),
|
||||
# French
|
||||
'fra' : (r'Le\s+', r'La\s+', r"L'", r'Les\s+', r'Un\s+', r'Une\s+',
|
||||
r'Des\s+'),
|
||||
# Italian
|
||||
'ita' : (r'Lo\s+', r'Il\s+', r"L'", r'La\s+', r'Gli\s+', r'I\s+',
|
||||
r'Le\s+', ),
|
||||
# Portuguese
|
||||
'por' : (r'A\s+', r'O\s+', r'Os\s+', r'As\s+', r'Um\s+', r'Uns\s+',
|
||||
r'Uma\s+', r'Umas\s+', ),
|
||||
# Romanian
|
||||
'ron' : (r'Un\s+', r'O\s+', r'Nişte\s+', ),
|
||||
# German
|
||||
'deu' : (r'Der\s+', r'Die\s+', r'Das\s+', r'Den\s+', r'Ein\s+',
|
||||
r'Eine\s+', r'Einen\s+', ),
|
||||
# Dutch
|
||||
'nld' : (r'De\s+', r'Het\s+', r'Een\s+', ),
|
||||
# Swedish
|
||||
'swe' : (r'En\s+', r'Ett\s+', r'Det\s+', r'Den\s+', r'De\s+', ),
|
||||
# Turkish
|
||||
'tur' : (r'Bir\s+', ),
|
||||
# Afrikaans
|
||||
'afr' : (r"'n\s+", r'Die\s+', ),
|
||||
# Greek
|
||||
'ell' : (r'O\s+', r'I\s+', r'To\s+', r'Ta\s+', r'Tus\s+', r'Tis\s+',
|
||||
r"'Enas\s+", r"'Mia\s+", r"'Ena\s+", r"'Enan\s+", ),
|
||||
}
|
||||
default_language_for_title_sort = None
|
||||
title_sort_articles=r'^(A|The|An)\s+'
|
||||
|
||||
#: Specify a folder calibre should connect to at startup
|
||||
|
@ -95,18 +95,33 @@ def author_to_author_sort(author, method=None):
|
||||
def authors_to_sort_string(authors):
|
||||
return ' & '.join(map(author_to_author_sort, authors))
|
||||
|
||||
try:
|
||||
_title_pat = re.compile(tweaks.get('title_sort_articles',
|
||||
r'^(A|The|An)\s+'), re.IGNORECASE)
|
||||
except:
|
||||
print 'Error in title sort pattern'
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
_title_pat = re.compile('^(A|The|An)\s+', re.IGNORECASE)
|
||||
_title_pats = {}
|
||||
def get_title_sort_pat(lang=None):
|
||||
ans = _title_pats.get(lang, None)
|
||||
if ans is not None:
|
||||
return ans
|
||||
q = lang
|
||||
from calibre.utils.localization import canonicalize_lang, get_lang
|
||||
if lang is None:
|
||||
q = tweaks['default_language_for_title_sort']
|
||||
if q is None:
|
||||
q = get_lang()
|
||||
q = canonicalize_lang(q) if q else q
|
||||
data = tweaks['per_language_title_sort_articles']
|
||||
ans = data.get(q, None)
|
||||
if ans is None:
|
||||
ans = data['eng']
|
||||
ans = frozenset(ans + data['eng'])
|
||||
ans = '|'.join(ans)
|
||||
ans = '^(%s)'%ans
|
||||
ans = re.compile(ans, re.IGNORECASE)
|
||||
_title_pats[lang] = ans
|
||||
return ans
|
||||
|
||||
_ignore_starts = u'\'"'+u''.join(unichr(x) for x in range(0x2018, 0x201e)+[0x2032, 0x2033])
|
||||
_ignore_starts = u'\'"'+u''.join(unichr(x) for x in
|
||||
range(0x2018, 0x201e)+[0x2032, 0x2033])
|
||||
|
||||
def title_sort(title, order=None):
|
||||
def title_sort(title, order=None, lang=None):
|
||||
if order is None:
|
||||
order = tweaks['title_series_sorting']
|
||||
title = title.strip()
|
||||
@ -114,7 +129,7 @@ def title_sort(title, order=None):
|
||||
return title
|
||||
if title and title[0] in _ignore_starts:
|
||||
title = title[1:]
|
||||
match = _title_pat.search(title)
|
||||
match = get_title_sort_pat(lang).search(title)
|
||||
if match:
|
||||
try:
|
||||
prep = match.group(1)
|
||||
|
@ -138,9 +138,10 @@ class TitleSortEdit(TitleEdit):
|
||||
' For example, The Exorcist might be sorted as Exorcist, The.')
|
||||
LABEL = _('Title &sort:')
|
||||
|
||||
def __init__(self, parent, title_edit, autogen_button):
|
||||
def __init__(self, parent, title_edit, autogen_button, languages_edit):
|
||||
TitleEdit.__init__(self, parent)
|
||||
self.title_edit = title_edit
|
||||
self.languages_edit = languages_edit
|
||||
|
||||
base = self.TOOLTIP
|
||||
ok_tooltip = '<p>' + textwrap.fill(base+'<br><br>'+
|
||||
@ -157,10 +158,20 @@ class TitleSortEdit(TitleEdit):
|
||||
|
||||
self.autogen_button = autogen_button
|
||||
autogen_button.clicked.connect(self.auto_generate)
|
||||
languages_edit.editTextChanged.connect(self.update_state)
|
||||
languages_edit.currentIndexChanged.connect(self.update_state)
|
||||
self.update_state()
|
||||
|
||||
@property
|
||||
def book_lang(self):
|
||||
try:
|
||||
book_lang = self.languages_edit.lang_codes[0]
|
||||
except:
|
||||
book_lang = None
|
||||
return book_lang
|
||||
|
||||
def update_state(self, *args):
|
||||
ts = title_sort(self.title_edit.current_val)
|
||||
ts = title_sort(self.title_edit.current_val, lang=self.book_lang)
|
||||
normal = ts == self.current_val
|
||||
if normal:
|
||||
col = 'rgb(0, 255, 0, 20%)'
|
||||
@ -173,7 +184,8 @@ class TitleSortEdit(TitleEdit):
|
||||
self.setWhatsThis(tt)
|
||||
|
||||
def auto_generate(self, *args):
|
||||
self.current_val = title_sort(self.title_edit.current_val)
|
||||
self.current_val = title_sort(self.title_edit.current_val,
|
||||
lang=self.book_lang)
|
||||
|
||||
def break_cycles(self):
|
||||
try:
|
||||
|
@ -109,6 +109,9 @@ class MetadataSingleDialogBase(ResizableDialog):
|
||||
def create_basic_metadata_widgets(self): # {{{
|
||||
self.basic_metadata_widgets = []
|
||||
|
||||
self.languages = LanguagesEdit(self)
|
||||
self.basic_metadata_widgets.append(self.languages)
|
||||
|
||||
self.title = TitleEdit(self)
|
||||
self.title.textChanged.connect(self.update_window_title)
|
||||
self.deduce_title_sort_button = QToolButton(self)
|
||||
@ -119,7 +122,7 @@ class MetadataSingleDialogBase(ResizableDialog):
|
||||
self.deduce_title_sort_button.setWhatsThis(
|
||||
self.deduce_title_sort_button.toolTip())
|
||||
self.title_sort = TitleSortEdit(self, self.title,
|
||||
self.deduce_title_sort_button)
|
||||
self.deduce_title_sort_button, self.languages)
|
||||
self.basic_metadata_widgets.extend([self.title, self.title_sort])
|
||||
|
||||
self.deduce_author_sort_button = b = QToolButton(self)
|
||||
@ -203,9 +206,6 @@ class MetadataSingleDialogBase(ResizableDialog):
|
||||
self.publisher = PublisherEdit(self)
|
||||
self.basic_metadata_widgets.append(self.publisher)
|
||||
|
||||
self.languages = LanguagesEdit(self)
|
||||
self.basic_metadata_widgets.append(self.languages)
|
||||
|
||||
self.timestamp = DateEdit(self)
|
||||
self.pubdate = PubdateEdit(self)
|
||||
self.basic_metadata_widgets.extend([self.timestamp, self.pubdate])
|
||||
@ -282,7 +282,6 @@ class MetadataSingleDialogBase(ResizableDialog):
|
||||
# Commented out as it doesn't play nice with Next, Prev buttons
|
||||
#self.fetch_metadata_button.setFocus(Qt.OtherFocusReason)
|
||||
|
||||
|
||||
# Miscellaneous interaction methods {{{
|
||||
def update_window_title(self, *args):
|
||||
title = self.title.current_val
|
||||
|
@ -15,7 +15,7 @@ from math import ceil
|
||||
|
||||
from calibre import prints
|
||||
from calibre.ebooks.metadata import (title_sort, author_to_author_sort,
|
||||
string_to_authors, authors_to_string)
|
||||
string_to_authors, authors_to_string, get_title_sort_pat)
|
||||
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||
from calibre.library.database import LibraryDatabase
|
||||
from calibre.library.field_metadata import FieldMetadata, TagsIcons
|
||||
@ -1004,10 +1004,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
return False
|
||||
|
||||
def find_identical_books(self, mi):
|
||||
fuzzy_title_patterns = [(re.compile(pat, re.IGNORECASE), repl) for pat, repl in
|
||||
fuzzy_title_patterns = [(re.compile(pat, re.IGNORECASE) if
|
||||
isinstance(pat, basestring) else pat, repl) for pat, repl in
|
||||
[
|
||||
(r'[\[\](){}<>\'";,:#]', ''),
|
||||
(tweaks.get('title_sort_articles', r'^(a|the|an)\s+'), ''),
|
||||
(get_title_sort_pat(), ''),
|
||||
(r'[-._]', ' '),
|
||||
(r'\s+', ' ')
|
||||
]
|
||||
|
Loading…
x
Reference in New Issue
Block a user