From c64828c6c8b01e62aebcac4335525257013f516e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 9 Nov 2014 21:45:17 +0530 Subject: [PATCH] A spot of refactoring --- src/calibre/db/cache.py | 21 +++------------------ src/calibre/db/utils.py | 25 ++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/src/calibre/db/cache.py b/src/calibre/db/cache.py index 11ce1de526..4f91a382cd 100644 --- a/src/calibre/db/cache.py +++ b/src/calibre/db/cache.py @@ -7,7 +7,7 @@ __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import os, traceback, random, shutil, re, operator +import os, traceback, random, shutil, operator from io import BytesIO from collections import defaultdict from functools import wraps, partial @@ -26,7 +26,7 @@ from calibre.db.tables import VirtualTable from calibre.db.write import get_series_values, uniq from calibre.db.lazy import FormatMetadata, FormatsList, ProxyMetadata from calibre.ebooks import check_ebook_format -from calibre.ebooks.metadata import string_to_authors, author_to_author_sort, get_title_sort_pat +from calibre.ebooks.metadata import string_to_authors, author_to_author_sort from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.opf2 import metadata_to_opf from calibre.ptempfile import (base_dir, PersistentTemporaryFile, @@ -1778,22 +1778,7 @@ class Cache(object): def find_identical_books(self, mi, search_restriction='', book_ids=None): ''' Finds books that have a superset of the authors in mi and the same title (title is fuzzy matched) ''' - fuzzy_title_patterns = [(re.compile(pat, re.IGNORECASE) if - isinstance(pat, basestring) else pat, repl) for pat, repl in - [ - (r'[\[\](){}<>\'";,:#]', ''), - (get_title_sort_pat(), ''), - (r'[-._]', ' '), - (r'\s+', ' ') - ] - ] - - def fuzzy_title(title): - title = icu_lower(title.strip()) - for pat, repl in fuzzy_title_patterns: - title = pat.sub(repl, title) - return title - + from calibre.db.utils import fuzzy_title identical_book_ids = set() if mi.authors: try: diff --git a/src/calibre/db/utils.py b/src/calibre/db/utils.py index dad05c2022..2026c287ed 100644 --- a/src/calibre/db/utils.py +++ b/src/calibre/db/utils.py @@ -6,7 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import, __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' -import os, errno, cPickle, sys +import os, errno, cPickle, sys, re from collections import OrderedDict, namedtuple from future_builtins import map from threading import Lock @@ -30,6 +30,29 @@ def force_to_bool(val): val = None return val +_fuzzy_title_patterns = None + +def fuzzy_title_patterns(): + global _fuzzy_title_patterns + if _fuzzy_title_patterns is None: + from calibre.ebooks.metadata import get_title_sort_pat + _fuzzy_title_patterns = tuple((re.compile(pat, re.IGNORECASE) if + isinstance(pat, basestring) else pat, repl) for pat, repl in + [ + (r'[\[\](){}<>\'";,:#]', ''), + (get_title_sort_pat(), ''), + (r'[-._]', ' '), + (r'\s+', ' ') + ] + ) + return _fuzzy_title_patterns + +def fuzzy_title(title): + title = icu_lower(title.strip()) + for pat, repl in fuzzy_title_patterns(): + title = pat.sub(repl, title) + return title + Entry = namedtuple('Entry', 'path size timestamp thumbnail_size') class CacheError(Exception): pass