A spot of refactoring

This commit is contained in:
Kovid Goyal 2014-11-09 21:45:17 +05:30
parent 5198962763
commit c64828c6c8
2 changed files with 27 additions and 19 deletions

View File

@ -7,7 +7,7 @@ __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os, traceback, random, shutil, re, operator import os, traceback, random, shutil, operator
from io import BytesIO from io import BytesIO
from collections import defaultdict from collections import defaultdict
from functools import wraps, partial from functools import wraps, partial
@ -26,7 +26,7 @@ from calibre.db.tables import VirtualTable
from calibre.db.write import get_series_values, uniq from calibre.db.write import get_series_values, uniq
from calibre.db.lazy import FormatMetadata, FormatsList, ProxyMetadata from calibre.db.lazy import FormatMetadata, FormatsList, ProxyMetadata
from calibre.ebooks import check_ebook_format from calibre.ebooks import check_ebook_format
from calibre.ebooks.metadata import string_to_authors, author_to_author_sort, get_title_sort_pat from calibre.ebooks.metadata import string_to_authors, author_to_author_sort
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.metadata.opf2 import metadata_to_opf from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre.ptempfile import (base_dir, PersistentTemporaryFile, from calibre.ptempfile import (base_dir, PersistentTemporaryFile,
@ -1778,22 +1778,7 @@ class Cache(object):
def find_identical_books(self, mi, search_restriction='', book_ids=None): def find_identical_books(self, mi, search_restriction='', book_ids=None):
''' Finds books that have a superset of the authors in mi and the same ''' Finds books that have a superset of the authors in mi and the same
title (title is fuzzy matched) ''' title (title is fuzzy matched) '''
fuzzy_title_patterns = [(re.compile(pat, re.IGNORECASE) if from calibre.db.utils import fuzzy_title
isinstance(pat, basestring) else pat, repl) for pat, repl in
[
(r'[\[\](){}<>\'";,:#]', ''),
(get_title_sort_pat(), ''),
(r'[-._]', ' '),
(r'\s+', ' ')
]
]
def fuzzy_title(title):
title = icu_lower(title.strip())
for pat, repl in fuzzy_title_patterns:
title = pat.sub(repl, title)
return title
identical_book_ids = set() identical_book_ids = set()
if mi.authors: if mi.authors:
try: try:

View File

@ -6,7 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import,
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import os, errno, cPickle, sys import os, errno, cPickle, sys, re
from collections import OrderedDict, namedtuple from collections import OrderedDict, namedtuple
from future_builtins import map from future_builtins import map
from threading import Lock from threading import Lock
@ -30,6 +30,29 @@ def force_to_bool(val):
val = None val = None
return val return val
_fuzzy_title_patterns = None
def fuzzy_title_patterns():
global _fuzzy_title_patterns
if _fuzzy_title_patterns is None:
from calibre.ebooks.metadata import get_title_sort_pat
_fuzzy_title_patterns = tuple((re.compile(pat, re.IGNORECASE) if
isinstance(pat, basestring) else pat, repl) for pat, repl in
[
(r'[\[\](){}<>\'";,:#]', ''),
(get_title_sort_pat(), ''),
(r'[-._]', ' '),
(r'\s+', ' ')
]
)
return _fuzzy_title_patterns
def fuzzy_title(title):
title = icu_lower(title.strip())
for pat, repl in fuzzy_title_patterns():
title = pat.sub(repl, title)
return title
Entry = namedtuple('Entry', 'path size timestamp thumbnail_size') Entry = namedtuple('Entry', 'path size timestamp thumbnail_size')
class CacheError(Exception): class CacheError(Exception):
pass pass