From 3ff0beb626400f9e00d668daf0bdb512478f52f3 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 24 Feb 2016 20:40:04 +0530 Subject: [PATCH] Allow creating rules to ignore files when adding books based on the filename, useful when using the auto-add feature or when adding in bulk. Preferences->Adding Books->Rules to filter added files --- src/calibre/db/adding.py | 50 +++++++++++++++++++++----- src/calibre/db/tests/filesystem.py | 21 +++++++++++ src/calibre/gui2/add.py | 12 +++++-- src/calibre/gui2/auto_add.py | 24 ++++++++++++- src/calibre/gui2/preferences/adding.py | 23 ++++++++++-- src/calibre/gui2/preferences/adding.ui | 7 ++++ 6 files changed, 124 insertions(+), 13 deletions(-) diff --git a/src/calibre/db/adding.py b/src/calibre/db/adding.py index 79e8ac1d61..128e012f5d 100644 --- a/src/calibre/db/adding.py +++ b/src/calibre/db/adding.py @@ -6,7 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import, __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' -import os, time +import os, time, re from collections import defaultdict from future_builtins import map @@ -22,6 +22,34 @@ def formats_ok(formats): def path_ok(path): return not os.path.isdir(path) and os.access(path, os.R_OK) +def compile_glob(pat): + import fnmatch + return re.compile(fnmatch.translate(pat), flags=re.I) + +def compile_rule(rule): + mt = rule['match_type'] + if 'with' in mt: + q = icu_lower(rule['query']) + if 'startswith' in mt: + func = lambda filename: icu_lower(filename).startswith(q) + else: + func = lambda filename: icu_lower(filename).endswith(q) + elif 'glob' in mt: + q = compile_glob(rule['query']) + func = lambda filename: q.match(filename) is not None + else: + q = re.compile(rule['query']) + func = lambda filename: q.match(filename) is not None + ans = func + if mt.startswith('not_'): + ans = lambda filename: not func(filename) + return ans, rule['action'] == 'add' + +def filter_filename(compiled_rules, filename): + for q, action in compiled_rules: + if q(filename): + return action + _metadata_extensions = None def metadata_extensions(): @@ -43,24 +71,30 @@ def listdir(root, sort_by_mtime=False): items = sorted(items, key=safe_mtime) for path in items: - yield path + if path_ok(path): + yield path -def find_books_in_directory(dirpath, single_book_per_directory): +def allow_path(path, ext, compiled_rules): + ans = filter_filename(compiled_rules, os.path.basename(path)) + if ans is None: + ans = ext in metadata_extensions() + return ans + +def find_books_in_directory(dirpath, single_book_per_directory, compiled_rules=(), listdir_impl=listdir): dirpath = os.path.abspath(dirpath) - book_extentions = metadata_extensions() if single_book_per_directory: formats = {} - for path in listdir(dirpath): + for path in listdir_impl(dirpath): key, ext = splitext(path) - if ext in book_extentions and path_ok(path): + if allow_path(path, ext, compiled_rules): formats[ext] = path if formats_ok(formats): yield list(formats.itervalues()) else: books = defaultdict(dict) - for path in listdir(dirpath, sort_by_mtime=True): + for path in listdir_impl(dirpath, sort_by_mtime=True): key, ext = splitext(path) - if ext in book_extentions and path_ok(path): + if allow_path(path, ext, compiled_rules): books[icu_lower(key) if isinstance(key, unicode) else key.lower()][ext] = path for formats in books.itervalues(): diff --git a/src/calibre/db/tests/filesystem.py b/src/calibre/db/tests/filesystem.py index a4def468d2..f7fa341df1 100644 --- a/src/calibre/db/tests/filesystem.py +++ b/src/calibre/db/tests/filesystem.py @@ -161,3 +161,24 @@ class FilesystemTest(BaseTest): for fmt in cache.formats(book_id): self.assertEqual(cache.format(book_id, fmt), ic.format(book_id, fmt)) self.assertEqual(cache.format_metadata(book_id, fmt)['mtime'], cache.format_metadata(book_id, fmt)['mtime']) + + def test_find_books_in_directory(self): + from calibre.db.adding import find_books_in_directory, compile_rule + strip = lambda files: frozenset({os.path.basename(x) for x in files}) + def q(one, two): + one, two = {strip(a) for a in one}, {strip(b) for b in two} + self.assertEqual(one, two) + def r(action='ignore', match_type='startswith', query=''): + return {'action':action, 'match_type':match_type, 'query':query} + def c(*rules): + return tuple(map(compile_rule, rules)) + + files = ['added.epub', 'ignored.md', 'non-book.other'] + q(['added.epub ignored.md'.split()], find_books_in_directory('', True, listdir_impl=lambda x: files)) + q([['added.epub'], ['ignored.md']], find_books_in_directory('', False, listdir_impl=lambda x, **k: files)) + for rules in ( + c(r(query='ignored.'), r(action='add', match_type='endswith', query='.OTHER')), + c(r(match_type='glob', query='*.md'), r(action='add', match_type='matches', query=r'.+\.other$')), + c(r(match_type='not_startswith', query='IGnored.', action='add'), r(query='ignored.md')), + ): + q(['added.epub non-book.other'.split()], find_books_in_directory('', True, compiled_rules=rules, listdir_impl=lambda x: files)) diff --git a/src/calibre/gui2/add.py b/src/calibre/gui2/add.py index 9aa8a0c138..0b6f02c4d0 100644 --- a/src/calibre/gui2/add.py +++ b/src/calibre/gui2/add.py @@ -11,13 +11,14 @@ from threading import Thread from collections import OrderedDict from Queue import Empty from io import BytesIO +from future_builtins import map from PyQt5.Qt import QObject, Qt, pyqtSignal from calibre import prints, as_unicode from calibre.constants import DEBUG from calibre.customize.ui import run_plugins_on_postimport, run_plugins_on_postadd -from calibre.db.adding import find_books_in_directory +from calibre.db.adding import find_books_in_directory, compile_rule from calibre.db.utils import find_identical_books from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.opf2 import OPF @@ -119,9 +120,16 @@ class Adder(QObject): def scan(self): + try: + compiled_rules = tuple(map(compile_rule, gprefs.get('add_filter_rules', ()))) + except Exception: + compiled_rules = () + import traceback + traceback.print_exc() + def find_files(root): for dirpath, dirnames, filenames in os.walk(root): - for files in find_books_in_directory(dirpath, self.single_book_per_directory): + for files in find_books_in_directory(dirpath, self.single_book_per_directory, compiled_rules=compiled_rules): if self.abort_scan: return self.file_groups[len(self.file_groups)] = files diff --git a/src/calibre/gui2/auto_add.py b/src/calibre/gui2/auto_add.py index 44df81876c..69f81e12e4 100644 --- a/src/calibre/gui2/auto_add.py +++ b/src/calibre/gui2/auto_add.py @@ -9,11 +9,13 @@ __docformat__ = 'restructuredtext en' import os, tempfile, shutil, time from threading import Thread, Event +from future_builtins import map from PyQt5.Qt import (QFileSystemWatcher, QObject, Qt, pyqtSignal, QTimer) from calibre import prints from calibre.ptempfile import PersistentTemporaryDirectory +from calibre.db.adding import filter_filename, compile_rule from calibre.ebooks import BOOK_EXTENSIONS from calibre.gui2 import gprefs from calibre.gui2.dialogs.duplicates import DuplicatesQuestion @@ -47,6 +49,22 @@ class Worker(Thread): self.path, self.callback = path, callback self.staging = set() self.allowed = allowed_formats() + self.read_rules() + + def read_rules(self): + try: + self.compiled_rules = tuple(map(compile_rule, gprefs.get('add_filter_rules', ()))) + except Exception: + self.compiled_rules = () + import traceback + traceback.print_exc() + + def is_filename_allowed(self, filename): + allowed = filter_filename(self.compiled_rules, filename) + if allowed is None: + ext = os.path.splitext(filename)[1][1:].lower() + allowed = ext in self.allowed + return allowed def run(self): self.tdir = PersistentTemporaryDirectory('_auto_adder') @@ -76,7 +94,7 @@ class Worker(Thread): # Must have read and write permissions os.access(os.path.join(self.path, x), os.R_OK|os.W_OK) and # Must be a known ebook file type - os.path.splitext(x)[1][1:].lower() in self.allowed + self.is_filename_allowed(x) ] data = {} # Give any in progress copies time to complete @@ -149,6 +167,10 @@ class AutoAdder(QObject): prints(path, 'is not a valid directory to watch for new ebooks, ignoring') + def read_rules(self): + if hasattr(self, 'worker'): + self.worker.read_rules() + def initialize(self): try: if os.listdir(self.worker.path): diff --git a/src/calibre/gui2/preferences/adding.py b/src/calibre/gui2/preferences/adding.py index 4d50295be1..cf659fcd9b 100644 --- a/src/calibre/gui2/preferences/adding.py +++ b/src/calibre/gui2/preferences/adding.py @@ -50,8 +50,9 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): for signal in ('Activated', 'Changed', 'DoubleClicked', 'Clicked'): signal = getattr(self.opt_blocked_auto_formats, 'item'+signal) signal.connect(self.blocked_auto_formats_changed) - self.tag_map_rules = None + self.tag_map_rules = self.add_filter_rules = None self.tag_map_rules_button.clicked.connect(self.change_tag_map_rules) + self.add_filter_rules_button.clicked.connect(self.change_add_filter_rules) def change_tag_map_rules(self): from calibre.gui2.tag_mapper import RulesDialog @@ -62,6 +63,15 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): self.tag_map_rules = d.rules self.changed_signal.emit() + def change_add_filter_rules(self): + from calibre.gui2.add_filters import RulesDialog + d = RulesDialog(self) + if gprefs.get('add_filter_rules'): + d.rules = gprefs['add_filter_rules'] + if d.exec_() == d.Accepted: + self.add_filter_rules = d.rules + self.changed_signal.emit() + def choose_aa_path(self): path = choose_dir(self, 'auto add path choose', _('Choose a folder')) @@ -75,7 +85,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): self.filename_pattern.blockSignals(False) self.init_blocked_auto_formats() self.opt_automerge.setEnabled(self.opt_add_formats_to_existing.isChecked()) - self.tag_map_rules = None + self.tag_map_rules = self.add_filter_rules = None # Blocked auto formats {{{ def blocked_auto_formats_changed(self, *args): @@ -115,6 +125,8 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): ConfigWidgetBase.restore_defaults(self) self.filename_pattern.initialize(defaults=True) self.init_blocked_auto_formats(defaults=True) + self.tag_map_rules = [] + self.add_filter_rules = [] def commit(self): path = unicode(self.opt_auto_add_path.text()).strip() @@ -150,12 +162,19 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): gprefs['tag_map_on_add_rules'] = self.tag_map_rules else: gprefs.pop('tag_map_on_add_rules', None) + if self.add_filter_rules is not None: + if self.add_filter_rules: + gprefs['add_filter_rules'] = self.add_filter_rules + else: + gprefs.pop('add_filter_rules', None) ret = ConfigWidgetBase.commit(self) return changed or ret def refresh_gui(self, gui): # Ensure worker process reads updated settings gui.spare_pool().shutdown() + # Update rules used int he auto adder + gui.auto_adder.read_rules() if __name__ == '__main__': from PyQt5.Qt import QApplication diff --git a/src/calibre/gui2/preferences/adding.ui b/src/calibre/gui2/preferences/adding.ui index d792d0200d..0b6efa4b2f 100644 --- a/src/calibre/gui2/preferences/adding.ui +++ b/src/calibre/gui2/preferences/adding.ui @@ -185,6 +185,13 @@ Title match ignores leading indefinite articles ("the", "a", + + + + Rules to &filter added files + + +