Allow creating rules to ignore files when adding books based on the filename, useful when using the auto-add feature or when adding in bulk. Preferences->Adding Books->Rules to filter added files

This commit is contained in:
Kovid Goyal 2016-02-24 20:40:04 +05:30
parent 5f8b99eca5
commit 3ff0beb626
6 changed files with 124 additions and 13 deletions

View File

@ -6,7 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import,
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import os, time
import os, time, re
from collections import defaultdict
from future_builtins import map
@ -22,6 +22,34 @@ def formats_ok(formats):
def path_ok(path):
return not os.path.isdir(path) and os.access(path, os.R_OK)
def compile_glob(pat):
import fnmatch
return re.compile(fnmatch.translate(pat), flags=re.I)
def compile_rule(rule):
mt = rule['match_type']
if 'with' in mt:
q = icu_lower(rule['query'])
if 'startswith' in mt:
func = lambda filename: icu_lower(filename).startswith(q)
else:
func = lambda filename: icu_lower(filename).endswith(q)
elif 'glob' in mt:
q = compile_glob(rule['query'])
func = lambda filename: q.match(filename) is not None
else:
q = re.compile(rule['query'])
func = lambda filename: q.match(filename) is not None
ans = func
if mt.startswith('not_'):
ans = lambda filename: not func(filename)
return ans, rule['action'] == 'add'
def filter_filename(compiled_rules, filename):
for q, action in compiled_rules:
if q(filename):
return action
_metadata_extensions = None
def metadata_extensions():
@ -43,24 +71,30 @@ def listdir(root, sort_by_mtime=False):
items = sorted(items, key=safe_mtime)
for path in items:
yield path
if path_ok(path):
yield path
def find_books_in_directory(dirpath, single_book_per_directory):
def allow_path(path, ext, compiled_rules):
ans = filter_filename(compiled_rules, os.path.basename(path))
if ans is None:
ans = ext in metadata_extensions()
return ans
def find_books_in_directory(dirpath, single_book_per_directory, compiled_rules=(), listdir_impl=listdir):
dirpath = os.path.abspath(dirpath)
book_extentions = metadata_extensions()
if single_book_per_directory:
formats = {}
for path in listdir(dirpath):
for path in listdir_impl(dirpath):
key, ext = splitext(path)
if ext in book_extentions and path_ok(path):
if allow_path(path, ext, compiled_rules):
formats[ext] = path
if formats_ok(formats):
yield list(formats.itervalues())
else:
books = defaultdict(dict)
for path in listdir(dirpath, sort_by_mtime=True):
for path in listdir_impl(dirpath, sort_by_mtime=True):
key, ext = splitext(path)
if ext in book_extentions and path_ok(path):
if allow_path(path, ext, compiled_rules):
books[icu_lower(key) if isinstance(key, unicode) else key.lower()][ext] = path
for formats in books.itervalues():

View File

@ -161,3 +161,24 @@ class FilesystemTest(BaseTest):
for fmt in cache.formats(book_id):
self.assertEqual(cache.format(book_id, fmt), ic.format(book_id, fmt))
self.assertEqual(cache.format_metadata(book_id, fmt)['mtime'], cache.format_metadata(book_id, fmt)['mtime'])
def test_find_books_in_directory(self):
from calibre.db.adding import find_books_in_directory, compile_rule
strip = lambda files: frozenset({os.path.basename(x) for x in files})
def q(one, two):
one, two = {strip(a) for a in one}, {strip(b) for b in two}
self.assertEqual(one, two)
def r(action='ignore', match_type='startswith', query=''):
return {'action':action, 'match_type':match_type, 'query':query}
def c(*rules):
return tuple(map(compile_rule, rules))
files = ['added.epub', 'ignored.md', 'non-book.other']
q(['added.epub ignored.md'.split()], find_books_in_directory('', True, listdir_impl=lambda x: files))
q([['added.epub'], ['ignored.md']], find_books_in_directory('', False, listdir_impl=lambda x, **k: files))
for rules in (
c(r(query='ignored.'), r(action='add', match_type='endswith', query='.OTHER')),
c(r(match_type='glob', query='*.md'), r(action='add', match_type='matches', query=r'.+\.other$')),
c(r(match_type='not_startswith', query='IGnored.', action='add'), r(query='ignored.md')),
):
q(['added.epub non-book.other'.split()], find_books_in_directory('', True, compiled_rules=rules, listdir_impl=lambda x: files))

View File

@ -11,13 +11,14 @@ from threading import Thread
from collections import OrderedDict
from Queue import Empty
from io import BytesIO
from future_builtins import map
from PyQt5.Qt import QObject, Qt, pyqtSignal
from calibre import prints, as_unicode
from calibre.constants import DEBUG
from calibre.customize.ui import run_plugins_on_postimport, run_plugins_on_postadd
from calibre.db.adding import find_books_in_directory
from calibre.db.adding import find_books_in_directory, compile_rule
from calibre.db.utils import find_identical_books
from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.metadata.opf2 import OPF
@ -119,9 +120,16 @@ class Adder(QObject):
def scan(self):
try:
compiled_rules = tuple(map(compile_rule, gprefs.get('add_filter_rules', ())))
except Exception:
compiled_rules = ()
import traceback
traceback.print_exc()
def find_files(root):
for dirpath, dirnames, filenames in os.walk(root):
for files in find_books_in_directory(dirpath, self.single_book_per_directory):
for files in find_books_in_directory(dirpath, self.single_book_per_directory, compiled_rules=compiled_rules):
if self.abort_scan:
return
self.file_groups[len(self.file_groups)] = files

View File

@ -9,11 +9,13 @@ __docformat__ = 'restructuredtext en'
import os, tempfile, shutil, time
from threading import Thread, Event
from future_builtins import map
from PyQt5.Qt import (QFileSystemWatcher, QObject, Qt, pyqtSignal, QTimer)
from calibre import prints
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.db.adding import filter_filename, compile_rule
from calibre.ebooks import BOOK_EXTENSIONS
from calibre.gui2 import gprefs
from calibre.gui2.dialogs.duplicates import DuplicatesQuestion
@ -47,6 +49,22 @@ class Worker(Thread):
self.path, self.callback = path, callback
self.staging = set()
self.allowed = allowed_formats()
self.read_rules()
def read_rules(self):
try:
self.compiled_rules = tuple(map(compile_rule, gprefs.get('add_filter_rules', ())))
except Exception:
self.compiled_rules = ()
import traceback
traceback.print_exc()
def is_filename_allowed(self, filename):
allowed = filter_filename(self.compiled_rules, filename)
if allowed is None:
ext = os.path.splitext(filename)[1][1:].lower()
allowed = ext in self.allowed
return allowed
def run(self):
self.tdir = PersistentTemporaryDirectory('_auto_adder')
@ -76,7 +94,7 @@ class Worker(Thread):
# Must have read and write permissions
os.access(os.path.join(self.path, x), os.R_OK|os.W_OK) and
# Must be a known ebook file type
os.path.splitext(x)[1][1:].lower() in self.allowed
self.is_filename_allowed(x)
]
data = {}
# Give any in progress copies time to complete
@ -149,6 +167,10 @@ class AutoAdder(QObject):
prints(path,
'is not a valid directory to watch for new ebooks, ignoring')
def read_rules(self):
if hasattr(self, 'worker'):
self.worker.read_rules()
def initialize(self):
try:
if os.listdir(self.worker.path):

View File

@ -50,8 +50,9 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
for signal in ('Activated', 'Changed', 'DoubleClicked', 'Clicked'):
signal = getattr(self.opt_blocked_auto_formats, 'item'+signal)
signal.connect(self.blocked_auto_formats_changed)
self.tag_map_rules = None
self.tag_map_rules = self.add_filter_rules = None
self.tag_map_rules_button.clicked.connect(self.change_tag_map_rules)
self.add_filter_rules_button.clicked.connect(self.change_add_filter_rules)
def change_tag_map_rules(self):
from calibre.gui2.tag_mapper import RulesDialog
@ -62,6 +63,15 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
self.tag_map_rules = d.rules
self.changed_signal.emit()
def change_add_filter_rules(self):
from calibre.gui2.add_filters import RulesDialog
d = RulesDialog(self)
if gprefs.get('add_filter_rules'):
d.rules = gprefs['add_filter_rules']
if d.exec_() == d.Accepted:
self.add_filter_rules = d.rules
self.changed_signal.emit()
def choose_aa_path(self):
path = choose_dir(self, 'auto add path choose',
_('Choose a folder'))
@ -75,7 +85,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
self.filename_pattern.blockSignals(False)
self.init_blocked_auto_formats()
self.opt_automerge.setEnabled(self.opt_add_formats_to_existing.isChecked())
self.tag_map_rules = None
self.tag_map_rules = self.add_filter_rules = None
# Blocked auto formats {{{
def blocked_auto_formats_changed(self, *args):
@ -115,6 +125,8 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
ConfigWidgetBase.restore_defaults(self)
self.filename_pattern.initialize(defaults=True)
self.init_blocked_auto_formats(defaults=True)
self.tag_map_rules = []
self.add_filter_rules = []
def commit(self):
path = unicode(self.opt_auto_add_path.text()).strip()
@ -150,12 +162,19 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
gprefs['tag_map_on_add_rules'] = self.tag_map_rules
else:
gprefs.pop('tag_map_on_add_rules', None)
if self.add_filter_rules is not None:
if self.add_filter_rules:
gprefs['add_filter_rules'] = self.add_filter_rules
else:
gprefs.pop('add_filter_rules', None)
ret = ConfigWidgetBase.commit(self)
return changed or ret
def refresh_gui(self, gui):
# Ensure worker process reads updated settings
gui.spare_pool().shutdown()
# Update rules used int he auto adder
gui.auto_adder.read_rules()
if __name__ == '__main__':
from PyQt5.Qt import QApplication

View File

@ -185,6 +185,13 @@ Title match ignores leading indefinite articles (&quot;the&quot;, &quot;a&quot;,
</property>
</widget>
</item>
<item row="8" column="2">
<widget class="QPushButton" name="add_filter_rules_button">
<property name="text">
<string>Rules to &amp;filter added files</string>
</property>
</widget>
</item>
</layout>
</widget>
<widget class="QWidget" name="tab_4">