Allow creating rules to ignore files when adding books based on the filename, useful when using the auto-add feature or when adding in bulk. Preferences->Adding Books->Rules to filter added files

This commit is contained in:
Kovid Goyal 2016-02-24 20:40:04 +05:30
parent 5f8b99eca5
commit 3ff0beb626
6 changed files with 124 additions and 13 deletions

View File

@ -6,7 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import,
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import os, time import os, time, re
from collections import defaultdict from collections import defaultdict
from future_builtins import map from future_builtins import map
@ -22,6 +22,34 @@ def formats_ok(formats):
def path_ok(path): def path_ok(path):
return not os.path.isdir(path) and os.access(path, os.R_OK) return not os.path.isdir(path) and os.access(path, os.R_OK)
def compile_glob(pat):
import fnmatch
return re.compile(fnmatch.translate(pat), flags=re.I)
def compile_rule(rule):
mt = rule['match_type']
if 'with' in mt:
q = icu_lower(rule['query'])
if 'startswith' in mt:
func = lambda filename: icu_lower(filename).startswith(q)
else:
func = lambda filename: icu_lower(filename).endswith(q)
elif 'glob' in mt:
q = compile_glob(rule['query'])
func = lambda filename: q.match(filename) is not None
else:
q = re.compile(rule['query'])
func = lambda filename: q.match(filename) is not None
ans = func
if mt.startswith('not_'):
ans = lambda filename: not func(filename)
return ans, rule['action'] == 'add'
def filter_filename(compiled_rules, filename):
for q, action in compiled_rules:
if q(filename):
return action
_metadata_extensions = None _metadata_extensions = None
def metadata_extensions(): def metadata_extensions():
@ -43,24 +71,30 @@ def listdir(root, sort_by_mtime=False):
items = sorted(items, key=safe_mtime) items = sorted(items, key=safe_mtime)
for path in items: for path in items:
yield path if path_ok(path):
yield path
def find_books_in_directory(dirpath, single_book_per_directory): def allow_path(path, ext, compiled_rules):
ans = filter_filename(compiled_rules, os.path.basename(path))
if ans is None:
ans = ext in metadata_extensions()
return ans
def find_books_in_directory(dirpath, single_book_per_directory, compiled_rules=(), listdir_impl=listdir):
dirpath = os.path.abspath(dirpath) dirpath = os.path.abspath(dirpath)
book_extentions = metadata_extensions()
if single_book_per_directory: if single_book_per_directory:
formats = {} formats = {}
for path in listdir(dirpath): for path in listdir_impl(dirpath):
key, ext = splitext(path) key, ext = splitext(path)
if ext in book_extentions and path_ok(path): if allow_path(path, ext, compiled_rules):
formats[ext] = path formats[ext] = path
if formats_ok(formats): if formats_ok(formats):
yield list(formats.itervalues()) yield list(formats.itervalues())
else: else:
books = defaultdict(dict) books = defaultdict(dict)
for path in listdir(dirpath, sort_by_mtime=True): for path in listdir_impl(dirpath, sort_by_mtime=True):
key, ext = splitext(path) key, ext = splitext(path)
if ext in book_extentions and path_ok(path): if allow_path(path, ext, compiled_rules):
books[icu_lower(key) if isinstance(key, unicode) else key.lower()][ext] = path books[icu_lower(key) if isinstance(key, unicode) else key.lower()][ext] = path
for formats in books.itervalues(): for formats in books.itervalues():

View File

@ -161,3 +161,24 @@ class FilesystemTest(BaseTest):
for fmt in cache.formats(book_id): for fmt in cache.formats(book_id):
self.assertEqual(cache.format(book_id, fmt), ic.format(book_id, fmt)) self.assertEqual(cache.format(book_id, fmt), ic.format(book_id, fmt))
self.assertEqual(cache.format_metadata(book_id, fmt)['mtime'], cache.format_metadata(book_id, fmt)['mtime']) self.assertEqual(cache.format_metadata(book_id, fmt)['mtime'], cache.format_metadata(book_id, fmt)['mtime'])
def test_find_books_in_directory(self):
from calibre.db.adding import find_books_in_directory, compile_rule
strip = lambda files: frozenset({os.path.basename(x) for x in files})
def q(one, two):
one, two = {strip(a) for a in one}, {strip(b) for b in two}
self.assertEqual(one, two)
def r(action='ignore', match_type='startswith', query=''):
return {'action':action, 'match_type':match_type, 'query':query}
def c(*rules):
return tuple(map(compile_rule, rules))
files = ['added.epub', 'ignored.md', 'non-book.other']
q(['added.epub ignored.md'.split()], find_books_in_directory('', True, listdir_impl=lambda x: files))
q([['added.epub'], ['ignored.md']], find_books_in_directory('', False, listdir_impl=lambda x, **k: files))
for rules in (
c(r(query='ignored.'), r(action='add', match_type='endswith', query='.OTHER')),
c(r(match_type='glob', query='*.md'), r(action='add', match_type='matches', query=r'.+\.other$')),
c(r(match_type='not_startswith', query='IGnored.', action='add'), r(query='ignored.md')),
):
q(['added.epub non-book.other'.split()], find_books_in_directory('', True, compiled_rules=rules, listdir_impl=lambda x: files))

View File

@ -11,13 +11,14 @@ from threading import Thread
from collections import OrderedDict from collections import OrderedDict
from Queue import Empty from Queue import Empty
from io import BytesIO from io import BytesIO
from future_builtins import map
from PyQt5.Qt import QObject, Qt, pyqtSignal from PyQt5.Qt import QObject, Qt, pyqtSignal
from calibre import prints, as_unicode from calibre import prints, as_unicode
from calibre.constants import DEBUG from calibre.constants import DEBUG
from calibre.customize.ui import run_plugins_on_postimport, run_plugins_on_postadd from calibre.customize.ui import run_plugins_on_postimport, run_plugins_on_postadd
from calibre.db.adding import find_books_in_directory from calibre.db.adding import find_books_in_directory, compile_rule
from calibre.db.utils import find_identical_books from calibre.db.utils import find_identical_books
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.metadata.opf2 import OPF
@ -119,9 +120,16 @@ class Adder(QObject):
def scan(self): def scan(self):
try:
compiled_rules = tuple(map(compile_rule, gprefs.get('add_filter_rules', ())))
except Exception:
compiled_rules = ()
import traceback
traceback.print_exc()
def find_files(root): def find_files(root):
for dirpath, dirnames, filenames in os.walk(root): for dirpath, dirnames, filenames in os.walk(root):
for files in find_books_in_directory(dirpath, self.single_book_per_directory): for files in find_books_in_directory(dirpath, self.single_book_per_directory, compiled_rules=compiled_rules):
if self.abort_scan: if self.abort_scan:
return return
self.file_groups[len(self.file_groups)] = files self.file_groups[len(self.file_groups)] = files

View File

@ -9,11 +9,13 @@ __docformat__ = 'restructuredtext en'
import os, tempfile, shutil, time import os, tempfile, shutil, time
from threading import Thread, Event from threading import Thread, Event
from future_builtins import map
from PyQt5.Qt import (QFileSystemWatcher, QObject, Qt, pyqtSignal, QTimer) from PyQt5.Qt import (QFileSystemWatcher, QObject, Qt, pyqtSignal, QTimer)
from calibre import prints from calibre import prints
from calibre.ptempfile import PersistentTemporaryDirectory from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.db.adding import filter_filename, compile_rule
from calibre.ebooks import BOOK_EXTENSIONS from calibre.ebooks import BOOK_EXTENSIONS
from calibre.gui2 import gprefs from calibre.gui2 import gprefs
from calibre.gui2.dialogs.duplicates import DuplicatesQuestion from calibre.gui2.dialogs.duplicates import DuplicatesQuestion
@ -47,6 +49,22 @@ class Worker(Thread):
self.path, self.callback = path, callback self.path, self.callback = path, callback
self.staging = set() self.staging = set()
self.allowed = allowed_formats() self.allowed = allowed_formats()
self.read_rules()
def read_rules(self):
try:
self.compiled_rules = tuple(map(compile_rule, gprefs.get('add_filter_rules', ())))
except Exception:
self.compiled_rules = ()
import traceback
traceback.print_exc()
def is_filename_allowed(self, filename):
allowed = filter_filename(self.compiled_rules, filename)
if allowed is None:
ext = os.path.splitext(filename)[1][1:].lower()
allowed = ext in self.allowed
return allowed
def run(self): def run(self):
self.tdir = PersistentTemporaryDirectory('_auto_adder') self.tdir = PersistentTemporaryDirectory('_auto_adder')
@ -76,7 +94,7 @@ class Worker(Thread):
# Must have read and write permissions # Must have read and write permissions
os.access(os.path.join(self.path, x), os.R_OK|os.W_OK) and os.access(os.path.join(self.path, x), os.R_OK|os.W_OK) and
# Must be a known ebook file type # Must be a known ebook file type
os.path.splitext(x)[1][1:].lower() in self.allowed self.is_filename_allowed(x)
] ]
data = {} data = {}
# Give any in progress copies time to complete # Give any in progress copies time to complete
@ -149,6 +167,10 @@ class AutoAdder(QObject):
prints(path, prints(path,
'is not a valid directory to watch for new ebooks, ignoring') 'is not a valid directory to watch for new ebooks, ignoring')
def read_rules(self):
if hasattr(self, 'worker'):
self.worker.read_rules()
def initialize(self): def initialize(self):
try: try:
if os.listdir(self.worker.path): if os.listdir(self.worker.path):

View File

@ -50,8 +50,9 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
for signal in ('Activated', 'Changed', 'DoubleClicked', 'Clicked'): for signal in ('Activated', 'Changed', 'DoubleClicked', 'Clicked'):
signal = getattr(self.opt_blocked_auto_formats, 'item'+signal) signal = getattr(self.opt_blocked_auto_formats, 'item'+signal)
signal.connect(self.blocked_auto_formats_changed) signal.connect(self.blocked_auto_formats_changed)
self.tag_map_rules = None self.tag_map_rules = self.add_filter_rules = None
self.tag_map_rules_button.clicked.connect(self.change_tag_map_rules) self.tag_map_rules_button.clicked.connect(self.change_tag_map_rules)
self.add_filter_rules_button.clicked.connect(self.change_add_filter_rules)
def change_tag_map_rules(self): def change_tag_map_rules(self):
from calibre.gui2.tag_mapper import RulesDialog from calibre.gui2.tag_mapper import RulesDialog
@ -62,6 +63,15 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
self.tag_map_rules = d.rules self.tag_map_rules = d.rules
self.changed_signal.emit() self.changed_signal.emit()
def change_add_filter_rules(self):
from calibre.gui2.add_filters import RulesDialog
d = RulesDialog(self)
if gprefs.get('add_filter_rules'):
d.rules = gprefs['add_filter_rules']
if d.exec_() == d.Accepted:
self.add_filter_rules = d.rules
self.changed_signal.emit()
def choose_aa_path(self): def choose_aa_path(self):
path = choose_dir(self, 'auto add path choose', path = choose_dir(self, 'auto add path choose',
_('Choose a folder')) _('Choose a folder'))
@ -75,7 +85,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
self.filename_pattern.blockSignals(False) self.filename_pattern.blockSignals(False)
self.init_blocked_auto_formats() self.init_blocked_auto_formats()
self.opt_automerge.setEnabled(self.opt_add_formats_to_existing.isChecked()) self.opt_automerge.setEnabled(self.opt_add_formats_to_existing.isChecked())
self.tag_map_rules = None self.tag_map_rules = self.add_filter_rules = None
# Blocked auto formats {{{ # Blocked auto formats {{{
def blocked_auto_formats_changed(self, *args): def blocked_auto_formats_changed(self, *args):
@ -115,6 +125,8 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
ConfigWidgetBase.restore_defaults(self) ConfigWidgetBase.restore_defaults(self)
self.filename_pattern.initialize(defaults=True) self.filename_pattern.initialize(defaults=True)
self.init_blocked_auto_formats(defaults=True) self.init_blocked_auto_formats(defaults=True)
self.tag_map_rules = []
self.add_filter_rules = []
def commit(self): def commit(self):
path = unicode(self.opt_auto_add_path.text()).strip() path = unicode(self.opt_auto_add_path.text()).strip()
@ -150,12 +162,19 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
gprefs['tag_map_on_add_rules'] = self.tag_map_rules gprefs['tag_map_on_add_rules'] = self.tag_map_rules
else: else:
gprefs.pop('tag_map_on_add_rules', None) gprefs.pop('tag_map_on_add_rules', None)
if self.add_filter_rules is not None:
if self.add_filter_rules:
gprefs['add_filter_rules'] = self.add_filter_rules
else:
gprefs.pop('add_filter_rules', None)
ret = ConfigWidgetBase.commit(self) ret = ConfigWidgetBase.commit(self)
return changed or ret return changed or ret
def refresh_gui(self, gui): def refresh_gui(self, gui):
# Ensure worker process reads updated settings # Ensure worker process reads updated settings
gui.spare_pool().shutdown() gui.spare_pool().shutdown()
# Update rules used int he auto adder
gui.auto_adder.read_rules()
if __name__ == '__main__': if __name__ == '__main__':
from PyQt5.Qt import QApplication from PyQt5.Qt import QApplication

View File

@ -185,6 +185,13 @@ Title match ignores leading indefinite articles (&quot;the&quot;, &quot;a&quot;,
</property> </property>
</widget> </widget>
</item> </item>
<item row="8" column="2">
<widget class="QPushButton" name="add_filter_rules_button">
<property name="text">
<string>Rules to &amp;filter added files</string>
</property>
</widget>
</item>
</layout> </layout>
</widget> </widget>
<widget class="QWidget" name="tab_4"> <widget class="QWidget" name="tab_4">