calibredb add: Allow specifying filters to control adding of books from directories

This commit is contained in:
Kovid Goyal 2016-02-24 22:06:53 +05:30
parent 3ff0beb626
commit 8d560375f5
5 changed files with 62 additions and 27 deletions

View File

@ -111,6 +111,11 @@ def generate_calibredb_help(preamble, app):
lines += [''] lines += ['']
lines += render_options('calibredb '+cmd, groups, False) lines += render_options('calibredb '+cmd, groups, False)
lines += [''] lines += ['']
for group in parser.option_groups:
if not getattr(group, 'is_global_options', False):
lines.extend(render_options(
'calibredb_' + cmd, [[group.title.capitalize(), group.description, group.option_list]], False, False, header_level='^'))
lines += ['']
raw = preamble + '\n\n'+'.. contents::\n :local:'+ '\n\n' + global_options+'\n\n'+'\n'.join(lines) raw = preamble + '\n\n'+'.. contents::\n :local:'+ '\n\n' + global_options+'\n\n'+'\n'.join(lines)
update_cli_doc('calibredb', raw, app) update_cli_doc('calibredb', raw, app)
@ -169,7 +174,7 @@ def update_cli_doc(name, raw, app):
os.makedirs(p) os.makedirs(p)
open(path, 'wb').write(raw) open(path, 'wb').write(raw)
def render_options(cmd, groups, options_header=True, add_program=True): def render_options(cmd, groups, options_header=True, add_program=True, header_level='~'):
lines = [''] lines = ['']
if options_header: if options_header:
lines = ['[options]', '-'*15, ''] lines = ['[options]', '-'*15, '']
@ -177,7 +182,7 @@ def render_options(cmd, groups, options_header=True, add_program=True):
lines += ['.. program:: '+cmd, ''] lines += ['.. program:: '+cmd, '']
for title, desc, options in groups: for title, desc, options in groups:
if title: if title:
lines.extend([title, '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~']) lines.extend([title, header_level * (len(title) + 4)])
lines.append('') lines.append('')
if desc: if desc:
lines.extend([desc, '']) lines.extend([desc, ''])

View File

@ -102,11 +102,11 @@ def find_books_in_directory(dirpath, single_book_per_directory, compiled_rules=(
yield list(formats.itervalues()) yield list(formats.itervalues())
def import_book_directory_multiple(db, dirpath, callback=None, def import_book_directory_multiple(db, dirpath, callback=None,
added_ids=None): added_ids=None, compiled_rules=()):
from calibre.ebooks.metadata.meta import metadata_from_formats from calibre.ebooks.metadata.meta import metadata_from_formats
duplicates = [] duplicates = []
for formats in find_books_in_directory(dirpath, False): for formats in find_books_in_directory(dirpath, False, compiled_rules=compiled_rules):
mi = metadata_from_formats(formats) mi = metadata_from_formats(formats)
if mi.title is None: if mi.title is None:
continue continue
@ -121,11 +121,11 @@ def import_book_directory_multiple(db, dirpath, callback=None,
break break
return duplicates return duplicates
def import_book_directory(db, dirpath, callback=None, added_ids=None): def import_book_directory(db, dirpath, callback=None, added_ids=None, compiled_rules=()):
from calibre.ebooks.metadata.meta import metadata_from_formats from calibre.ebooks.metadata.meta import metadata_from_formats
dirpath = os.path.abspath(dirpath) dirpath = os.path.abspath(dirpath)
formats = None formats = None
for formats in find_books_in_directory(dirpath, True): for formats in find_books_in_directory(dirpath, True, compiled_rules=compiled_rules):
break break
if not formats: if not formats:
return return
@ -141,14 +141,14 @@ def import_book_directory(db, dirpath, callback=None, added_ids=None):
callback(mi.title) callback(mi.title)
def recursive_import(db, root, single_book_per_directory=True, def recursive_import(db, root, single_book_per_directory=True,
callback=None, added_ids=None): callback=None, added_ids=None, compiled_rules=()):
root = os.path.abspath(root) root = os.path.abspath(root)
duplicates = [] duplicates = []
for dirpath in os.walk(root): for dirpath in os.walk(root):
res = (import_book_directory(db, dirpath[0], callback=callback, res = (import_book_directory(db, dirpath[0], callback=callback,
added_ids=added_ids) if single_book_per_directory else added_ids=added_ids, compiled_rules=compiled_rules) if single_book_per_directory else
import_book_directory_multiple(db, dirpath[0], import_book_directory_multiple(db, dirpath[0],
callback=callback, added_ids=added_ids)) callback=callback, added_ids=added_ids, compiled_rules=compiled_rules))
if res is not None: if res is not None:
duplicates.extend(res) duplicates.extend(res)
if callable(callback): if callable(callback):

View File

@ -247,19 +247,18 @@ class LibraryDatabase(object):
self.notify('add', book_ids) self.notify('add', book_ids)
return book_ids[0] return book_ids[0]
def find_books_in_directory(self, dirpath, single_book_per_directory): def find_books_in_directory(self, dirpath, single_book_per_directory, compiled_rules=()):
return find_books_in_directory(dirpath, single_book_per_directory) return find_books_in_directory(dirpath, single_book_per_directory, compiled_rules=compiled_rules)
def import_book_directory_multiple(self, dirpath, callback=None, def import_book_directory_multiple(self, dirpath, callback=None, added_ids=None, compiled_rules=()):
added_ids=None): return import_book_directory_multiple(self, dirpath, callback=callback, added_ids=added_ids, compiled_rules=compiled_rules)
return import_book_directory_multiple(self, dirpath, callback=callback, added_ids=added_ids)
def import_book_directory(self, dirpath, callback=None, added_ids=None): def import_book_directory(self, dirpath, callback=None, added_ids=None, compiled_rules=()):
return import_book_directory(self, dirpath, callback=callback, added_ids=added_ids) return import_book_directory(self, dirpath, callback=callback, added_ids=added_ids, compiled_rules=compiled_rules)
def recursive_import(self, root, single_book_per_directory=True, def recursive_import(self, root, single_book_per_directory=True, callback=None, added_ids=None, compiled_rules=()):
callback=None, added_ids=None): return recursive_import(
return recursive_import(self, root, single_book_per_directory=single_book_per_directory, callback=callback, added_ids=added_ids) self, root, single_book_per_directory=single_book_per_directory, callback=callback, added_ids=added_ids, compiled_rules=compiled_rules)
def add_catalog(self, path, title): def add_catalog(self, path, title):
book_id, new_book_added = add_catalog(self.new_api, path, title, dbapi=self) book_id, new_book_added = add_catalog(self.new_api, path, title, dbapi=self)

View File

@ -418,6 +418,8 @@ class LegacyTest(BaseTest):
'migrate_old', # no longer supported 'migrate_old', # no longer supported
'remove_unused_series', # superseded by clean API 'remove_unused_series', # superseded by clean API
'move_library_to', # API changed, no code uses old API 'move_library_to', # API changed, no code uses old API
# Added compiled_rules() for calibredb add
'find_books_in_directory', 'import_book_directory', 'import_book_directory_multiple', 'recursive_import',
# Internal API # Internal API
'clean_user_categories', 'cleanup_tags', 'books_list_filter', 'conn', 'connect', 'construct_file_name', 'clean_user_categories', 'cleanup_tags', 'books_list_filter', 'conn', 'connect', 'construct_file_name',

View File

@ -10,9 +10,11 @@ Command line interface to the calibre database.
import sys, os, cStringIO, re import sys, os, cStringIO, re
import unicodedata import unicodedata
from textwrap import TextWrapper from textwrap import TextWrapper
from optparse import OptionValueError, OptionGroup
from calibre import preferred_encoding, prints, isbytestring, patheq from calibre import preferred_encoding, prints, isbytestring, patheq
from calibre.constants import iswindows from calibre.constants import iswindows
from calibre.db.adding import compile_rule
from calibre.db.legacy import LibraryDatabase from calibre.db.legacy import LibraryDatabase
from calibre.utils.config import OptionParser, prefs, tweaks from calibre.utils.config import OptionParser, prefs, tweaks
from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.meta import get_metadata
@ -48,10 +50,13 @@ def write_dirtied(db):
def get_parser(usage): def get_parser(usage):
parser = OptionParser(usage) parser = OptionParser(usage)
go = parser.add_option_group(_('GLOBAL OPTIONS')) go = parser.add_option_group(_('GLOBAL OPTIONS'))
go.is_global_options = True
go.add_option('--library-path', '--with-library', default=None, help=_('Path to the calibre library. Default is to use the path stored in the settings.')) go.add_option('--library-path', '--with-library', default=None, help=_('Path to the calibre library. Default is to use the path stored in the settings.'))
go.add_option('--dont-notify-gui', default=False, action='store_true', go.add_option('--dont-notify-gui', default=False, action='store_true',
help=_('Do not notify the running calibre GUI (if any) that the database has' help=_('Do not notify the running calibre GUI (if any) that the database has'
' changed. Use with care, as it can lead to database corruption!')) ' changed. Use with care, as it can lead to database corruption!'))
go.add_option('-h', '--help', help=_('show this help message and exit'), action='help')
go.add_option('--version', help=_("show program's version number and exit"), action='version')
return parser return parser
@ -255,7 +260,7 @@ class DevNull(object):
NULL = DevNull() NULL = DevNull()
def do_add(db, paths, one_book_per_directory, recurse, add_duplicates, otitle, def do_add(db, paths, one_book_per_directory, recurse, add_duplicates, otitle,
oauthors, oisbn, otags, oseries, oseries_index, ocover, oidentifiers, olanguages): oauthors, oisbn, otags, oseries, oseries_index, ocover, oidentifiers, olanguages, compiled_rules):
orig = sys.stdout orig = sys.stdout
# sys.stdout = NULL # sys.stdout = NULL
try: try:
@ -307,14 +312,15 @@ def do_add(db, paths, one_book_per_directory, recurse, add_duplicates, otitle,
added_ids |= set(ids) added_ids |= set(ids)
dir_dups = [] dir_dups = []
for dir in dirs: for dir in dirs:
if recurse: if recurse:
dir_dups.extend(db.recursive_import(dir, dir_dups.extend(db.recursive_import(dir,
single_book_per_directory=one_book_per_directory, single_book_per_directory=one_book_per_directory,
added_ids=added_ids)) added_ids=added_ids, compiled_rules=compiled_rules))
else: else:
func = db.import_book_directory if one_book_per_directory else db.import_book_directory_multiple func = db.import_book_directory if one_book_per_directory else db.import_book_directory_multiple
dups = func(dir, added_ids=added_ids) dups = func(dir, added_ids=added_ids, compiled_rules=compiled_rules)
if not dups: if not dups:
dups = [] dups = []
dir_dups.extend(dups) dir_dups.extend(dups)
@ -362,10 +368,6 @@ Add the specified files as books to the database. You can also specify directori
the directory related options below. the directory related options below.
''' '''
)) ))
parser.add_option('-1', '--one-book-per-directory', action='store_true', default=False,
help=_('Assume that each directory has only a single logical book and that all files in it are different e-book formats of that book'))
parser.add_option('-r', '--recurse', action='store_true', default=False,
help=_('Process directories recursively'))
parser.add_option('-d', '--duplicates', action='store_true', default=False, parser.add_option('-d', '--duplicates', action='store_true', default=False,
help=_('Add books to database even if they already exist. Comparison is done based on book titles.')) help=_('Add books to database even if they already exist. Comparison is done based on book titles.'))
parser.add_option('-e', '--empty', action='store_true', default=False, parser.add_option('-e', '--empty', action='store_true', default=False,
@ -389,6 +391,33 @@ the directory related options below.
parser.add_option('-l', '--languages', default=None, parser.add_option('-l', '--languages', default=None,
help=_('A comma separated list of languages (best to use ISO639 language codes, though some language names may also be recognized)')) help=_('A comma separated list of languages (best to use ISO639 language codes, though some language names may also be recognized)'))
g = OptionGroup(parser, _('ADDING FROM DIRECTORIES'), _(
'Options to control the adding of books from directories. By default only files that have extensions of known e-book file types are added.'))
def filter_pat(option, opt, value, parser, action):
try:
getattr(parser.values, option.dest).append(compile_rule({'match_type':'glob', 'query':value, 'action':action}))
except Exception:
raise OptionValueError('%r is not a valid filename pattern' % value)
g.add_option('-1', '--one-book-per-directory', action='store_true', default=False,
help=_('Assume that each directory has only a single logical book and that all files in it are different e-book formats of that book'))
g.add_option('-r', '--recurse', action='store_true', default=False,
help=_('Process directories recursively'))
def fadd(opt, action, help):
g.add_option(
opt, action='callback', type='string', nargs=1, default=[],
callback=filter_pat, dest='filters', callback_args=(action,),
metavar=_('GLOB PATTERN'), help=help
)
fadd('--ignore', 'ignore', _(
'A filename (glob) pattern, files matching this pattern will be ignored when scanning directories for files.'
' Can be specified multiple times for multiple patterns. For e.g.: *.pdf will ignore all pdf files'))
fadd('--add', 'add', _(
'A filename (glob) pattern, files matching this pattern will be added when scanning directories for files,'
' even if they are not of a known ebook file type. Can be specified multiple times for multiple patterns.'))
parser.add_option_group(g)
return parser return parser
def do_add_empty(db, title, authors, isbn, tags, series, series_index, cover, identifiers, languages): def do_add_empty(db, title, authors, isbn, tags, series, series_index, cover, identifiers, languages):
@ -436,7 +465,7 @@ def command_add(args, dbpath):
return 1 return 1
do_add(get_db(dbpath, opts), args[1:], opts.one_book_per_directory, do_add(get_db(dbpath, opts), args[1:], opts.one_book_per_directory,
opts.recurse, opts.duplicates, opts.title, aut, opts.isbn, opts.recurse, opts.duplicates, opts.title, aut, opts.isbn,
tags, opts.series, opts.series_index, opts.cover, identifiers, lcodes) tags, opts.series, opts.series_index, opts.cover, identifiers, lcodes, opts.filters)
return 0 return 0
def do_remove(db, ids): def do_remove(db, ids):