mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Implement recursive import assuming multiple ebooks per directory
This commit is contained in:
parent
c7cd76bdc7
commit
ac4a0f0dfc
@ -45,7 +45,7 @@ class MetaInformation(object):
|
|||||||
ans = MetaInformation(mi.title, mi.authors)
|
ans = MetaInformation(mi.title, mi.authors)
|
||||||
for attr in ('author_sort', 'title_sort', 'comments', 'category',
|
for attr in ('author_sort', 'title_sort', 'comments', 'category',
|
||||||
'publisher', 'series', 'series_index', 'rating',
|
'publisher', 'series', 'series_index', 'rating',
|
||||||
'isbn', 'tags', 'cover_data'):
|
'isbn', 'tags', 'cover_data', 'libprs_id'):
|
||||||
if hasattr(mi, attr):
|
if hasattr(mi, attr):
|
||||||
setattr(ans, attr, getattr(mi, attr))
|
setattr(ans, attr, getattr(mi, attr))
|
||||||
|
|
||||||
@ -76,6 +76,7 @@ class MetaInformation(object):
|
|||||||
self.isbn = None if not mi else mi.isbn
|
self.isbn = None if not mi else mi.isbn
|
||||||
self.tags = [] if not mi else mi.tags
|
self.tags = [] if not mi else mi.tags
|
||||||
self.cover_data = mi.cover_data if (mi and hasattr(mi, 'cover_data')) else (None, None)
|
self.cover_data = mi.cover_data if (mi and hasattr(mi, 'cover_data')) else (None, None)
|
||||||
|
self.libprs_id = mi.libprs_id if (mi and hasattr(mi, 'libprs_id')) else None
|
||||||
|
|
||||||
|
|
||||||
def smart_update(self, mi):
|
def smart_update(self, mi):
|
||||||
@ -91,7 +92,7 @@ class MetaInformation(object):
|
|||||||
|
|
||||||
for attr in ('author_sort', 'title_sort', 'comments', 'category',
|
for attr in ('author_sort', 'title_sort', 'comments', 'category',
|
||||||
'publisher', 'series', 'series_index', 'rating',
|
'publisher', 'series', 'series_index', 'rating',
|
||||||
'isbn'):
|
'isbn', 'libprs_id'):
|
||||||
if hasattr(mi, attr):
|
if hasattr(mi, attr):
|
||||||
val = getattr(mi, attr)
|
val = getattr(mi, attr)
|
||||||
if val is not None:
|
if val is not None:
|
||||||
|
@ -13,7 +13,7 @@
|
|||||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
import os, re
|
import os, re, collections
|
||||||
|
|
||||||
from libprs500.ebooks.metadata.rtf import get_metadata as rtf_metadata
|
from libprs500.ebooks.metadata.rtf import get_metadata as rtf_metadata
|
||||||
from libprs500.ebooks.lrf.meta import get_metadata as lrf_metadata
|
from libprs500.ebooks.lrf.meta import get_metadata as lrf_metadata
|
||||||
@ -28,33 +28,66 @@ from libprs500.ebooks.lrf.meta import set_metadata as set_lrf_metadata
|
|||||||
|
|
||||||
from libprs500.ebooks.metadata import MetaInformation
|
from libprs500.ebooks.metadata import MetaInformation
|
||||||
|
|
||||||
|
_METADATA_PRIORITIES = [
|
||||||
|
'html', 'htm', 'xhtml', 'xhtm',
|
||||||
|
'rtf', 'pdf', 'prc',
|
||||||
|
'epub', 'lit', 'lrf', 'mobi',
|
||||||
|
]
|
||||||
|
|
||||||
|
# The priorities for loading metadata from different file types
|
||||||
|
# Higher values should be used to update metadata from lower values
|
||||||
|
METADATA_PRIORITIES = collections.defaultdict(lambda:0)
|
||||||
|
for i, ext in enumerate(_METADATA_PRIORITIES):
|
||||||
|
METADATA_PRIORITIES[ext] = i
|
||||||
|
|
||||||
|
def path_to_ext(path):
|
||||||
|
return os.path.splitext(path)[1][1:].lower()
|
||||||
|
|
||||||
|
def metadata_from_formats(formats):
|
||||||
|
mi = MetaInformation(None, None)
|
||||||
|
formats.sort(cmp=lambda x,y: cmp(METADATA_PRIORITIES[path_to_ext(x)],
|
||||||
|
METADATA_PRIORITIES[path_to_ext(y)]))
|
||||||
|
for path in formats:
|
||||||
|
ext = path_to_ext(path)
|
||||||
|
stream = open(path, 'rb')
|
||||||
|
mi.smart_update(get_metadata(stream, stream_type=ext, use_libprs_metadata=True))
|
||||||
|
if getattr(mi, 'libprs_id', None) is not None:
|
||||||
|
return mi
|
||||||
|
|
||||||
|
return mi
|
||||||
|
|
||||||
def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
|
def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
|
||||||
if stream_type: stream_type = stream_type.lower()
|
if stream_type: stream_type = stream_type.lower()
|
||||||
if stream_type in ('html', 'html', 'xhtml', 'xhtm'):
|
if stream_type in ('html', 'html', 'xhtml', 'xhtm'):
|
||||||
stream_type = 'html'
|
stream_type = 'html'
|
||||||
if stream_type in ('mobi', 'prc'):
|
if stream_type in ('mobi', 'prc'):
|
||||||
stream_type = 'mobi'
|
stream_type = 'mobi'
|
||||||
if use_libprs_metadata and hasattr(stream, 'name'):
|
|
||||||
mi = libprs_metadata(stream.name)
|
opf = None
|
||||||
if mi is not None:
|
if hasattr(stream, 'name'):
|
||||||
return mi
|
c = os.path.splitext(stream.name)[0]+'.opf'
|
||||||
|
if os.access(c, os.R_OK):
|
||||||
|
opf = opf_metadata(os.path.abspath(c))
|
||||||
|
|
||||||
|
if use_libprs_metadata and getattr(opf, 'libprs_id', None) is not None:
|
||||||
|
return opf
|
||||||
|
|
||||||
try:
|
try:
|
||||||
func = eval(stream_type + '_metadata')
|
func = eval(stream_type + '_metadata')
|
||||||
mi = func(stream)
|
mi = func(stream)
|
||||||
except NameError:
|
except NameError:
|
||||||
mi = MetaInformation(None, None)
|
mi = MetaInformation(None, None)
|
||||||
|
|
||||||
name = os.path.basename(stream.name) if hasattr(stream, 'name') else ''
|
name = os.path.basename(getattr(stream, 'name', ''))
|
||||||
base = metadata_from_filename(name)
|
base = metadata_from_filename(name)
|
||||||
if not base.authors:
|
if not base.authors:
|
||||||
base.authors = ['Unknown']
|
base.authors = ['Unknown']
|
||||||
|
if not base.title:
|
||||||
|
base.title = 'Unknown'
|
||||||
base.smart_update(mi)
|
base.smart_update(mi)
|
||||||
if hasattr(stream, 'name'):
|
if opf is not None:
|
||||||
opfpath = os.path.abspath(os.path.splitext(stream.name)[0]+'.opf')
|
base.update(opf)
|
||||||
if os.access(opfpath, os.R_OK):
|
|
||||||
mi = opf_metadata(opfpath)
|
|
||||||
if mi is not None:
|
|
||||||
base.smart_update(mi)
|
|
||||||
return base
|
return base
|
||||||
|
|
||||||
def set_metadata(stream, mi, stream_type='lrf'):
|
def set_metadata(stream, mi, stream_type='lrf'):
|
||||||
@ -125,12 +158,3 @@ def opf_metadata(opfpath):
|
|||||||
return mi
|
return mi
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def libprs_metadata(name):
|
|
||||||
if os.path.basename(name) != 'metadata.opf':
|
|
||||||
name = os.path.join(os.path.dirname(name), 'metadata.opf')
|
|
||||||
name = os.path.abspath(name)
|
|
||||||
if os.access(name, os.R_OK):
|
|
||||||
return opf_metadata(name)
|
|
||||||
|
|
@ -304,12 +304,12 @@ class Main(MainWindow, Ui_MainWindow):
|
|||||||
|
|
||||||
if duplicates:
|
if duplicates:
|
||||||
files = _('<p>Books with the same title as the following already exist in the database. Add them anyway?<ul>')
|
files = _('<p>Books with the same title as the following already exist in the database. Add them anyway?<ul>')
|
||||||
for mi, path in duplicates:
|
for mi, formats in duplicates:
|
||||||
files += '<li>'+mi.title+'</li>\n'
|
files += '<li>'+mi.title+'</li>\n'
|
||||||
d = question_dialog(self, _('Duplicates found!'), files+'</ul></p>')
|
d = question_dialog(self, _('Duplicates found!'), files+'</ul></p>')
|
||||||
if d.exec_() == QMessageBox.Yes:
|
if d.exec_() == QMessageBox.Yes:
|
||||||
for mi, path in duplicates:
|
for mi, formats in duplicates:
|
||||||
self.library_view.model().db.import_book_directory(path, add_duplicates=True)
|
self.library_view.model().db.import_book(mi, formats )
|
||||||
|
|
||||||
self.library_view.model().resort()
|
self.library_view.model().resort()
|
||||||
self.library_view.model().research()
|
self.library_view.model().research()
|
||||||
|
@ -20,7 +20,7 @@ import datetime, re, os, cPickle, traceback
|
|||||||
from zlib import compress, decompress
|
from zlib import compress, decompress
|
||||||
|
|
||||||
from libprs500 import sanitize_file_name
|
from libprs500 import sanitize_file_name
|
||||||
from libprs500.ebooks.metadata.meta import set_metadata, get_metadata
|
from libprs500.ebooks.metadata.meta import set_metadata, metadata_from_formats
|
||||||
from libprs500.ebooks.metadata.opf import OPFCreator
|
from libprs500.ebooks.metadata.opf import OPFCreator
|
||||||
from libprs500.ebooks.metadata import MetaInformation
|
from libprs500.ebooks.metadata import MetaInformation
|
||||||
from libprs500.ebooks import BOOK_EXTENSIONS
|
from libprs500.ebooks import BOOK_EXTENSIONS
|
||||||
@ -1325,24 +1325,26 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
|
|||||||
id = str(self.id(idx))
|
id = str(self.id(idx))
|
||||||
if not single_dir and not os.path.exists(tpath):
|
if not single_dir and not os.path.exists(tpath):
|
||||||
os.mkdir(tpath)
|
os.mkdir(tpath)
|
||||||
|
|
||||||
|
name = au + ' - ' + title if byauthor else title + ' - ' + au
|
||||||
|
name += '_'+id
|
||||||
|
base = dir if single_dir else tpath
|
||||||
|
|
||||||
mi = OPFCreator(self.get_metadata(idx))
|
mi = OPFCreator(self.get_metadata(idx))
|
||||||
cover = self.cover(idx)
|
cover = self.cover(idx)
|
||||||
if not single_dir:
|
|
||||||
if cover is not None:
|
if cover is not None:
|
||||||
f = open(os.path.join(tpath, 'cover.jpg'), 'wb')
|
cname = name + '.jpg'
|
||||||
f.write(cover)
|
cpath = os.path.join(base, cname)
|
||||||
mi.cover = 'cover.jpg'
|
open(cpath, 'wb').write(cover)
|
||||||
f.close()
|
mi.cover = cname
|
||||||
f = open(os.path.join(tpath, 'metadata.opf'), 'wb')
|
f = open(os.path.join(base, name+'.opf'), 'wb')
|
||||||
mi.write(f)
|
mi.write(f)
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
for fmt in self.formats(idx).split(','):
|
for fmt in self.formats(idx).split(','):
|
||||||
data = self.format(idx, fmt)
|
data = self.format(idx, fmt)
|
||||||
name = au + ' - ' + title if byauthor else title + ' - ' + au
|
fname = name +'.'+fmt.lower()
|
||||||
fname = name +'_'+id+'.'+fmt.lower()
|
|
||||||
fname = sanitize_file_name(fname)
|
fname = sanitize_file_name(fname)
|
||||||
base = dir if single_dir else tpath
|
|
||||||
f = open(os.path.join(base, fname), 'w+b')
|
f = open(os.path.join(base, fname), 'w+b')
|
||||||
f.write(data)
|
f.write(data)
|
||||||
f.flush()
|
f.flush()
|
||||||
@ -1355,12 +1357,29 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
|
|||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
|
|
||||||
def import_book_directory_multiple(self, dirpath, add_duplicates=False):
|
def import_book(self, mi, formats):
|
||||||
mi = MetaInformation(None, None)
|
series_index = 1 if mi.series_index is None else mi.series_index
|
||||||
|
obj = self.conn.execute('INSERT INTO books(title, uri, series_index) VALUES (?, ?, ?)',
|
||||||
|
(mi.title, None, series_index))
|
||||||
|
id = obj.lastrowid
|
||||||
|
self.conn.commit()
|
||||||
|
self.set_metadata(id, mi)
|
||||||
|
for path in formats:
|
||||||
|
ext = os.path.splitext(path)[1][1:].lower()
|
||||||
|
stream = open(path, 'rb')
|
||||||
|
stream.seek(0, 2)
|
||||||
|
usize = stream.tell()
|
||||||
|
stream.seek(0)
|
||||||
|
self.conn.execute('INSERT INTO data(book, format, uncompressed_size, data) VALUES (?,?,?,?)',
|
||||||
|
(id, ext, usize, sqlite.Binary(compress(stream.read()))))
|
||||||
|
self.conn.commit()
|
||||||
|
|
||||||
|
def import_book_directory_multiple(self, dirpath):
|
||||||
dirpath = os.path.abspath(dirpath)
|
dirpath = os.path.abspath(dirpath)
|
||||||
duplicates = []
|
duplicates = []
|
||||||
|
books = {}
|
||||||
for path in os.listdir(dirpath):
|
for path in os.listdir(dirpath):
|
||||||
path = os.path.join(dirpath, path)
|
path = os.path.abspath(os.path.join(dirpath, path))
|
||||||
if os.path.isdir(path) or not os.access(path, os.R_OK):
|
if os.path.isdir(path) or not os.access(path, os.R_OK):
|
||||||
continue
|
continue
|
||||||
ext = os.path.splitext(path)[1]
|
ext = os.path.splitext(path)[1]
|
||||||
@ -1369,34 +1388,30 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
|
|||||||
ext = ext[1:].lower()
|
ext = ext[1:].lower()
|
||||||
if ext not in BOOK_EXTENSIONS:
|
if ext not in BOOK_EXTENSIONS:
|
||||||
continue
|
continue
|
||||||
stream = open(path, 'rb')
|
|
||||||
mi.smart_update(get_metadata(stream, stream_type=ext, use_libprs_metadata=False))
|
key = os.path.splitext(path)[0]
|
||||||
|
if not books.has_key(key):
|
||||||
|
books[key] = []
|
||||||
|
|
||||||
|
books[key].append(path)
|
||||||
|
|
||||||
|
for formats in books.values():
|
||||||
|
mi = metadata_from_formats(formats)
|
||||||
if mi.title is None:
|
if mi.title is None:
|
||||||
continue
|
continue
|
||||||
if not add_duplicates and self.conn.execute('SELECT id FROM books where title=?', (mi.title,)).fetchone():
|
if self.has_book(mi):
|
||||||
duplicates.append((mi, path))
|
duplicates.append((mi, formats))
|
||||||
continue
|
continue
|
||||||
series_index = 1 if mi.series_index is None else mi.series_index
|
self.import_book(mi, formats)
|
||||||
obj = self.conn.execute('INSERT INTO books(title, uri, series_index) VALUES (?, ?, ?)',
|
|
||||||
(mi.title, None, series_index))
|
|
||||||
id = obj.lastrowid
|
|
||||||
self.conn.commit()
|
|
||||||
self.set_metadata(id, mi)
|
|
||||||
stream.seek(0, 2)
|
|
||||||
usize = stream.tell()
|
|
||||||
stream.seek(0)
|
|
||||||
self.conn.execute('INSERT INTO data(book, format, uncompressed_size, data) VALUES (?,?,?,?)',
|
|
||||||
(id, ext, usize, sqlite.Binary(compress(stream.read()))))
|
|
||||||
self.conn.commit()
|
|
||||||
return duplicates
|
return duplicates
|
||||||
|
|
||||||
|
|
||||||
def import_book_directory(self, dirpath, add_duplicates=False):
|
def import_book_directory(self, dirpath):
|
||||||
mi = MetaInformation(None, None)
|
|
||||||
dirpath = os.path.abspath(dirpath)
|
dirpath = os.path.abspath(dirpath)
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
for path in os.listdir(dirpath):
|
for path in os.listdir(dirpath):
|
||||||
path = os.path.join(dirpath, path)
|
path = os.path.abspath(os.path.join(dirpath, path))
|
||||||
if os.path.isdir(path) or not os.access(path, os.R_OK):
|
if os.path.isdir(path) or not os.access(path, os.R_OK):
|
||||||
continue
|
continue
|
||||||
ext = os.path.splitext(path)[1]
|
ext = os.path.splitext(path)[1]
|
||||||
@ -1405,29 +1420,20 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
|
|||||||
ext = ext[1:].lower()
|
ext = ext[1:].lower()
|
||||||
if ext not in BOOK_EXTENSIONS:
|
if ext not in BOOK_EXTENSIONS:
|
||||||
continue
|
continue
|
||||||
f = open(path, 'rb')
|
formats.append(path)
|
||||||
mi.smart_update(get_metadata(f, stream_type=ext, use_libprs_metadata=True))
|
|
||||||
f.close()
|
|
||||||
formats.append((ext, path))
|
|
||||||
if mi.title is None or not formats:
|
|
||||||
return
|
|
||||||
if not add_duplicates and self.conn.execute('SELECT id FROM books where title=?', (mi.title,)).fetchone():
|
|
||||||
return mi, dirpath
|
|
||||||
series_index = 1 if mi.series_index is None else mi.series_index
|
|
||||||
obj = self.conn.execute('INSERT INTO books(title, uri, series_index) VALUES (?, ?, ?)',
|
|
||||||
(mi.title, None, series_index))
|
|
||||||
id = obj.lastrowid
|
|
||||||
self.conn.commit()
|
|
||||||
self.set_metadata(id, mi)
|
|
||||||
for ext, path in formats:
|
|
||||||
stream = open(path, 'rb')
|
|
||||||
stream.seek(0, 2)
|
|
||||||
usize = stream.tell()
|
|
||||||
stream.seek(0)
|
|
||||||
self.conn.execute('INSERT INTO data(book, format, uncompressed_size, data) VALUES (?,?,?,?)',
|
|
||||||
(id, ext, usize, sqlite.Binary(compress(stream.read()))))
|
|
||||||
self.conn.commit()
|
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
return
|
||||||
|
mi = metadata_from_formats(formats)
|
||||||
|
if mi.title is None:
|
||||||
|
return
|
||||||
|
if self.has_book(mi):
|
||||||
|
return [(mi, formats)]
|
||||||
|
self.import_book(mi, formats)
|
||||||
|
|
||||||
|
|
||||||
|
def has_book(self, mi):
|
||||||
|
return bool(self.conn.execute('SELECT id FROM books where title=?', (mi.title,)).fetchone())
|
||||||
|
|
||||||
def recursive_import(self, root, single_book_per_directory=True):
|
def recursive_import(self, root, single_book_per_directory=True):
|
||||||
root = os.path.abspath(root)
|
root = os.path.abspath(root)
|
||||||
@ -1435,9 +1441,6 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
|
|||||||
for dirpath in os.walk(root):
|
for dirpath in os.walk(root):
|
||||||
res = self.import_book_directory(dirpath[0]) if single_book_per_directory else self.import_book_directory_multiple(dirpath[0])
|
res = self.import_book_directory(dirpath[0]) if single_book_per_directory else self.import_book_directory_multiple(dirpath[0])
|
||||||
if res is not None:
|
if res is not None:
|
||||||
if single_book_per_directory:
|
|
||||||
duplicates.append(res)
|
|
||||||
else:
|
|
||||||
duplicates.extend(res)
|
duplicates.extend(res)
|
||||||
return duplicates
|
return duplicates
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user