diff --git a/src/libprs500/ebooks/metadata/__init__.py b/src/libprs500/ebooks/metadata/__init__.py index 47a96f06b5..1605ccb2f6 100644 --- a/src/libprs500/ebooks/metadata/__init__.py +++ b/src/libprs500/ebooks/metadata/__init__.py @@ -45,7 +45,7 @@ class MetaInformation(object): ans = MetaInformation(mi.title, mi.authors) for attr in ('author_sort', 'title_sort', 'comments', 'category', 'publisher', 'series', 'series_index', 'rating', - 'isbn', 'tags', 'cover_data'): + 'isbn', 'tags', 'cover_data', 'libprs_id'): if hasattr(mi, attr): setattr(ans, attr, getattr(mi, attr)) @@ -76,6 +76,7 @@ class MetaInformation(object): self.isbn = None if not mi else mi.isbn self.tags = [] if not mi else mi.tags self.cover_data = mi.cover_data if (mi and hasattr(mi, 'cover_data')) else (None, None) + self.libprs_id = mi.libprs_id if (mi and hasattr(mi, 'libprs_id')) else None def smart_update(self, mi): @@ -91,7 +92,7 @@ class MetaInformation(object): for attr in ('author_sort', 'title_sort', 'comments', 'category', 'publisher', 'series', 'series_index', 'rating', - 'isbn'): + 'isbn', 'libprs_id'): if hasattr(mi, attr): val = getattr(mi, attr) if val is not None: diff --git a/src/libprs500/ebooks/metadata/meta.py b/src/libprs500/ebooks/metadata/meta.py index 48e24690ec..8e2f3e5524 100644 --- a/src/libprs500/ebooks/metadata/meta.py +++ b/src/libprs500/ebooks/metadata/meta.py @@ -13,7 +13,7 @@ ## with this program; if not, write to the Free Software Foundation, Inc., ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -import os, re +import os, re, collections from libprs500.ebooks.metadata.rtf import get_metadata as rtf_metadata from libprs500.ebooks.lrf.meta import get_metadata as lrf_metadata @@ -28,33 +28,66 @@ from libprs500.ebooks.lrf.meta import set_metadata as set_lrf_metadata from libprs500.ebooks.metadata import MetaInformation +_METADATA_PRIORITIES = [ + 'html', 'htm', 'xhtml', 'xhtm', + 'rtf', 'pdf', 'prc', + 'epub', 'lit', 'lrf', 'mobi', + ] + +# The priorities for loading metadata from different file types +# Higher values should be used to update metadata from lower values +METADATA_PRIORITIES = collections.defaultdict(lambda:0) +for i, ext in enumerate(_METADATA_PRIORITIES): + METADATA_PRIORITIES[ext] = i + +def path_to_ext(path): + return os.path.splitext(path)[1][1:].lower() + +def metadata_from_formats(formats): + mi = MetaInformation(None, None) + formats.sort(cmp=lambda x,y: cmp(METADATA_PRIORITIES[path_to_ext(x)], + METADATA_PRIORITIES[path_to_ext(y)])) + for path in formats: + ext = path_to_ext(path) + stream = open(path, 'rb') + mi.smart_update(get_metadata(stream, stream_type=ext, use_libprs_metadata=True)) + if getattr(mi, 'libprs_id', None) is not None: + return mi + + return mi + def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False): if stream_type: stream_type = stream_type.lower() if stream_type in ('html', 'html', 'xhtml', 'xhtm'): stream_type = 'html' if stream_type in ('mobi', 'prc'): stream_type = 'mobi' - if use_libprs_metadata and hasattr(stream, 'name'): - mi = libprs_metadata(stream.name) - if mi is not None: - return mi + + opf = None + if hasattr(stream, 'name'): + c = os.path.splitext(stream.name)[0]+'.opf' + if os.access(c, os.R_OK): + opf = opf_metadata(os.path.abspath(c)) + + if use_libprs_metadata and getattr(opf, 'libprs_id', None) is not None: + return opf + try: func = eval(stream_type + '_metadata') mi = func(stream) except NameError: mi = MetaInformation(None, None) - name = os.path.basename(stream.name) if hasattr(stream, 'name') else '' + name = os.path.basename(getattr(stream, 'name', '')) base = metadata_from_filename(name) if not base.authors: base.authors = ['Unknown'] + if not base.title: + base.title = 'Unknown' base.smart_update(mi) - if hasattr(stream, 'name'): - opfpath = os.path.abspath(os.path.splitext(stream.name)[0]+'.opf') - if os.access(opfpath, os.R_OK): - mi = opf_metadata(opfpath) - if mi is not None: - base.smart_update(mi) + if opf is not None: + base.update(opf) + return base def set_metadata(stream, mi, stream_type='lrf'): @@ -125,12 +158,3 @@ def opf_metadata(opfpath): return mi except: pass - - -def libprs_metadata(name): - if os.path.basename(name) != 'metadata.opf': - name = os.path.join(os.path.dirname(name), 'metadata.opf') - name = os.path.abspath(name) - if os.access(name, os.R_OK): - return opf_metadata(name) - \ No newline at end of file diff --git a/src/libprs500/gui2/main.py b/src/libprs500/gui2/main.py index 02865c7be8..b829a3d7bd 100644 --- a/src/libprs500/gui2/main.py +++ b/src/libprs500/gui2/main.py @@ -304,12 +304,12 @@ class Main(MainWindow, Ui_MainWindow): if duplicates: files = _('

Books with the same title as the following already exist in the database. Add them anyway?

') if d.exec_() == QMessageBox.Yes: - for mi, path in duplicates: - self.library_view.model().db.import_book_directory(path, add_duplicates=True) + for mi, formats in duplicates: + self.library_view.model().db.import_book(mi, formats ) self.library_view.model().resort() self.library_view.model().research() diff --git a/src/libprs500/library/database.py b/src/libprs500/library/database.py index f44244b2be..94e3f6ee34 100644 --- a/src/libprs500/library/database.py +++ b/src/libprs500/library/database.py @@ -20,7 +20,7 @@ import datetime, re, os, cPickle, traceback from zlib import compress, decompress from libprs500 import sanitize_file_name -from libprs500.ebooks.metadata.meta import set_metadata, get_metadata +from libprs500.ebooks.metadata.meta import set_metadata, metadata_from_formats from libprs500.ebooks.metadata.opf import OPFCreator from libprs500.ebooks.metadata import MetaInformation from libprs500.ebooks import BOOK_EXTENSIONS @@ -1325,24 +1325,26 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE; id = str(self.id(idx)) if not single_dir and not os.path.exists(tpath): os.mkdir(tpath) + + name = au + ' - ' + title if byauthor else title + ' - ' + au + name += '_'+id + base = dir if single_dir else tpath + mi = OPFCreator(self.get_metadata(idx)) cover = self.cover(idx) - if not single_dir: - if cover is not None: - f = open(os.path.join(tpath, 'cover.jpg'), 'wb') - f.write(cover) - mi.cover = 'cover.jpg' - f.close() - f = open(os.path.join(tpath, 'metadata.opf'), 'wb') - mi.write(f) - f.close() + if cover is not None: + cname = name + '.jpg' + cpath = os.path.join(base, cname) + open(cpath, 'wb').write(cover) + mi.cover = cname + f = open(os.path.join(base, name+'.opf'), 'wb') + mi.write(f) + f.close() for fmt in self.formats(idx).split(','): data = self.format(idx, fmt) - name = au + ' - ' + title if byauthor else title + ' - ' + au - fname = name +'_'+id+'.'+fmt.lower() + fname = name +'.'+fmt.lower() fname = sanitize_file_name(fname) - base = dir if single_dir else tpath f = open(os.path.join(base, fname), 'w+b') f.write(data) f.flush() @@ -1355,90 +1357,91 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE; f.close() - def import_book_directory_multiple(self, dirpath, add_duplicates=False): - mi = MetaInformation(None, None) - dirpath = os.path.abspath(dirpath) - duplicates = [] - for path in os.listdir(dirpath): - path = os.path.join(dirpath, path) - if os.path.isdir(path) or not os.access(path, os.R_OK): - continue - ext = os.path.splitext(path)[1] - if not ext: - continue - ext = ext[1:].lower() - if ext not in BOOK_EXTENSIONS: - continue - stream = open(path, 'rb') - mi.smart_update(get_metadata(stream, stream_type=ext, use_libprs_metadata=False)) - if mi.title is None: - continue - if not add_duplicates and self.conn.execute('SELECT id FROM books where title=?', (mi.title,)).fetchone(): - duplicates.append((mi, path)) - continue - series_index = 1 if mi.series_index is None else mi.series_index - obj = self.conn.execute('INSERT INTO books(title, uri, series_index) VALUES (?, ?, ?)', - (mi.title, None, series_index)) - id = obj.lastrowid - self.conn.commit() - self.set_metadata(id, mi) - stream.seek(0, 2) - usize = stream.tell() - stream.seek(0) - self.conn.execute('INSERT INTO data(book, format, uncompressed_size, data) VALUES (?,?,?,?)', - (id, ext, usize, sqlite.Binary(compress(stream.read())))) - self.conn.commit() - return duplicates - - - def import_book_directory(self, dirpath, add_duplicates=False): - mi = MetaInformation(None, None) - dirpath = os.path.abspath(dirpath) - formats = [] - for path in os.listdir(dirpath): - path = os.path.join(dirpath, path) - if os.path.isdir(path) or not os.access(path, os.R_OK): - continue - ext = os.path.splitext(path)[1] - if not ext: - continue - ext = ext[1:].lower() - if ext not in BOOK_EXTENSIONS: - continue - f = open(path, 'rb') - mi.smart_update(get_metadata(f, stream_type=ext, use_libprs_metadata=True)) - f.close() - formats.append((ext, path)) - if mi.title is None or not formats: - return - if not add_duplicates and self.conn.execute('SELECT id FROM books where title=?', (mi.title,)).fetchone(): - return mi, dirpath + def import_book(self, mi, formats): series_index = 1 if mi.series_index is None else mi.series_index obj = self.conn.execute('INSERT INTO books(title, uri, series_index) VALUES (?, ?, ?)', (mi.title, None, series_index)) id = obj.lastrowid self.conn.commit() self.set_metadata(id, mi) - for ext, path in formats: + for path in formats: + ext = os.path.splitext(path)[1][1:].lower() stream = open(path, 'rb') stream.seek(0, 2) usize = stream.tell() stream.seek(0) self.conn.execute('INSERT INTO data(book, format, uncompressed_size, data) VALUES (?,?,?,?)', (id, ext, usize, sqlite.Binary(compress(stream.read())))) - self.conn.commit() + self.conn.commit() + + def import_book_directory_multiple(self, dirpath): + dirpath = os.path.abspath(dirpath) + duplicates = [] + books = {} + for path in os.listdir(dirpath): + path = os.path.abspath(os.path.join(dirpath, path)) + if os.path.isdir(path) or not os.access(path, os.R_OK): + continue + ext = os.path.splitext(path)[1] + if not ext: + continue + ext = ext[1:].lower() + if ext not in BOOK_EXTENSIONS: + continue + + key = os.path.splitext(path)[0] + if not books.has_key(key): + books[key] = [] + + books[key].append(path) + + for formats in books.values(): + mi = metadata_from_formats(formats) + if mi.title is None: + continue + if self.has_book(mi): + duplicates.append((mi, formats)) + continue + self.import_book(mi, formats) + return duplicates + + + def import_book_directory(self, dirpath): + dirpath = os.path.abspath(dirpath) + formats = [] + + for path in os.listdir(dirpath): + path = os.path.abspath(os.path.join(dirpath, path)) + if os.path.isdir(path) or not os.access(path, os.R_OK): + continue + ext = os.path.splitext(path)[1] + if not ext: + continue + ext = ext[1:].lower() + if ext not in BOOK_EXTENSIONS: + continue + formats.append(path) + + if not formats: + return + mi = metadata_from_formats(formats) + if mi.title is None: + return + if self.has_book(mi): + return [(mi, formats)] + self.import_book(mi, formats) + def has_book(self, mi): + return bool(self.conn.execute('SELECT id FROM books where title=?', (mi.title,)).fetchone()) + def recursive_import(self, root, single_book_per_directory=True): root = os.path.abspath(root) duplicates = [] for dirpath in os.walk(root): res = self.import_book_directory(dirpath[0]) if single_book_per_directory else self.import_book_directory_multiple(dirpath[0]) if res is not None: - if single_book_per_directory: - duplicates.append(res) - else: - duplicates.extend(res) + duplicates.extend(res) return duplicates