diff --git a/src/libprs500/ebooks/metadata/__init__.py b/src/libprs500/ebooks/metadata/__init__.py
index 47a96f06b5..1605ccb2f6 100644
--- a/src/libprs500/ebooks/metadata/__init__.py
+++ b/src/libprs500/ebooks/metadata/__init__.py
@@ -45,7 +45,7 @@ class MetaInformation(object):
ans = MetaInformation(mi.title, mi.authors)
for attr in ('author_sort', 'title_sort', 'comments', 'category',
'publisher', 'series', 'series_index', 'rating',
- 'isbn', 'tags', 'cover_data'):
+ 'isbn', 'tags', 'cover_data', 'libprs_id'):
if hasattr(mi, attr):
setattr(ans, attr, getattr(mi, attr))
@@ -76,6 +76,7 @@ class MetaInformation(object):
self.isbn = None if not mi else mi.isbn
self.tags = [] if not mi else mi.tags
self.cover_data = mi.cover_data if (mi and hasattr(mi, 'cover_data')) else (None, None)
+ self.libprs_id = mi.libprs_id if (mi and hasattr(mi, 'libprs_id')) else None
def smart_update(self, mi):
@@ -91,7 +92,7 @@ class MetaInformation(object):
for attr in ('author_sort', 'title_sort', 'comments', 'category',
'publisher', 'series', 'series_index', 'rating',
- 'isbn'):
+ 'isbn', 'libprs_id'):
if hasattr(mi, attr):
val = getattr(mi, attr)
if val is not None:
diff --git a/src/libprs500/ebooks/metadata/meta.py b/src/libprs500/ebooks/metadata/meta.py
index 48e24690ec..8e2f3e5524 100644
--- a/src/libprs500/ebooks/metadata/meta.py
+++ b/src/libprs500/ebooks/metadata/meta.py
@@ -13,7 +13,7 @@
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-import os, re
+import os, re, collections
from libprs500.ebooks.metadata.rtf import get_metadata as rtf_metadata
from libprs500.ebooks.lrf.meta import get_metadata as lrf_metadata
@@ -28,33 +28,66 @@ from libprs500.ebooks.lrf.meta import set_metadata as set_lrf_metadata
from libprs500.ebooks.metadata import MetaInformation
+_METADATA_PRIORITIES = [
+ 'html', 'htm', 'xhtml', 'xhtm',
+ 'rtf', 'pdf', 'prc',
+ 'epub', 'lit', 'lrf', 'mobi',
+ ]
+
+# The priorities for loading metadata from different file types
+# Higher values should be used to update metadata from lower values
+METADATA_PRIORITIES = collections.defaultdict(lambda:0)
+for i, ext in enumerate(_METADATA_PRIORITIES):
+ METADATA_PRIORITIES[ext] = i
+
+def path_to_ext(path):
+ return os.path.splitext(path)[1][1:].lower()
+
+def metadata_from_formats(formats):
+ mi = MetaInformation(None, None)
+ formats.sort(cmp=lambda x,y: cmp(METADATA_PRIORITIES[path_to_ext(x)],
+ METADATA_PRIORITIES[path_to_ext(y)]))
+ for path in formats:
+ ext = path_to_ext(path)
+ stream = open(path, 'rb')
+ mi.smart_update(get_metadata(stream, stream_type=ext, use_libprs_metadata=True))
+ if getattr(mi, 'libprs_id', None) is not None:
+ return mi
+
+ return mi
+
def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
if stream_type: stream_type = stream_type.lower()
if stream_type in ('html', 'html', 'xhtml', 'xhtm'):
stream_type = 'html'
if stream_type in ('mobi', 'prc'):
stream_type = 'mobi'
- if use_libprs_metadata and hasattr(stream, 'name'):
- mi = libprs_metadata(stream.name)
- if mi is not None:
- return mi
+
+ opf = None
+ if hasattr(stream, 'name'):
+ c = os.path.splitext(stream.name)[0]+'.opf'
+ if os.access(c, os.R_OK):
+ opf = opf_metadata(os.path.abspath(c))
+
+ if use_libprs_metadata and getattr(opf, 'libprs_id', None) is not None:
+ return opf
+
try:
func = eval(stream_type + '_metadata')
mi = func(stream)
except NameError:
mi = MetaInformation(None, None)
- name = os.path.basename(stream.name) if hasattr(stream, 'name') else ''
+ name = os.path.basename(getattr(stream, 'name', ''))
base = metadata_from_filename(name)
if not base.authors:
base.authors = ['Unknown']
+ if not base.title:
+ base.title = 'Unknown'
base.smart_update(mi)
- if hasattr(stream, 'name'):
- opfpath = os.path.abspath(os.path.splitext(stream.name)[0]+'.opf')
- if os.access(opfpath, os.R_OK):
- mi = opf_metadata(opfpath)
- if mi is not None:
- base.smart_update(mi)
+ if opf is not None:
+ base.update(opf)
+
return base
def set_metadata(stream, mi, stream_type='lrf'):
@@ -125,12 +158,3 @@ def opf_metadata(opfpath):
return mi
except:
pass
-
-
-def libprs_metadata(name):
- if os.path.basename(name) != 'metadata.opf':
- name = os.path.join(os.path.dirname(name), 'metadata.opf')
- name = os.path.abspath(name)
- if os.access(name, os.R_OK):
- return opf_metadata(name)
-
\ No newline at end of file
diff --git a/src/libprs500/gui2/main.py b/src/libprs500/gui2/main.py
index 02865c7be8..b829a3d7bd 100644
--- a/src/libprs500/gui2/main.py
+++ b/src/libprs500/gui2/main.py
@@ -304,12 +304,12 @@ class Main(MainWindow, Ui_MainWindow):
if duplicates:
files = _('
Books with the same title as the following already exist in the database. Add them anyway?
')
- for mi, path in duplicates:
+ for mi, formats in duplicates:
files += '- '+mi.title+'
\n'
d = question_dialog(self, _('Duplicates found!'), files+'
')
if d.exec_() == QMessageBox.Yes:
- for mi, path in duplicates:
- self.library_view.model().db.import_book_directory(path, add_duplicates=True)
+ for mi, formats in duplicates:
+ self.library_view.model().db.import_book(mi, formats )
self.library_view.model().resort()
self.library_view.model().research()
diff --git a/src/libprs500/library/database.py b/src/libprs500/library/database.py
index f44244b2be..94e3f6ee34 100644
--- a/src/libprs500/library/database.py
+++ b/src/libprs500/library/database.py
@@ -20,7 +20,7 @@ import datetime, re, os, cPickle, traceback
from zlib import compress, decompress
from libprs500 import sanitize_file_name
-from libprs500.ebooks.metadata.meta import set_metadata, get_metadata
+from libprs500.ebooks.metadata.meta import set_metadata, metadata_from_formats
from libprs500.ebooks.metadata.opf import OPFCreator
from libprs500.ebooks.metadata import MetaInformation
from libprs500.ebooks import BOOK_EXTENSIONS
@@ -1325,24 +1325,26 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
id = str(self.id(idx))
if not single_dir and not os.path.exists(tpath):
os.mkdir(tpath)
+
+ name = au + ' - ' + title if byauthor else title + ' - ' + au
+ name += '_'+id
+ base = dir if single_dir else tpath
+
mi = OPFCreator(self.get_metadata(idx))
cover = self.cover(idx)
- if not single_dir:
- if cover is not None:
- f = open(os.path.join(tpath, 'cover.jpg'), 'wb')
- f.write(cover)
- mi.cover = 'cover.jpg'
- f.close()
- f = open(os.path.join(tpath, 'metadata.opf'), 'wb')
- mi.write(f)
- f.close()
+ if cover is not None:
+ cname = name + '.jpg'
+ cpath = os.path.join(base, cname)
+ open(cpath, 'wb').write(cover)
+ mi.cover = cname
+ f = open(os.path.join(base, name+'.opf'), 'wb')
+ mi.write(f)
+ f.close()
for fmt in self.formats(idx).split(','):
data = self.format(idx, fmt)
- name = au + ' - ' + title if byauthor else title + ' - ' + au
- fname = name +'_'+id+'.'+fmt.lower()
+ fname = name +'.'+fmt.lower()
fname = sanitize_file_name(fname)
- base = dir if single_dir else tpath
f = open(os.path.join(base, fname), 'w+b')
f.write(data)
f.flush()
@@ -1355,90 +1357,91 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
f.close()
- def import_book_directory_multiple(self, dirpath, add_duplicates=False):
- mi = MetaInformation(None, None)
- dirpath = os.path.abspath(dirpath)
- duplicates = []
- for path in os.listdir(dirpath):
- path = os.path.join(dirpath, path)
- if os.path.isdir(path) or not os.access(path, os.R_OK):
- continue
- ext = os.path.splitext(path)[1]
- if not ext:
- continue
- ext = ext[1:].lower()
- if ext not in BOOK_EXTENSIONS:
- continue
- stream = open(path, 'rb')
- mi.smart_update(get_metadata(stream, stream_type=ext, use_libprs_metadata=False))
- if mi.title is None:
- continue
- if not add_duplicates and self.conn.execute('SELECT id FROM books where title=?', (mi.title,)).fetchone():
- duplicates.append((mi, path))
- continue
- series_index = 1 if mi.series_index is None else mi.series_index
- obj = self.conn.execute('INSERT INTO books(title, uri, series_index) VALUES (?, ?, ?)',
- (mi.title, None, series_index))
- id = obj.lastrowid
- self.conn.commit()
- self.set_metadata(id, mi)
- stream.seek(0, 2)
- usize = stream.tell()
- stream.seek(0)
- self.conn.execute('INSERT INTO data(book, format, uncompressed_size, data) VALUES (?,?,?,?)',
- (id, ext, usize, sqlite.Binary(compress(stream.read()))))
- self.conn.commit()
- return duplicates
-
-
- def import_book_directory(self, dirpath, add_duplicates=False):
- mi = MetaInformation(None, None)
- dirpath = os.path.abspath(dirpath)
- formats = []
- for path in os.listdir(dirpath):
- path = os.path.join(dirpath, path)
- if os.path.isdir(path) or not os.access(path, os.R_OK):
- continue
- ext = os.path.splitext(path)[1]
- if not ext:
- continue
- ext = ext[1:].lower()
- if ext not in BOOK_EXTENSIONS:
- continue
- f = open(path, 'rb')
- mi.smart_update(get_metadata(f, stream_type=ext, use_libprs_metadata=True))
- f.close()
- formats.append((ext, path))
- if mi.title is None or not formats:
- return
- if not add_duplicates and self.conn.execute('SELECT id FROM books where title=?', (mi.title,)).fetchone():
- return mi, dirpath
+ def import_book(self, mi, formats):
series_index = 1 if mi.series_index is None else mi.series_index
obj = self.conn.execute('INSERT INTO books(title, uri, series_index) VALUES (?, ?, ?)',
(mi.title, None, series_index))
id = obj.lastrowid
self.conn.commit()
self.set_metadata(id, mi)
- for ext, path in formats:
+ for path in formats:
+ ext = os.path.splitext(path)[1][1:].lower()
stream = open(path, 'rb')
stream.seek(0, 2)
usize = stream.tell()
stream.seek(0)
self.conn.execute('INSERT INTO data(book, format, uncompressed_size, data) VALUES (?,?,?,?)',
(id, ext, usize, sqlite.Binary(compress(stream.read()))))
- self.conn.commit()
+ self.conn.commit()
+
+ def import_book_directory_multiple(self, dirpath):
+ dirpath = os.path.abspath(dirpath)
+ duplicates = []
+ books = {}
+ for path in os.listdir(dirpath):
+ path = os.path.abspath(os.path.join(dirpath, path))
+ if os.path.isdir(path) or not os.access(path, os.R_OK):
+ continue
+ ext = os.path.splitext(path)[1]
+ if not ext:
+ continue
+ ext = ext[1:].lower()
+ if ext not in BOOK_EXTENSIONS:
+ continue
+
+ key = os.path.splitext(path)[0]
+ if not books.has_key(key):
+ books[key] = []
+
+ books[key].append(path)
+
+ for formats in books.values():
+ mi = metadata_from_formats(formats)
+ if mi.title is None:
+ continue
+ if self.has_book(mi):
+ duplicates.append((mi, formats))
+ continue
+ self.import_book(mi, formats)
+ return duplicates
+
+
+ def import_book_directory(self, dirpath):
+ dirpath = os.path.abspath(dirpath)
+ formats = []
+
+ for path in os.listdir(dirpath):
+ path = os.path.abspath(os.path.join(dirpath, path))
+ if os.path.isdir(path) or not os.access(path, os.R_OK):
+ continue
+ ext = os.path.splitext(path)[1]
+ if not ext:
+ continue
+ ext = ext[1:].lower()
+ if ext not in BOOK_EXTENSIONS:
+ continue
+ formats.append(path)
+
+ if not formats:
+ return
+ mi = metadata_from_formats(formats)
+ if mi.title is None:
+ return
+ if self.has_book(mi):
+ return [(mi, formats)]
+ self.import_book(mi, formats)
+ def has_book(self, mi):
+ return bool(self.conn.execute('SELECT id FROM books where title=?', (mi.title,)).fetchone())
+
def recursive_import(self, root, single_book_per_directory=True):
root = os.path.abspath(root)
duplicates = []
for dirpath in os.walk(root):
res = self.import_book_directory(dirpath[0]) if single_book_per_directory else self.import_book_directory_multiple(dirpath[0])
if res is not None:
- if single_book_per_directory:
- duplicates.append(res)
- else:
- duplicates.extend(res)
+ duplicates.extend(res)
return duplicates