Implement recursive import assuming multiple ebooks per directory

2025-11-05 12:03:03 -05:00 · 2008-02-22 22:58:17 +00:00 · 2008-02-22 22:58:17 +00:00 · ac4a0f0dfc
commit ac4a0f0dfc
parent c7cd76bdc7
4 changed files with 131 additions and 103 deletions
--- a/src/libprs500/ebooks/metadata/init.py
+++ b/src/libprs500/ebooks/metadata/init.py
@ -45,7 +45,7 @@ class MetaInformation(object):
        ans = MetaInformation(mi.title, mi.authors)
        for attr in ('author_sort', 'title_sort', 'comments', 'category',
                     'publisher', 'series', 'series_index', 'rating',
-                     'isbn', 'tags', 'cover_data'):
+                     'isbn', 'tags', 'cover_data', 'libprs_id'):
            if hasattr(mi, attr):
                setattr(ans, attr, getattr(mi, attr))
        
@ -76,6 +76,7 @@ class MetaInformation(object):
        self.isbn         = None if not mi else mi.isbn
        self.tags         = []  if not mi else mi.tags
        self.cover_data   = mi.cover_data if (mi and hasattr(mi, 'cover_data')) else (None, None)
+        self.libprs_id    = mi.libprs_id  if (mi and hasattr(mi, 'libprs_id')) else None
         
    
    def smart_update(self, mi):
@ -91,7 +92,7 @@ class MetaInformation(object):
            
        for attr in ('author_sort', 'title_sort', 'comments', 'category',
                     'publisher', 'series', 'series_index', 'rating',
-                     'isbn'):
+                     'isbn', 'libprs_id'):
            if hasattr(mi, attr):
                val = getattr(mi, attr)
                if val is not None:
--- a/src/libprs500/ebooks/metadata/meta.py
+++ b/src/libprs500/ebooks/metadata/meta.py
@ -13,7 +13,7 @@
 ##    with this program; if not, write to the Free Software Foundation, Inc.,
 ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

-import os, re
+import os, re, collections

 from libprs500.ebooks.metadata.rtf  import get_metadata as rtf_metadata
 from libprs500.ebooks.lrf.meta      import get_metadata as lrf_metadata
@ -28,33 +28,66 @@ from libprs500.ebooks.lrf.meta      import set_metadata as set_lrf_metadata

 from libprs500.ebooks.metadata import MetaInformation

+_METADATA_PRIORITIES = [
+                       'html', 'htm', 'xhtml', 'xhtm',
+                       'rtf', 'pdf', 'prc',
+                       'epub', 'lit', 'lrf', 'mobi',
+                      ]
+
+# The priorities for loading metadata from different file types
+# Higher values should be used to update metadata from lower values
+METADATA_PRIORITIES = collections.defaultdict(lambda:0)
+for i, ext in enumerate(_METADATA_PRIORITIES):
+    METADATA_PRIORITIES[ext] = i 
+
+def path_to_ext(path):
+    return os.path.splitext(path)[1][1:].lower()
+
+def metadata_from_formats(formats):
+    mi = MetaInformation(None, None)
+    formats.sort(cmp=lambda x,y: cmp(METADATA_PRIORITIES[path_to_ext(x)],  
+                                     METADATA_PRIORITIES[path_to_ext(y)]))
+    for path in formats:
+        ext = path_to_ext(path)
+        stream = open(path, 'rb')
+        mi.smart_update(get_metadata(stream, stream_type=ext, use_libprs_metadata=True))
+        if getattr(mi, 'libprs_id', None) is not None:
+            return mi
+    
+    return mi
+
 def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
    if stream_type: stream_type = stream_type.lower()
    if stream_type in ('html', 'html', 'xhtml', 'xhtm'):
        stream_type = 'html'
    if stream_type in ('mobi', 'prc'):
        stream_type = 'mobi'
-    if use_libprs_metadata and hasattr(stream, 'name'):
-        mi = libprs_metadata(stream.name)
-        if mi is not None:
-            return mi
+        
+    opf = None
+    if hasattr(stream, 'name'):
+        c = os.path.splitext(stream.name)[0]+'.opf'
+        if os.access(c, os.R_OK):
+            opf = opf_metadata(os.path.abspath(c))
+        
+    if use_libprs_metadata and getattr(opf, 'libprs_id', None) is not None:
+        return opf
+    
    try:
        func = eval(stream_type + '_metadata')
        mi = func(stream)
    except NameError:
        mi = MetaInformation(None, None)
        
-    name = os.path.basename(stream.name) if hasattr(stream, 'name') else ''
+    name = os.path.basename(getattr(stream, 'name', ''))
    base = metadata_from_filename(name)
    if not base.authors:
        base.authors = ['Unknown']
+    if not base.title:
+        base.title = 'Unknown'
    base.smart_update(mi)
-    if hasattr(stream, 'name'):
-        opfpath = os.path.abspath(os.path.splitext(stream.name)[0]+'.opf')
-        if os.access(opfpath, os.R_OK):
-            mi = opf_metadata(opfpath)
-            if mi is not None:
-                base.smart_update(mi)
+    if opf is not None:
+        base.update(opf)
+    
    return base

 def set_metadata(stream, mi, stream_type='lrf'):
@ -125,12 +158,3 @@ def opf_metadata(opfpath):
            return mi
    except:
        pass
-    
-    
-def libprs_metadata(name):
-    if os.path.basename(name) != 'metadata.opf':
-        name = os.path.join(os.path.dirname(name), 'metadata.opf')
-    name = os.path.abspath(name)
-    if os.access(name, os.R_OK):
-        return opf_metadata(name)
-    
--- a/src/libprs500/gui2/main.py
+++ b/src/libprs500/gui2/main.py
@ -304,12 +304,12 @@ class Main(MainWindow, Ui_MainWindow):
        
        if duplicates:
            files = _('<p>Books with the same title as the following already exist in the database. Add them anyway?<ul>')
-            for mi, path in duplicates:
+            for mi, formats in duplicates:
                files += '<li>'+mi.title+'</li>\n'
            d = question_dialog(self, _('Duplicates found!'), files+'</ul></p>')
            if d.exec_() == QMessageBox.Yes:
-                for mi, path in duplicates:
-                    self.library_view.model().db.import_book_directory(path, add_duplicates=True)
+                for mi, formats in duplicates:
+                    self.library_view.model().db.import_book(mi, formats )
        
        self.library_view.model().resort()
        self.library_view.model().research()
--- a/src/libprs500/library/database.py
+++ b/src/libprs500/library/database.py
@ -20,7 +20,7 @@ import datetime, re, os, cPickle, traceback
 from zlib import compress, decompress

 from libprs500 import sanitize_file_name
-from libprs500.ebooks.metadata.meta import set_metadata, get_metadata
+from libprs500.ebooks.metadata.meta import set_metadata, metadata_from_formats
 from libprs500.ebooks.metadata.opf import OPFCreator
 from libprs500.ebooks.metadata import MetaInformation
 from libprs500.ebooks import BOOK_EXTENSIONS
@ -1325,24 +1325,26 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
                id = str(self.id(idx))
                if not single_dir and not os.path.exists(tpath):
                    os.mkdir(tpath)
+                
+                name = au + ' - ' + title if byauthor else title + ' - ' + au
+                name += '_'+id
+                base  = dir if single_dir else tpath
+                
                mi = OPFCreator(self.get_metadata(idx))
                cover = self.cover(idx)
-                if not single_dir:
-                    if cover is not None:
-                        f = open(os.path.join(tpath, 'cover.jpg'), 'wb')
-                        f.write(cover)
-                        mi.cover = 'cover.jpg'
-                        f.close()
-                    f = open(os.path.join(tpath, 'metadata.opf'), 'wb')
-                    mi.write(f)
-                    f.close()
+                if cover is not None:
+                    cname = name + '.jpg'
+                    cpath = os.path.join(base, cname)
+                    open(cpath, 'wb').write(cover)
+                    mi.cover = cname
+                f = open(os.path.join(base, name+'.opf'), 'wb')
+                mi.write(f)
+                f.close()
                
                for fmt in self.formats(idx).split(','):
                    data = self.format(idx, fmt)
-                    name = au + ' - ' + title if byauthor else title + ' - ' + au
-                    fname = name +'_'+id+'.'+fmt.lower()
+                    fname = name +'.'+fmt.lower()
                    fname = sanitize_file_name(fname)
-                    base  = dir if single_dir else tpath 
                    f = open(os.path.join(base, fname), 'w+b')
                    f.write(data)
                    f.flush()
@ -1355,90 +1357,91 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
                    f.close()
                    
    
-    def import_book_directory_multiple(self, dirpath, add_duplicates=False):
-        mi = MetaInformation(None, None)
-        dirpath = os.path.abspath(dirpath)
-        duplicates = []
-        for path in os.listdir(dirpath):
-            path = os.path.join(dirpath, path)
-            if os.path.isdir(path) or not os.access(path, os.R_OK):
-                continue
-            ext = os.path.splitext(path)[1]
-            if not ext:
-                continue
-            ext = ext[1:].lower()
-            if ext not in BOOK_EXTENSIONS:
-                continue
-            stream = open(path, 'rb')
-            mi.smart_update(get_metadata(stream, stream_type=ext, use_libprs_metadata=False))
-            if mi.title is None: 
-                continue
-            if not add_duplicates and self.conn.execute('SELECT id FROM books where title=?', (mi.title,)).fetchone():
-                duplicates.append((mi, path))
-                continue
-            series_index = 1 if mi.series_index is None else mi.series_index
-            obj = self.conn.execute('INSERT INTO books(title, uri, series_index) VALUES (?, ?, ?)', 
-                              (mi.title, None, series_index))
-            id = obj.lastrowid
-            self.conn.commit()
-            self.set_metadata(id, mi)
-            stream.seek(0, 2)
-            usize = stream.tell()
-            stream.seek(0)
-            self.conn.execute('INSERT INTO data(book, format, uncompressed_size, data) VALUES (?,?,?,?)',
-                              (id, ext, usize, sqlite.Binary(compress(stream.read()))))
-            self.conn.commit()
-        return duplicates
-                      
-    
-    def import_book_directory(self, dirpath, add_duplicates=False):
-        mi = MetaInformation(None, None)
-        dirpath = os.path.abspath(dirpath)
-        formats = []
-        for path in os.listdir(dirpath):
-            path = os.path.join(dirpath, path)
-            if os.path.isdir(path) or not os.access(path, os.R_OK):
-                continue
-            ext = os.path.splitext(path)[1]
-            if not ext:
-                continue
-            ext = ext[1:].lower()
-            if ext not in BOOK_EXTENSIONS:
-                continue
-            f = open(path, 'rb')
-            mi.smart_update(get_metadata(f, stream_type=ext, use_libprs_metadata=True))
-            f.close()
-            formats.append((ext, path))
-        if mi.title is None or not formats:
-            return
-        if not add_duplicates and self.conn.execute('SELECT id FROM books where title=?', (mi.title,)).fetchone():
-            return mi, dirpath
+    def import_book(self, mi, formats):
        series_index = 1 if mi.series_index is None else mi.series_index
        obj = self.conn.execute('INSERT INTO books(title, uri, series_index) VALUES (?, ?, ?)', 
                          (mi.title, None, series_index))
        id = obj.lastrowid
        self.conn.commit()
        self.set_metadata(id, mi)
-        for ext, path in formats:
+        for path in formats:
+            ext = os.path.splitext(path)[1][1:].lower()
            stream = open(path, 'rb')
            stream.seek(0, 2)
            usize = stream.tell()
            stream.seek(0)
            self.conn.execute('INSERT INTO data(book, format, uncompressed_size, data) VALUES (?,?,?,?)',
                              (id, ext, usize, sqlite.Binary(compress(stream.read()))))
-        self.conn.commit()   
+        self.conn.commit()
+    
+    def import_book_directory_multiple(self, dirpath):
+        dirpath = os.path.abspath(dirpath)
+        duplicates = []
+        books = {}
+        for path in os.listdir(dirpath):
+            path = os.path.abspath(os.path.join(dirpath, path))
+            if os.path.isdir(path) or not os.access(path, os.R_OK):
+                continue
+            ext = os.path.splitext(path)[1]
+            if not ext:
+                continue
+            ext = ext[1:].lower()
+            if ext not in BOOK_EXTENSIONS:
+                continue
+            
+            key = os.path.splitext(path)[0]
+            if not books.has_key(key):
+                books[key] = []
+                
+            books[key].append(path)
+            
+        for formats in books.values():
+            mi = metadata_from_formats(formats)
+            if mi.title is None:
+                continue
+            if self.has_book(mi):
+                duplicates.append((mi, formats))
+                continue
+            self.import_book(mi, formats)
+        return duplicates
+                      
+    
+    def import_book_directory(self, dirpath):
+        dirpath = os.path.abspath(dirpath)
+        formats = []
+        
+        for path in os.listdir(dirpath):
+            path = os.path.abspath(os.path.join(dirpath, path))
+            if os.path.isdir(path) or not os.access(path, os.R_OK):
+                continue
+            ext = os.path.splitext(path)[1]
+            if not ext:
+                continue
+            ext = ext[1:].lower()
+            if ext not in BOOK_EXTENSIONS:
+                continue
+            formats.append(path)
+        
+        if not formats:
+            return
+        mi = metadata_from_formats(formats)
+        if mi.title is None:
+            return
+        if self.has_book(mi):
+            return [(mi, formats)]
+        self.import_book(mi, formats)
            
                    
+    def has_book(self, mi):
+        return bool(self.conn.execute('SELECT id FROM books where title=?', (mi.title,)).fetchone())
+    
    def recursive_import(self, root, single_book_per_directory=True):
        root = os.path.abspath(root)
        duplicates  = []
        for dirpath in os.walk(root):
            res = self.import_book_directory(dirpath[0]) if single_book_per_directory else self.import_book_directory_multiple(dirpath[0])
            if res is not None:
-                if single_book_per_directory:
-                    duplicates.append(res)
-                else:
-                    duplicates.extend(res)
+                duplicates.extend(res)
        return duplicates