mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-11-04 03:27:00 -05:00 
			
		
		
		
	Implement recursive import assuming multiple ebooks per directory
This commit is contained in:
		
							parent
							
								
									c7cd76bdc7
								
							
						
					
					
						commit
						ac4a0f0dfc
					
				@ -45,7 +45,7 @@ class MetaInformation(object):
 | 
			
		||||
        ans = MetaInformation(mi.title, mi.authors)
 | 
			
		||||
        for attr in ('author_sort', 'title_sort', 'comments', 'category',
 | 
			
		||||
                     'publisher', 'series', 'series_index', 'rating',
 | 
			
		||||
                     'isbn', 'tags', 'cover_data'):
 | 
			
		||||
                     'isbn', 'tags', 'cover_data', 'libprs_id'):
 | 
			
		||||
            if hasattr(mi, attr):
 | 
			
		||||
                setattr(ans, attr, getattr(mi, attr))
 | 
			
		||||
        
 | 
			
		||||
@ -76,6 +76,7 @@ class MetaInformation(object):
 | 
			
		||||
        self.isbn         = None if not mi else mi.isbn
 | 
			
		||||
        self.tags         = []  if not mi else mi.tags
 | 
			
		||||
        self.cover_data   = mi.cover_data if (mi and hasattr(mi, 'cover_data')) else (None, None)
 | 
			
		||||
        self.libprs_id    = mi.libprs_id  if (mi and hasattr(mi, 'libprs_id')) else None
 | 
			
		||||
         
 | 
			
		||||
    
 | 
			
		||||
    def smart_update(self, mi):
 | 
			
		||||
@ -91,7 +92,7 @@ class MetaInformation(object):
 | 
			
		||||
            
 | 
			
		||||
        for attr in ('author_sort', 'title_sort', 'comments', 'category',
 | 
			
		||||
                     'publisher', 'series', 'series_index', 'rating',
 | 
			
		||||
                     'isbn'):
 | 
			
		||||
                     'isbn', 'libprs_id'):
 | 
			
		||||
            if hasattr(mi, attr):
 | 
			
		||||
                val = getattr(mi, attr)
 | 
			
		||||
                if val is not None:
 | 
			
		||||
 | 
			
		||||
@ -13,7 +13,7 @@
 | 
			
		||||
##    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
import os, re
 | 
			
		||||
import os, re, collections
 | 
			
		||||
 | 
			
		||||
from libprs500.ebooks.metadata.rtf  import get_metadata as rtf_metadata
 | 
			
		||||
from libprs500.ebooks.lrf.meta      import get_metadata as lrf_metadata
 | 
			
		||||
@ -28,33 +28,66 @@ from libprs500.ebooks.lrf.meta      import set_metadata as set_lrf_metadata
 | 
			
		||||
 | 
			
		||||
from libprs500.ebooks.metadata import MetaInformation
 | 
			
		||||
 | 
			
		||||
_METADATA_PRIORITIES = [
 | 
			
		||||
                       'html', 'htm', 'xhtml', 'xhtm',
 | 
			
		||||
                       'rtf', 'pdf', 'prc',
 | 
			
		||||
                       'epub', 'lit', 'lrf', 'mobi',
 | 
			
		||||
                      ]
 | 
			
		||||
 | 
			
		||||
# The priorities for loading metadata from different file types
 | 
			
		||||
# Higher values should be used to update metadata from lower values
 | 
			
		||||
METADATA_PRIORITIES = collections.defaultdict(lambda:0)
 | 
			
		||||
for i, ext in enumerate(_METADATA_PRIORITIES):
 | 
			
		||||
    METADATA_PRIORITIES[ext] = i 
 | 
			
		||||
 | 
			
		||||
def path_to_ext(path):
 | 
			
		||||
    return os.path.splitext(path)[1][1:].lower()
 | 
			
		||||
 | 
			
		||||
def metadata_from_formats(formats):
 | 
			
		||||
    mi = MetaInformation(None, None)
 | 
			
		||||
    formats.sort(cmp=lambda x,y: cmp(METADATA_PRIORITIES[path_to_ext(x)],  
 | 
			
		||||
                                     METADATA_PRIORITIES[path_to_ext(y)]))
 | 
			
		||||
    for path in formats:
 | 
			
		||||
        ext = path_to_ext(path)
 | 
			
		||||
        stream = open(path, 'rb')
 | 
			
		||||
        mi.smart_update(get_metadata(stream, stream_type=ext, use_libprs_metadata=True))
 | 
			
		||||
        if getattr(mi, 'libprs_id', None) is not None:
 | 
			
		||||
            return mi
 | 
			
		||||
    
 | 
			
		||||
    return mi
 | 
			
		||||
 | 
			
		||||
def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
 | 
			
		||||
    if stream_type: stream_type = stream_type.lower()
 | 
			
		||||
    if stream_type in ('html', 'html', 'xhtml', 'xhtm'):
 | 
			
		||||
        stream_type = 'html'
 | 
			
		||||
    if stream_type in ('mobi', 'prc'):
 | 
			
		||||
        stream_type = 'mobi'
 | 
			
		||||
    if use_libprs_metadata and hasattr(stream, 'name'):
 | 
			
		||||
        mi = libprs_metadata(stream.name)
 | 
			
		||||
        if mi is not None:
 | 
			
		||||
            return mi
 | 
			
		||||
        
 | 
			
		||||
    opf = None
 | 
			
		||||
    if hasattr(stream, 'name'):
 | 
			
		||||
        c = os.path.splitext(stream.name)[0]+'.opf'
 | 
			
		||||
        if os.access(c, os.R_OK):
 | 
			
		||||
            opf = opf_metadata(os.path.abspath(c))
 | 
			
		||||
        
 | 
			
		||||
    if use_libprs_metadata and getattr(opf, 'libprs_id', None) is not None:
 | 
			
		||||
        return opf
 | 
			
		||||
    
 | 
			
		||||
    try:
 | 
			
		||||
        func = eval(stream_type + '_metadata')
 | 
			
		||||
        mi = func(stream)
 | 
			
		||||
    except NameError:
 | 
			
		||||
        mi = MetaInformation(None, None)
 | 
			
		||||
        
 | 
			
		||||
    name = os.path.basename(stream.name) if hasattr(stream, 'name') else ''
 | 
			
		||||
    name = os.path.basename(getattr(stream, 'name', ''))
 | 
			
		||||
    base = metadata_from_filename(name)
 | 
			
		||||
    if not base.authors:
 | 
			
		||||
        base.authors = ['Unknown']
 | 
			
		||||
    if not base.title:
 | 
			
		||||
        base.title = 'Unknown'
 | 
			
		||||
    base.smart_update(mi)
 | 
			
		||||
    if hasattr(stream, 'name'):
 | 
			
		||||
        opfpath = os.path.abspath(os.path.splitext(stream.name)[0]+'.opf')
 | 
			
		||||
        if os.access(opfpath, os.R_OK):
 | 
			
		||||
            mi = opf_metadata(opfpath)
 | 
			
		||||
            if mi is not None:
 | 
			
		||||
                base.smart_update(mi)
 | 
			
		||||
    if opf is not None:
 | 
			
		||||
        base.update(opf)
 | 
			
		||||
    
 | 
			
		||||
    return base
 | 
			
		||||
 | 
			
		||||
def set_metadata(stream, mi, stream_type='lrf'):
 | 
			
		||||
@ -125,12 +158,3 @@ def opf_metadata(opfpath):
 | 
			
		||||
            return mi
 | 
			
		||||
    except:
 | 
			
		||||
        pass
 | 
			
		||||
    
 | 
			
		||||
    
 | 
			
		||||
def libprs_metadata(name):
 | 
			
		||||
    if os.path.basename(name) != 'metadata.opf':
 | 
			
		||||
        name = os.path.join(os.path.dirname(name), 'metadata.opf')
 | 
			
		||||
    name = os.path.abspath(name)
 | 
			
		||||
    if os.access(name, os.R_OK):
 | 
			
		||||
        return opf_metadata(name)
 | 
			
		||||
    
 | 
			
		||||
@ -304,12 +304,12 @@ class Main(MainWindow, Ui_MainWindow):
 | 
			
		||||
        
 | 
			
		||||
        if duplicates:
 | 
			
		||||
            files = _('<p>Books with the same title as the following already exist in the database. Add them anyway?<ul>')
 | 
			
		||||
            for mi, path in duplicates:
 | 
			
		||||
            for mi, formats in duplicates:
 | 
			
		||||
                files += '<li>'+mi.title+'</li>\n'
 | 
			
		||||
            d = question_dialog(self, _('Duplicates found!'), files+'</ul></p>')
 | 
			
		||||
            if d.exec_() == QMessageBox.Yes:
 | 
			
		||||
                for mi, path in duplicates:
 | 
			
		||||
                    self.library_view.model().db.import_book_directory(path, add_duplicates=True)
 | 
			
		||||
                for mi, formats in duplicates:
 | 
			
		||||
                    self.library_view.model().db.import_book(mi, formats )
 | 
			
		||||
        
 | 
			
		||||
        self.library_view.model().resort()
 | 
			
		||||
        self.library_view.model().research()
 | 
			
		||||
 | 
			
		||||
@ -20,7 +20,7 @@ import datetime, re, os, cPickle, traceback
 | 
			
		||||
from zlib import compress, decompress
 | 
			
		||||
 | 
			
		||||
from libprs500 import sanitize_file_name
 | 
			
		||||
from libprs500.ebooks.metadata.meta import set_metadata, get_metadata
 | 
			
		||||
from libprs500.ebooks.metadata.meta import set_metadata, metadata_from_formats
 | 
			
		||||
from libprs500.ebooks.metadata.opf import OPFCreator
 | 
			
		||||
from libprs500.ebooks.metadata import MetaInformation
 | 
			
		||||
from libprs500.ebooks import BOOK_EXTENSIONS
 | 
			
		||||
@ -1325,24 +1325,26 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
 | 
			
		||||
                id = str(self.id(idx))
 | 
			
		||||
                if not single_dir and not os.path.exists(tpath):
 | 
			
		||||
                    os.mkdir(tpath)
 | 
			
		||||
                
 | 
			
		||||
                name = au + ' - ' + title if byauthor else title + ' - ' + au
 | 
			
		||||
                name += '_'+id
 | 
			
		||||
                base  = dir if single_dir else tpath
 | 
			
		||||
                
 | 
			
		||||
                mi = OPFCreator(self.get_metadata(idx))
 | 
			
		||||
                cover = self.cover(idx)
 | 
			
		||||
                if not single_dir:
 | 
			
		||||
                    if cover is not None:
 | 
			
		||||
                        f = open(os.path.join(tpath, 'cover.jpg'), 'wb')
 | 
			
		||||
                        f.write(cover)
 | 
			
		||||
                        mi.cover = 'cover.jpg'
 | 
			
		||||
                        f.close()
 | 
			
		||||
                    f = open(os.path.join(tpath, 'metadata.opf'), 'wb')
 | 
			
		||||
                    mi.write(f)
 | 
			
		||||
                    f.close()
 | 
			
		||||
                if cover is not None:
 | 
			
		||||
                    cname = name + '.jpg'
 | 
			
		||||
                    cpath = os.path.join(base, cname)
 | 
			
		||||
                    open(cpath, 'wb').write(cover)
 | 
			
		||||
                    mi.cover = cname
 | 
			
		||||
                f = open(os.path.join(base, name+'.opf'), 'wb')
 | 
			
		||||
                mi.write(f)
 | 
			
		||||
                f.close()
 | 
			
		||||
                
 | 
			
		||||
                for fmt in self.formats(idx).split(','):
 | 
			
		||||
                    data = self.format(idx, fmt)
 | 
			
		||||
                    name = au + ' - ' + title if byauthor else title + ' - ' + au
 | 
			
		||||
                    fname = name +'_'+id+'.'+fmt.lower()
 | 
			
		||||
                    fname = name +'.'+fmt.lower()
 | 
			
		||||
                    fname = sanitize_file_name(fname)
 | 
			
		||||
                    base  = dir if single_dir else tpath 
 | 
			
		||||
                    f = open(os.path.join(base, fname), 'w+b')
 | 
			
		||||
                    f.write(data)
 | 
			
		||||
                    f.flush()
 | 
			
		||||
@ -1355,71 +1357,15 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
 | 
			
		||||
                    f.close()
 | 
			
		||||
                    
 | 
			
		||||
    
 | 
			
		||||
    def import_book_directory_multiple(self, dirpath, add_duplicates=False):
 | 
			
		||||
        mi = MetaInformation(None, None)
 | 
			
		||||
        dirpath = os.path.abspath(dirpath)
 | 
			
		||||
        duplicates = []
 | 
			
		||||
        for path in os.listdir(dirpath):
 | 
			
		||||
            path = os.path.join(dirpath, path)
 | 
			
		||||
            if os.path.isdir(path) or not os.access(path, os.R_OK):
 | 
			
		||||
                continue
 | 
			
		||||
            ext = os.path.splitext(path)[1]
 | 
			
		||||
            if not ext:
 | 
			
		||||
                continue
 | 
			
		||||
            ext = ext[1:].lower()
 | 
			
		||||
            if ext not in BOOK_EXTENSIONS:
 | 
			
		||||
                continue
 | 
			
		||||
            stream = open(path, 'rb')
 | 
			
		||||
            mi.smart_update(get_metadata(stream, stream_type=ext, use_libprs_metadata=False))
 | 
			
		||||
            if mi.title is None: 
 | 
			
		||||
                continue
 | 
			
		||||
            if not add_duplicates and self.conn.execute('SELECT id FROM books where title=?', (mi.title,)).fetchone():
 | 
			
		||||
                duplicates.append((mi, path))
 | 
			
		||||
                continue
 | 
			
		||||
            series_index = 1 if mi.series_index is None else mi.series_index
 | 
			
		||||
            obj = self.conn.execute('INSERT INTO books(title, uri, series_index) VALUES (?, ?, ?)', 
 | 
			
		||||
                              (mi.title, None, series_index))
 | 
			
		||||
            id = obj.lastrowid
 | 
			
		||||
            self.conn.commit()
 | 
			
		||||
            self.set_metadata(id, mi)
 | 
			
		||||
            stream.seek(0, 2)
 | 
			
		||||
            usize = stream.tell()
 | 
			
		||||
            stream.seek(0)
 | 
			
		||||
            self.conn.execute('INSERT INTO data(book, format, uncompressed_size, data) VALUES (?,?,?,?)',
 | 
			
		||||
                              (id, ext, usize, sqlite.Binary(compress(stream.read()))))
 | 
			
		||||
            self.conn.commit()
 | 
			
		||||
        return duplicates
 | 
			
		||||
                      
 | 
			
		||||
    
 | 
			
		||||
    def import_book_directory(self, dirpath, add_duplicates=False):
 | 
			
		||||
        mi = MetaInformation(None, None)
 | 
			
		||||
        dirpath = os.path.abspath(dirpath)
 | 
			
		||||
        formats = []
 | 
			
		||||
        for path in os.listdir(dirpath):
 | 
			
		||||
            path = os.path.join(dirpath, path)
 | 
			
		||||
            if os.path.isdir(path) or not os.access(path, os.R_OK):
 | 
			
		||||
                continue
 | 
			
		||||
            ext = os.path.splitext(path)[1]
 | 
			
		||||
            if not ext:
 | 
			
		||||
                continue
 | 
			
		||||
            ext = ext[1:].lower()
 | 
			
		||||
            if ext not in BOOK_EXTENSIONS:
 | 
			
		||||
                continue
 | 
			
		||||
            f = open(path, 'rb')
 | 
			
		||||
            mi.smart_update(get_metadata(f, stream_type=ext, use_libprs_metadata=True))
 | 
			
		||||
            f.close()
 | 
			
		||||
            formats.append((ext, path))
 | 
			
		||||
        if mi.title is None or not formats:
 | 
			
		||||
            return
 | 
			
		||||
        if not add_duplicates and self.conn.execute('SELECT id FROM books where title=?', (mi.title,)).fetchone():
 | 
			
		||||
            return mi, dirpath
 | 
			
		||||
    def import_book(self, mi, formats):
 | 
			
		||||
        series_index = 1 if mi.series_index is None else mi.series_index
 | 
			
		||||
        obj = self.conn.execute('INSERT INTO books(title, uri, series_index) VALUES (?, ?, ?)', 
 | 
			
		||||
                          (mi.title, None, series_index))
 | 
			
		||||
        id = obj.lastrowid
 | 
			
		||||
        self.conn.commit()
 | 
			
		||||
        self.set_metadata(id, mi)
 | 
			
		||||
        for ext, path in formats:
 | 
			
		||||
        for path in formats:
 | 
			
		||||
            ext = os.path.splitext(path)[1][1:].lower()
 | 
			
		||||
            stream = open(path, 'rb')
 | 
			
		||||
            stream.seek(0, 2)
 | 
			
		||||
            usize = stream.tell()
 | 
			
		||||
@ -1428,6 +1374,66 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
 | 
			
		||||
                              (id, ext, usize, sqlite.Binary(compress(stream.read()))))
 | 
			
		||||
        self.conn.commit()
 | 
			
		||||
    
 | 
			
		||||
    def import_book_directory_multiple(self, dirpath):
 | 
			
		||||
        dirpath = os.path.abspath(dirpath)
 | 
			
		||||
        duplicates = []
 | 
			
		||||
        books = {}
 | 
			
		||||
        for path in os.listdir(dirpath):
 | 
			
		||||
            path = os.path.abspath(os.path.join(dirpath, path))
 | 
			
		||||
            if os.path.isdir(path) or not os.access(path, os.R_OK):
 | 
			
		||||
                continue
 | 
			
		||||
            ext = os.path.splitext(path)[1]
 | 
			
		||||
            if not ext:
 | 
			
		||||
                continue
 | 
			
		||||
            ext = ext[1:].lower()
 | 
			
		||||
            if ext not in BOOK_EXTENSIONS:
 | 
			
		||||
                continue
 | 
			
		||||
            
 | 
			
		||||
            key = os.path.splitext(path)[0]
 | 
			
		||||
            if not books.has_key(key):
 | 
			
		||||
                books[key] = []
 | 
			
		||||
                
 | 
			
		||||
            books[key].append(path)
 | 
			
		||||
            
 | 
			
		||||
        for formats in books.values():
 | 
			
		||||
            mi = metadata_from_formats(formats)
 | 
			
		||||
            if mi.title is None:
 | 
			
		||||
                continue
 | 
			
		||||
            if self.has_book(mi):
 | 
			
		||||
                duplicates.append((mi, formats))
 | 
			
		||||
                continue
 | 
			
		||||
            self.import_book(mi, formats)
 | 
			
		||||
        return duplicates
 | 
			
		||||
                      
 | 
			
		||||
    
 | 
			
		||||
    def import_book_directory(self, dirpath):
 | 
			
		||||
        dirpath = os.path.abspath(dirpath)
 | 
			
		||||
        formats = []
 | 
			
		||||
        
 | 
			
		||||
        for path in os.listdir(dirpath):
 | 
			
		||||
            path = os.path.abspath(os.path.join(dirpath, path))
 | 
			
		||||
            if os.path.isdir(path) or not os.access(path, os.R_OK):
 | 
			
		||||
                continue
 | 
			
		||||
            ext = os.path.splitext(path)[1]
 | 
			
		||||
            if not ext:
 | 
			
		||||
                continue
 | 
			
		||||
            ext = ext[1:].lower()
 | 
			
		||||
            if ext not in BOOK_EXTENSIONS:
 | 
			
		||||
                continue
 | 
			
		||||
            formats.append(path)
 | 
			
		||||
        
 | 
			
		||||
        if not formats:
 | 
			
		||||
            return
 | 
			
		||||
        mi = metadata_from_formats(formats)
 | 
			
		||||
        if mi.title is None:
 | 
			
		||||
            return
 | 
			
		||||
        if self.has_book(mi):
 | 
			
		||||
            return [(mi, formats)]
 | 
			
		||||
        self.import_book(mi, formats)
 | 
			
		||||
            
 | 
			
		||||
                    
 | 
			
		||||
    def has_book(self, mi):
 | 
			
		||||
        return bool(self.conn.execute('SELECT id FROM books where title=?', (mi.title,)).fetchone())
 | 
			
		||||
    
 | 
			
		||||
    def recursive_import(self, root, single_book_per_directory=True):
 | 
			
		||||
        root = os.path.abspath(root)
 | 
			
		||||
@ -1435,10 +1441,7 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
 | 
			
		||||
        for dirpath in os.walk(root):
 | 
			
		||||
            res = self.import_book_directory(dirpath[0]) if single_book_per_directory else self.import_book_directory_multiple(dirpath[0])
 | 
			
		||||
            if res is not None:
 | 
			
		||||
                if single_book_per_directory:
 | 
			
		||||
                    duplicates.append(res)
 | 
			
		||||
                else:
 | 
			
		||||
                    duplicates.extend(res)
 | 
			
		||||
                duplicates.extend(res)
 | 
			
		||||
        return duplicates
 | 
			
		||||
                
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user