Fix a regression in 2.10 that causes Adding books from sub-folders, one book per folder to incorrectly add OPF files present in the sub-folders as an extra format. See #1392864 (Errors in book import in 2.10)

2025-07-09 03:04:10 -04:00 · 2014-11-15 10:35:01 +05:30 · 2014-11-15 10:35:01 +05:30 · 870ad92a0d
commit 870ad92a0d
parent 8291e02f12
2 changed files with 51 additions and 33 deletions
--- a/src/calibre/db/adding.py
+++ b/src/calibre/db/adding.py
@ -7,44 +7,58 @@ __license__ = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'

 import os
+from collections import defaultdict
+from future_builtins import map
+
 from calibre.ebooks import BOOK_EXTENSIONS

+def splitext(path):
+    key, ext = os.path.splitext(path)
+    return key, ext[1:].lower()
+
+def formats_ok(formats):
+    if formats and (len(formats) > 1 or tuple(formats.iterkeys()) != ('opf',)):
+        return True
+    return False
+
+def path_ok(path):
+    return not os.path.isdir(path) and os.access(path, os.R_OK)
+
+_metadata_extensions = None
+
+def metadata_extensions():
+    # Set of all known book extensions + OPF (the OPF is used to read metadata,
+    # but not actually added)
+    global _metadata_extensions
+    if _metadata_extensions is None:
+        _metadata_extensions =  frozenset(map(unicode, BOOK_EXTENSIONS)) | {'opf'}
+    return _metadata_extensions
+
+def listdir(root):
+    for path in os.listdir(root):
+        yield os.path.abspath(os.path.join(root, path))
+
 def find_books_in_directory(dirpath, single_book_per_directory):
    dirpath = os.path.abspath(dirpath)
+    book_extentions = metadata_extensions()
    if single_book_per_directory:
-        formats = []
-        for path in os.listdir(dirpath):
-            path = os.path.abspath(os.path.join(dirpath, path))
-            if os.path.isdir(path) or not os.access(path, os.R_OK):
-                continue
-            ext = os.path.splitext(path)[1]
-            if not ext:
-                continue
-            ext = ext[1:].lower()
-            if ext not in BOOK_EXTENSIONS and ext != 'opf':
-                continue
-            formats.append(path)
-        yield formats
+        formats = {}
+        for path in listdir(dirpath):
+            key, ext = splitext(path)
+            if ext in book_extentions and path_ok(path):
+                formats[ext] = path
+        if formats_ok(formats):
+            yield list(formats.itervalues())
    else:
-        books = {}
-        for path in os.listdir(dirpath):
-            path = os.path.abspath(os.path.join(dirpath, path))
-            if os.path.isdir(path) or not os.access(path, os.R_OK):
-                continue
-            ext = os.path.splitext(path)[1]
-            if not ext:
-                continue
-            ext = ext[1:].lower()
-            if ext not in BOOK_EXTENSIONS:
-                continue
+        books = defaultdict(dict)
+        for path in listdir(dirpath):
+            key, ext = splitext(path)
+            if ext in book_extentions and path_ok(path):
+                books[icu_lower(key) if isinstance(key, unicode) else key.lower()][ext] = path

-            key = os.path.splitext(path)[0]
-            if key not in books:
-                books[key] = []
-            books[key].append(path)
-
-        for formats in books.values():
-            yield formats
+        for formats in books.itervalues():
+            if formats_ok(formats):
+                yield list(formats.itervalues())

 def import_book_directory_multiple(db, dirpath, callback=None,
        added_ids=None):
--- a/src/calibre/gui2/add.py
+++ b/src/calibre/gui2/add.py
@ -62,6 +62,7 @@ class Adder(QObject):
            return
        QObject.__init__(self, parent)
        self.single_book_per_directory = single_book_per_directory
+        self.ignore_opf = False
        self.list_of_archives = list_of_archives
        self.callback = callback
        self.add_formats_to_existing = prefs['add_formats_to_existing']
@ -123,7 +124,6 @@ class Adder(QObject):
                for files in find_books_in_directory(dirpath, self.single_book_per_directory):
                    if self.abort_scan:
                        return
-                    if files:
                    self.file_groups[len(self.file_groups)] = files

        def extract(source):
@ -145,6 +145,7 @@ class Adder(QObject):
        try:
            if isinstance(self.source, basestring):
                find_files(self.source)
+                self.ignore_opf = True
            else:
                unreadable_files = []
                for path in self.source:
@ -153,6 +154,7 @@ class Adder(QObject):
                    if os.access(path, os.R_OK):
                        if self.list_of_archives:
                            find_files(extract(path))
+                            self.ignore_opf = True
                        else:
                            self.file_groups[len(self.file_groups)] = [path]
                    else:
@ -377,6 +379,8 @@ class Adder(QObject):
        for fmt, path in fmap.iteritems():
            # The onimport plugins have already been run by the read metadata
            # worker
+            if self.ignore_opf and fmt.lower() == 'opf':
+                continue
            try:
                if self.db.add_format(book_id, fmt, path, run_hooks=False, replace=replace):
                    run_plugins_on_postimport(self.dbref(), book_id, fmt)