Fix #6641 (Copy to library ignores "merge" preference)

This commit is contained in:
Kovid Goyal 2010-09-01 11:05:11 -06:00
commit f4adce1a95
3 changed files with 61 additions and 44 deletions

View File

@ -5,6 +5,7 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os
from functools import partial from functools import partial
from threading import Thread from threading import Thread
@ -13,6 +14,7 @@ from PyQt4.Qt import QMenu, QToolButton
from calibre.gui2.actions import InterfaceAction from calibre.gui2.actions import InterfaceAction
from calibre.gui2 import error_dialog, Dispatcher from calibre.gui2 import error_dialog, Dispatcher
from calibre.gui2.dialogs.progress import ProgressDialog from calibre.gui2.dialogs.progress import ProgressDialog
from calibre.utils.config import prefs
class Worker(Thread): class Worker(Thread):
@ -38,6 +40,13 @@ class Worker(Thread):
self.done() self.done()
def add_formats(self, id, paths, newdb, replace=True):
for path in paths:
fmt = os.path.splitext(path)[-1].replace('.', '').upper()
with open(path, 'rb') as f:
newdb.add_format(id, fmt, f, index_is_id=True,
notify=False, replace=replace)
def doit(self): def doit(self):
from calibre.library.database2 import LibraryDatabase2 from calibre.library.database2 import LibraryDatabase2
newdb = LibraryDatabase2(self.loc) newdb = LibraryDatabase2(self.loc)
@ -49,12 +58,18 @@ class Worker(Thread):
else: fmts = fmts.split(',') else: fmts = fmts.split(',')
paths = [self.db.format_abspath(x, fmt, index_is_id=True) for fmt in paths = [self.db.format_abspath(x, fmt, index_is_id=True) for fmt in
fmts] fmts]
newdb.import_book(mi, paths, notify=False, import_hooks=False) added = False
co = self.db.conversion_options(x, 'PIPE') if prefs['add_formats_to_existing']:
if co is not None: identical_book_list = newdb.find_identical_books(mi)
newdb.set_conversion_options(x, 'PIPE', co) if identical_book_list: # books with same author and nearly same title exist in newdb
added = True
for identical_book in identical_book_list:
self.add_formats(identical_book, paths, newdb, replace=False)
if not added:
newdb.import_book(mi, paths, notify=False, import_hooks=False)
co = self.db.conversion_options(x, 'PIPE')
if co is not None:
newdb.set_conversion_options(x, 'PIPE', co)
class CopyToLibraryAction(InterfaceAction): class CopyToLibraryAction(InterfaceAction):

View File

@ -1,7 +1,7 @@
''' '''
UI for adding books to the database and saving books to disk UI for adding books to the database and saving books to disk
''' '''
import os, shutil, time, re import os, shutil, time
from Queue import Queue, Empty from Queue import Queue, Empty
from threading import Thread from threading import Thread
@ -94,14 +94,6 @@ class DBAdder(Thread): # {{{
self.daemon = True self.daemon = True
self.input_queue = Queue() self.input_queue = Queue()
self.output_queue = Queue() self.output_queue = Queue()
self.fuzzy_title_patterns = [(re.compile(pat), repl) for pat, repl in
[
(r'[\[\](){}<>\'";,:#]', ''),
(r'^(the|a|an) ', ''),
(r'[-._]', ' '),
(r'\s+', ' ')
]
]
self.merged_books = set([]) self.merged_books = set([])
def run(self): def run(self):
@ -138,33 +130,6 @@ class DBAdder(Thread): # {{{
fmts[-1] = fmt fmts[-1] = fmt
return fmts return fmts
def fuzzy_title(self, title):
title = title.strip().lower()
for pat, repl in self.fuzzy_title_patterns:
title = pat.sub(repl, title)
return title
def find_identical_books(self, mi):
identical_book_ids = set([])
if mi.authors:
try:
query = u' and '.join([u'author:"=%s"'%(a.replace('"', '')) for a in
mi.authors])
except ValueError:
return identical_book_ids
try:
book_ids = self.db.data.parse(query)
except:
import traceback
traceback.print_exc()
return identical_book_ids
for book_id in book_ids:
fbook_title = self.db.title(book_id, index_is_id=True)
fbook_title = self.fuzzy_title(fbook_title)
mbook_title = self.fuzzy_title(mi.title)
if fbook_title == mbook_title:
identical_book_ids.add(book_id)
return identical_book_ids
def add(self, id, opf, cover, name): def add(self, id, opf, cover, name):
formats = self.ids.pop(id) formats = self.ids.pop(id)
@ -191,7 +156,7 @@ class DBAdder(Thread): # {{{
orig_formats = formats orig_formats = formats
formats = [f for f in formats if not f.lower().endswith('.opf')] formats = [f for f in formats if not f.lower().endswith('.opf')]
if prefs['add_formats_to_existing']: if prefs['add_formats_to_existing']:
identical_book_list = self.find_identical_books(mi) identical_book_list = self.db.find_identical_books(mi)
if identical_book_list: # books with same author and nearly same title exist in db if identical_book_list: # books with same author and nearly same title exist in db
self.merged_books.add(mi.title) self.merged_books.add(mi.title)

View File

@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
''' '''
The database used to store ebook metadata The database used to store ebook metadata
''' '''
import os, sys, shutil, cStringIO, glob, time, functools, traceback import os, sys, shutil, cStringIO, glob, time, functools, traceback, re
from itertools import repeat from itertools import repeat
from math import floor from math import floor
@ -550,6 +550,43 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
return bool(self.conn.get('SELECT id FROM books where title=?', (title,), all=False)) return bool(self.conn.get('SELECT id FROM books where title=?', (title,), all=False))
return False return False
def find_identical_books(self, mi):
fuzzy_title_patterns = [(re.compile(pat), repl) for pat, repl in
[
(r'[\[\](){}<>\'";,:#]', ''),
(r'^(the|a|an) ', ''),
(r'[-._]', ' '),
(r'\s+', ' ')
]
]
def fuzzy_title(title):
title = title.strip().lower()
for pat, repl in fuzzy_title_patterns:
title = pat.sub(repl, title)
return title
identical_book_ids = set([])
if mi.authors:
try:
query = u' and '.join([u'author:"=%s"'%(a.replace('"', '')) for a in
mi.authors])
except ValueError:
return identical_book_ids
try:
book_ids = self.data.parse(query)
except:
import traceback
traceback.print_exc()
return identical_book_ids
for book_id in book_ids:
fbook_title = self.title(book_id, index_is_id=True)
fbook_title = fuzzy_title(fbook_title)
mbook_title = fuzzy_title(mi.title)
if fbook_title == mbook_title:
identical_book_ids.add(book_id)
return identical_book_ids
def has_cover(self, index, index_is_id=False): def has_cover(self, index, index_is_id=False):
id = index if index_is_id else self.id(index) id = index if index_is_id else self.id(index)
path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg') path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg')