diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py index f741d2201d..5e8edc0c81 100644 --- a/src/calibre/ebooks/metadata/__init__.py +++ b/src/calibre/ebooks/metadata/__init__.py @@ -20,7 +20,7 @@ def string_to_authors(raw): raw = raw.replace('&&', u'\uffff') raw = _author_pat.sub('&', raw) authors = [a.strip().replace(u'\uffff', '&') for a in raw.split('&')] - return authors + return [a for a in authors if a] def authors_to_string(authors): if authors is not None: diff --git a/src/calibre/ebooks/metadata/topaz.py b/src/calibre/ebooks/metadata/topaz.py index 07c7af3bf6..6fe858df53 100644 --- a/src/calibre/ebooks/metadata/topaz.py +++ b/src/calibre/ebooks/metadata/topaz.py @@ -4,10 +4,9 @@ __copyright__ = '2010, Greg Riker ' __docformat__ = 'restructuredtext en' ''' Read/write metadata from Amazon's topaz format ''' -import copy, StringIO, sys -from struct import pack, unpack +import StringIO, sys +from struct import pack -from calibre import prints from calibre.ebooks.metadata import MetaInformation class StreamSlicer(object): @@ -200,7 +199,6 @@ class MetadataUpdater(object): # Build a dict of topaz_header records topaz_headers = {} for x in range(self.header_records): - c_marker = self.data[offset] offset += 1 taglen, consumed = self.decode_vwi(self.data[offset:offset+4]) offset += consumed @@ -259,7 +257,6 @@ class MetadataUpdater(object): self.metadata = {} for x in range(self.md_header['num_recs']): - md_record = {} taglen, consumed = self.decode_vwi(self.data[offset:offset+4]) offset += consumed tag = self.data[offset:offset+taglen] @@ -380,7 +377,6 @@ def set_metadata(stream, mi): return if __name__ == '__main__': - import cStringIO, sys #print get_metadata(open(sys.argv[1], 'rb')) mi = MetaInformation(title="My New Title", authors=['Smith, John']) set_metadata(open(sys.argv[1], 'rb'), mi) diff --git a/src/calibre/gui2/add.py b/src/calibre/gui2/add.py index 0b37fe2515..3f9d1925b5 100644 --- a/src/calibre/gui2/add.py +++ b/src/calibre/gui2/add.py @@ -1,7 +1,7 @@ ''' UI for adding books to the database and saving books to disk ''' -import os, shutil, time +import os, shutil, time, re from Queue import Queue, Empty from threading import Thread @@ -13,9 +13,10 @@ from calibre.gui2 import question_dialog, error_dialog, info_dialog from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.metadata import MetaInformation from calibre.constants import preferred_encoding, filesystem_encoding +from calibre.utils.config import prefs class DuplicatesAdder(QThread): - + # Add duplicate books def __init__(self, parent, db, duplicates, db_adder): QThread.__init__(self, parent) self.db, self.db_adder = db, db_adder @@ -27,6 +28,7 @@ class DuplicatesAdder(QThread): formats = [f for f in formats if not f.lower().endswith('.opf')] id = self.db.create_book_entry(mi, cover=cover, add_duplicates=True) + # here we add all the formats for dupe book record created above self.db_adder.add_formats(id, formats) self.db_adder.number_of_books_added += 1 self.emit(SIGNAL('added(PyQt_PyObject)'), count) @@ -90,6 +92,15 @@ class DBAdder(Thread): self.daemon = True self.input_queue = Queue() self.output_queue = Queue() + self.fuzzy_title_patterns = [(re.compile(pat), repl) for pat, repl in + [ + (r'[\[\](){}<>\'";,:#]', ''), + (r'^(the|a|an) ', ''), + (r'[-._]', ' '), + (r'\s+', ' ') + ] + ] + self.merged_books = set([]) def run(self): while not self.end: @@ -125,6 +136,34 @@ class DBAdder(Thread): fmts[-1] = fmt return fmts + def fuzzy_title(self, title): + title = title.strip().lower() + for pat, repl in self.fuzzy_title_patterns: + title = pat.sub(repl, title) + return title + + def find_identical_books(self, mi): + identical_book_ids = set([]) + if mi.authors: + try: + query = u' and '.join([u'author:"=%s"'%(a.replace('"', '')) for a in + mi.authors]) + except ValueError: + return identical_book_ids + try: + book_ids = self.db.data.parse(query) + except: + import traceback + traceback.print_exc() + return identical_book_ids + for book_id in book_ids: + fbook_title = self.db.title(book_id, index_is_id=True) + fbook_title = self.fuzzy_title(fbook_title) + mbook_title = self.fuzzy_title(mi.title) + if fbook_title == mbook_title: + identical_book_ids.add(book_id) + return identical_book_ids + def add(self, id, opf, cover, name): formats = self.ids.pop(id) if opf.endswith('.error'): @@ -145,25 +184,38 @@ class DBAdder(Thread): if self.db is not None: if cover: cover = open(cover, 'rb').read() - id = self.db.create_book_entry(mi, cover=cover, add_duplicates=False) - self.number_of_books_added += 1 - if id is None: - self.duplicates.append((mi, cover, formats)) + orig_formats = formats + formats = [f for f in formats if not f.lower().endswith('.opf')] + if prefs['add_formats_to_existing']: + identical_book_list = self.find_identical_books(mi) + + if identical_book_list: # books with same author and nearly same title exist in db + self.merged_books.add(mi.title) + for identical_book in identical_book_list: + self.add_formats(identical_book, formats, replace=False) + else: + id = self.db.create_book_entry(mi, cover=cover, add_duplicates=True) + self.number_of_books_added += 1 + self.add_formats(id, formats) else: - formats = [f for f in formats if not f.lower().endswith('.opf')] - self.add_formats(id, formats) + id = self.db.create_book_entry(mi, cover=cover, add_duplicates=False) + self.number_of_books_added += 1 + if id is None: + self.duplicates.append((mi, cover, orig_formats)) + else: + self.add_formats(id, formats) else: self.names.append(name) self.paths.append(formats[0]) self.infos.append(mi) return mi.title - def add_formats(self, id, formats): + def add_formats(self, id, formats, replace=True): for path in formats: fmt = os.path.splitext(path)[-1].replace('.', '').upper() with open(path, 'rb') as f: self.db.add_format(id, fmt, f, index_is_id=True, - notify=False) + notify=False, replace=replace) class Adder(QObject): @@ -330,6 +382,11 @@ class Adder(QObject): return getattr(getattr(self, 'db_adder', None), 'number_of_books_added', 0) + @property + def merged_books(self): + return getattr(getattr(self, 'db_adder', None), 'merged_books', + set([])) + @property def critical(self): return getattr(getattr(self, 'db_adder', None), 'critical', diff --git a/src/calibre/gui2/dialogs/config/add_save.py b/src/calibre/gui2/dialogs/config/add_save.py index 3c1e30ff01..aff995d84f 100644 --- a/src/calibre/gui2/dialogs/config/add_save.py +++ b/src/calibre/gui2/dialogs/config/add_save.py @@ -44,6 +44,7 @@ class AddSave(QTabWidget, Ui_TabWidget): self.filename_pattern = FilenamePattern(self) self.metadata_box.layout().insertWidget(0, self.filename_pattern) self.opt_swap_author_names.setChecked(prefs['swap_author_names']) + self.opt_add_formats_to_existing.setChecked(prefs['add_formats_to_existing']) help = '\n'.join(textwrap.wrap(c.get_option('template').help, 75)) self.save_template.initialize('save_to_disk', opts.template, help) self.send_template.initialize('send_to_device', opts.send_template, help) @@ -69,6 +70,7 @@ class AddSave(QTabWidget, Ui_TabWidget): pattern = self.filename_pattern.commit() prefs['filename_pattern'] = pattern prefs['swap_author_names'] = bool(self.opt_swap_author_names.isChecked()) + prefs['add_formats_to_existing'] = bool(self.opt_add_formats_to_existing.isChecked()) return True diff --git a/src/calibre/gui2/dialogs/config/add_save.ui b/src/calibre/gui2/dialogs/config/add_save.ui index fbf9ceaf2a..7fda2dbc7f 100644 --- a/src/calibre/gui2/dialogs/config/add_save.ui +++ b/src/calibre/gui2/dialogs/config/add_save.ui @@ -6,7 +6,7 @@ 0 0 - 645 + 588 516 @@ -49,6 +49,19 @@ + + + If an existing book with a similar title and author is found that does not have the format being added, the format is added +to the existing book, instead of creating a new entry. If the existing book already has the format, then it is silently ignored. + +Title match ignores leading indefinite articles ("the", "a", "an"), punctuation, case, etc. Author match is exact. + + + If books with similar titles and authors found, &merge the new files automatically + + + + &Configure metadata from file name diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py index 46c72a540d..8db2a52a3c 100644 --- a/src/calibre/gui2/ui.py +++ b/src/calibre/gui2/ui.py @@ -24,7 +24,7 @@ from PyQt4.QtSvg import QSvgRenderer from calibre import prints, patheq, strftime from calibre.constants import __version__, __appname__, isfrozen, islinux, \ - iswindows, isosx, filesystem_encoding + iswindows, isosx, filesystem_encoding, preferred_encoding from calibre.utils.filenames import ascii_filename from calibre.ptempfile import PersistentTemporaryFile from calibre.utils.config import prefs, dynamic @@ -1244,6 +1244,13 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): self.library_view.model().books_added(self._adder.number_of_books_added) if hasattr(self, 'db_images'): self.db_images.reset() + if getattr(self._adder, 'merged_books', False): + books = u'\n'.join([x if isinstance(x, unicode) else + x.decode(preferred_encoding, 'replace') for x in + self._adder.merged_books]) + info_dialog(self, _('Merged some books'), + _('Some duplicates were found and merged into the ' + 'following existing books:'), det_msg=books, show=True) if getattr(self._adder, 'critical', None): det_msg = [] for name, log in self._adder.critical.items(): diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 0580d76e51..750e600cee 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -998,12 +998,15 @@ class LibraryDatabase2(LibraryDatabase): return self.add_format(index, format, stream, index_is_id=index_is_id, path=path, notify=notify) - def add_format(self, index, format, stream, index_is_id=False, path=None, notify=True): + def add_format(self, index, format, stream, index_is_id=False, path=None, + notify=True, replace=True): id = index if index_is_id else self.id(index) if path is None: path = os.path.join(self.library_path, self.path(id, index_is_id=True)) name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False) if name: + if not replace: + return False self.conn.execute('DELETE FROM data WHERE book=? AND format=?', (id, format)) name = self.construct_file_name(id) ext = ('.' + format.lower()) if format else '' @@ -1021,6 +1024,7 @@ class LibraryDatabase2(LibraryDatabase): self.refresh_ids([id]) if notify: self.notify('metadata', [id]) + return True def delete_book(self, id, notify=True): ''' diff --git a/src/calibre/utils/config.py b/src/calibre/utils/config.py index 4bde124c40..316fc1de64 100644 --- a/src/calibre/utils/config.py +++ b/src/calibre/utils/config.py @@ -670,6 +670,8 @@ def _prefs(): help=_('The priority of worker processes')) c.add_opt('swap_author_names', default=False, help=_('Swap author first and last names when reading metadata')) + c.add_opt('add_formats_to_existing', default=False, + help=_('Add new formats to existing book records')) c.add_opt('migrated', default=False, help='For Internal use. Don\'t modify.') return c