mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Implement #5016 (Add formats to existing ebook records)
This commit is contained in:
commit
a0f2163403
@ -20,7 +20,7 @@ def string_to_authors(raw):
|
||||
raw = raw.replace('&&', u'\uffff')
|
||||
raw = _author_pat.sub('&', raw)
|
||||
authors = [a.strip().replace(u'\uffff', '&') for a in raw.split('&')]
|
||||
return authors
|
||||
return [a for a in authors if a]
|
||||
|
||||
def authors_to_string(authors):
|
||||
if authors is not None:
|
||||
|
@ -4,10 +4,9 @@ __copyright__ = '2010, Greg Riker <griker@hotmail.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
''' Read/write metadata from Amazon's topaz format '''
|
||||
import copy, StringIO, sys
|
||||
from struct import pack, unpack
|
||||
import StringIO, sys
|
||||
from struct import pack
|
||||
|
||||
from calibre import prints
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
|
||||
class StreamSlicer(object):
|
||||
@ -200,7 +199,6 @@ class MetadataUpdater(object):
|
||||
# Build a dict of topaz_header records
|
||||
topaz_headers = {}
|
||||
for x in range(self.header_records):
|
||||
c_marker = self.data[offset]
|
||||
offset += 1
|
||||
taglen, consumed = self.decode_vwi(self.data[offset:offset+4])
|
||||
offset += consumed
|
||||
@ -259,7 +257,6 @@ class MetadataUpdater(object):
|
||||
|
||||
self.metadata = {}
|
||||
for x in range(self.md_header['num_recs']):
|
||||
md_record = {}
|
||||
taglen, consumed = self.decode_vwi(self.data[offset:offset+4])
|
||||
offset += consumed
|
||||
tag = self.data[offset:offset+taglen]
|
||||
@ -380,7 +377,6 @@ def set_metadata(stream, mi):
|
||||
return
|
||||
|
||||
if __name__ == '__main__':
|
||||
import cStringIO, sys
|
||||
#print get_metadata(open(sys.argv[1], 'rb'))
|
||||
mi = MetaInformation(title="My New Title", authors=['Smith, John'])
|
||||
set_metadata(open(sys.argv[1], 'rb'), mi)
|
||||
|
@ -1,7 +1,7 @@
|
||||
'''
|
||||
UI for adding books to the database and saving books to disk
|
||||
'''
|
||||
import os, shutil, time
|
||||
import os, shutil, time, re
|
||||
from Queue import Queue, Empty
|
||||
from threading import Thread
|
||||
|
||||
@ -13,9 +13,10 @@ from calibre.gui2 import question_dialog, error_dialog, info_dialog
|
||||
from calibre.ebooks.metadata.opf2 import OPF
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.constants import preferred_encoding, filesystem_encoding
|
||||
from calibre.utils.config import prefs
|
||||
|
||||
class DuplicatesAdder(QThread):
|
||||
|
||||
# Add duplicate books
|
||||
def __init__(self, parent, db, duplicates, db_adder):
|
||||
QThread.__init__(self, parent)
|
||||
self.db, self.db_adder = db, db_adder
|
||||
@ -27,6 +28,7 @@ class DuplicatesAdder(QThread):
|
||||
formats = [f for f in formats if not f.lower().endswith('.opf')]
|
||||
id = self.db.create_book_entry(mi, cover=cover,
|
||||
add_duplicates=True)
|
||||
# here we add all the formats for dupe book record created above
|
||||
self.db_adder.add_formats(id, formats)
|
||||
self.db_adder.number_of_books_added += 1
|
||||
self.emit(SIGNAL('added(PyQt_PyObject)'), count)
|
||||
@ -90,6 +92,15 @@ class DBAdder(Thread):
|
||||
self.daemon = True
|
||||
self.input_queue = Queue()
|
||||
self.output_queue = Queue()
|
||||
self.fuzzy_title_patterns = [(re.compile(pat), repl) for pat, repl in
|
||||
[
|
||||
(r'[\[\](){}<>\'";,:#]', ''),
|
||||
(r'^(the|a|an) ', ''),
|
||||
(r'[-._]', ' '),
|
||||
(r'\s+', ' ')
|
||||
]
|
||||
]
|
||||
self.merged_books = set([])
|
||||
|
||||
def run(self):
|
||||
while not self.end:
|
||||
@ -125,6 +136,34 @@ class DBAdder(Thread):
|
||||
fmts[-1] = fmt
|
||||
return fmts
|
||||
|
||||
def fuzzy_title(self, title):
|
||||
title = title.strip().lower()
|
||||
for pat, repl in self.fuzzy_title_patterns:
|
||||
title = pat.sub(repl, title)
|
||||
return title
|
||||
|
||||
def find_identical_books(self, mi):
|
||||
identical_book_ids = set([])
|
||||
if mi.authors:
|
||||
try:
|
||||
query = u' and '.join([u'author:"=%s"'%(a.replace('"', '')) for a in
|
||||
mi.authors])
|
||||
except ValueError:
|
||||
return identical_book_ids
|
||||
try:
|
||||
book_ids = self.db.data.parse(query)
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return identical_book_ids
|
||||
for book_id in book_ids:
|
||||
fbook_title = self.db.title(book_id, index_is_id=True)
|
||||
fbook_title = self.fuzzy_title(fbook_title)
|
||||
mbook_title = self.fuzzy_title(mi.title)
|
||||
if fbook_title == mbook_title:
|
||||
identical_book_ids.add(book_id)
|
||||
return identical_book_ids
|
||||
|
||||
def add(self, id, opf, cover, name):
|
||||
formats = self.ids.pop(id)
|
||||
if opf.endswith('.error'):
|
||||
@ -145,25 +184,38 @@ class DBAdder(Thread):
|
||||
if self.db is not None:
|
||||
if cover:
|
||||
cover = open(cover, 'rb').read()
|
||||
id = self.db.create_book_entry(mi, cover=cover, add_duplicates=False)
|
||||
self.number_of_books_added += 1
|
||||
if id is None:
|
||||
self.duplicates.append((mi, cover, formats))
|
||||
orig_formats = formats
|
||||
formats = [f for f in formats if not f.lower().endswith('.opf')]
|
||||
if prefs['add_formats_to_existing']:
|
||||
identical_book_list = self.find_identical_books(mi)
|
||||
|
||||
if identical_book_list: # books with same author and nearly same title exist in db
|
||||
self.merged_books.add(mi.title)
|
||||
for identical_book in identical_book_list:
|
||||
self.add_formats(identical_book, formats, replace=False)
|
||||
else:
|
||||
id = self.db.create_book_entry(mi, cover=cover, add_duplicates=True)
|
||||
self.number_of_books_added += 1
|
||||
self.add_formats(id, formats)
|
||||
else:
|
||||
formats = [f for f in formats if not f.lower().endswith('.opf')]
|
||||
self.add_formats(id, formats)
|
||||
id = self.db.create_book_entry(mi, cover=cover, add_duplicates=False)
|
||||
self.number_of_books_added += 1
|
||||
if id is None:
|
||||
self.duplicates.append((mi, cover, orig_formats))
|
||||
else:
|
||||
self.add_formats(id, formats)
|
||||
else:
|
||||
self.names.append(name)
|
||||
self.paths.append(formats[0])
|
||||
self.infos.append(mi)
|
||||
return mi.title
|
||||
|
||||
def add_formats(self, id, formats):
|
||||
def add_formats(self, id, formats, replace=True):
|
||||
for path in formats:
|
||||
fmt = os.path.splitext(path)[-1].replace('.', '').upper()
|
||||
with open(path, 'rb') as f:
|
||||
self.db.add_format(id, fmt, f, index_is_id=True,
|
||||
notify=False)
|
||||
notify=False, replace=replace)
|
||||
|
||||
|
||||
class Adder(QObject):
|
||||
@ -330,6 +382,11 @@ class Adder(QObject):
|
||||
return getattr(getattr(self, 'db_adder', None), 'number_of_books_added',
|
||||
0)
|
||||
|
||||
@property
|
||||
def merged_books(self):
|
||||
return getattr(getattr(self, 'db_adder', None), 'merged_books',
|
||||
set([]))
|
||||
|
||||
@property
|
||||
def critical(self):
|
||||
return getattr(getattr(self, 'db_adder', None), 'critical',
|
||||
|
@ -44,6 +44,7 @@ class AddSave(QTabWidget, Ui_TabWidget):
|
||||
self.filename_pattern = FilenamePattern(self)
|
||||
self.metadata_box.layout().insertWidget(0, self.filename_pattern)
|
||||
self.opt_swap_author_names.setChecked(prefs['swap_author_names'])
|
||||
self.opt_add_formats_to_existing.setChecked(prefs['add_formats_to_existing'])
|
||||
help = '\n'.join(textwrap.wrap(c.get_option('template').help, 75))
|
||||
self.save_template.initialize('save_to_disk', opts.template, help)
|
||||
self.send_template.initialize('send_to_device', opts.send_template, help)
|
||||
@ -69,6 +70,7 @@ class AddSave(QTabWidget, Ui_TabWidget):
|
||||
pattern = self.filename_pattern.commit()
|
||||
prefs['filename_pattern'] = pattern
|
||||
prefs['swap_author_names'] = bool(self.opt_swap_author_names.isChecked())
|
||||
prefs['add_formats_to_existing'] = bool(self.opt_add_formats_to_existing.isChecked())
|
||||
|
||||
return True
|
||||
|
||||
|
@ -6,7 +6,7 @@
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>645</width>
|
||||
<width>588</width>
|
||||
<height>516</height>
|
||||
</rect>
|
||||
</property>
|
||||
@ -49,6 +49,19 @@
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="0" colspan="2">
|
||||
<widget class="QCheckBox" name="opt_add_formats_to_existing">
|
||||
<property name="toolTip">
|
||||
<string>If an existing book with a similar title and author is found that does not have the format being added, the format is added
|
||||
to the existing book, instead of creating a new entry. If the existing book already has the format, then it is silently ignored.
|
||||
|
||||
Title match ignores leading indefinite articles ("the", "a", "an"), punctuation, case, etc. Author match is exact.</string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>If books with similar titles and authors found, &merge the new files automatically</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="3" column="0" colspan="2">
|
||||
<widget class="QGroupBox" name="metadata_box">
|
||||
<property name="title">
|
||||
<string>&Configure metadata from file name</string>
|
||||
|
@ -24,7 +24,7 @@ from PyQt4.QtSvg import QSvgRenderer
|
||||
|
||||
from calibre import prints, patheq, strftime
|
||||
from calibre.constants import __version__, __appname__, isfrozen, islinux, \
|
||||
iswindows, isosx, filesystem_encoding
|
||||
iswindows, isosx, filesystem_encoding, preferred_encoding
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.utils.config import prefs, dynamic
|
||||
@ -1244,6 +1244,13 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
|
||||
self.library_view.model().books_added(self._adder.number_of_books_added)
|
||||
if hasattr(self, 'db_images'):
|
||||
self.db_images.reset()
|
||||
if getattr(self._adder, 'merged_books', False):
|
||||
books = u'\n'.join([x if isinstance(x, unicode) else
|
||||
x.decode(preferred_encoding, 'replace') for x in
|
||||
self._adder.merged_books])
|
||||
info_dialog(self, _('Merged some books'),
|
||||
_('Some duplicates were found and merged into the '
|
||||
'following existing books:'), det_msg=books, show=True)
|
||||
if getattr(self._adder, 'critical', None):
|
||||
det_msg = []
|
||||
for name, log in self._adder.critical.items():
|
||||
|
@ -998,12 +998,15 @@ class LibraryDatabase2(LibraryDatabase):
|
||||
return self.add_format(index, format, stream,
|
||||
index_is_id=index_is_id, path=path, notify=notify)
|
||||
|
||||
def add_format(self, index, format, stream, index_is_id=False, path=None, notify=True):
|
||||
def add_format(self, index, format, stream, index_is_id=False, path=None,
|
||||
notify=True, replace=True):
|
||||
id = index if index_is_id else self.id(index)
|
||||
if path is None:
|
||||
path = os.path.join(self.library_path, self.path(id, index_is_id=True))
|
||||
name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
|
||||
if name:
|
||||
if not replace:
|
||||
return False
|
||||
self.conn.execute('DELETE FROM data WHERE book=? AND format=?', (id, format))
|
||||
name = self.construct_file_name(id)
|
||||
ext = ('.' + format.lower()) if format else ''
|
||||
@ -1021,6 +1024,7 @@ class LibraryDatabase2(LibraryDatabase):
|
||||
self.refresh_ids([id])
|
||||
if notify:
|
||||
self.notify('metadata', [id])
|
||||
return True
|
||||
|
||||
def delete_book(self, id, notify=True):
|
||||
'''
|
||||
|
@ -670,6 +670,8 @@ def _prefs():
|
||||
help=_('The priority of worker processes'))
|
||||
c.add_opt('swap_author_names', default=False,
|
||||
help=_('Swap author first and last names when reading metadata'))
|
||||
c.add_opt('add_formats_to_existing', default=False,
|
||||
help=_('Add new formats to existing book records'))
|
||||
|
||||
c.add_opt('migrated', default=False, help='For Internal use. Don\'t modify.')
|
||||
return c
|
||||
|
Loading…
x
Reference in New Issue
Block a user