mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Implement #5016 (Add formats to existing ebook records)
This commit is contained in:
commit
a0f2163403
@ -20,7 +20,7 @@ def string_to_authors(raw):
|
|||||||
raw = raw.replace('&&', u'\uffff')
|
raw = raw.replace('&&', u'\uffff')
|
||||||
raw = _author_pat.sub('&', raw)
|
raw = _author_pat.sub('&', raw)
|
||||||
authors = [a.strip().replace(u'\uffff', '&') for a in raw.split('&')]
|
authors = [a.strip().replace(u'\uffff', '&') for a in raw.split('&')]
|
||||||
return authors
|
return [a for a in authors if a]
|
||||||
|
|
||||||
def authors_to_string(authors):
|
def authors_to_string(authors):
|
||||||
if authors is not None:
|
if authors is not None:
|
||||||
|
@ -4,10 +4,9 @@ __copyright__ = '2010, Greg Riker <griker@hotmail.com>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
''' Read/write metadata from Amazon's topaz format '''
|
''' Read/write metadata from Amazon's topaz format '''
|
||||||
import copy, StringIO, sys
|
import StringIO, sys
|
||||||
from struct import pack, unpack
|
from struct import pack
|
||||||
|
|
||||||
from calibre import prints
|
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
|
||||||
class StreamSlicer(object):
|
class StreamSlicer(object):
|
||||||
@ -200,7 +199,6 @@ class MetadataUpdater(object):
|
|||||||
# Build a dict of topaz_header records
|
# Build a dict of topaz_header records
|
||||||
topaz_headers = {}
|
topaz_headers = {}
|
||||||
for x in range(self.header_records):
|
for x in range(self.header_records):
|
||||||
c_marker = self.data[offset]
|
|
||||||
offset += 1
|
offset += 1
|
||||||
taglen, consumed = self.decode_vwi(self.data[offset:offset+4])
|
taglen, consumed = self.decode_vwi(self.data[offset:offset+4])
|
||||||
offset += consumed
|
offset += consumed
|
||||||
@ -259,7 +257,6 @@ class MetadataUpdater(object):
|
|||||||
|
|
||||||
self.metadata = {}
|
self.metadata = {}
|
||||||
for x in range(self.md_header['num_recs']):
|
for x in range(self.md_header['num_recs']):
|
||||||
md_record = {}
|
|
||||||
taglen, consumed = self.decode_vwi(self.data[offset:offset+4])
|
taglen, consumed = self.decode_vwi(self.data[offset:offset+4])
|
||||||
offset += consumed
|
offset += consumed
|
||||||
tag = self.data[offset:offset+taglen]
|
tag = self.data[offset:offset+taglen]
|
||||||
@ -380,7 +377,6 @@ def set_metadata(stream, mi):
|
|||||||
return
|
return
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import cStringIO, sys
|
|
||||||
#print get_metadata(open(sys.argv[1], 'rb'))
|
#print get_metadata(open(sys.argv[1], 'rb'))
|
||||||
mi = MetaInformation(title="My New Title", authors=['Smith, John'])
|
mi = MetaInformation(title="My New Title", authors=['Smith, John'])
|
||||||
set_metadata(open(sys.argv[1], 'rb'), mi)
|
set_metadata(open(sys.argv[1], 'rb'), mi)
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
'''
|
'''
|
||||||
UI for adding books to the database and saving books to disk
|
UI for adding books to the database and saving books to disk
|
||||||
'''
|
'''
|
||||||
import os, shutil, time
|
import os, shutil, time, re
|
||||||
from Queue import Queue, Empty
|
from Queue import Queue, Empty
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
|
||||||
@ -13,9 +13,10 @@ from calibre.gui2 import question_dialog, error_dialog, info_dialog
|
|||||||
from calibre.ebooks.metadata.opf2 import OPF
|
from calibre.ebooks.metadata.opf2 import OPF
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
from calibre.constants import preferred_encoding, filesystem_encoding
|
from calibre.constants import preferred_encoding, filesystem_encoding
|
||||||
|
from calibre.utils.config import prefs
|
||||||
|
|
||||||
class DuplicatesAdder(QThread):
|
class DuplicatesAdder(QThread):
|
||||||
|
# Add duplicate books
|
||||||
def __init__(self, parent, db, duplicates, db_adder):
|
def __init__(self, parent, db, duplicates, db_adder):
|
||||||
QThread.__init__(self, parent)
|
QThread.__init__(self, parent)
|
||||||
self.db, self.db_adder = db, db_adder
|
self.db, self.db_adder = db, db_adder
|
||||||
@ -27,6 +28,7 @@ class DuplicatesAdder(QThread):
|
|||||||
formats = [f for f in formats if not f.lower().endswith('.opf')]
|
formats = [f for f in formats if not f.lower().endswith('.opf')]
|
||||||
id = self.db.create_book_entry(mi, cover=cover,
|
id = self.db.create_book_entry(mi, cover=cover,
|
||||||
add_duplicates=True)
|
add_duplicates=True)
|
||||||
|
# here we add all the formats for dupe book record created above
|
||||||
self.db_adder.add_formats(id, formats)
|
self.db_adder.add_formats(id, formats)
|
||||||
self.db_adder.number_of_books_added += 1
|
self.db_adder.number_of_books_added += 1
|
||||||
self.emit(SIGNAL('added(PyQt_PyObject)'), count)
|
self.emit(SIGNAL('added(PyQt_PyObject)'), count)
|
||||||
@ -90,6 +92,15 @@ class DBAdder(Thread):
|
|||||||
self.daemon = True
|
self.daemon = True
|
||||||
self.input_queue = Queue()
|
self.input_queue = Queue()
|
||||||
self.output_queue = Queue()
|
self.output_queue = Queue()
|
||||||
|
self.fuzzy_title_patterns = [(re.compile(pat), repl) for pat, repl in
|
||||||
|
[
|
||||||
|
(r'[\[\](){}<>\'";,:#]', ''),
|
||||||
|
(r'^(the|a|an) ', ''),
|
||||||
|
(r'[-._]', ' '),
|
||||||
|
(r'\s+', ' ')
|
||||||
|
]
|
||||||
|
]
|
||||||
|
self.merged_books = set([])
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
while not self.end:
|
while not self.end:
|
||||||
@ -125,6 +136,34 @@ class DBAdder(Thread):
|
|||||||
fmts[-1] = fmt
|
fmts[-1] = fmt
|
||||||
return fmts
|
return fmts
|
||||||
|
|
||||||
|
def fuzzy_title(self, title):
|
||||||
|
title = title.strip().lower()
|
||||||
|
for pat, repl in self.fuzzy_title_patterns:
|
||||||
|
title = pat.sub(repl, title)
|
||||||
|
return title
|
||||||
|
|
||||||
|
def find_identical_books(self, mi):
|
||||||
|
identical_book_ids = set([])
|
||||||
|
if mi.authors:
|
||||||
|
try:
|
||||||
|
query = u' and '.join([u'author:"=%s"'%(a.replace('"', '')) for a in
|
||||||
|
mi.authors])
|
||||||
|
except ValueError:
|
||||||
|
return identical_book_ids
|
||||||
|
try:
|
||||||
|
book_ids = self.db.data.parse(query)
|
||||||
|
except:
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return identical_book_ids
|
||||||
|
for book_id in book_ids:
|
||||||
|
fbook_title = self.db.title(book_id, index_is_id=True)
|
||||||
|
fbook_title = self.fuzzy_title(fbook_title)
|
||||||
|
mbook_title = self.fuzzy_title(mi.title)
|
||||||
|
if fbook_title == mbook_title:
|
||||||
|
identical_book_ids.add(book_id)
|
||||||
|
return identical_book_ids
|
||||||
|
|
||||||
def add(self, id, opf, cover, name):
|
def add(self, id, opf, cover, name):
|
||||||
formats = self.ids.pop(id)
|
formats = self.ids.pop(id)
|
||||||
if opf.endswith('.error'):
|
if opf.endswith('.error'):
|
||||||
@ -145,25 +184,38 @@ class DBAdder(Thread):
|
|||||||
if self.db is not None:
|
if self.db is not None:
|
||||||
if cover:
|
if cover:
|
||||||
cover = open(cover, 'rb').read()
|
cover = open(cover, 'rb').read()
|
||||||
id = self.db.create_book_entry(mi, cover=cover, add_duplicates=False)
|
orig_formats = formats
|
||||||
self.number_of_books_added += 1
|
formats = [f for f in formats if not f.lower().endswith('.opf')]
|
||||||
if id is None:
|
if prefs['add_formats_to_existing']:
|
||||||
self.duplicates.append((mi, cover, formats))
|
identical_book_list = self.find_identical_books(mi)
|
||||||
|
|
||||||
|
if identical_book_list: # books with same author and nearly same title exist in db
|
||||||
|
self.merged_books.add(mi.title)
|
||||||
|
for identical_book in identical_book_list:
|
||||||
|
self.add_formats(identical_book, formats, replace=False)
|
||||||
|
else:
|
||||||
|
id = self.db.create_book_entry(mi, cover=cover, add_duplicates=True)
|
||||||
|
self.number_of_books_added += 1
|
||||||
|
self.add_formats(id, formats)
|
||||||
else:
|
else:
|
||||||
formats = [f for f in formats if not f.lower().endswith('.opf')]
|
id = self.db.create_book_entry(mi, cover=cover, add_duplicates=False)
|
||||||
self.add_formats(id, formats)
|
self.number_of_books_added += 1
|
||||||
|
if id is None:
|
||||||
|
self.duplicates.append((mi, cover, orig_formats))
|
||||||
|
else:
|
||||||
|
self.add_formats(id, formats)
|
||||||
else:
|
else:
|
||||||
self.names.append(name)
|
self.names.append(name)
|
||||||
self.paths.append(formats[0])
|
self.paths.append(formats[0])
|
||||||
self.infos.append(mi)
|
self.infos.append(mi)
|
||||||
return mi.title
|
return mi.title
|
||||||
|
|
||||||
def add_formats(self, id, formats):
|
def add_formats(self, id, formats, replace=True):
|
||||||
for path in formats:
|
for path in formats:
|
||||||
fmt = os.path.splitext(path)[-1].replace('.', '').upper()
|
fmt = os.path.splitext(path)[-1].replace('.', '').upper()
|
||||||
with open(path, 'rb') as f:
|
with open(path, 'rb') as f:
|
||||||
self.db.add_format(id, fmt, f, index_is_id=True,
|
self.db.add_format(id, fmt, f, index_is_id=True,
|
||||||
notify=False)
|
notify=False, replace=replace)
|
||||||
|
|
||||||
|
|
||||||
class Adder(QObject):
|
class Adder(QObject):
|
||||||
@ -330,6 +382,11 @@ class Adder(QObject):
|
|||||||
return getattr(getattr(self, 'db_adder', None), 'number_of_books_added',
|
return getattr(getattr(self, 'db_adder', None), 'number_of_books_added',
|
||||||
0)
|
0)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def merged_books(self):
|
||||||
|
return getattr(getattr(self, 'db_adder', None), 'merged_books',
|
||||||
|
set([]))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def critical(self):
|
def critical(self):
|
||||||
return getattr(getattr(self, 'db_adder', None), 'critical',
|
return getattr(getattr(self, 'db_adder', None), 'critical',
|
||||||
|
@ -44,6 +44,7 @@ class AddSave(QTabWidget, Ui_TabWidget):
|
|||||||
self.filename_pattern = FilenamePattern(self)
|
self.filename_pattern = FilenamePattern(self)
|
||||||
self.metadata_box.layout().insertWidget(0, self.filename_pattern)
|
self.metadata_box.layout().insertWidget(0, self.filename_pattern)
|
||||||
self.opt_swap_author_names.setChecked(prefs['swap_author_names'])
|
self.opt_swap_author_names.setChecked(prefs['swap_author_names'])
|
||||||
|
self.opt_add_formats_to_existing.setChecked(prefs['add_formats_to_existing'])
|
||||||
help = '\n'.join(textwrap.wrap(c.get_option('template').help, 75))
|
help = '\n'.join(textwrap.wrap(c.get_option('template').help, 75))
|
||||||
self.save_template.initialize('save_to_disk', opts.template, help)
|
self.save_template.initialize('save_to_disk', opts.template, help)
|
||||||
self.send_template.initialize('send_to_device', opts.send_template, help)
|
self.send_template.initialize('send_to_device', opts.send_template, help)
|
||||||
@ -69,6 +70,7 @@ class AddSave(QTabWidget, Ui_TabWidget):
|
|||||||
pattern = self.filename_pattern.commit()
|
pattern = self.filename_pattern.commit()
|
||||||
prefs['filename_pattern'] = pattern
|
prefs['filename_pattern'] = pattern
|
||||||
prefs['swap_author_names'] = bool(self.opt_swap_author_names.isChecked())
|
prefs['swap_author_names'] = bool(self.opt_swap_author_names.isChecked())
|
||||||
|
prefs['add_formats_to_existing'] = bool(self.opt_add_formats_to_existing.isChecked())
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
<rect>
|
<rect>
|
||||||
<x>0</x>
|
<x>0</x>
|
||||||
<y>0</y>
|
<y>0</y>
|
||||||
<width>645</width>
|
<width>588</width>
|
||||||
<height>516</height>
|
<height>516</height>
|
||||||
</rect>
|
</rect>
|
||||||
</property>
|
</property>
|
||||||
@ -49,6 +49,19 @@
|
|||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="2" column="0" colspan="2">
|
<item row="2" column="0" colspan="2">
|
||||||
|
<widget class="QCheckBox" name="opt_add_formats_to_existing">
|
||||||
|
<property name="toolTip">
|
||||||
|
<string>If an existing book with a similar title and author is found that does not have the format being added, the format is added
|
||||||
|
to the existing book, instead of creating a new entry. If the existing book already has the format, then it is silently ignored.
|
||||||
|
|
||||||
|
Title match ignores leading indefinite articles ("the", "a", "an"), punctuation, case, etc. Author match is exact.</string>
|
||||||
|
</property>
|
||||||
|
<property name="text">
|
||||||
|
<string>If books with similar titles and authors found, &merge the new files automatically</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="3" column="0" colspan="2">
|
||||||
<widget class="QGroupBox" name="metadata_box">
|
<widget class="QGroupBox" name="metadata_box">
|
||||||
<property name="title">
|
<property name="title">
|
||||||
<string>&Configure metadata from file name</string>
|
<string>&Configure metadata from file name</string>
|
||||||
|
@ -24,7 +24,7 @@ from PyQt4.QtSvg import QSvgRenderer
|
|||||||
|
|
||||||
from calibre import prints, patheq, strftime
|
from calibre import prints, patheq, strftime
|
||||||
from calibre.constants import __version__, __appname__, isfrozen, islinux, \
|
from calibre.constants import __version__, __appname__, isfrozen, islinux, \
|
||||||
iswindows, isosx, filesystem_encoding
|
iswindows, isosx, filesystem_encoding, preferred_encoding
|
||||||
from calibre.utils.filenames import ascii_filename
|
from calibre.utils.filenames import ascii_filename
|
||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
from calibre.utils.config import prefs, dynamic
|
from calibre.utils.config import prefs, dynamic
|
||||||
@ -1244,6 +1244,13 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
|
|||||||
self.library_view.model().books_added(self._adder.number_of_books_added)
|
self.library_view.model().books_added(self._adder.number_of_books_added)
|
||||||
if hasattr(self, 'db_images'):
|
if hasattr(self, 'db_images'):
|
||||||
self.db_images.reset()
|
self.db_images.reset()
|
||||||
|
if getattr(self._adder, 'merged_books', False):
|
||||||
|
books = u'\n'.join([x if isinstance(x, unicode) else
|
||||||
|
x.decode(preferred_encoding, 'replace') for x in
|
||||||
|
self._adder.merged_books])
|
||||||
|
info_dialog(self, _('Merged some books'),
|
||||||
|
_('Some duplicates were found and merged into the '
|
||||||
|
'following existing books:'), det_msg=books, show=True)
|
||||||
if getattr(self._adder, 'critical', None):
|
if getattr(self._adder, 'critical', None):
|
||||||
det_msg = []
|
det_msg = []
|
||||||
for name, log in self._adder.critical.items():
|
for name, log in self._adder.critical.items():
|
||||||
|
@ -998,12 +998,15 @@ class LibraryDatabase2(LibraryDatabase):
|
|||||||
return self.add_format(index, format, stream,
|
return self.add_format(index, format, stream,
|
||||||
index_is_id=index_is_id, path=path, notify=notify)
|
index_is_id=index_is_id, path=path, notify=notify)
|
||||||
|
|
||||||
def add_format(self, index, format, stream, index_is_id=False, path=None, notify=True):
|
def add_format(self, index, format, stream, index_is_id=False, path=None,
|
||||||
|
notify=True, replace=True):
|
||||||
id = index if index_is_id else self.id(index)
|
id = index if index_is_id else self.id(index)
|
||||||
if path is None:
|
if path is None:
|
||||||
path = os.path.join(self.library_path, self.path(id, index_is_id=True))
|
path = os.path.join(self.library_path, self.path(id, index_is_id=True))
|
||||||
name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
|
name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
|
||||||
if name:
|
if name:
|
||||||
|
if not replace:
|
||||||
|
return False
|
||||||
self.conn.execute('DELETE FROM data WHERE book=? AND format=?', (id, format))
|
self.conn.execute('DELETE FROM data WHERE book=? AND format=?', (id, format))
|
||||||
name = self.construct_file_name(id)
|
name = self.construct_file_name(id)
|
||||||
ext = ('.' + format.lower()) if format else ''
|
ext = ('.' + format.lower()) if format else ''
|
||||||
@ -1021,6 +1024,7 @@ class LibraryDatabase2(LibraryDatabase):
|
|||||||
self.refresh_ids([id])
|
self.refresh_ids([id])
|
||||||
if notify:
|
if notify:
|
||||||
self.notify('metadata', [id])
|
self.notify('metadata', [id])
|
||||||
|
return True
|
||||||
|
|
||||||
def delete_book(self, id, notify=True):
|
def delete_book(self, id, notify=True):
|
||||||
'''
|
'''
|
||||||
|
@ -670,6 +670,8 @@ def _prefs():
|
|||||||
help=_('The priority of worker processes'))
|
help=_('The priority of worker processes'))
|
||||||
c.add_opt('swap_author_names', default=False,
|
c.add_opt('swap_author_names', default=False,
|
||||||
help=_('Swap author first and last names when reading metadata'))
|
help=_('Swap author first and last names when reading metadata'))
|
||||||
|
c.add_opt('add_formats_to_existing', default=False,
|
||||||
|
help=_('Add new formats to existing book records'))
|
||||||
|
|
||||||
c.add_opt('migrated', default=False, help='For Internal use. Don\'t modify.')
|
c.add_opt('migrated', default=False, help='For Internal use. Don\'t modify.')
|
||||||
return c
|
return c
|
||||||
|
Loading…
x
Reference in New Issue
Block a user