Saving to disk is now fully customizable. The user can control the file and directory structure of the saved files as well as various other aspects of the save process.

This commit is contained in:
Kovid Goyal 2009-08-18 15:23:11 -06:00
parent 6973d80602
commit 181802da53
10 changed files with 534 additions and 353 deletions

View File

@ -153,14 +153,17 @@ def read_metadata(paths, result_queue, chunk=50, spare_server=None):
t.start() t.start()
return t return t
###########################################################################
############ Saving #####################
###########################################################################
class SaveWorker(Thread): class SaveWorker(Thread):
def __init__(self, result_queue, db, ids, path, by_author=False, def __init__(self, result_queue, db, ids, path, opts, spare_server=None):
single_dir=False, single_format=None, spare_server=None):
Thread.__init__(self) Thread.__init__(self)
self.daemon = True self.daemon = True
self.path, self.by_author = path, by_author self.path, self.opts = path, opts
self.single_dir, self.single_format = single_dir, single_format
self.ids = ids self.ids = ids
self.library_path = db.library_path self.library_path = db.library_path
self.canceled = False self.canceled = False
@ -170,17 +173,22 @@ class SaveWorker(Thread):
self.start() self.start()
def run(self): def run(self):
from calibre.library.save_to_disk import config
server = Server() if self.spare_server is None else self.spare_server server = Server() if self.spare_server is None else self.spare_server
ids = set(self.ids) ids = set(self.ids)
tasks = server.split(list(ids)) tasks = server.split(list(ids))
jobs = set([]) jobs = set([])
c = config()
recs = {}
for pref in c.preferences:
recs[pref.name] = getattr(self.opts, pref.name)
for i, task in enumerate(tasks): for i, task in enumerate(tasks):
tids = [x[-1] for x in task] tids = [x[-1] for x in task]
job = ParallelJob('save_book', job = ParallelJob('save_book',
'Save books (%d of %d)'%(i, len(tasks)), 'Save books (%d of %d)'%(i, len(tasks)),
lambda x,y:x, lambda x,y:x,
args=[tids, self.library_path, self.path, self.single_dir, args=[tids, self.library_path, self.path, recs])
self.single_format, self.by_author])
jobs.add(job) jobs.add(job)
server.add_job(job) server.add_job(job)
@ -192,9 +200,9 @@ class SaveWorker(Thread):
job.update(consume_notifications=False) job.update(consume_notifications=False)
while True: while True:
try: try:
id, title, ok = job.notifications.get_nowait()[0] id, title, ok, tb = job.notifications.get_nowait()[0]
if id in ids: if id in ids:
self.result_queue.put((id, title, ok)) self.result_queue.put((id, title, ok, tb))
ids.remove(id) ids.remove(id)
except Empty: except Empty:
break break
@ -221,23 +229,18 @@ class SaveWorker(Thread):
pass pass
def save_book(task, library_path, path, single_dir, single_format, def save_book(task, library_path, path, recs, notification=lambda x,y:x):
by_author, notification=lambda x,y:x):
from calibre.library.database2 import LibraryDatabase2 from calibre.library.database2 import LibraryDatabase2
db = LibraryDatabase2(library_path) db = LibraryDatabase2(library_path)
from calibre.library.save_to_disk import config, save_to_disk
opts = config().parse()
for name in recs:
setattr(opts, name, recs[name])
def callback(id, title):
notification((id, title, True)) def callback(id, title, failed, tb):
notification((id, title, not failed, tb))
return True return True
if single_format is None: save_to_disk(db, task, path, opts, callback)
failures = []
db.export_to_dir(path, task, index_is_id=True, byauthor=by_author,
callback=callback, single_dir=single_dir)
else:
failures = db.export_single_format_to_dir(path, task, single_format,
index_is_id=True, callback=callback)
for id, title in failures:
notification((id, title, False))

View File

@ -295,13 +295,13 @@ class Adder(QObject):
class Saver(QObject): class Saver(QObject):
def __init__(self, parent, db, callback, rows, path, def __init__(self, parent, db, callback, rows, path, opts,
by_author=False, single_dir=False, single_format=None,
spare_server=None): spare_server=None):
QObject.__init__(self, parent) QObject.__init__(self, parent)
self.pd = ProgressDialog(_('Saving...'), parent=parent) self.pd = ProgressDialog(_('Saving...'), parent=parent)
self.spare_server = spare_server self.spare_server = spare_server
self.db = db self.db = db
self.opts = opts
self.pd.setModal(True) self.pd.setModal(True)
self.pd.show() self.pd.show()
self.pd.set_min(0) self.pd.set_min(0)
@ -315,8 +315,8 @@ class Saver(QObject):
self.failures = set([]) self.failures = set([])
from calibre.ebooks.metadata.worker import SaveWorker from calibre.ebooks.metadata.worker import SaveWorker
self.worker = SaveWorker(self.rq, db, self.ids, path, by_author, self.worker = SaveWorker(self.rq, db, self.ids, path, self.opts,
single_dir, single_format, spare_server=self.spare_server) spare_server=self.spare_server)
self.connect(self.pd, SIGNAL('canceled()'), self.canceled) self.connect(self.pd, SIGNAL('canceled()'), self.canceled)
self.timer = QTimer(self) self.timer = QTimer(self)
self.connect(self.timer, SIGNAL('timeout()'), self.update) self.connect(self.timer, SIGNAL('timeout()'), self.update)
@ -344,15 +344,14 @@ class Saver(QObject):
return return
try: try:
id, title, ok = self.rq.get_nowait() id, title, ok, tb = self.rq.get_nowait()
except Empty: except Empty:
return return
self.pd.value += 1 self.pd.value += 1
self.ids.remove(id) self.ids.remove(id)
if not isinstance(title, unicode): if not isinstance(title, unicode):
title = str(title).decode('utf-8', preferred_encoding) title = str(title).decode(preferred_encoding, 'replace')
self.pd.set_msg(_('Saved')+' '+title) self.pd.set_msg(_('Saved')+' '+title)
if not ok: if not ok:
self.failures.add(title) self.failures.add((title, tb))

View File

@ -52,13 +52,10 @@ class AddSave(QTabWidget, Ui_TabWidget):
table = u'<table>%s</table>'%(u'\n'.join(rows)) table = u'<table>%s</table>'%(u'\n'.join(rows))
self.template_variables.setText(table) self.template_variables.setText(table)
self.opt_read_metadata_from_filename.setChecked(prefs['read_file_metadata']) self.opt_read_metadata_from_filename.setChecked(not prefs['read_file_metadata'])
self.metadata_box.setEnabled(self.opt_read_metadata_from_filename.isChecked())
self.filename_pattern = FilenamePattern(self) self.filename_pattern = FilenamePattern(self)
self.metadata_box.layout().insertWidget(0, self.filename_pattern) self.metadata_box.layout().insertWidget(0, self.filename_pattern)
def validate(self): def validate(self):
tmpl = preprocess_template(self.opt_template.text()) tmpl = preprocess_template(self.opt_template.text())
fa = {} fa = {}
@ -82,7 +79,7 @@ class AddSave(QTabWidget, Ui_TabWidget):
for x in ('formats', 'template', 'timefmt'): for x in ('formats', 'template', 'timefmt'):
c.set(x, unicode(getattr(self, 'opt_'+x).text()).strip()) c.set(x, unicode(getattr(self, 'opt_'+x).text()).strip())
self.opt_template.save_history('save_to_disk_template_history') self.opt_template.save_history('save_to_disk_template_history')
prefs['read_file_metadata'] = bool(self.opt_read_metadata_from_filename.isChecked()) prefs['read_file_metadata'] = not bool(self.opt_read_metadata_from_filename.isChecked())
pattern = self.filename_pattern.commit() pattern = self.filename_pattern.commit()
prefs['filename_pattern'] = pattern prefs['filename_pattern'] = pattern

View File

@ -34,7 +34,7 @@
<item> <item>
<widget class="QCheckBox" name="opt_read_metadata_from_filename"> <widget class="QCheckBox" name="opt_read_metadata_from_filename">
<property name="text"> <property name="text">
<string>Read metadata from &amp;file name</string> <string>Read metadata only from &amp;file name</string>
</property> </property>
</widget> </widget>
</item> </item>
@ -174,22 +174,5 @@
</customwidget> </customwidget>
</customwidgets> </customwidgets>
<resources/> <resources/>
<connections> <connections/>
<connection>
<sender>opt_read_metadata_from_filename</sender>
<signal>toggled(bool)</signal>
<receiver>metadata_box</receiver>
<slot>setEnabled(bool)</slot>
<hints>
<hint type="sourcelabel">
<x>159</x>
<y>81</y>
</hint>
<hint type="destinationlabel">
<x>178</x>
<y>122</y>
</hint>
</hints>
</connection>
</connections>
</ui> </ui>

View File

@ -1064,11 +1064,14 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
if self.current_view() is self.library_view: if self.current_view() is self.library_view:
from calibre.gui2.add import Saver from calibre.gui2.add import Saver
from calibre.library.save_to_disk import config
opts = config().parse()
if single_format is not None:
opts.formats = single_format
if single_dir:
opts.template = '{title} - {authors}'
self._saver = Saver(self, self.library_view.model().db, self._saver = Saver(self, self.library_view.model().db,
Dispatcher(self._books_saved), rows, path, Dispatcher(self._books_saved), rows, path, opts,
by_author=self.library_view.model().by_author,
single_dir=single_dir,
single_format=single_format,
spare_server=self.spare_server) spare_server=self.spare_server)
else: else:
@ -1078,19 +1081,20 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
def _books_saved(self, path, failures, error): def _books_saved(self, path, failures, error):
single_format = self._saver.worker.single_format
self._saver = None self._saver = None
if error: if error:
return error_dialog(self, _('Error while saving'), return error_dialog(self, _('Error while saving'),
_('There was an error while saving.'), _('There was an error while saving.'),
error, show=True) error, show=True)
if failures and single_format: if failures:
single_format = single_format.upper() failures = [u'%s\n\t%s'%
(title, '\n\t'.join(err.splitlines())) for title, err in
failures]
warning_dialog(self, _('Could not save some books'), warning_dialog(self, _('Could not save some books'),
_('Could not save some books') + ', ' + _('Could not save some books') + ', ' +
(_('as the %s format is not available for them.')%single_format) +
_('Click the show details button to see which ones.'), _('Click the show details button to see which ones.'),
'\n'.join(failures), show=True) u'\n\n'.join(failures), show=True)
QDesktopServices.openUrl(QUrl.fromLocalFile(path)) QDesktopServices.openUrl(QUrl.fromLocalFile(path))
def books_saved(self, job): def books_saved(self, job):

View File

@ -498,10 +498,7 @@ def do_export(db, ids, dir, opts):
prints('Failed to save the following books:') prints('Failed to save the following books:')
for id, title, tb in failures: for id, title, tb in failures:
prints(str(id)+':', title) prints(str(id)+':', title)
if tb:
prints('\t'+'\n\t'.join(tb.splitlines())) prints('\t'+'\n\t'.join(tb.splitlines()))
else:
prints('\tRequested formats not available')
prints(' ') prints(' ')
def command_export(args, dbpath): def command_export(args, dbpath):

View File

@ -176,7 +176,8 @@ def save_book_to_disk(id, db, root, opts, length):
traceback.print_exc() traceback.print_exc()
stream.seek(0) stream.seek(0)
data = stream.read() data = stream.read()
with open(base_path+'.'+fmt, 'wb') as f: fmt_path = base_path+'.'+str(fmt)
with open(fmt_path, 'wb') as f:
f.write(data) f.write(data)
return not written, id, mi.title return not written, id, mi.title
@ -189,7 +190,9 @@ def save_to_disk(db, ids, root, opts=None, callback=None):
:param:`ids` iterable of book ids to save from the database. :param:`ids` iterable of book ids to save from the database.
:param:`callback` is an optional callable that is called on after each :param:`callback` is an optional callable that is called on after each
book is processed with the arguments: id, title and failed book is processed with the arguments: id, title, failed, traceback.
If the callback returns False, further processing is terminated and
the function returns.
:return: A list of failures. Each element of the list is a tuple :return: A list of failures. Each element of the list is a tuple
(id, title, traceback) (id, title, traceback)
''' '''
@ -209,13 +212,14 @@ def save_to_disk(db, ids, root, opts=None, callback=None):
tb = '' tb = ''
try: try:
failed, id, title = save_book_to_disk(x, db, root, opts, length) failed, id, title = save_book_to_disk(x, db, root, opts, length)
tb = _('Requested formats not available')
except: except:
failed, id, title = True, x, db.title(x, index_is_id=True) failed, id, title = True, x, db.title(x, index_is_id=True)
tb = traceback.format_exc() tb = traceback.format_exc()
if failed: if failed:
failures.append((id, title, tb)) failures.append((id, title, tb))
if callable(callback): if callable(callback):
if not callback(int(id), title, failed): if not callback(int(id), title, failed, tb):
break break
return failures return failures

File diff suppressed because it is too large Load Diff

View File

@ -379,6 +379,7 @@ class ConfigInterface(object):
self.remove_opt = self.remove = self.option_set.remove_opt self.remove_opt = self.remove = self.option_set.remove_opt
self.parse_string = self.option_set.parse_string self.parse_string = self.option_set.parse_string
self.get_option = self.option_set.get_option self.get_option = self.option_set.get_option
self.preferences = self.option_set.preferences
def update(self, other): def update(self, other):
self.option_set.update(other.option_set) self.option_set.update(other.option_set)

View File

@ -8,6 +8,7 @@ www.guardian.co.uk
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Guardian(BasicNewsRecipe): class Guardian(BasicNewsRecipe):
@ -16,14 +17,33 @@ class Guardian(BasicNewsRecipe):
language = _('English') language = _('English')
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 20 max_articles_per_feed = 20
remove_javascript = True
timefmt = ' [%a, %d %b %Y]' timefmt = ' [%a, %d %b %Y]'
keep_only_tags = [
dict(name='div', attrs={'id':["content","article_header","main-article-info",]}),
]
remove_tags = [
dict(name='div', attrs={'class':["video-content","videos-third-column"]}),
dict(name='div', attrs={'id':["article-toolbox","subscribe-feeds",]}),
dict(name='ul', attrs={'class':["pagination"]}),
dict(name='ul', attrs={'id':["content-actions"]}),
]
use_embedded_content = False
remove_tags_before = dict(id='main-article-info')
remove_tags_after = dict(id='article-wrapper')
remove_tags_after = dict(id='content')
no_stylesheets = True no_stylesheets = True
extra_css = 'h2 {font-size: medium;} \n h1 {text-align: left;}' extra_css = '''
.article-attributes{font-size: x-small; font-family:Arial,Helvetica,sans-serif;}
.h1{font-size: large ;font-family:georgia,serif; font-weight:bold;}
.stand-first-alone{color:#666666; font-size:small; font-family:Arial,Helvetica,sans-serif;}
.caption{color:#666666; font-size:x-small; font-family:Arial,Helvetica,sans-serif;}
#article-wrapper{font-size:small; font-family:Arial,Helvetica,sans-serif;}
.main-article-info{font-family:Arial,Helvetica,sans-serif;}
#full-contents{font-size:small; font-family:Arial,Helvetica,sans-serif;}
#match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;}
'''
feeds = [ feeds = [
('Front Page', 'http://www.guardian.co.uk/rss'), ('Front Page', 'http://www.guardian.co.uk/rss'),
@ -37,3 +57,21 @@ class Guardian(BasicNewsRecipe):
('Comment','http://www.guardian.co.uk/commentisfree/rss'), ('Comment','http://www.guardian.co.uk/commentisfree/rss'),
] ]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(face=True):
del item['face']
for tag in soup.findAll(name=['ul','li']):
tag.name = 'div'
return soup