Sync to trunk.

This commit is contained in:
John Schember 2011-04-12 18:54:07 -04:00
commit 05d902046b
7 changed files with 346 additions and 15 deletions

View File

@ -1,6 +1,6 @@
__license__ = 'GPL v3'
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
'''
novosti.rs
'''
@ -21,34 +21,71 @@ class Novosti(BasicNewsRecipe):
encoding = 'utf-8'
language = 'sr'
publication_type = 'newspaper'
masthead_url = 'http://www.novosti.rs/images/basic/logo-print.png'
extra_css = """ @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
.article_description,body{font-family: Arial,Helvetica,sans1,sans-serif}
.author{font-size: small}
.articleLead{font-size: large; font-weight: bold}
img{display: block; margin-bottom: 1em; margin-top: 1em}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
, 'pretty_print' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [dict(attrs={'class':['articleTitle','author','articleLead','articleBody']})]
remove_tags = [dict(name=['embed','object','iframe','base','link','meta'])]
feeds = [(u'Vesti', u'http://www.novosti.rs/rss/rss-vesti')]
keep_only_tags = [dict(attrs={'class':['articleTitle','articleInfo','articleLead','singlePhoto fl','articleBody']})]
remove_tags = [
dict(name=['embed','object','iframe','base','link','meta'])
,dict(name='a', attrs={'class':'loadComments topCommentsLink'})
]
remove_attributes = ['lang','xmlns:fb']
feeds = [
(u'Politika' , u'http://www.novosti.rs/rss/2-Sve%20vesti')
,(u'Drustvo' , u'http://www.novosti.rs/rss/1-Sve%20vesti')
,(u'Ekonomija' , u'http://www.novosti.rs/rss/3-Sve%20vesti')
,(u'Hronika' , u'http://www.novosti.rs/rss/4-Sve%20vesti')
,(u'Dosije' , u'http://www.novosti.rs/rss/5-Sve%20vesti')
,(u'Reportaze' , u'http://www.novosti.rs/rss/6-Sve%20vesti')
,(u'Tehnologije' , u'http://www.novosti.rs/rss/35-Sve%20vesti')
,(u'Zanimljivosti', u'http://www.novosti.rs/rss/26-Sve%20vesti')
,(u'Auto' , u'http://www.novosti.rs/rss/50-Sve%20vesti')
,(u'Sport' , u'http://www.novosti.rs/rss/11|47|12|14|13-Sve%20vesti')
,(u'Svet' , u'http://www.novosti.rs/rss/7-Sve%20vesti')
,(u'Region' , u'http://www.novosti.rs/rss/8-Sve%20vesti')
,(u'Dijaspora' , u'http://www.novosti.rs/rss/9-Sve%20vesti')
,(u'Spektakl' , u'http://www.novosti.rs/rss/10-Sve%20vesti')
,(u'Kultura' , u'http://www.novosti.rs/rss/31-Sve%20vesti')
,(u'Srbija' , u'http://www.novosti.rs/rss/15-Sve%20vesti')
,(u'Beograd' , u'http://www.novosti.rs/rss/16-Sve%20vesti')
,(u'Zivot+' , u'http://www.novosti.rs/rss/24|33|34|25|20|18|32|19-Sve%20vesti')
,(u'Turizam' , u'http://www.novosti.rs/rss/36-Sve%20vesti')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('span', attrs={'class':'author'}):
item.name='p'
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup

View File

@ -244,7 +244,7 @@ class EEEREADER(USBMS):
FORMATS = ['epub', 'fb2', 'txt', 'pdf']
VENDOR_ID = [0x0b05]
PRODUCT_ID = [0x178f]
PRODUCT_ID = [0x178f, 0x17a1]
BCD = [0x0319]
EBOOK_DIR_MAIN = EBOOK_DIR_CARD_A = 'Book'

View File

@ -495,6 +495,10 @@ class MobiMLizer(object):
vtag.append(child)
return
if tag == 'blockquote':
old_mim = self.opts.mobi_ignore_margins
self.opts.mobi_ignore_margins = False
if text or tag in CONTENT_TAGS or tag in NESTABLE_TAGS:
self.mobimlize_content(tag, text, bstate, istates)
for child in elem:
@ -510,6 +514,8 @@ class MobiMLizer(object):
if tail:
self.mobimlize_content(tag, tail, bstate, istates)
if tag == 'blockquote':
self.opts.mobi_ignore_margins = old_mim
if bstate.content and style['page-break-after'] in PAGE_BREAKS:
bstate.pbreak = True

View File

@ -10,7 +10,7 @@ from functools import partial
from PyQt4.Qt import Qt, QMenu, QModelIndex
from calibre.gui2 import error_dialog, config
from calibre.gui2 import error_dialog, config, Dispatcher
from calibre.gui2.dialogs.metadata_single import MetadataSingleDialog
from calibre.gui2.dialogs.metadata_bulk import MetadataBulkDialog
from calibre.gui2.dialogs.confirm_delete import confirm
@ -88,6 +88,16 @@ class EditMetadataAction(InterfaceAction):
_('No books selected'), show=True)
db = self.gui.library_view.model().db
ids = [db.id(row.row()) for row in rows]
from calibre.gui2.metadata.bulk_download2 import start_download
start_download(self.gui, ids,
Dispatcher(self.bulk_metadata_downloaded), identify, covers)
def bulk_metadata_downloaded(self, job):
if job.failed:
self.job_exception(job, dialog_title=_('Failed to download metadata'))
return
from calibre.gui2.metadata.bulk_download2 import proceed
proceed(self.gui, job)
def download_metadata_old(self, checked, covers=True, set_metadata=True,
set_social_metadata=None):

View File

@ -7,5 +7,280 @@ __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
from functools import partial
from itertools import izip
from PyQt4.Qt import (QIcon, QDialog, QVBoxLayout, QTextBrowser, QSize,
QDialogButtonBox, QApplication, QTimer, QLabel, QProgressBar)
from calibre.gui2.dialogs.message_box import MessageBox
from calibre.gui2.threaded_jobs import ThreadedJob
from calibre.utils.icu import lower
from calibre.ebooks.metadata import authors_to_string
from calibre.gui2 import question_dialog, error_dialog
from calibre.ebooks.metadata.sources.identify import identify, msprefs
from calibre.ebooks.metadata.sources.covers import download_cover
from calibre.ebooks.metadata.book.base import Metadata
from calibre.customize.ui import metadata_plugins
from calibre.ptempfile import PersistentTemporaryFile
def show_config(gui, parent):
from calibre.gui2.preferences import show_config_widget
show_config_widget('Sharing', 'Metadata download', parent=parent,
gui=gui, never_shutdown=True)
def start_download(gui, ids, callback, identify, covers):
q = MessageBox(MessageBox.QUESTION, _('Schedule download?'),
'<p>'+_('The download of metadata for the <b>%d selected book(s)</b> will'
' run in the background. Proceed?')%len(ids) +
'<p>'+_('You can monitor the progress of the download '
'by clicking the rotating spinner in the bottom right '
'corner.') +
'<p>'+_('When the download completes you will be asked for'
' confirmation before calibre applies the downloaded metadata.'),
show_copy_button=False, parent=gui)
b = q.bb.addButton(_('Configure download'), q.bb.ActionRole)
b.setIcon(QIcon(I('config.png')))
b.clicked.connect(partial(show_config, gui, q))
q.det_msg_toggle.setVisible(False)
ret = q.exec_()
b.clicked.disconnect()
if ret != q.Accepted:
return
job = ThreadedJob('metadata bulk download',
_('Download metadata for %d books')%len(ids),
download, (ids, gui.current_db, identify, covers), {}, callback)
gui.job_manager.run_threaded_job(job)
class ViewLog(QDialog): # {{{
def __init__(self, html, parent=None):
QDialog.__init__(self, parent)
self.l = l = QVBoxLayout()
self.setLayout(l)
self.tb = QTextBrowser(self)
self.tb.setHtml('<pre style="font-family: monospace">%s</pre>' % html)
l.addWidget(self.tb)
self.bb = QDialogButtonBox(QDialogButtonBox.Ok)
self.bb.accepted.connect(self.accept)
self.bb.rejected.connect(self.reject)
self.copy_button = self.bb.addButton(_('Copy to clipboard'),
self.bb.ActionRole)
self.copy_button.setIcon(QIcon(I('edit-copy.png')))
self.copy_button.clicked.connect(self.copy_to_clipboard)
l.addWidget(self.bb)
self.setModal(False)
self.resize(QSize(500, 400))
self.setWindowTitle(_('Download log'))
self.setWindowIcon(QIcon(I('debug.png')))
self.show()
def copy_to_clipboard(self):
txt = self.tb.toPlainText()
QApplication.clipboard().setText(txt)
_vl = None
def view_log(job, parent):
global _vl
_vl = ViewLog(job.html_details, parent)
# }}}
class ApplyDialog(QDialog):
def __init__(self, id_map, gui):
QDialog.__init__(self, gui)
self.l = l = QVBoxLayout()
self.setLayout(l)
l.addWidget(QLabel(_('Applying downloaded metadata to your library')))
self.pb = QProgressBar(self)
l.addWidget(self.pb)
self.pb.setMinimum(0)
self.pb.setMaximum(len(id_map))
self.bb = QDialogButtonBox(QDialogButtonBox.Cancel)
self.bb.rejected.connect(self.reject)
self.bb.accepted.connect(self.accept)
l.addWidget(self.bb)
self.db = gui.current_db
self.id_map = list(id_map.iteritems())
self.current_idx = 0
self.failures = []
self.canceled = False
QTimer.singleShot(20, self.do_one)
self.exec_()
def do_one(self):
if self.canceled:
return
i, mi = self.id_map[self.current_idx]
try:
set_title = not mi.is_null('title')
set_authors = not mi.is_null('authors')
self.db.set_metadata(i, mi, commit=False, set_title=set_title,
set_authors=set_authors)
except:
import traceback
self.failures.append((i, traceback.format_exc()))
try:
if mi.cover:
os.remove(mi.cover)
except:
pass
self.pb.setValue(self.pb.value()+1)
if self.current_idx >= len(self.id_map) - 1:
self.finalize()
else:
self.current_idx += 1
QTimer.singleShot(20, self.do_one)
def reject(self):
self.canceled = True
QDialog.reject(self)
def finalize(self):
if self.canceled:
return
if self.failures:
msg = []
for i, tb in self.failures:
title = self.db.title(i, index_is_id=True)
authors = self.db.authors(i, index_is_id=True)
if authors:
authors = [x.replace('|', ',') for x in authors.split(',')]
title += ' - ' + authors_to_string(authors)
msg.append(title+'\n\n'+tb+'\n'+('*'*80))
error_dialog(self, _('Some failures'),
_('Failed to apply updated metadata for some books'
' in your library. Click "Show Details" to see '
'details.'), det_msg='\n\n'.join(msg), show=True)
self.accept()
_amd = None
def apply_metadata(job, gui, q, result):
global _amd
q.vlb.clicked.disconnect()
q.finished.disconnect()
if result != q.Accepted:
return
id_map, failed_ids = job.result
id_map = dict([(k, v) for k, v in id_map.iteritems() if k not in
failed_ids])
if not id_map:
return
modified = set()
db = gui.current_db
for i, mi in id_map.iteritems():
lm = db.metadata_last_modified(i, index_is_id=True)
if lm > mi.last_modified:
title = db.title(i, index_is_id=True)
authors = db.authors(i, index_is_id=True)
if authors:
authors = [x.replace('|', ',') for x in authors.split(',')]
title += ' - ' + authors_to_string(authors)
modified.add(title)
if modified:
modified = sorted(modified, key=lower)
if not question_dialog(gui, _('Some books changed'), '<p>'+
_('The metadata for some books in your library has'
' changed since you started the download. If you'
' proceed, some of those changes may be overwritten. '
'Click "Show details" to see the list of changed books. '
'Do you want to proceed?'), det_msg='\n'.join(modified)):
return
_amd = ApplyDialog(id_map, gui)
def proceed(gui, job):
id_map, failed_ids = job.result
fmsg = det_msg = ''
if failed_ids:
fmsg = _('Could not download metadata for %d of the books. Click'
' "Show details" to see which books.')%len(failed_ids)
det_msg = '\n'.join([id_map[i].title for i in failed_ids])
msg = '<p>' + _('Finished downloading metadata for <b>%d book(s)</b>. '
'Proceed with updating the metadata in your library?')%len(id_map)
q = MessageBox(MessageBox.QUESTION, _('Download complete'),
msg + fmsg, det_msg=det_msg, show_copy_button=bool(failed_ids),
parent=gui)
q.vlb = q.bb.addButton(_('View log'), q.bb.ActionRole)
q.vlb.setIcon(QIcon(I('debug.png')))
q.vlb.clicked.connect(partial(view_log, job, q))
q.det_msg_toggle.setVisible(bool(failed_ids))
q.setModal(False)
q.show()
q.finished.connect(partial(apply_metadata, job, gui, q))
def merge_result(oldmi, newmi):
dummy = Metadata(_('Unknown'))
for f in msprefs['ignore_fields']:
setattr(newmi, f, getattr(dummy, f))
fields = set()
for plugin in metadata_plugins(['identify']):
fields |= plugin.touched_fields
for f in fields:
# Optimize so that set_metadata does not have to do extra work later
if not f.startswith('identifier:'):
if (not newmi.is_null(f) and getattr(newmi, f) == getattr(oldmi, f)):
setattr(newmi, f, getattr(dummy, f))
def download(ids, db, do_identify, covers,
log=None, abort=None, notifications=None):
ids = list(ids)
metadata = [db.get_metadata(i, index_is_id=True, get_user_categories=False)
for i in ids]
failed_ids = set()
ans = {}
count = 0
for i, mi in izip(ids, metadata):
if abort.is_set():
log.error('Aborting...')
break
title, authors, identifiers = mi.title, mi.authors, mi.identifiers
if do_identify:
results = []
try:
results = identify(log, abort, title=title, authors=authors,
identifiers=identifiers)
except:
pass
if results:
mi = merge_result(mi, results[0])
identifiers = mi.identifiers
else:
log.error('Failed to download metadata for', title)
failed_ids.add(mi)
if covers:
cdata = download_cover(log, title=title, authors=authors,
identifiers=identifiers)
if cdata:
with PersistentTemporaryFile('.jpg', 'downloaded-cover-') as f:
f.write(cdata)
mi.cover = f.name
ans[i] = mi
count += 1
notifications.put((count/len(ids),
_('Downloaded %d of %d')%(count, len(ids))))
log('Download complete, with %d failures'%len(failed_ids))
return (ans, failed_ids)

View File

@ -91,7 +91,8 @@ class ThreadedJob(BaseJob):
try:
self.callback(self)
except:
pass
import traceback
traceback.print_exc()
self._cleanup()
def _cleanup(self):
@ -103,6 +104,8 @@ class ThreadedJob(BaseJob):
# No need to keep references to these around anymore
self.func = self.args = self.kwargs = self.notifications = None
# We can't delete self.callback as it might be a Dispatch object and if
# it is garbage collected it won't work
def kill(self):
if self.start_time is None:

View File

@ -1781,7 +1781,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
path_changed = True
if set_authors:
if not mi.authors:
mi.authors = [_('Unknown')]
mi.authors = [_('Unknown')]
authors = []
for a in mi.authors:
authors += string_to_authors(a)