diff --git a/imgsrc/plugboard.svg b/imgsrc/plugboard.svg
index 9aa0996193..b8451a6b3a 100644
--- a/imgsrc/plugboard.svg
+++ b/imgsrc/plugboard.svg
@@ -2,8 +2,6 @@
diff --git a/resources/images/plugboard.png b/resources/images/plugboard.png
index 345fa6440e..db9e8e89f0 100644
Binary files a/resources/images/plugboard.png and b/resources/images/plugboard.png differ
diff --git a/resources/recipes/peterschiff.recipe b/resources/recipes/peterschiff.recipe
index 882dabc43b..842da7f733 100644
--- a/resources/recipes/peterschiff.recipe
+++ b/resources/recipes/peterschiff.recipe
@@ -12,15 +12,18 @@ class PeterSchiff(BasicNewsRecipe):
description = 'Economic commentary'
publisher = 'Euro Pacific capital'
category = 'news, politics, economy, USA'
- oldest_article = 15
+ oldest_article = 25
max_articles_per_feed = 200
no_stylesheets = True
- encoding = 'cp1252'
+ encoding = 'utf8'
use_embedded_content = False
language = 'en'
- country = 'US'
remove_empty_feeds = True
- extra_css = ' body{font-family: Verdana,Times,serif } h1{text-align: left} img{margin-bottom: 0.4em} '
+ extra_css = """
+ body{font-family: Verdana,Times,serif }
+ .field-field-commentary-writer-name{font-weight: bold}
+ .field-items{display: inline}
+ """
conversion_options = {
'comment' : description
@@ -30,7 +33,15 @@ class PeterSchiff(BasicNewsRecipe):
, 'linearize_tables' : True
}
- keep_only_tags = [dict(name='tr',attrs={'style':'vertical-align: top;'})]
+ keep_only_tags = [
+ dict(name='h2',attrs={'id':'page-title'})
+ ,dict(name='div',attrs={'class':'node'})
+ ]
+ remove_tags = [
+ dict(name=['meta','link','base','iframe','embed'])
+ ,dict(attrs={'id':'text-zoom'})
+ ]
+ remove_attributes=['track','linktype','lang']
feeds = [(u'Articles', u'http://feeds.feedburner.com/PeterSchiffsEconomicCommentary')]
diff --git a/resources/recipes/rmf24_opinie.recipe b/resources/recipes/rmf24_opinie.recipe
new file mode 100644
index 0000000000..4d2f447dbe
--- /dev/null
+++ b/resources/recipes/rmf24_opinie.recipe
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2010, Tomasz Dlugosz '
+'''
+rmf24.pl
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class RMF24_opinie(BasicNewsRecipe):
+ title = u'Rmf24.pl - Opinie'
+ description = u'Blogi, wywiady i komentarze ze strony rmf24.pl'
+ language = 'pl'
+ oldest_article = 7
+ max_articles_per_feed = 100
+ __author__ = u'Tomasz D\u0142ugosz'
+ no_stylesheets = True
+ remove_javascript = True
+
+ feeds = [(u'Blogi', u'http://www.rmf24.pl/opinie/blogi/feed'),
+ (u'Kontrwywiad', u'http://www.rmf24.pl/opinie/wywiady/kontrwywiad/feed'),
+ (u'Przes\u0142uchanie', u'http://www.rmf24.pl/opinie/wywiady/przesluchanie/feed'),
+ (u'Komentarze', u'http://www.rmf24.pl/opinie/komentarze/feed')]
+
+ keep_only_tags = [
+ dict(name='div', attrs={'class':'box articleSingle print'}),
+ dict(name='div', attrs={'class':'box articleSingle print singleCommentary'}),
+ dict(name='div', attrs={'class':'box articleSingle print blogSingleEntry'})]
+
+ remove_tags = [
+ dict(name='div', attrs={'class':'toTop'}),
+ dict(name='div', attrs={'class':'category'}),
+ dict(name='div', attrs={'class':'REMOVE'}),
+ dict(name='div', attrs={'class':'embed embedAd'})]
+
+ extra_css = '''
+ h1 { font-size: 1.2em; }
+ '''
+
+ # thanks to Kovid Goyal
+ def get_article_url(self, article):
+ link = article.get('link')
+ if 'audio' not in link:
+ return link
+
+ preprocess_regexps = [
+ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
+ [
+ (r'Zdj.cie
', lambda match: ''),
+ (r'embed embed(Left|Right|Center) articleEmbed(Audio|Wideo articleEmbedVideo|ArticleFull|ArticleTitle|ArticleListTitle|AlbumHorizontal)">', lambda match: 'REMOVE">'),
+ (r'"
import re
diff --git a/src/calibre/ebooks/metadata/isbndb.py b/src/calibre/ebooks/metadata/isbndb.py
index 6c321bf9d3..6416dcdc39 100644
--- a/src/calibre/ebooks/metadata/isbndb.py
+++ b/src/calibre/ebooks/metadata/isbndb.py
@@ -47,29 +47,43 @@ class ISBNDBMetadata(Metadata):
def __init__(self, book):
Metadata.__init__(self, None, [])
+ def tostring(e):
+ if not hasattr(e, 'string'):
+ return None
+ ans = e.string
+ if ans is not None:
+ ans = unicode(ans).strip()
+ if not ans:
+ ans = None
+ return ans
+
self.isbn = unicode(book.get('isbn13', book.get('isbn')))
- self.title = unicode(book.find('titlelong').string)
+ self.title = tostring(book.find('titlelong'))
if not self.title:
- self.title = unicode(book.find('title').string)
+ self.title = tostring(book.find('title'))
+ if not self.title:
+ self.title = _('Unknown')
self.title = unicode(self.title).strip()
- au = unicode(book.find('authorstext').string).strip()
- temp = au.split(',')
self.authors = []
- for au in temp:
- if not au: continue
- self.authors.extend([a.strip() for a in au.split('&')])
+ au = tostring(book.find('authorstext'))
+ if au:
+ au = au.strip()
+ temp = au.split(',')
+ for au in temp:
+ if not au: continue
+ self.authors.extend([a.strip() for a in au.split('&')])
try:
- self.author_sort = book.find('authors').find('person').string
+ self.author_sort = tostring(book.find('authors').find('person'))
if self.authors and self.author_sort == self.authors[0]:
self.author_sort = None
except:
pass
- self.publisher = unicode(book.find('publishertext').string)
+ self.publisher = tostring(book.find('publishertext'))
- summ = book.find('summary')
- if summ and hasattr(summ, 'string') and summ.string:
- self.comments = 'SUMMARY:\n'+unicode(summ.string)
+ summ = tostring(book.find('summary'))
+ if summ:
+ self.comments = 'SUMMARY:\n'+summ.string
def build_isbn(base_url, opts):
diff --git a/src/calibre/ebooks/metadata/library_thing.py b/src/calibre/ebooks/metadata/library_thing.py
index 669d9478a3..7f312da1d9 100644
--- a/src/calibre/ebooks/metadata/library_thing.py
+++ b/src/calibre/ebooks/metadata/library_thing.py
@@ -12,6 +12,7 @@ import mechanize
from calibre import browser, prints
from calibre.utils.config import OptionParser
from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.ebooks.chardet import strip_encoding_declarations
OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
@@ -110,6 +111,8 @@ def get_social_metadata(title, authors, publisher, isbn, username=None,
+isbn).read()
if not raw:
return mi
+ raw = raw.decode('utf-8', 'replace')
+ raw = strip_encoding_declarations(raw)
root = html.fromstring(raw)
h1 = root.xpath('//div[@class="headsummary"]/h1')
if h1 and not mi.title:
diff --git a/src/calibre/gui2/metadata.py b/src/calibre/gui2/metadata.py
index c71f82c654..a36571fc91 100644
--- a/src/calibre/gui2/metadata.py
+++ b/src/calibre/gui2/metadata.py
@@ -19,6 +19,7 @@ from calibre import prints
from calibre.constants import DEBUG
class Worker(Thread):
+ 'Cover downloader'
def __init__(self):
Thread.__init__(self)
@@ -88,7 +89,7 @@ class DownloadMetadata(Thread):
if mi.isbn:
args['isbn'] = mi.isbn
else:
- if not mi.title or mi.title == _('Unknown'):
+ if mi.is_null('title'):
self.failures[id] = \
(str(id), _('Book has neither title nor ISBN'))
continue
diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py
index cc2975e7a7..937b23b113 100644
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@@ -579,6 +579,8 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, # {{{
except KeyboardInterrupt:
pass
time.sleep(2)
+ if mb is not None:
+ mb.flush()
self.hide_windows()
return True
diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py
index 0c3904532e..179262dedc 100644
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@@ -40,12 +40,14 @@ class MetadataBackup(Thread): # {{{
self.get_metadata_for_dump = FunctionDispatcher(db.get_metadata_for_dump)
self.clear_dirtied = FunctionDispatcher(db.clear_dirtied)
self.set_dirtied = FunctionDispatcher(db.dirtied)
+ self.in_limbo = None
def stop(self):
self.keep_running = False
def run(self):
while self.keep_running:
+ self.in_limbo = None
try:
time.sleep(0.5) # Limit to two per second
id_ = self.db.dirtied_queue.get(True, 1.45)
@@ -72,6 +74,7 @@ class MetadataBackup(Thread): # {{{
if mi is None:
continue
+ self.in_limbo = id_
# Give the GUI thread a chance to do something. Python threads don't
# have priorities, so this thread would naturally keep the processor
@@ -98,6 +101,15 @@ class MetadataBackup(Thread): # {{{
prints('Failed to write backup metadata for id:', id_,
'again, giving up')
continue
+ self.in_limbo = None
+
+ def flush(self):
+ 'Used during shutdown to ensure that a dirtied book is not missed'
+ if self.in_limbo is not None:
+ try:
+ self.db.dirtied([self.in_limbo])
+ except:
+ traceback.print_exc()
def write(self, path, raw):
with open(path, 'wb') as f:
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index ca8824ae1c..9d9ebf64c5 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -348,10 +348,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
setattr(self, 'title_sort', functools.partial(self.get_property,
loc=self.FIELD_MAP['sort']))
- self.dirtied_cache = set()
d = self.conn.get('SELECT book FROM metadata_dirtied', all=True)
for x in d:
self.dirtied_queue.put(x[0])
+ self.dirtied_cache = set([x[0] for x in d])
self.refresh_ondevice = functools.partial(self.data.refresh_ondevice, self)
self.refresh()
@@ -616,9 +616,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.conn.commit()
def dirtied(self, book_ids, commit=True):
- for book in book_ids:
- if book in self.dirtied_cache:
- continue
+ for book in frozenset(book_ids) - self.dirtied_cache:
try:
self.conn.execute(
'INSERT INTO metadata_dirtied (book) VALUES (?)',