Merge from trunk

This commit is contained in:
Charles Haley 2010-09-30 09:00:50 +01:00
commit fa9c23031e
11 changed files with 341 additions and 7202 deletions

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 209 KiB

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 13 KiB

View File

@ -12,15 +12,18 @@ class PeterSchiff(BasicNewsRecipe):
description = 'Economic commentary'
publisher = 'Euro Pacific capital'
category = 'news, politics, economy, USA'
oldest_article = 15
oldest_article = 25
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'cp1252'
encoding = 'utf8'
use_embedded_content = False
language = 'en'
country = 'US'
remove_empty_feeds = True
extra_css = ' body{font-family: Verdana,Times,serif } h1{text-align: left} img{margin-bottom: 0.4em} '
extra_css = """
body{font-family: Verdana,Times,serif }
.field-field-commentary-writer-name{font-weight: bold}
.field-items{display: inline}
"""
conversion_options = {
'comment' : description
@ -30,7 +33,15 @@ class PeterSchiff(BasicNewsRecipe):
, 'linearize_tables' : True
}
keep_only_tags = [dict(name='tr',attrs={'style':'vertical-align: top;'})]
keep_only_tags = [
dict(name='h2',attrs={'id':'page-title'})
,dict(name='div',attrs={'class':'node'})
]
remove_tags = [
dict(name=['meta','link','base','iframe','embed'])
,dict(attrs={'id':'text-zoom'})
]
remove_attributes=['track','linktype','lang']
feeds = [(u'Articles', u'http://feeds.feedburner.com/PeterSchiffsEconomicCommentary')]

View File

@ -0,0 +1,55 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
'''
rmf24.pl
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class RMF24_opinie(BasicNewsRecipe):
title = u'Rmf24.pl - Opinie'
description = u'Blogi, wywiady i komentarze ze strony rmf24.pl'
language = 'pl'
oldest_article = 7
max_articles_per_feed = 100
__author__ = u'Tomasz D\u0142ugosz'
no_stylesheets = True
remove_javascript = True
feeds = [(u'Blogi', u'http://www.rmf24.pl/opinie/blogi/feed'),
(u'Kontrwywiad', u'http://www.rmf24.pl/opinie/wywiady/kontrwywiad/feed'),
(u'Przes\u0142uchanie', u'http://www.rmf24.pl/opinie/wywiady/przesluchanie/feed'),
(u'Komentarze', u'http://www.rmf24.pl/opinie/komentarze/feed')]
keep_only_tags = [
dict(name='div', attrs={'class':'box articleSingle print'}),
dict(name='div', attrs={'class':'box articleSingle print singleCommentary'}),
dict(name='div', attrs={'class':'box articleSingle print blogSingleEntry'})]
remove_tags = [
dict(name='div', attrs={'class':'toTop'}),
dict(name='div', attrs={'class':'category'}),
dict(name='div', attrs={'class':'REMOVE'}),
dict(name='div', attrs={'class':'embed embedAd'})]
extra_css = '''
h1 { font-size: 1.2em; }
'''
# thanks to Kovid Goyal
def get_article_url(self, article):
link = article.get('link')
if 'audio' not in link:
return link
preprocess_regexps = [
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
(r'<h2>Zdj.cie</h2>', lambda match: ''),
(r'embed embed(Left|Right|Center) articleEmbed(Audio|Wideo articleEmbedVideo|ArticleFull|ArticleTitle|ArticleListTitle|AlbumHorizontal)">', lambda match: 'REMOVE">'),
(r'<a href="http://www.facebook.com/pages/RMF24pl/.*?>RMF24.pl</a> on Facebook</div>', lambda match: '</div>')
]
]

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = 'calibre'
__version__ = '0.7.905'
__version__ = '0.7.906'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
import re

View File

@ -47,29 +47,43 @@ class ISBNDBMetadata(Metadata):
def __init__(self, book):
Metadata.__init__(self, None, [])
def tostring(e):
if not hasattr(e, 'string'):
return None
ans = e.string
if ans is not None:
ans = unicode(ans).strip()
if not ans:
ans = None
return ans
self.isbn = unicode(book.get('isbn13', book.get('isbn')))
self.title = unicode(book.find('titlelong').string)
self.title = tostring(book.find('titlelong'))
if not self.title:
self.title = unicode(book.find('title').string)
self.title = tostring(book.find('title'))
if not self.title:
self.title = _('Unknown')
self.title = unicode(self.title).strip()
au = unicode(book.find('authorstext').string).strip()
temp = au.split(',')
self.authors = []
for au in temp:
if not au: continue
self.authors.extend([a.strip() for a in au.split('&amp;')])
au = tostring(book.find('authorstext'))
if au:
au = au.strip()
temp = au.split(',')
for au in temp:
if not au: continue
self.authors.extend([a.strip() for a in au.split('&amp;')])
try:
self.author_sort = book.find('authors').find('person').string
self.author_sort = tostring(book.find('authors').find('person'))
if self.authors and self.author_sort == self.authors[0]:
self.author_sort = None
except:
pass
self.publisher = unicode(book.find('publishertext').string)
self.publisher = tostring(book.find('publishertext'))
summ = book.find('summary')
if summ and hasattr(summ, 'string') and summ.string:
self.comments = 'SUMMARY:\n'+unicode(summ.string)
summ = tostring(book.find('summary'))
if summ:
self.comments = 'SUMMARY:\n'+summ.string
def build_isbn(base_url, opts):

View File

@ -12,6 +12,7 @@ import mechanize
from calibre import browser, prints
from calibre.utils.config import OptionParser
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.chardet import strip_encoding_declarations
OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
@ -110,6 +111,8 @@ def get_social_metadata(title, authors, publisher, isbn, username=None,
+isbn).read()
if not raw:
return mi
raw = raw.decode('utf-8', 'replace')
raw = strip_encoding_declarations(raw)
root = html.fromstring(raw)
h1 = root.xpath('//div[@class="headsummary"]/h1')
if h1 and not mi.title:

View File

@ -19,6 +19,7 @@ from calibre import prints
from calibre.constants import DEBUG
class Worker(Thread):
'Cover downloader'
def __init__(self):
Thread.__init__(self)
@ -88,7 +89,7 @@ class DownloadMetadata(Thread):
if mi.isbn:
args['isbn'] = mi.isbn
else:
if not mi.title or mi.title == _('Unknown'):
if mi.is_null('title'):
self.failures[id] = \
(str(id), _('Book has neither title nor ISBN'))
continue

View File

@ -579,6 +579,8 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, # {{{
except KeyboardInterrupt:
pass
time.sleep(2)
if mb is not None:
mb.flush()
self.hide_windows()
return True

View File

@ -40,12 +40,14 @@ class MetadataBackup(Thread): # {{{
self.get_metadata_for_dump = FunctionDispatcher(db.get_metadata_for_dump)
self.clear_dirtied = FunctionDispatcher(db.clear_dirtied)
self.set_dirtied = FunctionDispatcher(db.dirtied)
self.in_limbo = None
def stop(self):
self.keep_running = False
def run(self):
while self.keep_running:
self.in_limbo = None
try:
time.sleep(0.5) # Limit to two per second
id_ = self.db.dirtied_queue.get(True, 1.45)
@ -72,6 +74,7 @@ class MetadataBackup(Thread): # {{{
if mi is None:
continue
self.in_limbo = id_
# Give the GUI thread a chance to do something. Python threads don't
# have priorities, so this thread would naturally keep the processor
@ -98,6 +101,15 @@ class MetadataBackup(Thread): # {{{
prints('Failed to write backup metadata for id:', id_,
'again, giving up')
continue
self.in_limbo = None
def flush(self):
'Used during shutdown to ensure that a dirtied book is not missed'
if self.in_limbo is not None:
try:
self.db.dirtied([self.in_limbo])
except:
traceback.print_exc()
def write(self, path, raw):
with open(path, 'wb') as f:

View File

@ -348,10 +348,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
setattr(self, 'title_sort', functools.partial(self.get_property,
loc=self.FIELD_MAP['sort']))
self.dirtied_cache = set()
d = self.conn.get('SELECT book FROM metadata_dirtied', all=True)
for x in d:
self.dirtied_queue.put(x[0])
self.dirtied_cache = set([x[0] for x in d])
self.refresh_ondevice = functools.partial(self.data.refresh_ondevice, self)
self.refresh()
@ -616,9 +616,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.conn.commit()
def dirtied(self, book_ids, commit=True):
for book in book_ids:
if book in self.dirtied_cache:
continue
for book in frozenset(book_ids) - self.dirtied_cache:
try:
self.conn.execute(
'INSERT INTO metadata_dirtied (book) VALUES (?)',