Merge from trunk

This commit is contained in:
Charles Haley 2010-09-30 09:00:50 +01:00
commit fa9c23031e
11 changed files with 341 additions and 7202 deletions

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 209 KiB

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 13 KiB

View File

@ -12,15 +12,18 @@ class PeterSchiff(BasicNewsRecipe):
description = 'Economic commentary' description = 'Economic commentary'
publisher = 'Euro Pacific capital' publisher = 'Euro Pacific capital'
category = 'news, politics, economy, USA' category = 'news, politics, economy, USA'
oldest_article = 15 oldest_article = 25
max_articles_per_feed = 200 max_articles_per_feed = 200
no_stylesheets = True no_stylesheets = True
encoding = 'cp1252' encoding = 'utf8'
use_embedded_content = False use_embedded_content = False
language = 'en' language = 'en'
country = 'US'
remove_empty_feeds = True remove_empty_feeds = True
extra_css = ' body{font-family: Verdana,Times,serif } h1{text-align: left} img{margin-bottom: 0.4em} ' extra_css = """
body{font-family: Verdana,Times,serif }
.field-field-commentary-writer-name{font-weight: bold}
.field-items{display: inline}
"""
conversion_options = { conversion_options = {
'comment' : description 'comment' : description
@ -30,7 +33,15 @@ class PeterSchiff(BasicNewsRecipe):
, 'linearize_tables' : True , 'linearize_tables' : True
} }
keep_only_tags = [dict(name='tr',attrs={'style':'vertical-align: top;'})] keep_only_tags = [
dict(name='h2',attrs={'id':'page-title'})
,dict(name='div',attrs={'class':'node'})
]
remove_tags = [
dict(name=['meta','link','base','iframe','embed'])
,dict(attrs={'id':'text-zoom'})
]
remove_attributes=['track','linktype','lang']
feeds = [(u'Articles', u'http://feeds.feedburner.com/PeterSchiffsEconomicCommentary')] feeds = [(u'Articles', u'http://feeds.feedburner.com/PeterSchiffsEconomicCommentary')]

View File

@ -0,0 +1,55 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
'''
rmf24.pl
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class RMF24_opinie(BasicNewsRecipe):
title = u'Rmf24.pl - Opinie'
description = u'Blogi, wywiady i komentarze ze strony rmf24.pl'
language = 'pl'
oldest_article = 7
max_articles_per_feed = 100
__author__ = u'Tomasz D\u0142ugosz'
no_stylesheets = True
remove_javascript = True
feeds = [(u'Blogi', u'http://www.rmf24.pl/opinie/blogi/feed'),
(u'Kontrwywiad', u'http://www.rmf24.pl/opinie/wywiady/kontrwywiad/feed'),
(u'Przes\u0142uchanie', u'http://www.rmf24.pl/opinie/wywiady/przesluchanie/feed'),
(u'Komentarze', u'http://www.rmf24.pl/opinie/komentarze/feed')]
keep_only_tags = [
dict(name='div', attrs={'class':'box articleSingle print'}),
dict(name='div', attrs={'class':'box articleSingle print singleCommentary'}),
dict(name='div', attrs={'class':'box articleSingle print blogSingleEntry'})]
remove_tags = [
dict(name='div', attrs={'class':'toTop'}),
dict(name='div', attrs={'class':'category'}),
dict(name='div', attrs={'class':'REMOVE'}),
dict(name='div', attrs={'class':'embed embedAd'})]
extra_css = '''
h1 { font-size: 1.2em; }
'''
# thanks to Kovid Goyal
def get_article_url(self, article):
link = article.get('link')
if 'audio' not in link:
return link
preprocess_regexps = [
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
(r'<h2>Zdj.cie</h2>', lambda match: ''),
(r'embed embed(Left|Right|Center) articleEmbed(Audio|Wideo articleEmbedVideo|ArticleFull|ArticleTitle|ArticleListTitle|AlbumHorizontal)">', lambda match: 'REMOVE">'),
(r'<a href="http://www.facebook.com/pages/RMF24pl/.*?>RMF24.pl</a> on Facebook</div>', lambda match: '</div>')
]
]

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = 'calibre' __appname__ = 'calibre'
__version__ = '0.7.905' __version__ = '0.7.906'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>" __author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
import re import re

View File

@ -47,29 +47,43 @@ class ISBNDBMetadata(Metadata):
def __init__(self, book): def __init__(self, book):
Metadata.__init__(self, None, []) Metadata.__init__(self, None, [])
def tostring(e):
if not hasattr(e, 'string'):
return None
ans = e.string
if ans is not None:
ans = unicode(ans).strip()
if not ans:
ans = None
return ans
self.isbn = unicode(book.get('isbn13', book.get('isbn'))) self.isbn = unicode(book.get('isbn13', book.get('isbn')))
self.title = unicode(book.find('titlelong').string) self.title = tostring(book.find('titlelong'))
if not self.title: if not self.title:
self.title = unicode(book.find('title').string) self.title = tostring(book.find('title'))
if not self.title:
self.title = _('Unknown')
self.title = unicode(self.title).strip() self.title = unicode(self.title).strip()
au = unicode(book.find('authorstext').string).strip()
temp = au.split(',')
self.authors = [] self.authors = []
for au in temp: au = tostring(book.find('authorstext'))
if not au: continue if au:
self.authors.extend([a.strip() for a in au.split('&amp;')]) au = au.strip()
temp = au.split(',')
for au in temp:
if not au: continue
self.authors.extend([a.strip() for a in au.split('&amp;')])
try: try:
self.author_sort = book.find('authors').find('person').string self.author_sort = tostring(book.find('authors').find('person'))
if self.authors and self.author_sort == self.authors[0]: if self.authors and self.author_sort == self.authors[0]:
self.author_sort = None self.author_sort = None
except: except:
pass pass
self.publisher = unicode(book.find('publishertext').string) self.publisher = tostring(book.find('publishertext'))
summ = book.find('summary') summ = tostring(book.find('summary'))
if summ and hasattr(summ, 'string') and summ.string: if summ:
self.comments = 'SUMMARY:\n'+unicode(summ.string) self.comments = 'SUMMARY:\n'+summ.string
def build_isbn(base_url, opts): def build_isbn(base_url, opts):

View File

@ -12,6 +12,7 @@ import mechanize
from calibre import browser, prints from calibre import browser, prints
from calibre.utils.config import OptionParser from calibre.utils.config import OptionParser
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.chardet import strip_encoding_declarations
OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false' OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
@ -110,6 +111,8 @@ def get_social_metadata(title, authors, publisher, isbn, username=None,
+isbn).read() +isbn).read()
if not raw: if not raw:
return mi return mi
raw = raw.decode('utf-8', 'replace')
raw = strip_encoding_declarations(raw)
root = html.fromstring(raw) root = html.fromstring(raw)
h1 = root.xpath('//div[@class="headsummary"]/h1') h1 = root.xpath('//div[@class="headsummary"]/h1')
if h1 and not mi.title: if h1 and not mi.title:

View File

@ -19,6 +19,7 @@ from calibre import prints
from calibre.constants import DEBUG from calibre.constants import DEBUG
class Worker(Thread): class Worker(Thread):
'Cover downloader'
def __init__(self): def __init__(self):
Thread.__init__(self) Thread.__init__(self)
@ -88,7 +89,7 @@ class DownloadMetadata(Thread):
if mi.isbn: if mi.isbn:
args['isbn'] = mi.isbn args['isbn'] = mi.isbn
else: else:
if not mi.title or mi.title == _('Unknown'): if mi.is_null('title'):
self.failures[id] = \ self.failures[id] = \
(str(id), _('Book has neither title nor ISBN')) (str(id), _('Book has neither title nor ISBN'))
continue continue

View File

@ -579,6 +579,8 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, # {{{
except KeyboardInterrupt: except KeyboardInterrupt:
pass pass
time.sleep(2) time.sleep(2)
if mb is not None:
mb.flush()
self.hide_windows() self.hide_windows()
return True return True

View File

@ -40,12 +40,14 @@ class MetadataBackup(Thread): # {{{
self.get_metadata_for_dump = FunctionDispatcher(db.get_metadata_for_dump) self.get_metadata_for_dump = FunctionDispatcher(db.get_metadata_for_dump)
self.clear_dirtied = FunctionDispatcher(db.clear_dirtied) self.clear_dirtied = FunctionDispatcher(db.clear_dirtied)
self.set_dirtied = FunctionDispatcher(db.dirtied) self.set_dirtied = FunctionDispatcher(db.dirtied)
self.in_limbo = None
def stop(self): def stop(self):
self.keep_running = False self.keep_running = False
def run(self): def run(self):
while self.keep_running: while self.keep_running:
self.in_limbo = None
try: try:
time.sleep(0.5) # Limit to two per second time.sleep(0.5) # Limit to two per second
id_ = self.db.dirtied_queue.get(True, 1.45) id_ = self.db.dirtied_queue.get(True, 1.45)
@ -72,6 +74,7 @@ class MetadataBackup(Thread): # {{{
if mi is None: if mi is None:
continue continue
self.in_limbo = id_
# Give the GUI thread a chance to do something. Python threads don't # Give the GUI thread a chance to do something. Python threads don't
# have priorities, so this thread would naturally keep the processor # have priorities, so this thread would naturally keep the processor
@ -98,6 +101,15 @@ class MetadataBackup(Thread): # {{{
prints('Failed to write backup metadata for id:', id_, prints('Failed to write backup metadata for id:', id_,
'again, giving up') 'again, giving up')
continue continue
self.in_limbo = None
def flush(self):
'Used during shutdown to ensure that a dirtied book is not missed'
if self.in_limbo is not None:
try:
self.db.dirtied([self.in_limbo])
except:
traceback.print_exc()
def write(self, path, raw): def write(self, path, raw):
with open(path, 'wb') as f: with open(path, 'wb') as f:

View File

@ -348,10 +348,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
setattr(self, 'title_sort', functools.partial(self.get_property, setattr(self, 'title_sort', functools.partial(self.get_property,
loc=self.FIELD_MAP['sort'])) loc=self.FIELD_MAP['sort']))
self.dirtied_cache = set()
d = self.conn.get('SELECT book FROM metadata_dirtied', all=True) d = self.conn.get('SELECT book FROM metadata_dirtied', all=True)
for x in d: for x in d:
self.dirtied_queue.put(x[0]) self.dirtied_queue.put(x[0])
self.dirtied_cache = set([x[0] for x in d])
self.refresh_ondevice = functools.partial(self.data.refresh_ondevice, self) self.refresh_ondevice = functools.partial(self.data.refresh_ondevice, self)
self.refresh() self.refresh()
@ -616,9 +616,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.conn.commit() self.conn.commit()
def dirtied(self, book_ids, commit=True): def dirtied(self, book_ids, commit=True):
for book in book_ids: for book in frozenset(book_ids) - self.dirtied_cache:
if book in self.dirtied_cache:
continue
try: try:
self.conn.execute( self.conn.execute(
'INSERT INTO metadata_dirtied (book) VALUES (?)', 'INSERT INTO metadata_dirtied (book) VALUES (?)',