mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
fa9c23031e
7401
imgsrc/plugboard.svg
7401
imgsrc/plugboard.svg
File diff suppressed because it is too large
Load Diff
Before Width: | Height: | Size: 209 KiB After Width: | Height: | Size: 14 KiB |
Binary file not shown.
Before Width: | Height: | Size: 3.6 KiB After Width: | Height: | Size: 13 KiB |
@ -12,15 +12,18 @@ class PeterSchiff(BasicNewsRecipe):
|
||||
description = 'Economic commentary'
|
||||
publisher = 'Euro Pacific capital'
|
||||
category = 'news, politics, economy, USA'
|
||||
oldest_article = 15
|
||||
oldest_article = 25
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'en'
|
||||
country = 'US'
|
||||
remove_empty_feeds = True
|
||||
extra_css = ' body{font-family: Verdana,Times,serif } h1{text-align: left} img{margin-bottom: 0.4em} '
|
||||
extra_css = """
|
||||
body{font-family: Verdana,Times,serif }
|
||||
.field-field-commentary-writer-name{font-weight: bold}
|
||||
.field-items{display: inline}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
@ -30,7 +33,15 @@ class PeterSchiff(BasicNewsRecipe):
|
||||
, 'linearize_tables' : True
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='tr',attrs={'style':'vertical-align: top;'})]
|
||||
keep_only_tags = [
|
||||
dict(name='h2',attrs={'id':'page-title'})
|
||||
,dict(name='div',attrs={'class':'node'})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name=['meta','link','base','iframe','embed'])
|
||||
,dict(attrs={'id':'text-zoom'})
|
||||
]
|
||||
remove_attributes=['track','linktype','lang']
|
||||
|
||||
|
||||
feeds = [(u'Articles', u'http://feeds.feedburner.com/PeterSchiffsEconomicCommentary')]
|
||||
|
55
resources/recipes/rmf24_opinie.recipe
Normal file
55
resources/recipes/rmf24_opinie.recipe
Normal file
@ -0,0 +1,55 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
|
||||
'''
|
||||
rmf24.pl
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class RMF24_opinie(BasicNewsRecipe):
|
||||
title = u'Rmf24.pl - Opinie'
|
||||
description = u'Blogi, wywiady i komentarze ze strony rmf24.pl'
|
||||
language = 'pl'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
__author__ = u'Tomasz D\u0142ugosz'
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
feeds = [(u'Blogi', u'http://www.rmf24.pl/opinie/blogi/feed'),
|
||||
(u'Kontrwywiad', u'http://www.rmf24.pl/opinie/wywiady/kontrwywiad/feed'),
|
||||
(u'Przes\u0142uchanie', u'http://www.rmf24.pl/opinie/wywiady/przesluchanie/feed'),
|
||||
(u'Komentarze', u'http://www.rmf24.pl/opinie/komentarze/feed')]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'box articleSingle print'}),
|
||||
dict(name='div', attrs={'class':'box articleSingle print singleCommentary'}),
|
||||
dict(name='div', attrs={'class':'box articleSingle print blogSingleEntry'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'toTop'}),
|
||||
dict(name='div', attrs={'class':'category'}),
|
||||
dict(name='div', attrs={'class':'REMOVE'}),
|
||||
dict(name='div', attrs={'class':'embed embedAd'})]
|
||||
|
||||
extra_css = '''
|
||||
h1 { font-size: 1.2em; }
|
||||
'''
|
||||
|
||||
# thanks to Kovid Goyal
|
||||
def get_article_url(self, article):
|
||||
link = article.get('link')
|
||||
if 'audio' not in link:
|
||||
return link
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||
[
|
||||
(r'<h2>Zdj.cie</h2>', lambda match: ''),
|
||||
(r'embed embed(Left|Right|Center) articleEmbed(Audio|Wideo articleEmbedVideo|ArticleFull|ArticleTitle|ArticleListTitle|AlbumHorizontal)">', lambda match: 'REMOVE">'),
|
||||
(r'<a href="http://www.facebook.com/pages/RMF24pl/.*?>RMF24.pl</a> on Facebook</div>', lambda match: '</div>')
|
||||
]
|
||||
]
|
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = 'calibre'
|
||||
__version__ = '0.7.905'
|
||||
__version__ = '0.7.906'
|
||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
|
||||
import re
|
||||
|
@ -47,29 +47,43 @@ class ISBNDBMetadata(Metadata):
|
||||
def __init__(self, book):
|
||||
Metadata.__init__(self, None, [])
|
||||
|
||||
def tostring(e):
|
||||
if not hasattr(e, 'string'):
|
||||
return None
|
||||
ans = e.string
|
||||
if ans is not None:
|
||||
ans = unicode(ans).strip()
|
||||
if not ans:
|
||||
ans = None
|
||||
return ans
|
||||
|
||||
self.isbn = unicode(book.get('isbn13', book.get('isbn')))
|
||||
self.title = unicode(book.find('titlelong').string)
|
||||
self.title = tostring(book.find('titlelong'))
|
||||
if not self.title:
|
||||
self.title = unicode(book.find('title').string)
|
||||
self.title = tostring(book.find('title'))
|
||||
if not self.title:
|
||||
self.title = _('Unknown')
|
||||
self.title = unicode(self.title).strip()
|
||||
au = unicode(book.find('authorstext').string).strip()
|
||||
temp = au.split(',')
|
||||
self.authors = []
|
||||
for au in temp:
|
||||
if not au: continue
|
||||
self.authors.extend([a.strip() for a in au.split('&')])
|
||||
au = tostring(book.find('authorstext'))
|
||||
if au:
|
||||
au = au.strip()
|
||||
temp = au.split(',')
|
||||
for au in temp:
|
||||
if not au: continue
|
||||
self.authors.extend([a.strip() for a in au.split('&')])
|
||||
|
||||
try:
|
||||
self.author_sort = book.find('authors').find('person').string
|
||||
self.author_sort = tostring(book.find('authors').find('person'))
|
||||
if self.authors and self.author_sort == self.authors[0]:
|
||||
self.author_sort = None
|
||||
except:
|
||||
pass
|
||||
self.publisher = unicode(book.find('publishertext').string)
|
||||
self.publisher = tostring(book.find('publishertext'))
|
||||
|
||||
summ = book.find('summary')
|
||||
if summ and hasattr(summ, 'string') and summ.string:
|
||||
self.comments = 'SUMMARY:\n'+unicode(summ.string)
|
||||
summ = tostring(book.find('summary'))
|
||||
if summ:
|
||||
self.comments = 'SUMMARY:\n'+summ.string
|
||||
|
||||
|
||||
def build_isbn(base_url, opts):
|
||||
|
@ -12,6 +12,7 @@ import mechanize
|
||||
from calibre import browser, prints
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.chardet import strip_encoding_declarations
|
||||
|
||||
OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
|
||||
|
||||
@ -110,6 +111,8 @@ def get_social_metadata(title, authors, publisher, isbn, username=None,
|
||||
+isbn).read()
|
||||
if not raw:
|
||||
return mi
|
||||
raw = raw.decode('utf-8', 'replace')
|
||||
raw = strip_encoding_declarations(raw)
|
||||
root = html.fromstring(raw)
|
||||
h1 = root.xpath('//div[@class="headsummary"]/h1')
|
||||
if h1 and not mi.title:
|
||||
|
@ -19,6 +19,7 @@ from calibre import prints
|
||||
from calibre.constants import DEBUG
|
||||
|
||||
class Worker(Thread):
|
||||
'Cover downloader'
|
||||
|
||||
def __init__(self):
|
||||
Thread.__init__(self)
|
||||
@ -88,7 +89,7 @@ class DownloadMetadata(Thread):
|
||||
if mi.isbn:
|
||||
args['isbn'] = mi.isbn
|
||||
else:
|
||||
if not mi.title or mi.title == _('Unknown'):
|
||||
if mi.is_null('title'):
|
||||
self.failures[id] = \
|
||||
(str(id), _('Book has neither title nor ISBN'))
|
||||
continue
|
||||
|
@ -579,6 +579,8 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, # {{{
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
time.sleep(2)
|
||||
if mb is not None:
|
||||
mb.flush()
|
||||
self.hide_windows()
|
||||
return True
|
||||
|
||||
|
@ -40,12 +40,14 @@ class MetadataBackup(Thread): # {{{
|
||||
self.get_metadata_for_dump = FunctionDispatcher(db.get_metadata_for_dump)
|
||||
self.clear_dirtied = FunctionDispatcher(db.clear_dirtied)
|
||||
self.set_dirtied = FunctionDispatcher(db.dirtied)
|
||||
self.in_limbo = None
|
||||
|
||||
def stop(self):
|
||||
self.keep_running = False
|
||||
|
||||
def run(self):
|
||||
while self.keep_running:
|
||||
self.in_limbo = None
|
||||
try:
|
||||
time.sleep(0.5) # Limit to two per second
|
||||
id_ = self.db.dirtied_queue.get(True, 1.45)
|
||||
@ -72,6 +74,7 @@ class MetadataBackup(Thread): # {{{
|
||||
|
||||
if mi is None:
|
||||
continue
|
||||
self.in_limbo = id_
|
||||
|
||||
# Give the GUI thread a chance to do something. Python threads don't
|
||||
# have priorities, so this thread would naturally keep the processor
|
||||
@ -98,6 +101,15 @@ class MetadataBackup(Thread): # {{{
|
||||
prints('Failed to write backup metadata for id:', id_,
|
||||
'again, giving up')
|
||||
continue
|
||||
self.in_limbo = None
|
||||
|
||||
def flush(self):
|
||||
'Used during shutdown to ensure that a dirtied book is not missed'
|
||||
if self.in_limbo is not None:
|
||||
try:
|
||||
self.db.dirtied([self.in_limbo])
|
||||
except:
|
||||
traceback.print_exc()
|
||||
|
||||
def write(self, path, raw):
|
||||
with open(path, 'wb') as f:
|
||||
|
@ -348,10 +348,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
setattr(self, 'title_sort', functools.partial(self.get_property,
|
||||
loc=self.FIELD_MAP['sort']))
|
||||
|
||||
self.dirtied_cache = set()
|
||||
d = self.conn.get('SELECT book FROM metadata_dirtied', all=True)
|
||||
for x in d:
|
||||
self.dirtied_queue.put(x[0])
|
||||
self.dirtied_cache = set([x[0] for x in d])
|
||||
|
||||
self.refresh_ondevice = functools.partial(self.data.refresh_ondevice, self)
|
||||
self.refresh()
|
||||
@ -616,9 +616,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
self.conn.commit()
|
||||
|
||||
def dirtied(self, book_ids, commit=True):
|
||||
for book in book_ids:
|
||||
if book in self.dirtied_cache:
|
||||
continue
|
||||
for book in frozenset(book_ids) - self.dirtied_cache:
|
||||
try:
|
||||
self.conn.execute(
|
||||
'INSERT INTO metadata_dirtied (book) VALUES (?)',
|
||||
|
Loading…
x
Reference in New Issue
Block a user