mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
fa9c23031e
7401
imgsrc/plugboard.svg
7401
imgsrc/plugboard.svg
File diff suppressed because it is too large
Load Diff
Before Width: | Height: | Size: 209 KiB After Width: | Height: | Size: 14 KiB |
Binary file not shown.
Before Width: | Height: | Size: 3.6 KiB After Width: | Height: | Size: 13 KiB |
@ -12,15 +12,18 @@ class PeterSchiff(BasicNewsRecipe):
|
|||||||
description = 'Economic commentary'
|
description = 'Economic commentary'
|
||||||
publisher = 'Euro Pacific capital'
|
publisher = 'Euro Pacific capital'
|
||||||
category = 'news, politics, economy, USA'
|
category = 'news, politics, economy, USA'
|
||||||
oldest_article = 15
|
oldest_article = 25
|
||||||
max_articles_per_feed = 200
|
max_articles_per_feed = 200
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'cp1252'
|
encoding = 'utf8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'en'
|
language = 'en'
|
||||||
country = 'US'
|
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
extra_css = ' body{font-family: Verdana,Times,serif } h1{text-align: left} img{margin-bottom: 0.4em} '
|
extra_css = """
|
||||||
|
body{font-family: Verdana,Times,serif }
|
||||||
|
.field-field-commentary-writer-name{font-weight: bold}
|
||||||
|
.field-items{display: inline}
|
||||||
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
@ -30,7 +33,15 @@ class PeterSchiff(BasicNewsRecipe):
|
|||||||
, 'linearize_tables' : True
|
, 'linearize_tables' : True
|
||||||
}
|
}
|
||||||
|
|
||||||
keep_only_tags = [dict(name='tr',attrs={'style':'vertical-align: top;'})]
|
keep_only_tags = [
|
||||||
|
dict(name='h2',attrs={'id':'page-title'})
|
||||||
|
,dict(name='div',attrs={'class':'node'})
|
||||||
|
]
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['meta','link','base','iframe','embed'])
|
||||||
|
,dict(attrs={'id':'text-zoom'})
|
||||||
|
]
|
||||||
|
remove_attributes=['track','linktype','lang']
|
||||||
|
|
||||||
|
|
||||||
feeds = [(u'Articles', u'http://feeds.feedburner.com/PeterSchiffsEconomicCommentary')]
|
feeds = [(u'Articles', u'http://feeds.feedburner.com/PeterSchiffsEconomicCommentary')]
|
||||||
|
55
resources/recipes/rmf24_opinie.recipe
Normal file
55
resources/recipes/rmf24_opinie.recipe
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
|
||||||
|
'''
|
||||||
|
rmf24.pl
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class RMF24_opinie(BasicNewsRecipe):
|
||||||
|
title = u'Rmf24.pl - Opinie'
|
||||||
|
description = u'Blogi, wywiady i komentarze ze strony rmf24.pl'
|
||||||
|
language = 'pl'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
__author__ = u'Tomasz D\u0142ugosz'
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
feeds = [(u'Blogi', u'http://www.rmf24.pl/opinie/blogi/feed'),
|
||||||
|
(u'Kontrwywiad', u'http://www.rmf24.pl/opinie/wywiady/kontrwywiad/feed'),
|
||||||
|
(u'Przes\u0142uchanie', u'http://www.rmf24.pl/opinie/wywiady/przesluchanie/feed'),
|
||||||
|
(u'Komentarze', u'http://www.rmf24.pl/opinie/komentarze/feed')]
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'box articleSingle print'}),
|
||||||
|
dict(name='div', attrs={'class':'box articleSingle print singleCommentary'}),
|
||||||
|
dict(name='div', attrs={'class':'box articleSingle print blogSingleEntry'})]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':'toTop'}),
|
||||||
|
dict(name='div', attrs={'class':'category'}),
|
||||||
|
dict(name='div', attrs={'class':'REMOVE'}),
|
||||||
|
dict(name='div', attrs={'class':'embed embedAd'})]
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1 { font-size: 1.2em; }
|
||||||
|
'''
|
||||||
|
|
||||||
|
# thanks to Kovid Goyal
|
||||||
|
def get_article_url(self, article):
|
||||||
|
link = article.get('link')
|
||||||
|
if 'audio' not in link:
|
||||||
|
return link
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||||
|
[
|
||||||
|
(r'<h2>Zdj.cie</h2>', lambda match: ''),
|
||||||
|
(r'embed embed(Left|Right|Center) articleEmbed(Audio|Wideo articleEmbedVideo|ArticleFull|ArticleTitle|ArticleListTitle|AlbumHorizontal)">', lambda match: 'REMOVE">'),
|
||||||
|
(r'<a href="http://www.facebook.com/pages/RMF24pl/.*?>RMF24.pl</a> on Facebook</div>', lambda match: '</div>')
|
||||||
|
]
|
||||||
|
]
|
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
__appname__ = 'calibre'
|
__appname__ = 'calibre'
|
||||||
__version__ = '0.7.905'
|
__version__ = '0.7.906'
|
||||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
@ -47,29 +47,43 @@ class ISBNDBMetadata(Metadata):
|
|||||||
def __init__(self, book):
|
def __init__(self, book):
|
||||||
Metadata.__init__(self, None, [])
|
Metadata.__init__(self, None, [])
|
||||||
|
|
||||||
|
def tostring(e):
|
||||||
|
if not hasattr(e, 'string'):
|
||||||
|
return None
|
||||||
|
ans = e.string
|
||||||
|
if ans is not None:
|
||||||
|
ans = unicode(ans).strip()
|
||||||
|
if not ans:
|
||||||
|
ans = None
|
||||||
|
return ans
|
||||||
|
|
||||||
self.isbn = unicode(book.get('isbn13', book.get('isbn')))
|
self.isbn = unicode(book.get('isbn13', book.get('isbn')))
|
||||||
self.title = unicode(book.find('titlelong').string)
|
self.title = tostring(book.find('titlelong'))
|
||||||
if not self.title:
|
if not self.title:
|
||||||
self.title = unicode(book.find('title').string)
|
self.title = tostring(book.find('title'))
|
||||||
|
if not self.title:
|
||||||
|
self.title = _('Unknown')
|
||||||
self.title = unicode(self.title).strip()
|
self.title = unicode(self.title).strip()
|
||||||
au = unicode(book.find('authorstext').string).strip()
|
|
||||||
temp = au.split(',')
|
|
||||||
self.authors = []
|
self.authors = []
|
||||||
for au in temp:
|
au = tostring(book.find('authorstext'))
|
||||||
if not au: continue
|
if au:
|
||||||
self.authors.extend([a.strip() for a in au.split('&')])
|
au = au.strip()
|
||||||
|
temp = au.split(',')
|
||||||
|
for au in temp:
|
||||||
|
if not au: continue
|
||||||
|
self.authors.extend([a.strip() for a in au.split('&')])
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.author_sort = book.find('authors').find('person').string
|
self.author_sort = tostring(book.find('authors').find('person'))
|
||||||
if self.authors and self.author_sort == self.authors[0]:
|
if self.authors and self.author_sort == self.authors[0]:
|
||||||
self.author_sort = None
|
self.author_sort = None
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
self.publisher = unicode(book.find('publishertext').string)
|
self.publisher = tostring(book.find('publishertext'))
|
||||||
|
|
||||||
summ = book.find('summary')
|
summ = tostring(book.find('summary'))
|
||||||
if summ and hasattr(summ, 'string') and summ.string:
|
if summ:
|
||||||
self.comments = 'SUMMARY:\n'+unicode(summ.string)
|
self.comments = 'SUMMARY:\n'+summ.string
|
||||||
|
|
||||||
|
|
||||||
def build_isbn(base_url, opts):
|
def build_isbn(base_url, opts):
|
||||||
|
@ -12,6 +12,7 @@ import mechanize
|
|||||||
from calibre import browser, prints
|
from calibre import browser, prints
|
||||||
from calibre.utils.config import OptionParser
|
from calibre.utils.config import OptionParser
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
from calibre.ebooks.chardet import strip_encoding_declarations
|
||||||
|
|
||||||
OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
|
OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
|
||||||
|
|
||||||
@ -110,6 +111,8 @@ def get_social_metadata(title, authors, publisher, isbn, username=None,
|
|||||||
+isbn).read()
|
+isbn).read()
|
||||||
if not raw:
|
if not raw:
|
||||||
return mi
|
return mi
|
||||||
|
raw = raw.decode('utf-8', 'replace')
|
||||||
|
raw = strip_encoding_declarations(raw)
|
||||||
root = html.fromstring(raw)
|
root = html.fromstring(raw)
|
||||||
h1 = root.xpath('//div[@class="headsummary"]/h1')
|
h1 = root.xpath('//div[@class="headsummary"]/h1')
|
||||||
if h1 and not mi.title:
|
if h1 and not mi.title:
|
||||||
|
@ -19,6 +19,7 @@ from calibre import prints
|
|||||||
from calibre.constants import DEBUG
|
from calibre.constants import DEBUG
|
||||||
|
|
||||||
class Worker(Thread):
|
class Worker(Thread):
|
||||||
|
'Cover downloader'
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
Thread.__init__(self)
|
Thread.__init__(self)
|
||||||
@ -88,7 +89,7 @@ class DownloadMetadata(Thread):
|
|||||||
if mi.isbn:
|
if mi.isbn:
|
||||||
args['isbn'] = mi.isbn
|
args['isbn'] = mi.isbn
|
||||||
else:
|
else:
|
||||||
if not mi.title or mi.title == _('Unknown'):
|
if mi.is_null('title'):
|
||||||
self.failures[id] = \
|
self.failures[id] = \
|
||||||
(str(id), _('Book has neither title nor ISBN'))
|
(str(id), _('Book has neither title nor ISBN'))
|
||||||
continue
|
continue
|
||||||
|
@ -579,6 +579,8 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, # {{{
|
|||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
pass
|
pass
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
if mb is not None:
|
||||||
|
mb.flush()
|
||||||
self.hide_windows()
|
self.hide_windows()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -40,12 +40,14 @@ class MetadataBackup(Thread): # {{{
|
|||||||
self.get_metadata_for_dump = FunctionDispatcher(db.get_metadata_for_dump)
|
self.get_metadata_for_dump = FunctionDispatcher(db.get_metadata_for_dump)
|
||||||
self.clear_dirtied = FunctionDispatcher(db.clear_dirtied)
|
self.clear_dirtied = FunctionDispatcher(db.clear_dirtied)
|
||||||
self.set_dirtied = FunctionDispatcher(db.dirtied)
|
self.set_dirtied = FunctionDispatcher(db.dirtied)
|
||||||
|
self.in_limbo = None
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
self.keep_running = False
|
self.keep_running = False
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
while self.keep_running:
|
while self.keep_running:
|
||||||
|
self.in_limbo = None
|
||||||
try:
|
try:
|
||||||
time.sleep(0.5) # Limit to two per second
|
time.sleep(0.5) # Limit to two per second
|
||||||
id_ = self.db.dirtied_queue.get(True, 1.45)
|
id_ = self.db.dirtied_queue.get(True, 1.45)
|
||||||
@ -72,6 +74,7 @@ class MetadataBackup(Thread): # {{{
|
|||||||
|
|
||||||
if mi is None:
|
if mi is None:
|
||||||
continue
|
continue
|
||||||
|
self.in_limbo = id_
|
||||||
|
|
||||||
# Give the GUI thread a chance to do something. Python threads don't
|
# Give the GUI thread a chance to do something. Python threads don't
|
||||||
# have priorities, so this thread would naturally keep the processor
|
# have priorities, so this thread would naturally keep the processor
|
||||||
@ -98,6 +101,15 @@ class MetadataBackup(Thread): # {{{
|
|||||||
prints('Failed to write backup metadata for id:', id_,
|
prints('Failed to write backup metadata for id:', id_,
|
||||||
'again, giving up')
|
'again, giving up')
|
||||||
continue
|
continue
|
||||||
|
self.in_limbo = None
|
||||||
|
|
||||||
|
def flush(self):
|
||||||
|
'Used during shutdown to ensure that a dirtied book is not missed'
|
||||||
|
if self.in_limbo is not None:
|
||||||
|
try:
|
||||||
|
self.db.dirtied([self.in_limbo])
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
def write(self, path, raw):
|
def write(self, path, raw):
|
||||||
with open(path, 'wb') as f:
|
with open(path, 'wb') as f:
|
||||||
|
@ -348,10 +348,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
setattr(self, 'title_sort', functools.partial(self.get_property,
|
setattr(self, 'title_sort', functools.partial(self.get_property,
|
||||||
loc=self.FIELD_MAP['sort']))
|
loc=self.FIELD_MAP['sort']))
|
||||||
|
|
||||||
self.dirtied_cache = set()
|
|
||||||
d = self.conn.get('SELECT book FROM metadata_dirtied', all=True)
|
d = self.conn.get('SELECT book FROM metadata_dirtied', all=True)
|
||||||
for x in d:
|
for x in d:
|
||||||
self.dirtied_queue.put(x[0])
|
self.dirtied_queue.put(x[0])
|
||||||
|
self.dirtied_cache = set([x[0] for x in d])
|
||||||
|
|
||||||
self.refresh_ondevice = functools.partial(self.data.refresh_ondevice, self)
|
self.refresh_ondevice = functools.partial(self.data.refresh_ondevice, self)
|
||||||
self.refresh()
|
self.refresh()
|
||||||
@ -616,9 +616,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
self.conn.commit()
|
self.conn.commit()
|
||||||
|
|
||||||
def dirtied(self, book_ids, commit=True):
|
def dirtied(self, book_ids, commit=True):
|
||||||
for book in book_ids:
|
for book in frozenset(book_ids) - self.dirtied_cache:
|
||||||
if book in self.dirtied_cache:
|
|
||||||
continue
|
|
||||||
try:
|
try:
|
||||||
self.conn.execute(
|
self.conn.execute(
|
||||||
'INSERT INTO metadata_dirtied (book) VALUES (?)',
|
'INSERT INTO metadata_dirtied (book) VALUES (?)',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user