From f1db17049cf0950ce494b8f246238b1ebe75ca79 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 20 Nov 2008 16:57:59 -0800 Subject: [PATCH] Implement scheduled news download. Add recipes for De Standaard and DeMorgen.be (thanks to Darko Miletic) --- src/calibre/gui2/dialogs/scheduler.py | 211 ++- src/calibre/gui2/dialogs/scheduler.ui | 11 +- src/calibre/gui2/images/scheduler.svg | 1295 +++++++++++++++++ src/calibre/gui2/main.py | 9 +- src/calibre/gui2/news.py | 13 +- src/calibre/gui2/tools.py | 3 +- src/calibre/library/database2.py | 2 +- src/calibre/utils/config.py | 9 +- src/calibre/web/feeds/recipes/__init__.py | 1 + src/calibre/web/feeds/recipes/de_standaard.py | 32 + src/calibre/web/feeds/recipes/demorgen_be.py | 31 + src/calibre/web/fetch/simple.py | 11 +- 12 files changed, 1542 insertions(+), 86 deletions(-) create mode 100644 src/calibre/gui2/images/scheduler.svg create mode 100644 src/calibre/web/feeds/recipes/de_standaard.py create mode 100644 src/calibre/web/feeds/recipes/demorgen_be.py diff --git a/src/calibre/gui2/dialogs/scheduler.py b/src/calibre/gui2/dialogs/scheduler.py index 12988e87c0..b3ee95ec88 100644 --- a/src/calibre/gui2/dialogs/scheduler.py +++ b/src/calibre/gui2/dialogs/scheduler.py @@ -8,30 +8,39 @@ Scheduler for automated recipe downloads ''' import sys, copy -from threading import RLock from datetime import datetime, timedelta from PyQt4.Qt import QDialog, QApplication, QLineEdit, QPalette, SIGNAL, QBrush, \ QColor, QAbstractListModel, Qt, QVariant, QFont, QIcon, \ - QFile, QObject, QTimer + QFile, QObject, QTimer, QMutex from calibre import english_sort from calibre.gui2.dialogs.scheduler_ui import Ui_Dialog from calibre.web.feeds.recipes import recipes, recipe_modules, compile_recipe from calibre.utils.search_query_parser import SearchQueryParser from calibre.utils.pyparsing import ParseException -from calibre.gui2 import dynamic, NONE, error_dialog +from calibre.gui2 import NONE, error_dialog +from calibre.utils.config import DynamicConfig + +config = DynamicConfig('scheduler') class Recipe(object): - def __init__(self, id, recipe_class, builtin): - self.id = id - self.title = recipe_class.title - self.description = recipe_class.description - self.last_downloaded = datetime.fromordinal(1) - self.downloading = False - self.builtin = builtin - self.schedule = None - self.needs_subscription = recipe_class.needs_subscription + def __init__(self, id=None, recipe_class=None, builtin=True): + self.id = id + self.title = getattr(recipe_class, 'title', None) + self.description = getattr(recipe_class, 'description', None) + self.last_downloaded = datetime.fromordinal(1) + self.downloading = False + self.builtin = builtin + self.schedule = None + self.needs_subscription = getattr(recipe_class, 'needs_subscription', False) + + def pickle(self): + return self.__dict__.copy() + + def unpickle(self, dict): + self.__dict__.update(dict) + return self def __cmp__(self, other): if self.id == getattr(other, 'id', None): @@ -53,10 +62,17 @@ class Recipe(object): return self.id == getattr(other, 'id', None) def __repr__(self): - return u'%s:%s'%(self.id, self.title) + return u'%s|%s|%s|%s'%(self.id, self.title, self.last_downloaded.ctime(), self.schedule) builtin_recipes = [Recipe(m, r, True) for r, m in zip(recipes, recipe_modules)] +def save_recipes(recipes): + config['scheduled_recipes'] = [r.pickle() for r in recipes] + +def load_recipes(): + config.refresh() + return [Recipe().unpickle(r) for r in config.get('scheduled_recipes', [])] + class RecipeModel(QAbstractListModel, SearchQueryParser): LOCATIONS = ['all'] @@ -70,16 +86,18 @@ class RecipeModel(QAbstractListModel, SearchQueryParser): for x in db.get_recipes(): recipe = compile_recipe(x[1]) self.recipes.append(Recipe(x[0], recipe, False)) - - sr = dynamic['scheduled_recipes'] - if not sr: - sr = [] + self.refresh() + self._map = list(range(len(self.recipes))) + + def refresh(self): + sr = load_recipes() for recipe in self.recipes: if recipe in sr: recipe.schedule = sr[sr.index(recipe)].schedule + recipe.last_downloaded = sr[sr.index(recipe)].last_downloaded self.recipes.sort() - self._map = list(range(len(self.recipes))) + def universal_set(self): return set(self.recipes) @@ -203,7 +221,7 @@ class SchedulerDialog(QDialog, Ui_Dialog): lambda state: self.interval.setEnabled(state == Qt.Checked)) self.connect(self.show_password, SIGNAL('stateChanged(int)'), lambda state: self.password.setEchoMode(self.password.Normal if state == Qt.Checked else self.password.Password)) - self.connect(self.interval, SIGNAL('valueChanged(int)'), self.do_schedule) + self.connect(self.interval, SIGNAL('valueChanged(double)'), self.do_schedule) self.connect(self.search, SIGNAL('search(PyQt_PyObject)'), self._model.search) self.connect(self._model, SIGNAL('modelReset()'), lambda : self.detail_box.setVisible(False)) self.connect(self.download, SIGNAL('clicked()'), self.download_now) @@ -218,32 +236,32 @@ class SchedulerDialog(QDialog, Ui_Dialog): username, password = username.strip(), password.strip() recipe = self._model.data(self.recipes.currentIndex(), Qt.UserRole) key = 'recipe_account_info_%s'%recipe.id - dynamic[key] = (username, password) if username and password else None + config[key] = (username, password) if username and password else None def do_schedule(self, *args): recipe = self.recipes.currentIndex() if not recipe.isValid(): return recipe = self._model.data(recipe, Qt.UserRole) - recipes = dynamic['scheduled_recipes'] + recipes = load_recipes() if self.schedule.checkState() == Qt.Checked: if recipe in recipes: recipe = recipes[recipes.index(recipe)] else: + recipe.last_downloaded = datetime.fromordinal(1) recipes.append(recipe) - recipes.schedule = self.interval.value() - if recipes.schedule == 0.0: - recipes.schedule = 1/24. - if recipe.need_subscription and not dynamic['recipe_account_info_%s'%recipe.id]: + recipe.schedule = self.interval.value() + if recipe.schedule < 0.1: + recipe.schedule = 1/24. + if recipe.needs_subscription and not config['recipe_account_info_%s'%recipe.id]: error_dialog(self, _('Must set account information'), _('This recipe requires a username and password')).exec_() self.schedule.setCheckState(Qt.Unchecked) return else: if recipe in recipes: recipes.remove(recipe) - dynamic['scheduled_recipes'] = recipes + save_recipes(recipes) self.emit(SIGNAL('new_schedule(PyQt_PyObject)'), recipes) - self._model.resort() def show_recipe(self, index): recipe = self._model.data(index, Qt.UserRole) @@ -254,9 +272,9 @@ class SchedulerDialog(QDialog, Ui_Dialog): self.interval.setValue(recipe.schedule if recipe.schedule is not None else 1) self.detail_box.setVisible(True) self.account.setVisible(recipe.needs_subscription) - self.interval.setEnabled(self.schedule.checkState == Qt.Checked) + self.interval.setEnabled(self.schedule.checkState() == Qt.Checked) key = 'recipe_account_info_%s'%recipe.id - account_info = dynamic[key] + account_info = config[key] self.show_password.setChecked(False) if account_info: self.username.blockSignals(True) @@ -265,73 +283,120 @@ class SchedulerDialog(QDialog, Ui_Dialog): self.password.setText(account_info[1]) self.username.blockSignals(False) self.password.blockSignals(False) + d = datetime.utcnow() - recipe.last_downloaded + ld = '%.1f'%(d.days + d.seconds/(24*3600)) + if d < timedelta(days=366): + self.last_downloaded.setText(_('Last downloaded: %s days ago')%ld) + else: + self.last_downloaded.setText(_('Last downloaded: never')) + class Scheduler(QObject): - INTERVAL = 5 # minutes + INTERVAL = 1 # minutes def __init__(self, main): self.main = main + self.verbose = main.verbose QObject.__init__(self) - self.lock = RLock() + self.lock = QMutex(QMutex.Recursive) self.queue = set([]) - recipes = dynamic['scheduled_recipes'] - if not recipes: - recipes = [] + recipes = load_recipes() self.refresh_schedule(recipes) self.timer = QTimer() + self.dirtied = False self.connect(self.timer, SIGNAL('timeout()'), self.check) - self.timer.start(self.INTERVAL * 60000) + self.timer.start(int(self.INTERVAL * 60000)) + + def debug(self, *args): + if self.verbose: + sys.stdout.write(' '.join(map(unicode, args))+'\n') + sys.stdout.flush() def check(self): - db = self.main.library_view.model().db - now = datetime.utcnow() - needs_downloading = set([]) - for recipe in self.recipes: - delta = now - recipe.last_downloaded - if delta > timedelta(days=recipe.schedule): - needs_downloading.add(recipe) - with self.lock: + if not self.lock.tryLock(): + return + try: + if self.dirtied: + self.refresh_schedule(load_recipes()) + self.dirtied = False + needs_downloading = set([]) + self.debug('Checking...') + now = datetime.utcnow() + for recipe in self.recipes: + if recipe.schedule is None: + continue + delta = now - recipe.last_downloaded + if delta > timedelta(days=recipe.schedule): + needs_downloading.add(recipe) + + self.debug('Needs downloading:', needs_downloading) + needs_downloading = [r for r in needs_downloading if r not in self.queue] for recipe in needs_downloading: - try: - id = int(recipe.id) - script = db.get_recipe(id) - if script is None: - self.recipes.remove(recipe) - dynamic['scheduled_recipes'] = self.recipes - continue - except ValueError: - script = recipe.title - self.main.download_scheduled_recipe(recipe, script, self.recipe_downloaded) - self.queue.add(recipe) - + self.do_download(recipe) + finally: + self.lock.unlock() + + def do_download(self, recipe): + try: + id = int(recipe.id) + script = self.main.library_view.model().db.get_recipe(id) + if script is None: + self.recipes.remove(recipe) + save_recipes(self.recipes) + return + except ValueError: + script = recipe.title + self.debug('\tQueueing:', recipe) + self.main.download_scheduled_recipe(recipe, script, self.recipe_downloaded) + self.queue.add(recipe) + def recipe_downloaded(self, recipe): - with self.lock: + self.lock.lock() + try: + if recipe in self.recipes: + recipe = self.recipes[self.recipes.index(recipe)] + now = datetime.utcnow() + d = now - recipe.last_downloaded + if recipe.schedule is not None: + interval = timedelta(days=recipe.schedule) + if abs(d - interval) < timedelta(hours=1): + recipe.last_downloaded += interval + else: + recipe.last_downloaded = now + else: + recipe.last_downloaded = now + save_recipes(self.recipes) self.queue.remove(recipe) - recipe = self.recipes[self.recipes.index(recipe)] - now = datetime.utcnow() - d = now - recipe.last_downloaded - interval = timedelta(days=recipe.schedule) - if abs(d - interval) < timedelta(hours=1): - recipe.last_downloaded += interval - else: - recipe.last_downloaded = now - dynamic['scheduled_recipes'] = self.recipes - + self.dirtied = True + finally: + self.lock.unlock() + self.debug('Downloaded:', recipe) + def download(self, recipe): - if recipe in self.recipes: - recipe = self.recipes[self.recipes.index(recipe)] - raise NotImplementedError + self.lock.lock() + try: + if recipe in self.recipes: + recipe = self.recipes[self.recipes.index(recipe)] + if recipe not in self.queue: + self.do_download(recipe) + finally: + self.lock.unlock() def refresh_schedule(self, recipes): self.recipes = recipes def show_dialog(self): - d = SchedulerDialog(self.main.library_view.model().db) - self.connect(d, SIGNAL('new_schedule(PyQt_PyObject)'), self.refresh_schedule) - self.connect(d, SIGNAL('download_now(PyQt_PyObject)'), self.download) - d.exec_() + self.lock.lock() + try: + d = SchedulerDialog(self.main.library_view.model().db) + self.connect(d, SIGNAL('new_schedule(PyQt_PyObject)'), self.refresh_schedule) + self.connect(d, SIGNAL('download_now(PyQt_PyObject)'), self.download) + d.exec_() + self.recipes = load_recipes() + finally: + self.lock.unlock() def main(args=sys.argv): app = QApplication([]) diff --git a/src/calibre/gui2/dialogs/scheduler.ui b/src/calibre/gui2/dialogs/scheduler.ui index 40b5074e47..5986e2b75a 100644 --- a/src/calibre/gui2/dialogs/scheduler.ui +++ b/src/calibre/gui2/dialogs/scheduler.ui @@ -10,11 +10,11 @@ - Schedule recipes for download + Schedule news download - :/images/news.svg:/images/news.svg + :/images/scheduler.svg:/images/scheduler.svg @@ -161,6 +161,13 @@ + + + + + + + diff --git a/src/calibre/gui2/images/scheduler.svg b/src/calibre/gui2/images/scheduler.svg new file mode 100644 index 0000000000..c115a10e92 --- /dev/null +++ b/src/calibre/gui2/images/scheduler.svg @@ -0,0 +1,1295 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py index 8ceb098e90..116ec4f957 100644 --- a/src/calibre/gui2/main.py +++ b/src/calibre/gui2/main.py @@ -23,6 +23,7 @@ from calibre.gui2 import APP_UID, warning_dialog, choose_files, error_dialog, \ max_available_height, config from calibre.gui2.cover_flow import CoverFlow, DatabaseImages, pictureflowerror from calibre.library.database import LibraryDatabase +from calibre.gui2.dialogs.scheduler import Scheduler from calibre.gui2.update import CheckForUpdates from calibre.gui2.main_window import MainWindow, option_parser from calibre.gui2.main_ui import Ui_MainWindow @@ -74,6 +75,7 @@ class Main(MainWindow, Ui_MainWindow): Ui_MainWindow.__init__(self) self.setupUi(self) self.setWindowTitle(__appname__) + self.verbose = opts.verbose self.read_settings() self.job_manager = JobManager() self.jobs_dialog = JobsDialog(self, self.job_manager) @@ -290,7 +292,10 @@ class Main(MainWindow, Ui_MainWindow): from calibre.library import server_config self.content_server = start_threaded_server(db, server_config().parse()) self.test_server_timer = QTimer.singleShot(10000, self.test_server) - + + self.scheduler = Scheduler(self) + self.connect(self.news_menu.scheduler, SIGNAL('triggered(bool)'), lambda x :self.scheduler.show_dialog()) + def test_server(self, *args): if self.content_server.exception is not None: error_dialog(self, _('Failed to start content server'), @@ -1294,6 +1299,8 @@ path_to_ebook to the database. ''') parser.add_option('--with-library', default=None, action='store', help=_('Use the library located at the specified path.')) + parser.add_option('-v', '--verbose', default=0, action='count', + help=_('Log debugging information to console')) opts, args = parser.parse_args(args) if opts.with_library is not None and os.path.isdir(opts.with_library): prefs.set('library_path', opts.with_library) diff --git a/src/calibre/gui2/news.py b/src/calibre/gui2/news.py index 30cba00abe..4a533975c1 100644 --- a/src/calibre/gui2/news.py +++ b/src/calibre/gui2/news.py @@ -29,18 +29,25 @@ class NewsMenu(QMenu): def __init__(self, customize_feeds_func): QMenu.__init__(self) + self.scheduler = QAction(QIcon(':/images/scheduler.svg'), _('Schedule news download'), self) + self.addAction(self.scheduler) self.cac = QAction(QIcon(':/images/user_profile.svg'), _('Add a custom news source'), self) self.connect(self.cac, SIGNAL('triggered(bool)'), customize_feeds_func) self.addAction(self.cac) + self.addSeparator() self.custom_menu = CustomNewsMenu() self.addMenu(self.custom_menu) self.connect(self.custom_menu, SIGNAL('start_news_fetch(PyQt_PyObject, PyQt_PyObject)'), self.fetch_news) - self.addSeparator() + + self.dmenu = QMenu(self) + self.dmenu.setTitle(_('Download news')) + self.dmenu.setIcon(QIcon(':/images/news.svg')) + self.addMenu(self.dmenu) for title in titles: recipe = get_builtin_recipe(title)[0] - self.addAction(NewsAction(recipe, self)) + self.dmenu.addAction(NewsAction(recipe, self)) def fetch_news(self, recipe, module): @@ -76,7 +83,7 @@ class CustomNewsMenu(QMenu): def __init__(self): QMenu.__init__(self) - self.setTitle(_('Custom news sources')) + self.setTitle(_('Download custom news')) self.connect(self, SIGNAL('triggered(QAction*)'), self.launch) def launch(self, action): diff --git a/src/calibre/gui2/tools.py b/src/calibre/gui2/tools.py index 46a00098bb..76eaf0fc4b 100644 --- a/src/calibre/gui2/tools.py +++ b/src/calibre/gui2/tools.py @@ -361,12 +361,13 @@ def _fetch_news(data, fmt): def fetch_scheduled_recipe(recipe, script): + from calibre.gui2.dialogs.scheduler import config fmt = prefs['output_format'].lower() pt = PersistentTemporaryFile(suffix='_feeds2%s.%s'%(fmt.lower(), fmt.lower())) pt.close() args = ['feeds2%s'%fmt.lower(), '--output', pt.name, '--debug'] if recipe.needs_subscription: - x = dynamic['recipe_account_info_%s'%recipe.id] + x = config.get('recipe_account_info_%s'%recipe.id, False) if not x: raise ValueError(_('You must set a username and password for %s')%recipe.title) args.extend(['--username', x[0], '--password', x[1]]) diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 29ac57346c..bb82ea7a8c 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -716,7 +716,7 @@ class LibraryDatabase2(LibraryDatabase): self.conn.commit() def get_recipes(self): - return self.conn.get('SELECT id, title FROM feeds') + return self.conn.get('SELECT id, script FROM feeds') def get_recipe(self, id): return self.conn.get('SELECT script FROM feeds WHERE id=?', (id,), all=False) diff --git a/src/calibre/utils/config.py b/src/calibre/utils/config.py index 190c1ff37c..1327fd415c 100644 --- a/src/calibre/utils/config.py +++ b/src/calibre/utils/config.py @@ -473,8 +473,12 @@ class DynamicConfig(dict): class for preferences that you don't intend to have the users edit directly. ''' def __init__(self, name='dynamic'): + dict.__init__(self, {}) self.name = name self.file_path = os.path.join(config_dir, name+'.pickle') + self.refresh() + + def refresh(self): d = {} if os.path.exists(self.file_path): with ExclusiveFile(self.file_path) as f: @@ -482,8 +486,11 @@ class DynamicConfig(dict): try: d = cPickle.loads(raw) if raw.strip() else {} except: + import traceback + traceback.print_exc() d = {} - dict.__init__(self, d) + self.clear() + self.update(d) def __getitem__(self, key): try: diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index 9523ef6421..3337353597 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -12,6 +12,7 @@ recipe_modules = [ 'discover_magazine', 'scientific_american', 'new_york_review_of_books', 'daily_telegraph', 'guardian', 'el_pais', 'new_scientist', 'b92', 'politika', 'moscow_times', 'latimes', 'japan_times', 'san_fran_chronicle', + 'demorgen_be', 'de_standaard' ] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/de_standaard.py b/src/calibre/web/feeds/recipes/de_standaard.py new file mode 100644 index 0000000000..2681f2acd9 --- /dev/null +++ b/src/calibre/web/feeds/recipes/de_standaard.py @@ -0,0 +1,32 @@ +__license__ = 'GPL v3' +__copyright__ = '2008, Darko Miletic ' +''' +standaard.be +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class DeStandaard(BasicNewsRecipe): + title = u'De Standaard' + __author__ = u'Darko Miletic' + description = u'News from Belgium' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + + keep_only_tags = [dict(name='div' , attrs={'id':'_parts_midContainer_div'})] + remove_tags_after = dict(name='h3', attrs={'title':'Binnenland'}) + remove_tags = [ + dict(name='h3' , attrs={'title':'Binnenland' }) + ,dict(name='p' , attrs={'class':'by' }) + ,dict(name='div' , attrs={'class':'articlesright'}) + ,dict(name='a' , attrs={'class':'help' }) + ,dict(name='a' , attrs={'class':'archive' }) + ,dict(name='a' , attrs={'class':'print' }) + ,dict(name='a' , attrs={'class':'email' }) + ] + + feeds = [ + (u'De Standaard Online', u'http://feeds.feedburner.com/dso-front') + ] diff --git a/src/calibre/web/feeds/recipes/demorgen_be.py b/src/calibre/web/feeds/recipes/demorgen_be.py new file mode 100644 index 0000000000..5f0b4471fe --- /dev/null +++ b/src/calibre/web/feeds/recipes/demorgen_be.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2008, Darko Miletic ' +''' +demorgen.be +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class DeMorganBe(BasicNewsRecipe): + title = u'DeMorgen.be' + __author__ = u'Darko Miletic' + description = u'News from Belgium' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + + keep_only_tags = [dict(name='div' , attrs={'class':'art_box2'})] + + feeds = [ + (u'Nieuws' , u'http://www.demorgen.be/nieuws/rss.xml' ) + ,(u'De Gedachte' , u'http://www.demorgen.be/degedachte/rss.xml' ) + ,(u'Financiele morgen' , u'http://www.demorgen.be/financielemorgen/rss.xml') + ,(u'Financiele morgen' , u'http://www.demorgen.be/financielemorgen/rss.xml') + ,(u'Sport' , u'http://www.demorgen.be/sport/rss.xml' ) + ,(u'Bis' , u'http://www.demorgen.be/bis/rss.xml' ) + ,(u'Magazine' , u'http://www.demorgen.be/magazine/rss.xml' ) + ,(u'De stand der dingen', u'http://www.demorgen.be/standderdingen/rss.xml' ) + ] diff --git a/src/calibre/web/fetch/simple.py b/src/calibre/web/fetch/simple.py index 57eec4d528..1fdeca06d9 100644 --- a/src/calibre/web/fetch/simple.py +++ b/src/calibre/web/fetch/simple.py @@ -127,10 +127,13 @@ class RecursiveFetcher(object, LoggingInterface): if self.keep_only_tags: body = Tag(soup, 'body') - for spec in self.keep_only_tags: - for tag in soup.find('body').findAll(**spec): - body.insert(len(body.contents), tag) - soup.find('body').replaceWith(body) + try: + for spec in self.keep_only_tags: + for tag in soup.find('body').findAll(**spec): + body.insert(len(body.contents), tag) + soup.find('body').replaceWith(body) + except AttributeError: # soup has no body element + pass def remove_beyond(tag, next): while tag is not None and tag.name != 'body':