mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Implement scheduled news download. Add recipes for De Standaard and DeMorgen.be (thanks to Darko Miletic)
This commit is contained in:
parent
145eab8acf
commit
f1db17049c
@ -8,30 +8,39 @@ Scheduler for automated recipe downloads
|
||||
'''
|
||||
|
||||
import sys, copy
|
||||
from threading import RLock
|
||||
from datetime import datetime, timedelta
|
||||
from PyQt4.Qt import QDialog, QApplication, QLineEdit, QPalette, SIGNAL, QBrush, \
|
||||
QColor, QAbstractListModel, Qt, QVariant, QFont, QIcon, \
|
||||
QFile, QObject, QTimer
|
||||
QFile, QObject, QTimer, QMutex
|
||||
|
||||
from calibre import english_sort
|
||||
from calibre.gui2.dialogs.scheduler_ui import Ui_Dialog
|
||||
from calibre.web.feeds.recipes import recipes, recipe_modules, compile_recipe
|
||||
from calibre.utils.search_query_parser import SearchQueryParser
|
||||
from calibre.utils.pyparsing import ParseException
|
||||
from calibre.gui2 import dynamic, NONE, error_dialog
|
||||
from calibre.gui2 import NONE, error_dialog
|
||||
from calibre.utils.config import DynamicConfig
|
||||
|
||||
config = DynamicConfig('scheduler')
|
||||
|
||||
class Recipe(object):
|
||||
|
||||
def __init__(self, id, recipe_class, builtin):
|
||||
self.id = id
|
||||
self.title = recipe_class.title
|
||||
self.description = recipe_class.description
|
||||
self.last_downloaded = datetime.fromordinal(1)
|
||||
self.downloading = False
|
||||
self.builtin = builtin
|
||||
self.schedule = None
|
||||
self.needs_subscription = recipe_class.needs_subscription
|
||||
def __init__(self, id=None, recipe_class=None, builtin=True):
|
||||
self.id = id
|
||||
self.title = getattr(recipe_class, 'title', None)
|
||||
self.description = getattr(recipe_class, 'description', None)
|
||||
self.last_downloaded = datetime.fromordinal(1)
|
||||
self.downloading = False
|
||||
self.builtin = builtin
|
||||
self.schedule = None
|
||||
self.needs_subscription = getattr(recipe_class, 'needs_subscription', False)
|
||||
|
||||
def pickle(self):
|
||||
return self.__dict__.copy()
|
||||
|
||||
def unpickle(self, dict):
|
||||
self.__dict__.update(dict)
|
||||
return self
|
||||
|
||||
def __cmp__(self, other):
|
||||
if self.id == getattr(other, 'id', None):
|
||||
@ -53,10 +62,17 @@ class Recipe(object):
|
||||
return self.id == getattr(other, 'id', None)
|
||||
|
||||
def __repr__(self):
|
||||
return u'%s:%s'%(self.id, self.title)
|
||||
return u'%s|%s|%s|%s'%(self.id, self.title, self.last_downloaded.ctime(), self.schedule)
|
||||
|
||||
builtin_recipes = [Recipe(m, r, True) for r, m in zip(recipes, recipe_modules)]
|
||||
|
||||
def save_recipes(recipes):
|
||||
config['scheduled_recipes'] = [r.pickle() for r in recipes]
|
||||
|
||||
def load_recipes():
|
||||
config.refresh()
|
||||
return [Recipe().unpickle(r) for r in config.get('scheduled_recipes', [])]
|
||||
|
||||
class RecipeModel(QAbstractListModel, SearchQueryParser):
|
||||
|
||||
LOCATIONS = ['all']
|
||||
@ -70,16 +86,18 @@ class RecipeModel(QAbstractListModel, SearchQueryParser):
|
||||
for x in db.get_recipes():
|
||||
recipe = compile_recipe(x[1])
|
||||
self.recipes.append(Recipe(x[0], recipe, False))
|
||||
|
||||
sr = dynamic['scheduled_recipes']
|
||||
if not sr:
|
||||
sr = []
|
||||
self.refresh()
|
||||
self._map = list(range(len(self.recipes)))
|
||||
|
||||
def refresh(self):
|
||||
sr = load_recipes()
|
||||
for recipe in self.recipes:
|
||||
if recipe in sr:
|
||||
recipe.schedule = sr[sr.index(recipe)].schedule
|
||||
recipe.last_downloaded = sr[sr.index(recipe)].last_downloaded
|
||||
|
||||
self.recipes.sort()
|
||||
self._map = list(range(len(self.recipes)))
|
||||
|
||||
|
||||
def universal_set(self):
|
||||
return set(self.recipes)
|
||||
@ -203,7 +221,7 @@ class SchedulerDialog(QDialog, Ui_Dialog):
|
||||
lambda state: self.interval.setEnabled(state == Qt.Checked))
|
||||
self.connect(self.show_password, SIGNAL('stateChanged(int)'),
|
||||
lambda state: self.password.setEchoMode(self.password.Normal if state == Qt.Checked else self.password.Password))
|
||||
self.connect(self.interval, SIGNAL('valueChanged(int)'), self.do_schedule)
|
||||
self.connect(self.interval, SIGNAL('valueChanged(double)'), self.do_schedule)
|
||||
self.connect(self.search, SIGNAL('search(PyQt_PyObject)'), self._model.search)
|
||||
self.connect(self._model, SIGNAL('modelReset()'), lambda : self.detail_box.setVisible(False))
|
||||
self.connect(self.download, SIGNAL('clicked()'), self.download_now)
|
||||
@ -218,32 +236,32 @@ class SchedulerDialog(QDialog, Ui_Dialog):
|
||||
username, password = username.strip(), password.strip()
|
||||
recipe = self._model.data(self.recipes.currentIndex(), Qt.UserRole)
|
||||
key = 'recipe_account_info_%s'%recipe.id
|
||||
dynamic[key] = (username, password) if username and password else None
|
||||
config[key] = (username, password) if username and password else None
|
||||
|
||||
def do_schedule(self, *args):
|
||||
recipe = self.recipes.currentIndex()
|
||||
if not recipe.isValid():
|
||||
return
|
||||
recipe = self._model.data(recipe, Qt.UserRole)
|
||||
recipes = dynamic['scheduled_recipes']
|
||||
recipes = load_recipes()
|
||||
if self.schedule.checkState() == Qt.Checked:
|
||||
if recipe in recipes:
|
||||
recipe = recipes[recipes.index(recipe)]
|
||||
else:
|
||||
recipe.last_downloaded = datetime.fromordinal(1)
|
||||
recipes.append(recipe)
|
||||
recipes.schedule = self.interval.value()
|
||||
if recipes.schedule == 0.0:
|
||||
recipes.schedule = 1/24.
|
||||
if recipe.need_subscription and not dynamic['recipe_account_info_%s'%recipe.id]:
|
||||
recipe.schedule = self.interval.value()
|
||||
if recipe.schedule < 0.1:
|
||||
recipe.schedule = 1/24.
|
||||
if recipe.needs_subscription and not config['recipe_account_info_%s'%recipe.id]:
|
||||
error_dialog(self, _('Must set account information'), _('This recipe requires a username and password')).exec_()
|
||||
self.schedule.setCheckState(Qt.Unchecked)
|
||||
return
|
||||
else:
|
||||
if recipe in recipes:
|
||||
recipes.remove(recipe)
|
||||
dynamic['scheduled_recipes'] = recipes
|
||||
save_recipes(recipes)
|
||||
self.emit(SIGNAL('new_schedule(PyQt_PyObject)'), recipes)
|
||||
self._model.resort()
|
||||
|
||||
def show_recipe(self, index):
|
||||
recipe = self._model.data(index, Qt.UserRole)
|
||||
@ -254,9 +272,9 @@ class SchedulerDialog(QDialog, Ui_Dialog):
|
||||
self.interval.setValue(recipe.schedule if recipe.schedule is not None else 1)
|
||||
self.detail_box.setVisible(True)
|
||||
self.account.setVisible(recipe.needs_subscription)
|
||||
self.interval.setEnabled(self.schedule.checkState == Qt.Checked)
|
||||
self.interval.setEnabled(self.schedule.checkState() == Qt.Checked)
|
||||
key = 'recipe_account_info_%s'%recipe.id
|
||||
account_info = dynamic[key]
|
||||
account_info = config[key]
|
||||
self.show_password.setChecked(False)
|
||||
if account_info:
|
||||
self.username.blockSignals(True)
|
||||
@ -265,73 +283,120 @@ class SchedulerDialog(QDialog, Ui_Dialog):
|
||||
self.password.setText(account_info[1])
|
||||
self.username.blockSignals(False)
|
||||
self.password.blockSignals(False)
|
||||
d = datetime.utcnow() - recipe.last_downloaded
|
||||
ld = '%.1f'%(d.days + d.seconds/(24*3600))
|
||||
if d < timedelta(days=366):
|
||||
self.last_downloaded.setText(_('Last downloaded: %s days ago')%ld)
|
||||
else:
|
||||
self.last_downloaded.setText(_('Last downloaded: never'))
|
||||
|
||||
|
||||
class Scheduler(QObject):
|
||||
|
||||
INTERVAL = 5 # minutes
|
||||
INTERVAL = 1 # minutes
|
||||
|
||||
def __init__(self, main):
|
||||
self.main = main
|
||||
self.verbose = main.verbose
|
||||
QObject.__init__(self)
|
||||
self.lock = RLock()
|
||||
self.lock = QMutex(QMutex.Recursive)
|
||||
self.queue = set([])
|
||||
recipes = dynamic['scheduled_recipes']
|
||||
if not recipes:
|
||||
recipes = []
|
||||
recipes = load_recipes()
|
||||
self.refresh_schedule(recipes)
|
||||
self.timer = QTimer()
|
||||
self.dirtied = False
|
||||
self.connect(self.timer, SIGNAL('timeout()'), self.check)
|
||||
self.timer.start(self.INTERVAL * 60000)
|
||||
self.timer.start(int(self.INTERVAL * 60000))
|
||||
|
||||
def debug(self, *args):
|
||||
if self.verbose:
|
||||
sys.stdout.write(' '.join(map(unicode, args))+'\n')
|
||||
sys.stdout.flush()
|
||||
|
||||
def check(self):
|
||||
db = self.main.library_view.model().db
|
||||
now = datetime.utcnow()
|
||||
needs_downloading = set([])
|
||||
for recipe in self.recipes:
|
||||
delta = now - recipe.last_downloaded
|
||||
if delta > timedelta(days=recipe.schedule):
|
||||
needs_downloading.add(recipe)
|
||||
with self.lock:
|
||||
if not self.lock.tryLock():
|
||||
return
|
||||
try:
|
||||
if self.dirtied:
|
||||
self.refresh_schedule(load_recipes())
|
||||
self.dirtied = False
|
||||
needs_downloading = set([])
|
||||
self.debug('Checking...')
|
||||
now = datetime.utcnow()
|
||||
for recipe in self.recipes:
|
||||
if recipe.schedule is None:
|
||||
continue
|
||||
delta = now - recipe.last_downloaded
|
||||
if delta > timedelta(days=recipe.schedule):
|
||||
needs_downloading.add(recipe)
|
||||
|
||||
self.debug('Needs downloading:', needs_downloading)
|
||||
|
||||
needs_downloading = [r for r in needs_downloading if r not in self.queue]
|
||||
for recipe in needs_downloading:
|
||||
try:
|
||||
id = int(recipe.id)
|
||||
script = db.get_recipe(id)
|
||||
if script is None:
|
||||
self.recipes.remove(recipe)
|
||||
dynamic['scheduled_recipes'] = self.recipes
|
||||
continue
|
||||
except ValueError:
|
||||
script = recipe.title
|
||||
self.main.download_scheduled_recipe(recipe, script, self.recipe_downloaded)
|
||||
self.queue.add(recipe)
|
||||
|
||||
self.do_download(recipe)
|
||||
finally:
|
||||
self.lock.unlock()
|
||||
|
||||
def do_download(self, recipe):
|
||||
try:
|
||||
id = int(recipe.id)
|
||||
script = self.main.library_view.model().db.get_recipe(id)
|
||||
if script is None:
|
||||
self.recipes.remove(recipe)
|
||||
save_recipes(self.recipes)
|
||||
return
|
||||
except ValueError:
|
||||
script = recipe.title
|
||||
self.debug('\tQueueing:', recipe)
|
||||
self.main.download_scheduled_recipe(recipe, script, self.recipe_downloaded)
|
||||
self.queue.add(recipe)
|
||||
|
||||
def recipe_downloaded(self, recipe):
|
||||
with self.lock:
|
||||
self.lock.lock()
|
||||
try:
|
||||
if recipe in self.recipes:
|
||||
recipe = self.recipes[self.recipes.index(recipe)]
|
||||
now = datetime.utcnow()
|
||||
d = now - recipe.last_downloaded
|
||||
if recipe.schedule is not None:
|
||||
interval = timedelta(days=recipe.schedule)
|
||||
if abs(d - interval) < timedelta(hours=1):
|
||||
recipe.last_downloaded += interval
|
||||
else:
|
||||
recipe.last_downloaded = now
|
||||
else:
|
||||
recipe.last_downloaded = now
|
||||
save_recipes(self.recipes)
|
||||
self.queue.remove(recipe)
|
||||
recipe = self.recipes[self.recipes.index(recipe)]
|
||||
now = datetime.utcnow()
|
||||
d = now - recipe.last_downloaded
|
||||
interval = timedelta(days=recipe.schedule)
|
||||
if abs(d - interval) < timedelta(hours=1):
|
||||
recipe.last_downloaded += interval
|
||||
else:
|
||||
recipe.last_downloaded = now
|
||||
dynamic['scheduled_recipes'] = self.recipes
|
||||
|
||||
self.dirtied = True
|
||||
finally:
|
||||
self.lock.unlock()
|
||||
self.debug('Downloaded:', recipe)
|
||||
|
||||
def download(self, recipe):
|
||||
if recipe in self.recipes:
|
||||
recipe = self.recipes[self.recipes.index(recipe)]
|
||||
raise NotImplementedError
|
||||
self.lock.lock()
|
||||
try:
|
||||
if recipe in self.recipes:
|
||||
recipe = self.recipes[self.recipes.index(recipe)]
|
||||
if recipe not in self.queue:
|
||||
self.do_download(recipe)
|
||||
finally:
|
||||
self.lock.unlock()
|
||||
|
||||
def refresh_schedule(self, recipes):
|
||||
self.recipes = recipes
|
||||
|
||||
def show_dialog(self):
|
||||
d = SchedulerDialog(self.main.library_view.model().db)
|
||||
self.connect(d, SIGNAL('new_schedule(PyQt_PyObject)'), self.refresh_schedule)
|
||||
self.connect(d, SIGNAL('download_now(PyQt_PyObject)'), self.download)
|
||||
d.exec_()
|
||||
self.lock.lock()
|
||||
try:
|
||||
d = SchedulerDialog(self.main.library_view.model().db)
|
||||
self.connect(d, SIGNAL('new_schedule(PyQt_PyObject)'), self.refresh_schedule)
|
||||
self.connect(d, SIGNAL('download_now(PyQt_PyObject)'), self.download)
|
||||
d.exec_()
|
||||
self.recipes = load_recipes()
|
||||
finally:
|
||||
self.lock.unlock()
|
||||
|
||||
def main(args=sys.argv):
|
||||
app = QApplication([])
|
||||
|
@ -10,11 +10,11 @@
|
||||
</rect>
|
||||
</property>
|
||||
<property name="windowTitle" >
|
||||
<string>Schedule recipes for download</string>
|
||||
<string>Schedule news download</string>
|
||||
</property>
|
||||
<property name="windowIcon" >
|
||||
<iconset resource="../images.qrc" >
|
||||
<normaloff>:/images/news.svg</normaloff>:/images/news.svg</iconset>
|
||||
<normaloff>:/images/scheduler.svg</normaloff>:/images/scheduler.svg</iconset>
|
||||
</property>
|
||||
<layout class="QGridLayout" name="gridLayout" >
|
||||
<item rowspan="2" row="0" column="0" >
|
||||
@ -161,6 +161,13 @@
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QLabel" name="last_downloaded" >
|
||||
<property name="text" >
|
||||
<string> </string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QGroupBox" name="account" >
|
||||
<property name="title" >
|
||||
|
1295
src/calibre/gui2/images/scheduler.svg
Normal file
1295
src/calibre/gui2/images/scheduler.svg
Normal file
File diff suppressed because it is too large
Load Diff
After Width: | Height: | Size: 48 KiB |
@ -23,6 +23,7 @@ from calibre.gui2 import APP_UID, warning_dialog, choose_files, error_dialog, \
|
||||
max_available_height, config
|
||||
from calibre.gui2.cover_flow import CoverFlow, DatabaseImages, pictureflowerror
|
||||
from calibre.library.database import LibraryDatabase
|
||||
from calibre.gui2.dialogs.scheduler import Scheduler
|
||||
from calibre.gui2.update import CheckForUpdates
|
||||
from calibre.gui2.main_window import MainWindow, option_parser
|
||||
from calibre.gui2.main_ui import Ui_MainWindow
|
||||
@ -74,6 +75,7 @@ class Main(MainWindow, Ui_MainWindow):
|
||||
Ui_MainWindow.__init__(self)
|
||||
self.setupUi(self)
|
||||
self.setWindowTitle(__appname__)
|
||||
self.verbose = opts.verbose
|
||||
self.read_settings()
|
||||
self.job_manager = JobManager()
|
||||
self.jobs_dialog = JobsDialog(self, self.job_manager)
|
||||
@ -290,7 +292,10 @@ class Main(MainWindow, Ui_MainWindow):
|
||||
from calibre.library import server_config
|
||||
self.content_server = start_threaded_server(db, server_config().parse())
|
||||
self.test_server_timer = QTimer.singleShot(10000, self.test_server)
|
||||
|
||||
|
||||
self.scheduler = Scheduler(self)
|
||||
self.connect(self.news_menu.scheduler, SIGNAL('triggered(bool)'), lambda x :self.scheduler.show_dialog())
|
||||
|
||||
def test_server(self, *args):
|
||||
if self.content_server.exception is not None:
|
||||
error_dialog(self, _('Failed to start content server'),
|
||||
@ -1294,6 +1299,8 @@ path_to_ebook to the database.
|
||||
''')
|
||||
parser.add_option('--with-library', default=None, action='store',
|
||||
help=_('Use the library located at the specified path.'))
|
||||
parser.add_option('-v', '--verbose', default=0, action='count',
|
||||
help=_('Log debugging information to console'))
|
||||
opts, args = parser.parse_args(args)
|
||||
if opts.with_library is not None and os.path.isdir(opts.with_library):
|
||||
prefs.set('library_path', opts.with_library)
|
||||
|
@ -29,18 +29,25 @@ class NewsMenu(QMenu):
|
||||
|
||||
def __init__(self, customize_feeds_func):
|
||||
QMenu.__init__(self)
|
||||
self.scheduler = QAction(QIcon(':/images/scheduler.svg'), _('Schedule news download'), self)
|
||||
self.addAction(self.scheduler)
|
||||
self.cac = QAction(QIcon(':/images/user_profile.svg'), _('Add a custom news source'), self)
|
||||
self.connect(self.cac, SIGNAL('triggered(bool)'), customize_feeds_func)
|
||||
self.addAction(self.cac)
|
||||
self.addSeparator()
|
||||
self.custom_menu = CustomNewsMenu()
|
||||
self.addMenu(self.custom_menu)
|
||||
self.connect(self.custom_menu, SIGNAL('start_news_fetch(PyQt_PyObject, PyQt_PyObject)'),
|
||||
self.fetch_news)
|
||||
self.addSeparator()
|
||||
|
||||
self.dmenu = QMenu(self)
|
||||
self.dmenu.setTitle(_('Download news'))
|
||||
self.dmenu.setIcon(QIcon(':/images/news.svg'))
|
||||
self.addMenu(self.dmenu)
|
||||
|
||||
for title in titles:
|
||||
recipe = get_builtin_recipe(title)[0]
|
||||
self.addAction(NewsAction(recipe, self))
|
||||
self.dmenu.addAction(NewsAction(recipe, self))
|
||||
|
||||
|
||||
def fetch_news(self, recipe, module):
|
||||
@ -76,7 +83,7 @@ class CustomNewsMenu(QMenu):
|
||||
|
||||
def __init__(self):
|
||||
QMenu.__init__(self)
|
||||
self.setTitle(_('Custom news sources'))
|
||||
self.setTitle(_('Download custom news'))
|
||||
self.connect(self, SIGNAL('triggered(QAction*)'), self.launch)
|
||||
|
||||
def launch(self, action):
|
||||
|
@ -361,12 +361,13 @@ def _fetch_news(data, fmt):
|
||||
|
||||
|
||||
def fetch_scheduled_recipe(recipe, script):
|
||||
from calibre.gui2.dialogs.scheduler import config
|
||||
fmt = prefs['output_format'].lower()
|
||||
pt = PersistentTemporaryFile(suffix='_feeds2%s.%s'%(fmt.lower(), fmt.lower()))
|
||||
pt.close()
|
||||
args = ['feeds2%s'%fmt.lower(), '--output', pt.name, '--debug']
|
||||
if recipe.needs_subscription:
|
||||
x = dynamic['recipe_account_info_%s'%recipe.id]
|
||||
x = config.get('recipe_account_info_%s'%recipe.id, False)
|
||||
if not x:
|
||||
raise ValueError(_('You must set a username and password for %s')%recipe.title)
|
||||
args.extend(['--username', x[0], '--password', x[1]])
|
||||
|
@ -716,7 +716,7 @@ class LibraryDatabase2(LibraryDatabase):
|
||||
self.conn.commit()
|
||||
|
||||
def get_recipes(self):
|
||||
return self.conn.get('SELECT id, title FROM feeds')
|
||||
return self.conn.get('SELECT id, script FROM feeds')
|
||||
|
||||
def get_recipe(self, id):
|
||||
return self.conn.get('SELECT script FROM feeds WHERE id=?', (id,), all=False)
|
||||
|
@ -473,8 +473,12 @@ class DynamicConfig(dict):
|
||||
class for preferences that you don't intend to have the users edit directly.
|
||||
'''
|
||||
def __init__(self, name='dynamic'):
|
||||
dict.__init__(self, {})
|
||||
self.name = name
|
||||
self.file_path = os.path.join(config_dir, name+'.pickle')
|
||||
self.refresh()
|
||||
|
||||
def refresh(self):
|
||||
d = {}
|
||||
if os.path.exists(self.file_path):
|
||||
with ExclusiveFile(self.file_path) as f:
|
||||
@ -482,8 +486,11 @@ class DynamicConfig(dict):
|
||||
try:
|
||||
d = cPickle.loads(raw) if raw.strip() else {}
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
d = {}
|
||||
dict.__init__(self, d)
|
||||
self.clear()
|
||||
self.update(d)
|
||||
|
||||
def __getitem__(self, key):
|
||||
try:
|
||||
|
@ -12,6 +12,7 @@ recipe_modules = [
|
||||
'discover_magazine', 'scientific_american', 'new_york_review_of_books',
|
||||
'daily_telegraph', 'guardian', 'el_pais', 'new_scientist', 'b92',
|
||||
'politika', 'moscow_times', 'latimes', 'japan_times', 'san_fran_chronicle',
|
||||
'demorgen_be', 'de_standaard'
|
||||
]
|
||||
|
||||
import re, imp, inspect, time, os
|
||||
|
32
src/calibre/web/feeds/recipes/de_standaard.py
Normal file
32
src/calibre/web/feeds/recipes/de_standaard.py
Normal file
@ -0,0 +1,32 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
standaard.be
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class DeStandaard(BasicNewsRecipe):
|
||||
title = u'De Standaard'
|
||||
__author__ = u'Darko Miletic'
|
||||
description = u'News from Belgium'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
|
||||
keep_only_tags = [dict(name='div' , attrs={'id':'_parts_midContainer_div'})]
|
||||
remove_tags_after = dict(name='h3', attrs={'title':'Binnenland'})
|
||||
remove_tags = [
|
||||
dict(name='h3' , attrs={'title':'Binnenland' })
|
||||
,dict(name='p' , attrs={'class':'by' })
|
||||
,dict(name='div' , attrs={'class':'articlesright'})
|
||||
,dict(name='a' , attrs={'class':'help' })
|
||||
,dict(name='a' , attrs={'class':'archive' })
|
||||
,dict(name='a' , attrs={'class':'print' })
|
||||
,dict(name='a' , attrs={'class':'email' })
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'De Standaard Online', u'http://feeds.feedburner.com/dso-front')
|
||||
]
|
31
src/calibre/web/feeds/recipes/demorgen_be.py
Normal file
31
src/calibre/web/feeds/recipes/demorgen_be.py
Normal file
@ -0,0 +1,31 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
demorgen.be
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class DeMorganBe(BasicNewsRecipe):
|
||||
title = u'DeMorgen.be'
|
||||
__author__ = u'Darko Miletic'
|
||||
description = u'News from Belgium'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
|
||||
keep_only_tags = [dict(name='div' , attrs={'class':'art_box2'})]
|
||||
|
||||
feeds = [
|
||||
(u'Nieuws' , u'http://www.demorgen.be/nieuws/rss.xml' )
|
||||
,(u'De Gedachte' , u'http://www.demorgen.be/degedachte/rss.xml' )
|
||||
,(u'Financiele morgen' , u'http://www.demorgen.be/financielemorgen/rss.xml')
|
||||
,(u'Financiele morgen' , u'http://www.demorgen.be/financielemorgen/rss.xml')
|
||||
,(u'Sport' , u'http://www.demorgen.be/sport/rss.xml' )
|
||||
,(u'Bis' , u'http://www.demorgen.be/bis/rss.xml' )
|
||||
,(u'Magazine' , u'http://www.demorgen.be/magazine/rss.xml' )
|
||||
,(u'De stand der dingen', u'http://www.demorgen.be/standderdingen/rss.xml' )
|
||||
]
|
@ -127,10 +127,13 @@ class RecursiveFetcher(object, LoggingInterface):
|
||||
|
||||
if self.keep_only_tags:
|
||||
body = Tag(soup, 'body')
|
||||
for spec in self.keep_only_tags:
|
||||
for tag in soup.find('body').findAll(**spec):
|
||||
body.insert(len(body.contents), tag)
|
||||
soup.find('body').replaceWith(body)
|
||||
try:
|
||||
for spec in self.keep_only_tags:
|
||||
for tag in soup.find('body').findAll(**spec):
|
||||
body.insert(len(body.contents), tag)
|
||||
soup.find('body').replaceWith(body)
|
||||
except AttributeError: # soup has no body element
|
||||
pass
|
||||
|
||||
def remove_beyond(tag, next):
|
||||
while tag is not None and tag.name != 'body':
|
||||
|
Loading…
x
Reference in New Issue
Block a user