Implement scheduled news download. Add recipes for De Standaard and DeMorgen.be (thanks to Darko Miletic)

This commit is contained in:
Kovid Goyal 2008-11-20 16:57:59 -08:00
parent 145eab8acf
commit f1db17049c
12 changed files with 1542 additions and 86 deletions

View File

@ -8,30 +8,39 @@ Scheduler for automated recipe downloads
''' '''
import sys, copy import sys, copy
from threading import RLock
from datetime import datetime, timedelta from datetime import datetime, timedelta
from PyQt4.Qt import QDialog, QApplication, QLineEdit, QPalette, SIGNAL, QBrush, \ from PyQt4.Qt import QDialog, QApplication, QLineEdit, QPalette, SIGNAL, QBrush, \
QColor, QAbstractListModel, Qt, QVariant, QFont, QIcon, \ QColor, QAbstractListModel, Qt, QVariant, QFont, QIcon, \
QFile, QObject, QTimer QFile, QObject, QTimer, QMutex
from calibre import english_sort from calibre import english_sort
from calibre.gui2.dialogs.scheduler_ui import Ui_Dialog from calibre.gui2.dialogs.scheduler_ui import Ui_Dialog
from calibre.web.feeds.recipes import recipes, recipe_modules, compile_recipe from calibre.web.feeds.recipes import recipes, recipe_modules, compile_recipe
from calibre.utils.search_query_parser import SearchQueryParser from calibre.utils.search_query_parser import SearchQueryParser
from calibre.utils.pyparsing import ParseException from calibre.utils.pyparsing import ParseException
from calibre.gui2 import dynamic, NONE, error_dialog from calibre.gui2 import NONE, error_dialog
from calibre.utils.config import DynamicConfig
config = DynamicConfig('scheduler')
class Recipe(object): class Recipe(object):
def __init__(self, id, recipe_class, builtin): def __init__(self, id=None, recipe_class=None, builtin=True):
self.id = id self.id = id
self.title = recipe_class.title self.title = getattr(recipe_class, 'title', None)
self.description = recipe_class.description self.description = getattr(recipe_class, 'description', None)
self.last_downloaded = datetime.fromordinal(1) self.last_downloaded = datetime.fromordinal(1)
self.downloading = False self.downloading = False
self.builtin = builtin self.builtin = builtin
self.schedule = None self.schedule = None
self.needs_subscription = recipe_class.needs_subscription self.needs_subscription = getattr(recipe_class, 'needs_subscription', False)
def pickle(self):
return self.__dict__.copy()
def unpickle(self, dict):
self.__dict__.update(dict)
return self
def __cmp__(self, other): def __cmp__(self, other):
if self.id == getattr(other, 'id', None): if self.id == getattr(other, 'id', None):
@ -53,10 +62,17 @@ class Recipe(object):
return self.id == getattr(other, 'id', None) return self.id == getattr(other, 'id', None)
def __repr__(self): def __repr__(self):
return u'%s:%s'%(self.id, self.title) return u'%s|%s|%s|%s'%(self.id, self.title, self.last_downloaded.ctime(), self.schedule)
builtin_recipes = [Recipe(m, r, True) for r, m in zip(recipes, recipe_modules)] builtin_recipes = [Recipe(m, r, True) for r, m in zip(recipes, recipe_modules)]
def save_recipes(recipes):
config['scheduled_recipes'] = [r.pickle() for r in recipes]
def load_recipes():
config.refresh()
return [Recipe().unpickle(r) for r in config.get('scheduled_recipes', [])]
class RecipeModel(QAbstractListModel, SearchQueryParser): class RecipeModel(QAbstractListModel, SearchQueryParser):
LOCATIONS = ['all'] LOCATIONS = ['all']
@ -70,16 +86,18 @@ class RecipeModel(QAbstractListModel, SearchQueryParser):
for x in db.get_recipes(): for x in db.get_recipes():
recipe = compile_recipe(x[1]) recipe = compile_recipe(x[1])
self.recipes.append(Recipe(x[0], recipe, False)) self.recipes.append(Recipe(x[0], recipe, False))
self.refresh()
self._map = list(range(len(self.recipes)))
sr = dynamic['scheduled_recipes'] def refresh(self):
if not sr: sr = load_recipes()
sr = []
for recipe in self.recipes: for recipe in self.recipes:
if recipe in sr: if recipe in sr:
recipe.schedule = sr[sr.index(recipe)].schedule recipe.schedule = sr[sr.index(recipe)].schedule
recipe.last_downloaded = sr[sr.index(recipe)].last_downloaded
self.recipes.sort() self.recipes.sort()
self._map = list(range(len(self.recipes)))
def universal_set(self): def universal_set(self):
return set(self.recipes) return set(self.recipes)
@ -203,7 +221,7 @@ class SchedulerDialog(QDialog, Ui_Dialog):
lambda state: self.interval.setEnabled(state == Qt.Checked)) lambda state: self.interval.setEnabled(state == Qt.Checked))
self.connect(self.show_password, SIGNAL('stateChanged(int)'), self.connect(self.show_password, SIGNAL('stateChanged(int)'),
lambda state: self.password.setEchoMode(self.password.Normal if state == Qt.Checked else self.password.Password)) lambda state: self.password.setEchoMode(self.password.Normal if state == Qt.Checked else self.password.Password))
self.connect(self.interval, SIGNAL('valueChanged(int)'), self.do_schedule) self.connect(self.interval, SIGNAL('valueChanged(double)'), self.do_schedule)
self.connect(self.search, SIGNAL('search(PyQt_PyObject)'), self._model.search) self.connect(self.search, SIGNAL('search(PyQt_PyObject)'), self._model.search)
self.connect(self._model, SIGNAL('modelReset()'), lambda : self.detail_box.setVisible(False)) self.connect(self._model, SIGNAL('modelReset()'), lambda : self.detail_box.setVisible(False))
self.connect(self.download, SIGNAL('clicked()'), self.download_now) self.connect(self.download, SIGNAL('clicked()'), self.download_now)
@ -218,32 +236,32 @@ class SchedulerDialog(QDialog, Ui_Dialog):
username, password = username.strip(), password.strip() username, password = username.strip(), password.strip()
recipe = self._model.data(self.recipes.currentIndex(), Qt.UserRole) recipe = self._model.data(self.recipes.currentIndex(), Qt.UserRole)
key = 'recipe_account_info_%s'%recipe.id key = 'recipe_account_info_%s'%recipe.id
dynamic[key] = (username, password) if username and password else None config[key] = (username, password) if username and password else None
def do_schedule(self, *args): def do_schedule(self, *args):
recipe = self.recipes.currentIndex() recipe = self.recipes.currentIndex()
if not recipe.isValid(): if not recipe.isValid():
return return
recipe = self._model.data(recipe, Qt.UserRole) recipe = self._model.data(recipe, Qt.UserRole)
recipes = dynamic['scheduled_recipes'] recipes = load_recipes()
if self.schedule.checkState() == Qt.Checked: if self.schedule.checkState() == Qt.Checked:
if recipe in recipes: if recipe in recipes:
recipe = recipes[recipes.index(recipe)] recipe = recipes[recipes.index(recipe)]
else: else:
recipe.last_downloaded = datetime.fromordinal(1)
recipes.append(recipe) recipes.append(recipe)
recipes.schedule = self.interval.value() recipe.schedule = self.interval.value()
if recipes.schedule == 0.0: if recipe.schedule < 0.1:
recipes.schedule = 1/24. recipe.schedule = 1/24.
if recipe.need_subscription and not dynamic['recipe_account_info_%s'%recipe.id]: if recipe.needs_subscription and not config['recipe_account_info_%s'%recipe.id]:
error_dialog(self, _('Must set account information'), _('This recipe requires a username and password')).exec_() error_dialog(self, _('Must set account information'), _('This recipe requires a username and password')).exec_()
self.schedule.setCheckState(Qt.Unchecked) self.schedule.setCheckState(Qt.Unchecked)
return return
else: else:
if recipe in recipes: if recipe in recipes:
recipes.remove(recipe) recipes.remove(recipe)
dynamic['scheduled_recipes'] = recipes save_recipes(recipes)
self.emit(SIGNAL('new_schedule(PyQt_PyObject)'), recipes) self.emit(SIGNAL('new_schedule(PyQt_PyObject)'), recipes)
self._model.resort()
def show_recipe(self, index): def show_recipe(self, index):
recipe = self._model.data(index, Qt.UserRole) recipe = self._model.data(index, Qt.UserRole)
@ -254,9 +272,9 @@ class SchedulerDialog(QDialog, Ui_Dialog):
self.interval.setValue(recipe.schedule if recipe.schedule is not None else 1) self.interval.setValue(recipe.schedule if recipe.schedule is not None else 1)
self.detail_box.setVisible(True) self.detail_box.setVisible(True)
self.account.setVisible(recipe.needs_subscription) self.account.setVisible(recipe.needs_subscription)
self.interval.setEnabled(self.schedule.checkState == Qt.Checked) self.interval.setEnabled(self.schedule.checkState() == Qt.Checked)
key = 'recipe_account_info_%s'%recipe.id key = 'recipe_account_info_%s'%recipe.id
account_info = dynamic[key] account_info = config[key]
self.show_password.setChecked(False) self.show_password.setChecked(False)
if account_info: if account_info:
self.username.blockSignals(True) self.username.blockSignals(True)
@ -265,73 +283,120 @@ class SchedulerDialog(QDialog, Ui_Dialog):
self.password.setText(account_info[1]) self.password.setText(account_info[1])
self.username.blockSignals(False) self.username.blockSignals(False)
self.password.blockSignals(False) self.password.blockSignals(False)
d = datetime.utcnow() - recipe.last_downloaded
ld = '%.1f'%(d.days + d.seconds/(24*3600))
if d < timedelta(days=366):
self.last_downloaded.setText(_('Last downloaded: %s days ago')%ld)
else:
self.last_downloaded.setText(_('Last downloaded: never'))
class Scheduler(QObject): class Scheduler(QObject):
INTERVAL = 5 # minutes INTERVAL = 1 # minutes
def __init__(self, main): def __init__(self, main):
self.main = main self.main = main
self.verbose = main.verbose
QObject.__init__(self) QObject.__init__(self)
self.lock = RLock() self.lock = QMutex(QMutex.Recursive)
self.queue = set([]) self.queue = set([])
recipes = dynamic['scheduled_recipes'] recipes = load_recipes()
if not recipes:
recipes = []
self.refresh_schedule(recipes) self.refresh_schedule(recipes)
self.timer = QTimer() self.timer = QTimer()
self.dirtied = False
self.connect(self.timer, SIGNAL('timeout()'), self.check) self.connect(self.timer, SIGNAL('timeout()'), self.check)
self.timer.start(self.INTERVAL * 60000) self.timer.start(int(self.INTERVAL * 60000))
def debug(self, *args):
if self.verbose:
sys.stdout.write(' '.join(map(unicode, args))+'\n')
sys.stdout.flush()
def check(self): def check(self):
db = self.main.library_view.model().db if not self.lock.tryLock():
now = datetime.utcnow() return
try:
if self.dirtied:
self.refresh_schedule(load_recipes())
self.dirtied = False
needs_downloading = set([]) needs_downloading = set([])
self.debug('Checking...')
now = datetime.utcnow()
for recipe in self.recipes: for recipe in self.recipes:
if recipe.schedule is None:
continue
delta = now - recipe.last_downloaded delta = now - recipe.last_downloaded
if delta > timedelta(days=recipe.schedule): if delta > timedelta(days=recipe.schedule):
needs_downloading.add(recipe) needs_downloading.add(recipe)
with self.lock:
self.debug('Needs downloading:', needs_downloading)
needs_downloading = [r for r in needs_downloading if r not in self.queue] needs_downloading = [r for r in needs_downloading if r not in self.queue]
for recipe in needs_downloading: for recipe in needs_downloading:
self.do_download(recipe)
finally:
self.lock.unlock()
def do_download(self, recipe):
try: try:
id = int(recipe.id) id = int(recipe.id)
script = db.get_recipe(id) script = self.main.library_view.model().db.get_recipe(id)
if script is None: if script is None:
self.recipes.remove(recipe) self.recipes.remove(recipe)
dynamic['scheduled_recipes'] = self.recipes save_recipes(self.recipes)
continue return
except ValueError: except ValueError:
script = recipe.title script = recipe.title
self.debug('\tQueueing:', recipe)
self.main.download_scheduled_recipe(recipe, script, self.recipe_downloaded) self.main.download_scheduled_recipe(recipe, script, self.recipe_downloaded)
self.queue.add(recipe) self.queue.add(recipe)
def recipe_downloaded(self, recipe): def recipe_downloaded(self, recipe):
with self.lock: self.lock.lock()
self.queue.remove(recipe) try:
if recipe in self.recipes:
recipe = self.recipes[self.recipes.index(recipe)] recipe = self.recipes[self.recipes.index(recipe)]
now = datetime.utcnow() now = datetime.utcnow()
d = now - recipe.last_downloaded d = now - recipe.last_downloaded
if recipe.schedule is not None:
interval = timedelta(days=recipe.schedule) interval = timedelta(days=recipe.schedule)
if abs(d - interval) < timedelta(hours=1): if abs(d - interval) < timedelta(hours=1):
recipe.last_downloaded += interval recipe.last_downloaded += interval
else: else:
recipe.last_downloaded = now recipe.last_downloaded = now
dynamic['scheduled_recipes'] = self.recipes else:
recipe.last_downloaded = now
save_recipes(self.recipes)
self.queue.remove(recipe)
self.dirtied = True
finally:
self.lock.unlock()
self.debug('Downloaded:', recipe)
def download(self, recipe): def download(self, recipe):
self.lock.lock()
try:
if recipe in self.recipes: if recipe in self.recipes:
recipe = self.recipes[self.recipes.index(recipe)] recipe = self.recipes[self.recipes.index(recipe)]
raise NotImplementedError if recipe not in self.queue:
self.do_download(recipe)
finally:
self.lock.unlock()
def refresh_schedule(self, recipes): def refresh_schedule(self, recipes):
self.recipes = recipes self.recipes = recipes
def show_dialog(self): def show_dialog(self):
self.lock.lock()
try:
d = SchedulerDialog(self.main.library_view.model().db) d = SchedulerDialog(self.main.library_view.model().db)
self.connect(d, SIGNAL('new_schedule(PyQt_PyObject)'), self.refresh_schedule) self.connect(d, SIGNAL('new_schedule(PyQt_PyObject)'), self.refresh_schedule)
self.connect(d, SIGNAL('download_now(PyQt_PyObject)'), self.download) self.connect(d, SIGNAL('download_now(PyQt_PyObject)'), self.download)
d.exec_() d.exec_()
self.recipes = load_recipes()
finally:
self.lock.unlock()
def main(args=sys.argv): def main(args=sys.argv):
app = QApplication([]) app = QApplication([])

View File

@ -10,11 +10,11 @@
</rect> </rect>
</property> </property>
<property name="windowTitle" > <property name="windowTitle" >
<string>Schedule recipes for download</string> <string>Schedule news download</string>
</property> </property>
<property name="windowIcon" > <property name="windowIcon" >
<iconset resource="../images.qrc" > <iconset resource="../images.qrc" >
<normaloff>:/images/news.svg</normaloff>:/images/news.svg</iconset> <normaloff>:/images/scheduler.svg</normaloff>:/images/scheduler.svg</iconset>
</property> </property>
<layout class="QGridLayout" name="gridLayout" > <layout class="QGridLayout" name="gridLayout" >
<item rowspan="2" row="0" column="0" > <item rowspan="2" row="0" column="0" >
@ -161,6 +161,13 @@
</item> </item>
</layout> </layout>
</item> </item>
<item>
<widget class="QLabel" name="last_downloaded" >
<property name="text" >
<string> </string>
</property>
</widget>
</item>
<item> <item>
<widget class="QGroupBox" name="account" > <widget class="QGroupBox" name="account" >
<property name="title" > <property name="title" >

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 48 KiB

View File

@ -23,6 +23,7 @@ from calibre.gui2 import APP_UID, warning_dialog, choose_files, error_dialog, \
max_available_height, config max_available_height, config
from calibre.gui2.cover_flow import CoverFlow, DatabaseImages, pictureflowerror from calibre.gui2.cover_flow import CoverFlow, DatabaseImages, pictureflowerror
from calibre.library.database import LibraryDatabase from calibre.library.database import LibraryDatabase
from calibre.gui2.dialogs.scheduler import Scheduler
from calibre.gui2.update import CheckForUpdates from calibre.gui2.update import CheckForUpdates
from calibre.gui2.main_window import MainWindow, option_parser from calibre.gui2.main_window import MainWindow, option_parser
from calibre.gui2.main_ui import Ui_MainWindow from calibre.gui2.main_ui import Ui_MainWindow
@ -74,6 +75,7 @@ class Main(MainWindow, Ui_MainWindow):
Ui_MainWindow.__init__(self) Ui_MainWindow.__init__(self)
self.setupUi(self) self.setupUi(self)
self.setWindowTitle(__appname__) self.setWindowTitle(__appname__)
self.verbose = opts.verbose
self.read_settings() self.read_settings()
self.job_manager = JobManager() self.job_manager = JobManager()
self.jobs_dialog = JobsDialog(self, self.job_manager) self.jobs_dialog = JobsDialog(self, self.job_manager)
@ -291,6 +293,9 @@ class Main(MainWindow, Ui_MainWindow):
self.content_server = start_threaded_server(db, server_config().parse()) self.content_server = start_threaded_server(db, server_config().parse())
self.test_server_timer = QTimer.singleShot(10000, self.test_server) self.test_server_timer = QTimer.singleShot(10000, self.test_server)
self.scheduler = Scheduler(self)
self.connect(self.news_menu.scheduler, SIGNAL('triggered(bool)'), lambda x :self.scheduler.show_dialog())
def test_server(self, *args): def test_server(self, *args):
if self.content_server.exception is not None: if self.content_server.exception is not None:
error_dialog(self, _('Failed to start content server'), error_dialog(self, _('Failed to start content server'),
@ -1294,6 +1299,8 @@ path_to_ebook to the database.
''') ''')
parser.add_option('--with-library', default=None, action='store', parser.add_option('--with-library', default=None, action='store',
help=_('Use the library located at the specified path.')) help=_('Use the library located at the specified path.'))
parser.add_option('-v', '--verbose', default=0, action='count',
help=_('Log debugging information to console'))
opts, args = parser.parse_args(args) opts, args = parser.parse_args(args)
if opts.with_library is not None and os.path.isdir(opts.with_library): if opts.with_library is not None and os.path.isdir(opts.with_library):
prefs.set('library_path', opts.with_library) prefs.set('library_path', opts.with_library)

View File

@ -29,18 +29,25 @@ class NewsMenu(QMenu):
def __init__(self, customize_feeds_func): def __init__(self, customize_feeds_func):
QMenu.__init__(self) QMenu.__init__(self)
self.scheduler = QAction(QIcon(':/images/scheduler.svg'), _('Schedule news download'), self)
self.addAction(self.scheduler)
self.cac = QAction(QIcon(':/images/user_profile.svg'), _('Add a custom news source'), self) self.cac = QAction(QIcon(':/images/user_profile.svg'), _('Add a custom news source'), self)
self.connect(self.cac, SIGNAL('triggered(bool)'), customize_feeds_func) self.connect(self.cac, SIGNAL('triggered(bool)'), customize_feeds_func)
self.addAction(self.cac) self.addAction(self.cac)
self.addSeparator()
self.custom_menu = CustomNewsMenu() self.custom_menu = CustomNewsMenu()
self.addMenu(self.custom_menu) self.addMenu(self.custom_menu)
self.connect(self.custom_menu, SIGNAL('start_news_fetch(PyQt_PyObject, PyQt_PyObject)'), self.connect(self.custom_menu, SIGNAL('start_news_fetch(PyQt_PyObject, PyQt_PyObject)'),
self.fetch_news) self.fetch_news)
self.addSeparator()
self.dmenu = QMenu(self)
self.dmenu.setTitle(_('Download news'))
self.dmenu.setIcon(QIcon(':/images/news.svg'))
self.addMenu(self.dmenu)
for title in titles: for title in titles:
recipe = get_builtin_recipe(title)[0] recipe = get_builtin_recipe(title)[0]
self.addAction(NewsAction(recipe, self)) self.dmenu.addAction(NewsAction(recipe, self))
def fetch_news(self, recipe, module): def fetch_news(self, recipe, module):
@ -76,7 +83,7 @@ class CustomNewsMenu(QMenu):
def __init__(self): def __init__(self):
QMenu.__init__(self) QMenu.__init__(self)
self.setTitle(_('Custom news sources')) self.setTitle(_('Download custom news'))
self.connect(self, SIGNAL('triggered(QAction*)'), self.launch) self.connect(self, SIGNAL('triggered(QAction*)'), self.launch)
def launch(self, action): def launch(self, action):

View File

@ -361,12 +361,13 @@ def _fetch_news(data, fmt):
def fetch_scheduled_recipe(recipe, script): def fetch_scheduled_recipe(recipe, script):
from calibre.gui2.dialogs.scheduler import config
fmt = prefs['output_format'].lower() fmt = prefs['output_format'].lower()
pt = PersistentTemporaryFile(suffix='_feeds2%s.%s'%(fmt.lower(), fmt.lower())) pt = PersistentTemporaryFile(suffix='_feeds2%s.%s'%(fmt.lower(), fmt.lower()))
pt.close() pt.close()
args = ['feeds2%s'%fmt.lower(), '--output', pt.name, '--debug'] args = ['feeds2%s'%fmt.lower(), '--output', pt.name, '--debug']
if recipe.needs_subscription: if recipe.needs_subscription:
x = dynamic['recipe_account_info_%s'%recipe.id] x = config.get('recipe_account_info_%s'%recipe.id, False)
if not x: if not x:
raise ValueError(_('You must set a username and password for %s')%recipe.title) raise ValueError(_('You must set a username and password for %s')%recipe.title)
args.extend(['--username', x[0], '--password', x[1]]) args.extend(['--username', x[0], '--password', x[1]])

View File

@ -716,7 +716,7 @@ class LibraryDatabase2(LibraryDatabase):
self.conn.commit() self.conn.commit()
def get_recipes(self): def get_recipes(self):
return self.conn.get('SELECT id, title FROM feeds') return self.conn.get('SELECT id, script FROM feeds')
def get_recipe(self, id): def get_recipe(self, id):
return self.conn.get('SELECT script FROM feeds WHERE id=?', (id,), all=False) return self.conn.get('SELECT script FROM feeds WHERE id=?', (id,), all=False)

View File

@ -473,8 +473,12 @@ class DynamicConfig(dict):
class for preferences that you don't intend to have the users edit directly. class for preferences that you don't intend to have the users edit directly.
''' '''
def __init__(self, name='dynamic'): def __init__(self, name='dynamic'):
dict.__init__(self, {})
self.name = name self.name = name
self.file_path = os.path.join(config_dir, name+'.pickle') self.file_path = os.path.join(config_dir, name+'.pickle')
self.refresh()
def refresh(self):
d = {} d = {}
if os.path.exists(self.file_path): if os.path.exists(self.file_path):
with ExclusiveFile(self.file_path) as f: with ExclusiveFile(self.file_path) as f:
@ -482,8 +486,11 @@ class DynamicConfig(dict):
try: try:
d = cPickle.loads(raw) if raw.strip() else {} d = cPickle.loads(raw) if raw.strip() else {}
except: except:
import traceback
traceback.print_exc()
d = {} d = {}
dict.__init__(self, d) self.clear()
self.update(d)
def __getitem__(self, key): def __getitem__(self, key):
try: try:

View File

@ -12,6 +12,7 @@ recipe_modules = [
'discover_magazine', 'scientific_american', 'new_york_review_of_books', 'discover_magazine', 'scientific_american', 'new_york_review_of_books',
'daily_telegraph', 'guardian', 'el_pais', 'new_scientist', 'b92', 'daily_telegraph', 'guardian', 'el_pais', 'new_scientist', 'b92',
'politika', 'moscow_times', 'latimes', 'japan_times', 'san_fran_chronicle', 'politika', 'moscow_times', 'latimes', 'japan_times', 'san_fran_chronicle',
'demorgen_be', 'de_standaard'
] ]
import re, imp, inspect, time, os import re, imp, inspect, time, os

View File

@ -0,0 +1,32 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
standaard.be
'''
from calibre.web.feeds.news import BasicNewsRecipe
class DeStandaard(BasicNewsRecipe):
title = u'De Standaard'
__author__ = u'Darko Miletic'
description = u'News from Belgium'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
keep_only_tags = [dict(name='div' , attrs={'id':'_parts_midContainer_div'})]
remove_tags_after = dict(name='h3', attrs={'title':'Binnenland'})
remove_tags = [
dict(name='h3' , attrs={'title':'Binnenland' })
,dict(name='p' , attrs={'class':'by' })
,dict(name='div' , attrs={'class':'articlesright'})
,dict(name='a' , attrs={'class':'help' })
,dict(name='a' , attrs={'class':'archive' })
,dict(name='a' , attrs={'class':'print' })
,dict(name='a' , attrs={'class':'email' })
]
feeds = [
(u'De Standaard Online', u'http://feeds.feedburner.com/dso-front')
]

View File

@ -0,0 +1,31 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
demorgen.be
'''
from calibre.web.feeds.news import BasicNewsRecipe
class DeMorganBe(BasicNewsRecipe):
title = u'DeMorgen.be'
__author__ = u'Darko Miletic'
description = u'News from Belgium'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
keep_only_tags = [dict(name='div' , attrs={'class':'art_box2'})]
feeds = [
(u'Nieuws' , u'http://www.demorgen.be/nieuws/rss.xml' )
,(u'De Gedachte' , u'http://www.demorgen.be/degedachte/rss.xml' )
,(u'Financiele morgen' , u'http://www.demorgen.be/financielemorgen/rss.xml')
,(u'Financiele morgen' , u'http://www.demorgen.be/financielemorgen/rss.xml')
,(u'Sport' , u'http://www.demorgen.be/sport/rss.xml' )
,(u'Bis' , u'http://www.demorgen.be/bis/rss.xml' )
,(u'Magazine' , u'http://www.demorgen.be/magazine/rss.xml' )
,(u'De stand der dingen', u'http://www.demorgen.be/standderdingen/rss.xml' )
]

View File

@ -127,10 +127,13 @@ class RecursiveFetcher(object, LoggingInterface):
if self.keep_only_tags: if self.keep_only_tags:
body = Tag(soup, 'body') body = Tag(soup, 'body')
try:
for spec in self.keep_only_tags: for spec in self.keep_only_tags:
for tag in soup.find('body').findAll(**spec): for tag in soup.find('body').findAll(**spec):
body.insert(len(body.contents), tag) body.insert(len(body.contents), tag)
soup.find('body').replaceWith(body) soup.find('body').replaceWith(body)
except AttributeError: # soup has no body element
pass
def remove_beyond(tag, next): def remove_beyond(tag, next):
while tag is not None and tag.name != 'body': while tag is not None and tag.name != 'body':