Recipe for Cyberpresse (thanks to balok)

This commit is contained in:
Kovid Goyal 2008-11-28 19:34:17 -08:00
parent d35585b579
commit 3e17eaaf2c
2 changed files with 23 additions and 1 deletions

View File

@ -15,7 +15,7 @@ recipe_modules = [
'demorgen_be', 'de_standaard', 'ap', 'barrons', 'chr_mon', 'cnn', 'faznet',
'jpost', 'jutarnji', 'nasa', 'reuters', 'spiegelde', 'wash_post', 'zeitde',
'blic', 'novosti', 'danas', 'vreme', 'times_online', 'the_scotsman',
'nytimes_sub', 'security_watch'
'nytimes_sub', 'security_watch', 'cyberpresse',
]
import re, imp, inspect, time, os

View File

@ -0,0 +1,22 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Cyberpresse(BasicNewsRecipe):
title = u'Cyberpresse'
__author__ = 'balok'
description = 'Canadian news in French'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
html2lrf_options = ['--left-margin=0','--right-margin=0','--top-margin=0','--bottom-margin=0']
preprocess_regexps = [
(re.compile(r'<body.*?<!-- END .centerbar -->', re.IGNORECASE | re.DOTALL), lambda match : '<BODY>'),
(re.compile(r'<!-- END .entry -->.*?</body>', re.IGNORECASE | re.DOTALL), lambda match : '</BODY>'),
(re.compile(r'<strong>Agrandir.*?</strong>', re.IGNORECASE | re.DOTALL), lambda match : '<br>'),
]
feeds = [(u'Manchettes', u'http://www.cyberpresse.ca/rss/225.xml'),(u'Capitale nationale', u'http://www.cyberpresse.ca/rss/501.xml'),(u'Opinions', u'http://www.cyberpresse.ca/rss/977.xml'),(u'Insolite', u'http://www.cyberpresse.ca/rss/279.xml')]