mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
Improved recipe for Cyberpresse
This commit is contained in:
parent
2055b3bdcc
commit
4ab7c54c67
@ -7,18 +7,51 @@ class Cyberpresse(BasicNewsRecipe):
|
||||
__author__ = 'balok'
|
||||
description = 'Canadian news in French'
|
||||
language = 'fr'
|
||||
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
html2lrf_options = ['--left-margin=0','--right-margin=0','--top-margin=0','--bottom-margin=0']
|
||||
encoding = 'utf-8'
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<body.*?<!-- END .centerbar -->', re.IGNORECASE | re.DOTALL), lambda match : '<BODY>'),
|
||||
(re.compile(r'<!-- END .entry -->.*?</body>', re.IGNORECASE | re.DOTALL), lambda match : '</BODY>'),
|
||||
(re.compile(r'<strong>Agrandir.*?</strong>', re.IGNORECASE | re.DOTALL), lambda match : '<br>'),
|
||||
]
|
||||
|
||||
|
||||
feeds = [(u'Manchettes', u'http://www.cyberpresse.ca/rss/225.xml'),(u'Capitale nationale', u'http://www.cyberpresse.ca/rss/501.xml'),(u'Opinions', u'http://www.cyberpresse.ca/rss/977.xml'),(u'Insolite', u'http://www.cyberpresse.ca/rss/279.xml')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'article-page'}),
|
||||
dict(name='div', attrs={'id':'articlePage'}),
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
.photodata{font-family:Arial,Helvetica,Verdana,sans-serif;color: #999999; font-size: 90%; }
|
||||
h1{font-family:Georgia,Times,serif ; font-size: large; }
|
||||
.amorce{font-family:Arial,Helvetica,Verdana,sans-serif; font-weight:bold;}
|
||||
.article-page{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: x-small;}
|
||||
#articlePage{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: x-small;}
|
||||
.auteur{font-family:Georgia,Times,sans-serif; font-size: 90%; color:#006699 ;}
|
||||
.bodyText{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: x-small;}
|
||||
.byLine{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: 90%;}
|
||||
.entry{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: x-small;}
|
||||
.minithumb-auteurs{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: 90%; }
|
||||
a{color:#003399; font-weight:bold; }
|
||||
'''
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['centerbar','colspan','share-module']}),
|
||||
dict(name='p', attrs={'class':['zoom']}),
|
||||
dict(name='ul', attrs={'class':['stories']}),
|
||||
dict(name='h4', attrs={'class':['general-cat']}),
|
||||
]
|
||||
|
||||
feeds = [(u'Manchettes', u'http://www.cyberpresse.ca/rss/225.xml'),
|
||||
(u'Capitale nationale', u'http://www.cyberpresse.ca/rss/501.xml'),
|
||||
(u'Opinions', u'http://www.cyberpresse.ca/rss/977.xml'),
|
||||
(u'Insolite', u'http://www.cyberpresse.ca/rss/279.xml')
|
||||
]
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
|
||||
for tag in soup.findAll(name=['i','strong']):
|
||||
tag.name = 'div'
|
||||
|
||||
return soup
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user