mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-31 14:33:54 -04:00
European Voice by malfi and Fix #7487 (Updated recipe for Pagina12)
This commit is contained in:
parent
4ecd8d2c6d
commit
e8928c8046
51
resources/recipes/european_voice.recipe
Normal file
51
resources/recipes/european_voice.recipe
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class EuropeanVoice(BasicNewsRecipe):
|
||||||
|
title = u'European Voice'
|
||||||
|
__author__ = 'malfi'
|
||||||
|
oldest_article = 14
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
cover_url = 'http://www.europeanvoice.com/Css/images/logo.gif'
|
||||||
|
language = 'en'
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':'articleLeftColumn'})]
|
||||||
|
remove_tags = [dict(name='div', attrs={'id':'BreadCrump'})]
|
||||||
|
feeds = [
|
||||||
|
(u'Whole site ',u'http://www.europeanvoice.com/Rss/2.xml'),
|
||||||
|
(u'News and analysis',u'http://www.europeanvoice.com/Rss/6.xml'),
|
||||||
|
(u'Comment',u'http://www.europeanvoice.com/Rss/7.xml'),
|
||||||
|
(u'Special reports',u'http://www.europeanvoice.com/Rss/5.xml'),
|
||||||
|
(u'People',u'http://www.europeanvoice.com/Rss/8.xml'),
|
||||||
|
(u'Career',u'http://www.europeanvoice.com/Rss/11.xml'),
|
||||||
|
(u'Policies',u'http://www.europeanvoice.com/Rss/4.xml'),
|
||||||
|
(u'EVents',u'http://www.europeanvoice.com/Rss/10.xml'),
|
||||||
|
(u'Policies - Economics',u'http://www.europeanvoice.com/Rss/31.xml'),
|
||||||
|
(u'Policies - Business',u'http://www.europeanvoice.com/Rss/19.xml'),
|
||||||
|
(u'Policies - Trade',u'http://www.europeanvoice.com/Rss/25.xml'),
|
||||||
|
(u'Policies - Information society',u'http://www.europeanvoice.com/Rss/20.xml'),
|
||||||
|
(u'Policies - Energy',u'http://www.europeanvoice.com/Rss/15.xml'),
|
||||||
|
(u'Policies - Transport',u'http://www.europeanvoice.com/Rss/18.xml'),
|
||||||
|
(u'Policies - Climate change',u'http://www.europeanvoice.com/Rss/16.xml'),
|
||||||
|
(u'Policies - Environment',u'http://www.europeanvoice.com/Rss/17.xml'),
|
||||||
|
(u'Policies - Farming & food',u'http://www.europeanvoice.com/Rss/23.xml'),
|
||||||
|
(u'Policies - Health & society',u'http://www.europeanvoice.com/Rss/24.xml'),
|
||||||
|
(u'Policies - Justice',u'http://www.europeanvoice.com/Rss/29.xml'),
|
||||||
|
(u'Policies - Foreign affairs',u'http://www.europeanvoice.com/Rss/27.xml')
|
||||||
|
]
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
|
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||||
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||||
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '?bPrint=1'
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
denied = soup.findAll(True,text='Subscribers')
|
||||||
|
if denied:
|
||||||
|
raise Exception('Article skipped, because content can only be seen with subscription')
|
||||||
|
return soup
|
||||||
|
|
@ -21,8 +21,16 @@ class Pagina12(BasicNewsRecipe):
|
|||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'es'
|
language = 'es'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
|
publication_type = 'newspaper'
|
||||||
masthead_url = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
|
masthead_url = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
|
||||||
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} #autor{font-weight: bold} #fecha,#epigrafe{font-size: 0.9em; margin: 5px} #imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px } '
|
extra_css = """
|
||||||
|
body{font-family: Arial,Helvetica,sans-serif }
|
||||||
|
img{margin-bottom: 0.4em; display:block}
|
||||||
|
#autor{font-weight: bold}
|
||||||
|
#fecha,#epigrafe{font-size: 0.9em; margin: 5px}
|
||||||
|
#imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px }
|
||||||
|
.fgprincipal{font-size: large; font-weight: bold}
|
||||||
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
@ -31,7 +39,11 @@ class Pagina12(BasicNewsRecipe):
|
|||||||
, 'language' : language
|
, 'language' : language
|
||||||
}
|
}
|
||||||
|
|
||||||
remove_tags = [dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})]
|
remove_tags = [
|
||||||
|
dict(name=['meta','link'])
|
||||||
|
,dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})
|
||||||
|
]
|
||||||
|
remove_attributes=['lang']
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
@ -65,4 +77,13 @@ class Pagina12(BasicNewsRecipe):
|
|||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
|
for item in soup.findAll('span', attrs={'id':'seccion'}):
|
||||||
|
it = item.a
|
||||||
|
it.name='span'
|
||||||
|
del it['href']
|
||||||
|
del it['title']
|
||||||
|
for item in soup.findAll('p'):
|
||||||
|
it = item.find('h3')
|
||||||
|
if it:
|
||||||
|
it.name='span'
|
||||||
return soup
|
return soup
|
Loading…
x
Reference in New Issue
Block a user