mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-31 14:33:54 -04:00
European Voice by malfi and Fix #7487 (Updated recipe for Pagina12)
This commit is contained in:
parent
4ecd8d2c6d
commit
e8928c8046
51
resources/recipes/european_voice.recipe
Normal file
51
resources/recipes/european_voice.recipe
Normal file
@ -0,0 +1,51 @@
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class EuropeanVoice(BasicNewsRecipe):
|
||||
title = u'European Voice'
|
||||
__author__ = 'malfi'
|
||||
oldest_article = 14
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
cover_url = 'http://www.europeanvoice.com/Css/images/logo.gif'
|
||||
language = 'en'
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'articleLeftColumn'})]
|
||||
remove_tags = [dict(name='div', attrs={'id':'BreadCrump'})]
|
||||
feeds = [
|
||||
(u'Whole site ',u'http://www.europeanvoice.com/Rss/2.xml'),
|
||||
(u'News and analysis',u'http://www.europeanvoice.com/Rss/6.xml'),
|
||||
(u'Comment',u'http://www.europeanvoice.com/Rss/7.xml'),
|
||||
(u'Special reports',u'http://www.europeanvoice.com/Rss/5.xml'),
|
||||
(u'People',u'http://www.europeanvoice.com/Rss/8.xml'),
|
||||
(u'Career',u'http://www.europeanvoice.com/Rss/11.xml'),
|
||||
(u'Policies',u'http://www.europeanvoice.com/Rss/4.xml'),
|
||||
(u'EVents',u'http://www.europeanvoice.com/Rss/10.xml'),
|
||||
(u'Policies - Economics',u'http://www.europeanvoice.com/Rss/31.xml'),
|
||||
(u'Policies - Business',u'http://www.europeanvoice.com/Rss/19.xml'),
|
||||
(u'Policies - Trade',u'http://www.europeanvoice.com/Rss/25.xml'),
|
||||
(u'Policies - Information society',u'http://www.europeanvoice.com/Rss/20.xml'),
|
||||
(u'Policies - Energy',u'http://www.europeanvoice.com/Rss/15.xml'),
|
||||
(u'Policies - Transport',u'http://www.europeanvoice.com/Rss/18.xml'),
|
||||
(u'Policies - Climate change',u'http://www.europeanvoice.com/Rss/16.xml'),
|
||||
(u'Policies - Environment',u'http://www.europeanvoice.com/Rss/17.xml'),
|
||||
(u'Policies - Farming & food',u'http://www.europeanvoice.com/Rss/23.xml'),
|
||||
(u'Policies - Health & society',u'http://www.europeanvoice.com/Rss/24.xml'),
|
||||
(u'Policies - Justice',u'http://www.europeanvoice.com/Rss/29.xml'),
|
||||
(u'Policies - Foreign affairs',u'http://www.europeanvoice.com/Rss/27.xml')
|
||||
]
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?bPrint=1'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
denied = soup.findAll(True,text='Subscribers')
|
||||
if denied:
|
||||
raise Exception('Article skipped, because content can only be seen with subscription')
|
||||
return soup
|
||||
|
@ -21,8 +21,16 @@ class Pagina12(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
language = 'es'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
|
||||
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} #autor{font-weight: bold} #fecha,#epigrafe{font-size: 0.9em; margin: 5px} #imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px } '
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
#autor{font-weight: bold}
|
||||
#fecha,#epigrafe{font-size: 0.9em; margin: 5px}
|
||||
#imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px }
|
||||
.fgprincipal{font-size: large; font-weight: bold}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
@ -31,7 +39,11 @@ class Pagina12(BasicNewsRecipe):
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})]
|
||||
remove_tags = [
|
||||
dict(name=['meta','link'])
|
||||
,dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})
|
||||
]
|
||||
remove_attributes=['lang']
|
||||
|
||||
|
||||
feeds = [
|
||||
@ -65,4 +77,13 @@ class Pagina12(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('span', attrs={'id':'seccion'}):
|
||||
it = item.a
|
||||
it.name='span'
|
||||
del it['href']
|
||||
del it['title']
|
||||
for item in soup.findAll('p'):
|
||||
it = item.find('h3')
|
||||
if it:
|
||||
it.name='span'
|
||||
return soup
|
Loading…
x
Reference in New Issue
Block a user