diff --git a/resources/recipes/european_voice.recipe b/resources/recipes/european_voice.recipe new file mode 100644 index 0000000000..caaca6d306 --- /dev/null +++ b/resources/recipes/european_voice.recipe @@ -0,0 +1,51 @@ + +from calibre.web.feeds.news import BasicNewsRecipe + +class EuropeanVoice(BasicNewsRecipe): + title = u'European Voice' + __author__ = 'malfi' + oldest_article = 14 + max_articles_per_feed = 100 + no_stylesheets = True + cover_url = 'http://www.europeanvoice.com/Css/images/logo.gif' + language = 'en' + keep_only_tags = [dict(name='div', attrs={'id':'articleLeftColumn'})] + remove_tags = [dict(name='div', attrs={'id':'BreadCrump'})] + feeds = [ + (u'Whole site ',u'http://www.europeanvoice.com/Rss/2.xml'), + (u'News and analysis',u'http://www.europeanvoice.com/Rss/6.xml'), + (u'Comment',u'http://www.europeanvoice.com/Rss/7.xml'), + (u'Special reports',u'http://www.europeanvoice.com/Rss/5.xml'), + (u'People',u'http://www.europeanvoice.com/Rss/8.xml'), + (u'Career',u'http://www.europeanvoice.com/Rss/11.xml'), + (u'Policies',u'http://www.europeanvoice.com/Rss/4.xml'), + (u'EVents',u'http://www.europeanvoice.com/Rss/10.xml'), + (u'Policies - Economics',u'http://www.europeanvoice.com/Rss/31.xml'), + (u'Policies - Business',u'http://www.europeanvoice.com/Rss/19.xml'), + (u'Policies - Trade',u'http://www.europeanvoice.com/Rss/25.xml'), + (u'Policies - Information society',u'http://www.europeanvoice.com/Rss/20.xml'), + (u'Policies - Energy',u'http://www.europeanvoice.com/Rss/15.xml'), + (u'Policies - Transport',u'http://www.europeanvoice.com/Rss/18.xml'), + (u'Policies - Climate change',u'http://www.europeanvoice.com/Rss/16.xml'), + (u'Policies - Environment',u'http://www.europeanvoice.com/Rss/17.xml'), + (u'Policies - Farming & food',u'http://www.europeanvoice.com/Rss/23.xml'), + (u'Policies - Health & society',u'http://www.europeanvoice.com/Rss/24.xml'), + (u'Policies - Justice',u'http://www.europeanvoice.com/Rss/29.xml'), + (u'Policies - Foreign affairs',u'http://www.europeanvoice.com/Rss/27.xml') + ] + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' + + def print_version(self, url): + return url + '?bPrint=1' + + def preprocess_html(self, soup): + denied = soup.findAll(True,text='Subscribers') + if denied: + raise Exception('Article skipped, because content can only be seen with subscription') + return soup + diff --git a/resources/recipes/pagina12.recipe b/resources/recipes/pagina12.recipe index da16c1697b..2809e87e2a 100644 --- a/resources/recipes/pagina12.recipe +++ b/resources/recipes/pagina12.recipe @@ -21,8 +21,16 @@ class Pagina12(BasicNewsRecipe): use_embedded_content = False language = 'es' remove_empty_feeds = True + publication_type = 'newspaper' masthead_url = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif' - extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} #autor{font-weight: bold} #fecha,#epigrafe{font-size: 0.9em; margin: 5px} #imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px } ' + extra_css = """ + body{font-family: Arial,Helvetica,sans-serif } + img{margin-bottom: 0.4em; display:block} + #autor{font-weight: bold} + #fecha,#epigrafe{font-size: 0.9em; margin: 5px} + #imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px } + .fgprincipal{font-size: large; font-weight: bold} + """ conversion_options = { 'comment' : description @@ -31,7 +39,11 @@ class Pagina12(BasicNewsRecipe): , 'language' : language } - remove_tags = [dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})] + remove_tags = [ + dict(name=['meta','link']) + ,dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']}) + ] + remove_attributes=['lang'] feeds = [ @@ -65,4 +77,13 @@ class Pagina12(BasicNewsRecipe): def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] + for item in soup.findAll('span', attrs={'id':'seccion'}): + it = item.a + it.name='span' + del it['href'] + del it['title'] + for item in soup.findAll('p'): + it = item.find('h3') + if it: + it.name='span' return soup \ No newline at end of file