diff --git a/resources/recipes/evz.ro.recipe b/resources/recipes/evz.ro.recipe index bce151d1fc..841dc80429 100644 --- a/resources/recipes/evz.ro.recipe +++ b/resources/recipes/evz.ro.recipe @@ -1,52 +1,54 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + __license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' +__copyright__ = u'2011, Silviu Cotoar\u0103' ''' evz.ro ''' -import re from calibre.web.feeds.news import BasicNewsRecipe -class EVZ_Ro(BasicNewsRecipe): - title = 'evz.ro' - __author__ = 'Darko Miletic' - description = 'News from Romania' - publisher = 'evz.ro' - category = 'news, politics, Romania' - oldest_article = 2 - max_articles_per_feed = 200 - no_stylesheets = True - encoding = 'utf8' - use_embedded_content = False +class EvenimentulZilei(BasicNewsRecipe): + title = u'Evenimentul Zilei' + __author__ = u'Silviu Cotoar\u0103' + description = '' + publisher = u'Evenimentul Zilei' + oldest_article = 5 language = 'ro' - masthead_url = 'http://www.evz.ro/fileadmin/images/logo.gif' - extra_css = ' body{font-family: Georgia,Arial,Helvetica,sans-serif } .firstP{font-size: 1.125em} .author,.articleInfo{font-size: small} ' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Stiri' + encoding = 'utf-8' + cover_url = 'http://www.evz.ro/fileadmin/images/evzLogo.png' conversion_options = { - 'comment' : description - , 'tags' : category - , 'publisher' : publisher - , 'language' : language - } + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } - preprocess_regexps = [ - (re.compile(r'.*?', re.DOTALL|re.IGNORECASE),lambda match: '<head><title>') - ,(re.compile(r'.*?', re.DOTALL|re.IGNORECASE),lambda match: '') - ] + keep_only_tags = [ + dict(name='div', attrs={'class':'single'}) + , dict(name='img', attrs={'id':'placeholder'}) + , dict(name='a', attrs={'id':'holderlink'}) + ] - remove_tags = [ - dict(name=['form','embed','iframe','object','base','link','script','noscript']) - ,dict(attrs={'class':['section','statsInfo','email il']}) - ,dict(attrs={'id' :'gallery'}) - ] + remove_tags = [ + dict(name='p', attrs={'class':['articleInfo']}) + , dict(name='div', attrs={'id':['bannerAddoceansArticleJos']}) + , dict(name='div', attrs={'id':['bannerAddoceansArticle']}) + ] - remove_tags_after = dict(attrs={'class':'section'}) - keep_only_tags = [dict(attrs={'class':'single'})] - remove_attributes = ['height','width'] + remove_tags_after = [ + dict(name='div', attrs={'id':['bannerAddoceansArticleJos']}) + ] - feeds = [(u'Articles', u'http://www.evz.ro/rss.xml')] + feeds = [ + (u'Feeds', u'http://www.evz.ro/rss.xml') + ] def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return soup + return self.adeify_images(soup) diff --git a/resources/recipes/nationalgeoro.recipe b/resources/recipes/nationalgeoro.recipe index a3c5727d38..8f989be74d 100644 --- a/resources/recipes/nationalgeoro.recipe +++ b/resources/recipes/nationalgeoro.recipe @@ -14,7 +14,7 @@ class NationalGeoRo(BasicNewsRecipe): __author__ = u'Silviu Cotoar\u0103' description = u'S\u0103 avem grij\u0103 de planet\u0103' publisher = 'National Geographic' - oldest_article = 5 + oldest_article = 35 language = 'ro' max_articles_per_feed = 100 no_stylesheets = True