__license__ = 'GPL v3' __copyright__ = '2010-2011, Darko Miletic ' ''' www.eluniversal.com ''' from calibre import strftime from calibre.web.feeds.recipes import BasicNewsRecipe class ElUniversal(BasicNewsRecipe): title = 'El Universal' __author__ = 'Darko Miletic' description = 'Noticias de Venezuela' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False remove_empty_feeds = True encoding = 'cp1252' publisher = 'El Universal' category = 'news, Caracas, Venezuela, world' language = 'es_VE' publication_type = 'newspaper' cover_url = strftime('http://static.eluniversal.com/%Y/%m/%d/portada.jpg') extra_css = """ .txt60{font-family: Tahoma,Geneva,sans-serif; font-size: small} .txt29{font-family: Tahoma,Geneva,sans-serif; font-size: small; color: gray} .txt38{font-family: Georgia,"Times New Roman",Times,serif; font-size: xx-large} .txt35{font-family: Georgia,"Times New Roman",Times,serif; font-size: large} body{font-family: Verdana,Arial,Helvetica,sans-serif} """ conversion_options = { 'comments' : description ,'tags' : category ,'language' : language ,'publisher' : publisher } remove_tags_before=dict(attrs={'class':'header-print MB10'}) remove_tags_after= dict(attrs={'id':'SizeText'}) remove_tags = [ dict(name=['object','link','script','iframe','meta']) ,dict(attrs={'class':'header-print MB10'}) ] feeds = [ (u'Ultimas Noticias', u'http://www.eluniversal.com/rss/avances.xml' ) ,(u'Economia' , u'http://www.eluniversal.com/rss/eco_avances.xml') ,(u'Internacionales' , u'http://www.eluniversal.com/rss/int_avances.xml') ,(u'Deportes' , u'http://www.eluniversal.com/rss/dep_avances.xml') ,(u'Cultura' , u'http://www.eluniversal.com/rss/cul_avances.xml') ,(u'Nacional y politica' , u'http://www.eluniversal.com/rss/pol_avances.xml') ,(u'Ciencia y tecnologia', u'http://www.eluniversal.com/rss/cyt_avances.xml') ,(u'Universo empresarial', u'http://www.eluniversal.com/rss/uni_avances.xml') ,(u'Caracas' , u'http://www.eluniversal.com/rss/ccs_avances.xml') ] def print_version(self, url): return url + '-imp' def get_article_url(self, article): return article.get('guid', None)