diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index 4021b258f9..31ce75356c 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -44,7 +44,7 @@ recipe_modules = ['recipe_' + r for r in ( 'stackoverflow', 'telepolis_artikel', 'zaobao', 'usnews', 'straitstimes', 'index_hu', 'pcworld_hu', 'hrt', 'rts', 'h1', 'h2', 'h3', 'phd_comics', 'woz_die', 'elektrolese', - 'climate_progress', 'carta', 'slashdot', + 'climate_progress', 'carta', 'slashdot', 'publico', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_publico.py b/src/calibre/web/feeds/recipes/recipe_publico.py new file mode 100644 index 0000000000..dd63949d30 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_publico.py @@ -0,0 +1,40 @@ +""" +publico.py - v1.0 + +Copyright (c) 2009, David Rodrigues - http://sixhat.net +All rights reserved. +""" + +__license__ = 'GPL 3' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class Publico(BasicNewsRecipe): + title = u'P\xc3\xbablico' + __author__ = 'David Rodrigues' + oldest_article = 1 + max_articles_per_feed = 30 + encoding='utf-8' + no_stylesheets = True + language = _('Portuguese') + preprocess_regexps = [(re.compile(u"\uFFFD", re.DOTALL|re.IGNORECASE), lambda match: ''),] + + feeds = [ + (u'Geral', u'http://feeds.feedburner.com/PublicoUltimaHora'), + (u'Internacional', u'http://www.publico.clix.pt/rss.ashx?idCanal=11'), + (u'Política', u'http://www.publico.clix.pt/rss.ashx?idCanal=12'), + (u'Ciências', u'http://www.publico.clix.pt/rss.ashx?idCanal=13'), + (u'Desporto', u'http://desporto.publico.pt/rss.ashx'), + (u'Economia', u'http://www.publico.clix.pt/rss.ashx?idCanal=57'), + (u'Educação', u'http://www.publico.clix.pt/rss.ashx?idCanal=58'), + (u'Local', u'http://www.publico.clix.pt/rss.ashx?idCanal=59'), + (u'Media e Tecnologia', u'http://www.publico.clix.pt/rss.ashx?idCanal=61'), + (u'Sociedade', u'http://www.publico.clix.pt/rss.ashx?idCanal=62') + ] + remove_tags = [dict(name='script'), dict(id='linhaTitulosHeader')] + keep_only_tags = [dict(name='div')] + + def print_version(self,url): + s=re.findall("id=[0-9]+",url); + return "http://ww2.publico.clix.pt/print.aspx?"+s[0]