diff --git a/resources/recipes/cicero.recipe b/resources/recipes/cicero.recipe new file mode 100644 index 0000000000..2df6b68000 --- /dev/null +++ b/resources/recipes/cicero.recipe @@ -0,0 +1,35 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class Cicero(BasicNewsRecipe): + timefmt = ' [%Y-%m-%d]' + title = u'Cicero' + __author__ = 'mad@sharktooth.de' + description = u'Magazin f\xfcr politische Kultur' + oldest_article = 7 + language = 'de' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + publisher = 'Ringier Publishing' + category = 'news, politics, Germany' + encoding = 'iso-8859-1' + publication_type = 'magazine' + masthead_url = 'http://www.cicero.de/img2/cicero_logo_rss.gif' + feeds = [ +(u'Das gesamte Portfolio', u'http://www.cicero.de/rss/rss.php?ress_id='), +#(u'Alle Heft-Inhalte', u'http://www.cicero.de/rss/rss.php?ress_id=heft'), +#(u'Alle Online-Inhalte', u'http://www.cicero.de/rss/rss.php?ress_id=online'), +#(u'Berliner Republik', u'http://www.cicero.de/rss/rss.php?ress_id=4'), +#(u'Weltb\xfchne', u'http://www.cicero.de/rss/rss.php?ress_id=1'), +#(u'Salon', u'http://www.cicero.de/rss/rss.php?ress_id=7'), +#(u'Kapital', u'http://www.cicero.de/rss/rss.php?ress_id=6'), +#(u'Netzst\xfccke', u'http://www.cicero.de/rss/rss.php?ress_id=9'), +#(u'Leinwand', u'http://www.cicero.de/rss/rss.php?ress_id=12'), +#(u'Bibliothek', u'http://www.cicero.de/rss/rss.php?ress_id=15'), +(u'Kolumne - Alle Kolulmnen', u'http://www.cicero.de/rss/rss2.php?ress_id='), +#(u'Kolumne - Schreiber, Berlin', u'http://www.cicero.de/rss/rss2.php?ress_id=35'), +#(u'Kolumne - TV Kritik', u'http://www.cicero.de/rss/rss2.php?ress_id=34') +] + + def print_version(self, url): + return 'http://www.cicero.de/page_print.php?' + url.rpartition('?')[2] diff --git a/resources/recipes/el_correo.recipe b/resources/recipes/el_correo.recipe new file mode 100644 index 0000000000..9190560b02 --- /dev/null +++ b/resources/recipes/el_correo.recipe @@ -0,0 +1,122 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__copyright__ = '08 Januery 2011, desUBIKado' +__author__ = 'desUBIKado' +__description__ = 'Daily newspaper from Biscay' +__version__ = 'v0.08' +__date__ = '08, Januery 2011' +''' +[url]http://www.elcorreo.com/[/url] +''' + +import time +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class heraldo(BasicNewsRecipe): + __author__ = 'desUBIKado' + description = 'Daily newspaper from Biscay' + title = u'El Correo' + publisher = 'Vocento' + category = 'News, politics, culture, economy, general interest' + oldest_article = 2 + delay = 1 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + language = 'es' + timefmt = '[%a, %d %b, %Y]' + encoding = 'iso-8859-1' + remove_empty_feeds = True + remove_javascript = False + + feeds = [ + (u'Portada', u'http://www.elcorreo.com/vizcaya/portada.xml'), + (u'Local', u'http://www.elcorreo.com/vizcaya/rss/feeds/vizcaya.xml'), + (u'Internacional', u'hhttp://www.elcorreo.com/vizcaya/rss/feeds/internacional.xml'), + (u'Econom\xeda', u'http://www.elcorreo.com/vizcaya/rss/feeds/economia.xml'), + (u'Pol\xedtica', u'http://www.elcorreo.com/vizcaya/rss/feeds/politica.xml'), + (u'Opini\xf3n', u'http://www.elcorreo.com/vizcaya/rss/feeds/opinion.xml'), + (u'Deportes', u'http://www.elcorreo.com/vizcaya/rss/feeds/deportes.xml'), + (u'Sociedad', u'http://www.elcorreo.com/vizcaya/rss/feeds/sociedad.xml'), + (u'Cultura', u'http://www.elcorreo.com/vizcaya/rss/feeds/cultura.xml'), + (u'Televisi\xf3n', u'http://www.elcorreo.com/vizcaya/rss/feeds/television.xml'), + (u'Gente', u'http://www.elcorreo.com/vizcaya/rss/feeds/gente.xml') + ] + + keep_only_tags = [ + dict(name='div', attrs={'class':['grouphead','date','art_head','story-texto','text','colC_articulo','contenido_comentarios']}), + dict(name='div' , attrs={'id':['articulo','story-texto','story-entradilla']}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':['art_barra','detalles-opinion','formdenunciar','modulo calculadoras','nubetags','pie']}), + dict(name='div', attrs={'class':['mod_lomas','bloque_lomas','blm_header','link-app3','link-app4','botones_listado']}), + dict(name='div', attrs={'class':['navegacion_galeria','modulocanalpromocion','separa','separacion','compartir','tags_relacionados']}), + dict(name='div', attrs={'class':['moduloBuscadorDeportes','modulo-gente','moddestacadopeq','OpcArt','articulopiniones']}), + dict(name='div', attrs={'class':['modulo-especial','publiEspecial']}), + dict(name='div', attrs={'id':['articulopina']}), + dict(name='br', attrs={'class':'clear'}), + dict(name='form', attrs={'name':'frm_conversor2'}) + ] + + remove_tags_before = dict(name='div' , attrs={'class':'articulo '}) + remove_tags_after = dict(name='div' , attrs={'class':'comentarios'}) + + def get_cover_url(self): + cover = None + st = time.localtime() + year = str(st.tm_year) + month = "%.2d" % st.tm_mon + day = "%.2d" % st.tm_mday + #[url]http://img.kiosko.net/2011/01/02/es/elcorreo.750.jpg[/url] + #[url]http://info.elcorreo.com/pdf/06012011-viz.pdf[/url] + cover='http://info.elcorreo.com/pdf/'+ day + month + year +'-viz.pdf' + + br = BasicNewsRecipe.get_browser() + try: + br.open(cover) + except: + self.log("\nPortada no disponible") + cover ='http://www.elcorreo.com/vizcaya/noticias/201002/02/Media/logo-elcorreo-nuevo.png' + return cover + + extra_css = ''' + h1, .headline {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;} + h2, .subhead {font-family:Arial,Helvetica,sans-serif; font-style:italic; font-weight:normal;font-size:18px;} + h3, .overhead {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:16px;} + h4 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:16px;} + h5 {font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:16px;} + h6 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:16px;} + .date,.byline, .photo {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:14px;} + img{margin-bottom: 0.4em} + ''' + + + + preprocess_regexps = [ + + # To present the image of the embedded video + (re.compile(r'var RUTA_IMAGEN', re.DOTALL|re.IGNORECASE), lambda match: ''), + (re.compile(r'var SITIO = "elcorreo";', re.DOTALL|re.IGNORECASE), lambda match: '