diff --git a/recipes/7dias.recipe b/recipes/7dias.recipe deleted file mode 100644 index f3b59200a0..0000000000 --- a/recipes/7dias.recipe +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env python2 - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' -''' -elargentino.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag - - -class SieteDias(BasicNewsRecipe): - title = '7 dias' - __author__ = 'Darko Miletic' - description = 'Revista Argentina' - publisher = 'ElArgentino.com' - category = 'news, politics, show, Argentina' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'utf-8' - language = 'es_AR' - - lang = 'es-AR' - direction = 'ltr' - INDEX = 'http://www.elargentino.com/medios/125/7-Dias.html' - extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} ' - - keep_only_tags = [dict(name='div', attrs={'class': 'ContainerPop'})] - - remove_tags = [dict(name='link')] - - feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=125&Content-Type=text/xml&ChannelDesc=7%20D%C3%ADas')] - - def print_version(self, url): - main, sep, article_part = url.partition('/nota-') - article_id, rsep, rrest = article_part.partition('-') - return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - soup.html['lang'] = self.lang - soup.html['dir'] = self.direction - mlang = Tag(soup, 'meta', [ - ("http-equiv", "Content-Language"), ("content", self.lang)]) - mcharset = Tag(soup, 'meta', [ - ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) - soup.head.insert(0, mlang) - soup.head.insert(1, mcharset) - return soup - - def get_cover_url(self): - cover_url = None - soup = self.index_to_soup(self.INDEX) - cover_item = soup.find('div', attrs={'class': 'colder'}) - if cover_item: - clean_url = self.image_url_processor( - None, cover_item.div.img['src']) - cover_url = 'http://www.elargentino.com' + clean_url + '&height=600' - return cover_url - - def image_url_processor(self, baseurl, url): - base, sep, rest = url.rpartition('?Id=') - img, sep2, rrest = rest.partition('&') - return base + sep + img diff --git a/recipes/argnoticias.recipe b/recipes/argnoticias.recipe deleted file mode 100644 index e46d618706..0000000000 --- a/recipes/argnoticias.recipe +++ /dev/null @@ -1,94 +0,0 @@ - -__license__ = 'GPL v3' -__copyright__ = '2013, Darko Miletic ' - -''' -www.argnoticias.com -''' - -import time -from calibre import strftime -from calibre.web.feeds.recipes import BasicNewsRecipe - - -class ArgNoticias(BasicNewsRecipe): - title = 'ARG Noticias' - __author__ = 'Darko Miletic' - description = 'Ultimas noticias de Argentina' - publisher = 'ARG Noticias' - category = 'news, politics, Argentina' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - encoding = 'utf-8' - use_embedded_content = False - masthead_url = 'http://www.argnoticias.com/images/arg-logo-footer.png' - language = 'es_AR' - publication_type = 'newsportal' - INDEX = 'http://www.argnoticias.com' - extra_css = '' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - keep_only_tags = [ - dict(name='div', attrs={'class': ['itemHeader', 'itemBody', 'itemAuthorBlock']})] - - remove_tags = [ - dict(name=['object', 'link', 'base', 'iframe']), - dict(name='div', attrs={ - 'class': ['b2jsocial_parent', 'itemSocialSharing']}) - ] - - feeds = [ - - (u'Politica', u'http://www.argnoticias.com/index.php/politica'), - (u'Economia', u'http://www.argnoticias.com/index.php/economia'), - (u'Sociedad', u'http://www.argnoticias.com/index.php/sociedad'), - (u'Mundo', u'http://www.argnoticias.com/index.php/mundo'), - (u'Deportes', u'http://www.argnoticias.com/index.php/deportes'), - (u'Espectaculos', u'http://www.argnoticias.com/index.php/espectaculos'), - (u'Tendencias', u'http://www.argnoticias.com/index.php/tendencias') - ] - - def parse_index(self): - totalfeeds = [] - lfeeds = self.get_feeds() - checker = [] - for feedobj in lfeeds: - feedtitle, feedurl = feedobj - self.report_progress(0, _('Fetching feed') + ' %s...' % - (feedtitle if feedtitle else feedurl)) - articles = [] - soup = self.index_to_soup(feedurl) - for item in soup.findAll('div', attrs={'class': 'Nota'}): - atag = item.find('a', attrs={'class': 'moduleItemTitle'}) - ptag = item.find('div', attrs={'class': 'moduleItemIntrotext'}) - url = self.INDEX + atag['href'] - title = self.tag_to_string(atag) - description = self.tag_to_string(ptag) - date = strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) - if url not in checker: - checker.append(url) - articles.append({ - 'title': title, 'date': date, 'url': url, 'description': description - }) - - for item in soup.findAll('li'): - atag = item.find('a', attrs={'class': 'moduleItemTitle'}) - if atag: - ptag = item.find( - 'div', attrs={'class': 'moduleItemIntrotext'}) - url = self.INDEX + atag['href'] - title = self.tag_to_string(atag) - description = self.tag_to_string(ptag) - date = strftime( - "%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) - if url not in checker: - checker.append(url) - articles.append({ - 'title': title, 'date': date, 'url': url, 'description': description - }) - totalfeeds.append((feedtitle, articles)) - return totalfeeds diff --git a/recipes/axxon_magazine.recipe b/recipes/axxon_magazine.recipe deleted file mode 100644 index 1ecf01e276..0000000000 --- a/recipes/axxon_magazine.recipe +++ /dev/null @@ -1,57 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' -''' -axxon.com.ar -''' -from calibre import strftime -from calibre.web.feeds.news import BasicNewsRecipe - - -class Axxon_news(BasicNewsRecipe): - title = 'Revista Axxon' - __author__ = 'Darko Miletic' - description = 'Axxon, Ciencia Ficcion en Bits' - publisher = 'Revista Axxon - Ciencia Ficcion' - category = 'SF, Argentina' - oldest_article = 31 - delay = 1 - max_articles_per_feed = 100 - no_stylesheets = False - use_embedded_content = False - language = 'es_AR' - encoding = 'utf-8' - publication_type = 'magazine' - INDEX = 'http://axxon.com.ar/rev/' - extra_css = ' body{font-family: Verdana,Arial,sans-serif} .editorial{font-family: serif} .posttitle{font-family: "Trebuchet MS","Lucida Grande",Verdana,Arial,sans-serif} .cuento{font-family: "Times New Roman", serif} .biografia{color: red; font-weight: bold; font-family: Verdana,Geneva,Arial,Helvetica,sans-serif} ' # noqa - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - keep_only_tags = [dict(name='div', attrs={'class': 'post'})] - remove_tags = [dict(name=['object', 'link', 'iframe', 'embed', 'img'])] - remove_tags_after = [ - dict(attrs={'class': ['editorial', 'correo', 'biografia', 'articulo']})] - remove_attributes = ['width', 'height', 'font', 'border', 'align'] - - def parse_index(self): - articles = [] - soup = self.index_to_soup(self.INDEX) - - for item in soup.findAll('strong'): - description = '' - title_prefix = '' - feed_link = item.find('a') - if feed_link and feed_link.has_key('href') and feed_link['href'].startswith('?p='): # noqa - url = self.INDEX + feed_link['href'] - title = title_prefix + self.tag_to_string(feed_link) - date = strftime(self.timefmt) - articles.append({ - 'title': title, 'date': date, 'url': url, 'description': description - }) - return [(soup.head.title.string, articles)] - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return self.adeify_images(soup) diff --git a/recipes/axxon_news.recipe b/recipes/axxon_news.recipe deleted file mode 100644 index 1dcb1a2337..0000000000 --- a/recipes/axxon_news.recipe +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python2 - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' -''' -axxon.com.ar -''' -from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag - - -class Axxon_news(BasicNewsRecipe): - title = 'Axxon noticias' - __author__ = 'Darko Miletic' - description = 'Axxon, Ciencia Ficcion en Bits' - publisher = 'Axxon' - category = 'news, SF, Argentina, science, movies' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = False - use_embedded_content = False - language = 'es_AR' - - lang = 'es-AR' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': lang, 'pretty_print': True - } - - keep_only_tags = [dict(name='div', attrs={'class': 'post'})] - - remove_tags = [dict(name=['object', 'link', 'iframe', 'embed'])] - - feeds = [(u'Noticias', u'http://axxon.com.ar/noticias/feed/')] - - remove_attributes = ['style', 'width', 'height', 'font', 'border', 'align'] - - def adeify_images2(cls, soup): - for item in soup.findAll('img'): - for attrib in ['height', 'width', 'border', 'align', 'style']: - if item.has_key(attrib): # noqa - del item[attrib] - oldParent = item.parent - if oldParent.name == 'a': - oldParent.name == 'p' - myIndex = oldParent.contents.index(item) - brtag = Tag(soup, 'br') - oldParent.insert(myIndex + 1, brtag) - return soup - - def preprocess_html(self, soup): - soup.html['xml:lang'] = self.lang - soup.html['lang'] = self.lang - mlang = Tag(soup, 'meta', [ - ("http-equiv", "Content-Language"), ("content", self.lang)]) - soup.html.insert(0, mlang) - return self.adeify_images2(soup) diff --git a/recipes/diagonales.recipe b/recipes/diagonales.recipe deleted file mode 100644 index 3ce2c5220d..0000000000 --- a/recipes/diagonales.recipe +++ /dev/null @@ -1,49 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2009-2013, Darko Miletic ' -''' -diagonales.infonews.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Diagonales(BasicNewsRecipe): - title = 'Diagonales' - __author__ = 'Darko Miletic' - description = 'Para estar bien informado sobre los temas de actualidad. Conoce sobre pais, economia, deportes, mundo, espectaculos, sociedad, entrevistas y tecnologia.' # noqa - publisher = 'INFOFIN S.A.' - category = 'news, politics, Argentina, La Plata' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'utf-8' - language = 'es_AR' - publication_type = 'newspaper' - delay = 1 - remove_empty_feeds = True - extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} ' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - keep_only_tags = [dict(name='div', attrs={'class': 'ContainerPop'})] - remove_tags = [dict(name='link')] - - feeds = [ - - (u'Pais', u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=112&Content-Type=text/xml&ChannelDesc=Pa%C3%ADs'), - (u'Deportes', u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=106&Content-Type=text/xml&ChannelDesc=Deportes'), - (u'Economia', u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=107&Content-Type=text/xml&ChannelDesc=Econom%C3%ADa'), - (u'Sociedad', u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=109&Content-Type=text/xml&ChannelDesc=Sociedad'), - (u'Mundo', u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=113&Content-Type=text/xml&ChannelDesc=Mundo'), - (u'Espectaculos', u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=114&Content-Type=text/xml&ChannelDesc=Espect%C3%A1culos'), - (u'Entrevistas', u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=115&Content-Type=text/xml&ChannelDesc=Entrevistas'), - (u'Tecnologia', u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=118&Content-Type=text/xml&ChannelDesc=Tecnolog%C3%ADa') - ] - - def print_version(self, url): - main, sep, article_part = url.partition('/nota-') - article_id, rsep, rrest = article_part.partition('-') - return u'http://diagonales.infonews.com/Impresion.aspx?Id=' + article_id diff --git a/recipes/elargentino.recipe b/recipes/elargentino.recipe deleted file mode 100644 index c3e57b9ad1..0000000000 --- a/recipes/elargentino.recipe +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env python2 - -__license__ = 'GPL v3' -__copyright__ = '2008-2009, Darko Miletic ' -''' -elargentino.com -''' -from calibre.web.feeds.news import BasicNewsRecipe - - -class ElArgentino(BasicNewsRecipe): - title = 'ElArgentino.com' - __author__ = 'Darko Miletic' - description = 'Informacion Libre las 24 horas' - publisher = 'ElArgentino.com' - category = 'news, politics, Argentina' - oldest_article = 2 - max_articles_per_feed = 100 - remove_javascript = True - no_stylesheets = True - use_embedded_content = False - encoding = 'utf8' - cover_url = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png' - language = 'es_AR' - - html2lrf_options = [ - '--comment', description, '--category', category, '--publisher', publisher - ] - - html2epub_options = 'publisher="' + publisher + \ - '"\ncomments="' + description + '"\ntags="' + category + '"' - - remove_tags = [ - dict(name='div', attrs={'id': 'noprint'}), dict(name='div', attrs={ - 'class': 'encabezadoImprimir'}), dict(name='a', attrs={'target': '_blank'}) - ] - - feeds = [ - - (u'Portada', u'http://www.elargentino.com/Highlights.aspx?Content-Type=text/xml&ChannelDesc=Home'), - (u'Pais', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=112&Content-Type=text/xml&ChannelDesc=Pa%C3%ADs'), - (u'Economia', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=107&Content-Type=text/xml&ChannelDesc=Econom%C3%ADa'), - (u'Mundo', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=113&Content-Type=text/xml&ChannelDesc=Mundo'), - (u'Tecnologia', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=118&Content-Type=text/xml&ChannelDesc=Tecnolog%C3%ADa'), - (u'Espectaculos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=114&Content-Type=text/xml&ChannelDesc=Espect%C3%A1culos'), - (u'Deportes', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=106&Content-Type=text/xml&ChannelDesc=Deportes'), - (u'Sociedad', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=109&Content-Type=text/xml&ChannelDesc=Sociedad'), - (u'Entrevistas', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=115&Content-Type=text/xml&ChannelDesc=Entrevistas') - ] - - def print_version(self, url): - main, sep, article_part = url.partition('/nota-') - article_id, rsep, rrest = article_part.partition('-') - return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id - - def preprocess_html(self, soup): - mtag = '\n\n' - soup.head.insert(0, mtag) - for item in soup.findAll(style=True): - del item['style'] - return soup diff --git a/recipes/eltribuno_jujuy_impreso.recipe b/recipes/eltribuno_jujuy_impreso.recipe deleted file mode 100644 index a986229bf6..0000000000 --- a/recipes/eltribuno_jujuy_impreso.recipe +++ /dev/null @@ -1,128 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2013, Darko Miletic ' -''' -http://www.eltribuno.info/jujuy/edicion_impresa.aspx -''' - -import urllib -from calibre.ptempfile import PersistentTemporaryFile -from calibre.web.feeds.news import BasicNewsRecipe -from collections import OrderedDict - - -class ElTribunoJujuyImpreso(BasicNewsRecipe): - title = 'El Tribuno Jujuy (Edición Impresa)' - __author__ = 'Darko Miletic' - description = "Diario principal de Jujuy" - publisher = 'Horizontes S.A.' - category = 'news, politics, Jujuy, Argentina, World' - oldest_article = 2 - language = 'es_AR' - max_articles_per_feed = 250 - no_stylesheets = True - use_embedded_content = False - encoding = 'utf8' - publication_type = 'newspaper' - delay = 1 - articles_are_obfuscated = True - temp_files = [] - PREFIX = 'http://www.eltribuno.info/jujuy/' - INDEX = PREFIX + 'edicion_impresa.aspx' - PRINTURL = PREFIX + 'nota_print.aspx?%s' - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True - } - - keep_only_tags = [ - dict(name='div', attrs={'class': ['notaHead', 'notaContent']})] - remove_tags = [ - dict(name=['meta', 'iframe', 'base', - 'object', 'embed', 'link', 'img']), - dict(name='ul', attrs={'class': 'Tabs'}) - ] - - extra_css = """ - body{font-family: Arial,Helvetica,sans-serif} - .notaHead h4{text-transform: uppercase; color: gray} - img{margin-top: 0.8em; display: block} - """ - - def parse_index(self): - feeds = OrderedDict() - soup = None - count = 0 - while (count < 5): - try: - soup = self.index_to_soup(self.INDEX) - count = 5 - except: - print "Retrying download..." - count += 1 - if not soup: - return [] - alink = soup.find('a', href=True, attrs={'class': 'ZoomTapa'}) - if alink and 'href' in alink: - self.cover_url = alink['href'] - sections = soup.findAll( - 'div', attrs={'id': lambda x: x and x.startswith('Ediciones')}) - for section in sections: - section_title = 'Sin titulo' - sectiont = section.find('h3', attrs={'class': 'NombreSeccion'}) - if sectiont: - section_title = self.tag_to_string(sectiont.span) - - arts = section.findAll( - 'div', attrs={'class': 'Noticia NoticiaAB1'}) - for article in arts: - articles = [] - title = self.tag_to_string(article.div.h3.a) - url = article.div.h3.a['href'] - description = self.tag_to_string(article.p) - articles.append({'title': title, 'url': url, - 'description': description, 'date': ''}) - - if articles: - if section_title not in feeds: - feeds[section_title] = [] - feeds[section_title] += articles - - ans = [(key, val) for key, val in feeds.iteritems()] - return ans - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - for item in soup.findAll('a'): - if item.string is not None: - str = item.string - item.replaceWith(str) - else: - str = self.tag_to_string(item) - item.replaceWith(str) - return soup - - def get_masthead_title(self): - return 'El Tribuno' - - def get_obfuscated_article(self, url): - count = 0 - while (count < 10): - try: - response = self.browser.open(url) - html = response.read() - count = 10 - except: - print "Retrying download..." - count += 1 - tfile = PersistentTemporaryFile('_fa.html') - tfile.write(html) - tfile.close() - self.temp_files.append(tfile) - return tfile.name - - def print_version(self, url): - right = url.rpartition('/')[2] - artid = right.partition('-')[0] - params = {'Note': artid} - return (self.PRINTURL % urllib.urlencode(params)) diff --git a/recipes/eltribuno_salta_impreso.recipe b/recipes/eltribuno_salta_impreso.recipe deleted file mode 100644 index 5530b552dd..0000000000 --- a/recipes/eltribuno_salta_impreso.recipe +++ /dev/null @@ -1,46 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2013 - 2016, Darko Miletic ' -''' -http://www.eltribuno.info/salta/edicion_impresa.aspx -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class ElTribunoSaltaImpreso(BasicNewsRecipe): - title = 'El Tribuno Salta' - __author__ = 'Darko Miletic' - description = "Diario principal de Salta" - publisher = 'Horizontes S.A.' - category = 'news, politics, Salta, Argentina, World' - oldest_article = 2 - language = 'es_AR' - max_articles_per_feed = 250 - no_stylesheets = True - use_embedded_content = False - encoding = 'utf8' - publication_type = 'newspaper' - remove_javascript = True - auto_cleanup = True - - conversion_options = { - 'comment' : description - , 'tags' : category - , 'publisher': publisher - , 'language' : language - } - - extra_css = """ - body{font-family: Arial,Helvetica,sans-serif} - img{margin-top: 0.8em; display: block} - """ - - feeds = [ - (u'Mas leidas', u'http://www.eltribuno.info/rss/salta/masleidas.xml') - ,(u'El Tribuno', u'http://www.eltribuno.info/rss/salta/home.xml') - ,(u'Salta' , u'http://www.eltribuno.info/rss/salta/salta.xml') - ,(u'Deportes' , u'http://www.eltribuno.info/rss/salta/deportes.xml') - ] - - def get_masthead_title(self): - return 'El Tribuno' diff --git a/recipes/icons/argnoticias.png b/recipes/icons/argnoticias.png deleted file mode 100644 index 75e8839340..0000000000 Binary files a/recipes/icons/argnoticias.png and /dev/null differ diff --git a/recipes/icons/axxon_news.png b/recipes/icons/axxon_news.png deleted file mode 100644 index 85c593367d..0000000000 Binary files a/recipes/icons/axxon_news.png and /dev/null differ diff --git a/recipes/icons/diagonales.png b/recipes/icons/diagonales.png deleted file mode 100644 index 68a661f52f..0000000000 Binary files a/recipes/icons/diagonales.png and /dev/null differ diff --git a/recipes/icons/elargentino.png b/recipes/icons/elargentino.png deleted file mode 100644 index 84b2f37ff9..0000000000 Binary files a/recipes/icons/elargentino.png and /dev/null differ diff --git a/recipes/icons/eltribuno_jujuy_impreso.png b/recipes/icons/eltribuno_jujuy_impreso.png deleted file mode 100644 index 6609b46adb..0000000000 Binary files a/recipes/icons/eltribuno_jujuy_impreso.png and /dev/null differ diff --git a/recipes/icons/eltribuno_salta_impreso.png b/recipes/icons/eltribuno_salta_impreso.png deleted file mode 100644 index 6609b46adb..0000000000 Binary files a/recipes/icons/eltribuno_salta_impreso.png and /dev/null differ diff --git a/recipes/icons/losandes.png b/recipes/icons/losandes.png deleted file mode 100644 index 01ef05e74a..0000000000 Binary files a/recipes/icons/losandes.png and /dev/null differ diff --git a/recipes/icons/miradasalsur.png b/recipes/icons/miradasalsur.png deleted file mode 100644 index 6a5c419e0a..0000000000 Binary files a/recipes/icons/miradasalsur.png and /dev/null differ diff --git a/recipes/icons/perfil.png b/recipes/icons/perfil.png deleted file mode 100644 index a3a990fed4..0000000000 Binary files a/recipes/icons/perfil.png and /dev/null differ diff --git a/recipes/lamujerdemivida.recipe b/recipes/lamujerdemivida.recipe deleted file mode 100644 index e8a9614814..0000000000 --- a/recipes/lamujerdemivida.recipe +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/env python2 - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' -''' -lamujerdemivida.com.ar -''' -from calibre import strftime -from calibre.web.feeds.news import BasicNewsRecipe - - -class LaMujerDeMiVida(BasicNewsRecipe): - title = 'La Mujer de mi Vida' - __author__ = 'Darko Miletic' - description = 'Cultura de otra manera' - oldest_article = 90 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'cp1252' - publisher = 'La Mujer de mi Vida' - category = 'literatura, critica, arte, ensayos' - language = 'es_AR' - - INDEX = 'http://www.lamujerdemivida.com.ar/' - html2lrf_options = [ - '--comment', description, '--category', category, '--publisher', publisher, '--ignore-tables' - ] - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + \ - description + '"\ntags="' + category + '"\nlinearize_tables=True' - - keep_only_tags = [dict(name='table', attrs={'width': '570'})] - - feeds = [(u'Articulos', u'http://www.lamujerdemivida.com.ar/index.php')] - - def preprocess_html(self, soup): - soup.html['xml:lang'] = 'es-AR' - soup.html['lang'] = 'es-AR' - mtag = '' - soup.head.insert(0, mtag) - for item in soup.findAll(style=True): - del item['style'] - return soup - - def get_cover_url(self): - cover_url = None - soup = self.index_to_soup(self.INDEX) - cover_item = soup.find('img', attrs={'alt': 'Lamujerdemivida.'}) - if cover_item: - cover_url = self.INDEX + cover_item['src'] - return cover_url - - def parse_index(self): - totalfeeds = [] - lfeeds = self.get_feeds() - for feedobj in lfeeds: - feedtitle, feedurl = feedobj - self.report_progress(0, _('Fetching feed') + ' %s...' % - (feedtitle if feedtitle else feedurl)) - articles = [] - soup = self.index_to_soup(feedurl) - for item in soup.findAll('td', attrs={'width': '390'}): - atag = item.find('a', href=True) - if atag: - url = atag['href'] - title = self.tag_to_string(atag) - date = strftime(self.timefmt) - articles.append({ - 'title': title, 'date': date, 'url': url, 'description': '' - }) - totalfeeds.append((feedtitle, articles)) - return totalfeeds diff --git a/recipes/losandes.recipe b/recipes/losandes.recipe deleted file mode 100644 index 34ebf81262..0000000000 --- a/recipes/losandes.recipe +++ /dev/null @@ -1,75 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2011, Darko Miletic ' -''' -www.losandes.com.ar -''' - -from calibre import strftime -from calibre.web.feeds.news import BasicNewsRecipe - - -class LosAndes(BasicNewsRecipe): - title = 'Los Andes' - __author__ = 'Darko Miletic' - description = 'Noticias de Mendoza, Argentina y el resto del mundo' - publisher = 'Los Andes' - category = 'news, politics, Argentina' - oldest_article = 2 - max_articles_per_feed = 200 - no_stylesheets = True - encoding = 'cp1252' - use_embedded_content = False - language = 'es_AR' - remove_empty_feeds = True - publication_type = 'newspaper' - masthead_url = 'http://www.losandes.com.ar/graficos/losandes.png' - extra_css = """ - body{font-family: Arial,Helvetica,sans-serif } - h1,h2{font-family: "Times New Roman",Times,serif} - .fechaNota{font-weight: bold; color: gray} - """ - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - remove_tags = [ - dict(name=['meta', 'link']), dict(attrs={'class': ['cabecera', 'url']}) - ] - remove_tags_before = dict(attrs={'class': 'cabecera'}) - remove_tags_after = dict(attrs={'class': 'url'}) - - feeds = [ - - (u'Ultimas Noticias', u'http://www.losandes.com.ar/servicios/rss.asp?r=78'), - (u'Politica', u'http://www.losandes.com.ar/servicios/rss.asp?r=68'), - (u'Economia nacional', u'http://www.losandes.com.ar/servicios/rss.asp?r=65'), - (u'Economia internacional', u'http://www.losandes.com.ar/servicios/rss.asp?r=505'), - (u'Internacionales', u'http://www.losandes.com.ar/servicios/rss.asp?r=66'), - (u'Turismo', u'http://www.losandes.com.ar/servicios/rss.asp?r=502'), - (u'Fincas', u'http://www.losandes.com.ar/servicios/rss.asp?r=504'), - (u'Isha nos habla', u'http://www.losandes.com.ar/servicios/rss.asp?r=562'), - (u'Estilo', u'http://www.losandes.com.ar/servicios/rss.asp?r=81'), - (u'Cultura', u'http://www.losandes.com.ar/servicios/rss.asp?r=503'), - (u'Policiales', u'http://www.losandes.com.ar/servicios/rss.asp?r=70'), - (u'Deportes', u'http://www.losandes.com.ar/servicios/rss.asp?r=69'), - (u'Sociedad', u'http://www.losandes.com.ar/servicios/rss.asp?r=67'), - (u'Opinion', u'http://www.losandes.com.ar/servicios/rss.asp?r=80'), - (u'Editorial', u'http://www.losandes.com.ar/servicios/rss.asp?r=76'), - (u'Mirador', u'http://www.losandes.com.ar/servicios/rss.asp?r=79') - ] - - def print_version(self, url): - artid = url.rpartition('.')[0].rpartition('-')[2] - return "http://www.losandes.com.ar/includes/modulos/imprimir.asp?tipo=noticia&id=" + artid - - def get_cover_url(self): - month = strftime("%m").lstrip('0') - day = strftime("%d").lstrip('0') - year = strftime("%Y") - return "http://www.losandes.com.ar/fotografias/fotosnoticias/" + year + "/" + month + "/" + day + "/th_tapa.jpg" - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return soup diff --git a/recipes/miradasalsur.recipe b/recipes/miradasalsur.recipe deleted file mode 100644 index 342a2250c5..0000000000 --- a/recipes/miradasalsur.recipe +++ /dev/null @@ -1,67 +0,0 @@ -__copyright__ = '2009-2013, Darko Miletic ' -''' -sur.infonews.com -''' - -import datetime -from calibre.web.feeds.news import BasicNewsRecipe - - -class MiradasAlSur(BasicNewsRecipe): - title = 'Miradas al Sur' - __author__ = 'Darko Miletic' - description = 'Semanario Argentino' - publisher = 'ElArgentino.com' - category = 'news, politics, Argentina' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'utf-8' - language = 'es_AR' - remove_empty_feeds = True - masthead_url = 'http://sur.infonews.com/sites/default/files/www_miradas_al_sur_com_logo.gif' - extra_css = """ - body{font-family: Arial,Helvetica,sans-serif} - h1{font-family: Georgia,Times,serif} - .field-field-story-author{color: gray; font-size: small} - """ - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'series': title - } - - keep_only_tags = [ - dict(name='div', attrs={'id': ['content-header', 'content-area']})] - remove_tags = [ - dict(name=['link', 'meta', 'iframe', 'embed', 'object']), - dict(name='form', attrs={'class': 'fivestar-widget'}), - dict(attrs={'class': lambda x: x and 'terms-inline' in x.split()}) - ] - - feeds = [ - (u'Politica', u'http://sur.infonews.com/taxonomy/term/1/0/feed'), - (u'Internacional', u'http://sur.infonews.com/taxonomy/term/2/0/feed'), - (u'Informe Especial', u'http://sur.infonews.com/taxonomy/term/14/0/feed'), - (u'Delitos y pesquisas', u'http://sur.infonews.com/taxonomy/term/6/0/feed'), - (u'Lesa Humanidad', u'http://sur.infonews.com/taxonomy/term/7/0/feed'), - (u'Cultura', u'http://sur.infonews.com/taxonomy/term/8/0/feed'), - (u'Deportes', u'http://sur.infonews.com/taxonomy/term/9/0/feed'), - (u'Contratapa', u'http://sur.infonews.com/taxonomy/term/10/0/feed'), - ] - - def get_cover_url(self): - # determine the series number, unfortunately not gonna happen now - # self.conversion_options.update({'series_index':seriesnr}) - cover_url = None - cdate = datetime.date.today() - todayweekday = cdate.isoweekday() - if (todayweekday != 7): - cdate -= datetime.timedelta(days=todayweekday) - cover_page_url = cdate.strftime( - 'http://sur.infonews.com/ediciones/%Y-%m-%d/tapa') - soup = self.index_to_soup(cover_page_url) - cover_item = soup.find('img', attrs={ - 'class': lambda x: x and 'imagecache-tapa_edicion_full' in x.split()}) - if cover_item: - cover_url = cover_item['src'] - return cover_url diff --git a/recipes/newsweek_argentina.recipe b/recipes/newsweek_argentina.recipe deleted file mode 100644 index 85b15040d3..0000000000 --- a/recipes/newsweek_argentina.recipe +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env python2 - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' -''' -elargentino.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag - - -class Newsweek_Argentina(BasicNewsRecipe): - title = 'NewsWeek Argentina' - __author__ = 'Darko Miletic' - description = 'Revista dedicada a politica' - publisher = 'ElArgentino.com' - category = 'news, politics, world, Argentina' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'utf-8' - language = 'es_AR' - - lang = 'es-AR' - direction = 'ltr' - INDEX = 'http://www.elargentino.com/medios/126/Newsweek.html' - extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} ' - - html2lrf_options = [ - '--comment', description, '--category', category, '--publisher', publisher - ] - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + \ - category + \ - '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "' - - keep_only_tags = [dict(name='div', attrs={'class': 'ContainerPop'})] - - remove_tags = [dict(name='link')] - - feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=126&Content-Type=text/xml&ChannelDesc=Newsweek')] - - def print_version(self, url): - main, sep, article_part = url.partition('/nota-') - article_id, rsep, rrest = article_part.partition('-') - return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - soup.html['lang'] = self.lang - soup.html['dir'] = self.direction - mlang = Tag(soup, 'meta', [ - ("http-equiv", "Content-Language"), ("content", self.lang)]) - mcharset = Tag(soup, 'meta', [ - ("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")]) - soup.head.insert(0, mlang) - soup.head.insert(1, mcharset) - return soup - - def get_cover_url(self): - cover_url = None - soup = self.index_to_soup(self.INDEX) - cover_item = soup.find('div', attrs={'class': 'colder'}) - if cover_item: - clean_url = self.image_url_processor( - None, cover_item.div.img['src']) - cover_url = 'http://www.elargentino.com' + clean_url + '&height=600' - return cover_url - - def image_url_processor(self, baseurl, url): - base, sep, rest = url.rpartition('?Id=') - img, sep2, rrest = rest.partition('&') - return base + sep + img diff --git a/recipes/perfil.recipe b/recipes/perfil.recipe deleted file mode 100644 index a5de7a429e..0000000000 --- a/recipes/perfil.recipe +++ /dev/null @@ -1,59 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2010-2015, Darko Miletic ' -''' -perfil.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Perfil(BasicNewsRecipe): - title = 'Perfil' - __author__ = 'Darko Miletic' - description = 'Noticias de Argentina y el resto del mundo' - publisher = 'perfil.com' - category = 'news, politics, Argentina' - oldest_article = 2 - max_articles_per_feed = 200 - no_stylesheets = True - encoding = 'utf8' - use_embedded_content = False - language = 'es_AR' - remove_empty_feeds = True - masthead_url = 'http://www.perfil.com/__export/1330013400000/system/modules/com.tfsla.perfil.diario/resources/v1/images/perfilcom-logo-secciones.png' - extra_css = """ - body{font-family: Arial,Helvetica,sans-serif } - .seccion{border-bottom: 1px dotted #666666; text-transform: uppercase; font-size: x-large} - .foto1 h1{font-size: x-small} - h1{font-family: Georgia,"Times New Roman",serif} - img{margin-bottom: 0.4em} - .hora{font-size: x-small; color: red} - """ - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - remove_tags = [ - dict(name=['iframe', 'embed', 'object', 'base', 'meta', 'link']), dict(attrs={ - 'id': ['social-articulo', 'relacionadas']}), dict(attrs={'class': 'destacadoNota'}) - ] - keep_only_tags = [ - dict(attrs={'id': 'header-noticia'}), dict(attrs={'class': 'cuerpo'})] - - feeds = [ - - (u'Ultimo momento', u'http://www.perfil.com/rss/ultimomomento.xml'), - (u'Politica', u'http://www.perfil.com/rss/politica.xml'), - (u'Policia', u'http://www.perfil.com/rss/policia.xml'), - (u'Internacionales', u'http://www.perfil.com/rss/internacional.xml'), - (u'Economia', u'http://www.perfil.com/rss/economia.xml'), - (u'Deportes', u'http://www.perfil.com/rss/deportes.xml'), - (u'Opinion', u'http://www.perfil.com/rss/columnistas.xml'), - (u'Sociedad', u'http://www.perfil.com/rss/sociedad.xml'), - (u'Cultura', u'http://www.perfil.com/rss/cultura.xml'), - (u'Espectaculos', u'http://www.perfil.com/rss/espectaculos.xml'), - (u'Ciencia', u'http://www.perfil.com/rss/ciencia.xml'), - (u'Salud', u'http://www.perfil.com/rss/salud.xml'), - (u'Tecnologia', u'http://www.perfil.com/rss/tecnologia.xml') - ] diff --git a/recipes/reptantes.recipe b/recipes/reptantes.recipe deleted file mode 100644 index 03929717bf..0000000000 --- a/recipes/reptantes.recipe +++ /dev/null @@ -1,38 +0,0 @@ - -__license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' -''' -www.reptantes.com.ar -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Reptantes(BasicNewsRecipe): - title = 'Reptantes' - __author__ = 'Darko Miletic' - description = u"cada vez que te haces acupuntura, tu muñeco vudú sufre en algún lado" - oldest_article = 130 - max_articles_per_feed = 100 - language = 'es_AR' - encoding = 'utf-8' - no_stylesheets = True - use_embedded_content = False - publication_type = 'blog' - extra_css = ' body{font-family: "Palatino Linotype",serif} h2{text-align: center; color:#BE7F8D} img{margin-bottom: 2em} ' - - conversion_options = { - 'comment': description, 'tags': 'literatura', 'publisher': 'Hernan Racnati', 'language': language - } - - feeds = [(u'Posts', u'http://www.reptantes.com.ar/?feed=rss2')] - - keep_only_tags = [dict(attrs={'id': 'content'})] - remove_tags = [dict(attrs={'class': 'iLikeThis'})] - remove_tags_before = dict(name='h2') - remove_tags_after = dict(attrs={'class': 'iLikeThis'}) - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return self.adeify_images(soup) diff --git a/recipes/rionegro.recipe b/recipes/rionegro.recipe deleted file mode 100644 index dd59cc2212..0000000000 --- a/recipes/rionegro.recipe +++ /dev/null @@ -1,63 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2012, Darko Miletic ' -''' -www.rionegro.com.ar -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class RioNegro(BasicNewsRecipe): - title = 'Diario Rio Negro' - __author__ = 'Darko Miletic' - description = 'Noticias desde la Patagonia Argentina y el resto del mundo' - publisher = 'Editorial Rio Negro SA.' - category = 'news, politics, Argentina' - oldest_article = 2 - max_articles_per_feed = 200 - no_stylesheets = True - encoding = 'utf8' - use_embedded_content = False - language = 'es_AR' - remove_empty_feeds = True - publication_type = 'newspaper' - masthead_url = 'http://www.rionegro.com.ar/diario/imagenes/logorn.gif' - extra_css = """ - body{font-family: Arial,Helvetica,sans-serif } - img{display:block} - h1 {font-size: 0.89em; color: red} - h2 {font-family: Georgia,"Times New Roman",Times,serif; font-size: 1.8em} - h3 {font-family: Georgia,"Times New Roman",Times,serif; border-bottom: 2px solid gray} - """ - - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } - - remove_tags = [ - dict(name=['meta', 'link', 'iframe', 'object', 'embed']), dict( - name='div', attrs={'class': 'logo'}) - ] - keep_only_tags = [dict(attrs={'class': 'nota'})] - remove_attributes = ['lang'] - - feeds = [ - - (u'Argentina', u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9532'), - (u'El Mundo', u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9533'), - (u'Carta de lectores', u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9538'), - (u'Columnistas', u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9539'), - (u'Domingo a Domingo', u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9541'), - (u'Editorial', u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9542'), - (u'Deportes', u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9522'), - (u'Espectaculos', u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9523'), - (u'Sociedad', u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9574'), - (u'Policiales', u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9525'), - (u'Municipales', u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9862'), - (u'Region', u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9701') - ] - - def print_version(self, url): - idart_raw = url.rpartition('idart=')[2] - idart = idart_raw.rpartition('&')[0] - return 'http://www.rionegro.com.ar/diario/rn/print.aspx?idArt=' + idart + '&tipo=2'