diff --git a/resources/recipes/levante.recipe b/resources/recipes/levante.recipe new file mode 100644 index 0000000000..f6a3a21d1a --- /dev/null +++ b/resources/recipes/levante.recipe @@ -0,0 +1,68 @@ +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag + +class LevanteRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = 'kwetal' + version = 1 + language = 'es' + description = u'El Mercantil Valenciano' + title = u'Levante' + + oldest_article = 2 + max_articles_per_feed = 100 + encoding = 'latin1' + no_stylesheets = True + remove_javascript = True + + # Feeds taken from http://www.levante-emv.com/servicios/rss/rss.jsp?pServicio=rss + # Feed titles are without accented characters for now. Hope to resolve this in the future. + feeds = [] + feeds.append((u'Portada Valencia', u'http://www.levante-emv.com/elementosInt/rss/1')) + feeds.append((u'Portada Castello', u'http://www.levante-emv.com/elementosInt/rss/2')) + feeds.append((u'Portada Alacant', u'http://www.levante-emv.com/elementosInt/rss/3')) + feeds.append((u'Lo Mas Leido', u'http://www.levante-emv.com/elementosInt/rss/LoMas')) + feeds.append((u'Seccion al minuto', u'http://www.levante-emv.com/elementosInt/rss/AlMinuto')) + feeds.append((u'Comunidad Valenciana', u'http://www.levante-emv.com/elementosInt/rss/19')) + feeds.append((u'Valencia', u'http://www.levante-emv.com/elementosInt/rss/16')) + feeds.append((u'Castello', u'http://www.levante-emv.com/elementosInt/rss/4')) + feeds.append((u'Alacant', u'http://www.levante-emv.com/elementosInt/rss/17')) + feeds.append((u'Comarcas', u'http://www.levante-emv.com/elementosInt/rss/12')) + feeds.append((u'Espana', u'http://www.levante-emv.com/elementosInt/rss/6')) + feeds.append((u'Internacional', u'http://www.levante-emv.com/elementosInt/rss/7')) + feeds.append((u'Opinion', u'http://www.levante-emv.com/elementosInt/rss/5')) + feeds.append((u'Economia', u'http://www.levante-emv.com/elementosInt/rss/8')) + feeds.append((u'Sociedad', u'http://www.levante-emv.com/elementosInt/rss/9')) + feeds.append((u'Sucesos', u'http://www.levante-emv.com/elementosInt/rss/10')) + feeds.append((u'Deportes', u'http://www.levante-emv.com/elementosInt/rss/11')) + feeds.append((u'Motor', u'http://www.levante-emv.com/elementosInt/rss/31')) + feeds.append((u'Panorama', u'http://www.levante-emv.com/elementosInt/rss/18')) + feeds.append((u'Salud y Vida', u'http://www.levante-emv.com/elementosInt/rss/20')) + feeds.append((u'Ciencia y Salud', u'http://www.levante-emv.com/elementosInt/rss/44')) + feeds.append((u'Ciencia e Investigacion', u'http://www.levante-emv.com/elementosInt/rss/23')) + feeds.append((u'Ensenanza', u'http://www.levante-emv.com/elementosInt/rss/22')) + feeds.append((u'Fiestas y Tradiciones', u'http://www.levante-emv.com/elementosInt/rss/24')) + feeds.append((u'Club Diario', u'http://www.levante-emv.com/elementosInt/rss/26')) + feeds.append((u'Juntos', u'http://www.levante-emv.com/elementosInt/rss/33')) + feeds.append((u'Integrados', u'http://www.levante-emv.com/elementosInt/rss/35')) + feeds.append((u'Agenda', u'http://www.levante-emv.com/elementosInt/rss/36')) + feeds.append((u'Cultura', u'http://www.levante-emv.com/elementosInt/rss/39')) + feeds.append((u'Tecnologia', u'http://www.levante-emv.com/elementosInt/rss/40')) + feeds.append((u'Gente', u'http://www.levante-emv.com/elementosInt/rss/41')) + feeds.append((u'Television', u'http://www.levante-emv.com/elementosInt/rss/42')) + feeds.append((u'Participa', u'http://www.levante-emv.com/elementosInt/rss/45')) + + keep_only_tags = [dict(name='div', attrs={'class' : 'noticia_titular'}), + dict(name='div', attrs={'class' : 'subtitulo'}), + dict(name='div', attrs={'id' : 'noticia_texto', 'class' : 'noticia_texto'})] + + def preprocess_html(self, soup): + # Nuke some real crappy html + theirHead = soup.head + theirHead.extract() + myHead = Tag(soup, 'head') + soup.insert(0, myHead) + + return soup + +