New recipe for Levante by kwetal

This commit is contained in:
Kovid Goyal 2009-11-25 13:57:20 -07:00
parent b56df51b2a
commit d51e3eff2d

View File

@ -0,0 +1,68 @@
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
class LevanteRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
version = 1
language = 'es'
description = u'El Mercantil Valenciano'
title = u'Levante'
oldest_article = 2
max_articles_per_feed = 100
encoding = 'latin1'
no_stylesheets = True
remove_javascript = True
# Feeds taken from http://www.levante-emv.com/servicios/rss/rss.jsp?pServicio=rss
# Feed titles are without accented characters for now. Hope to resolve this in the future.
feeds = []
feeds.append((u'Portada Valencia', u'http://www.levante-emv.com/elementosInt/rss/1'))
feeds.append((u'Portada Castello', u'http://www.levante-emv.com/elementosInt/rss/2'))
feeds.append((u'Portada Alacant', u'http://www.levante-emv.com/elementosInt/rss/3'))
feeds.append((u'Lo Mas Leido', u'http://www.levante-emv.com/elementosInt/rss/LoMas'))
feeds.append((u'Seccion al minuto', u'http://www.levante-emv.com/elementosInt/rss/AlMinuto'))
feeds.append((u'Comunidad Valenciana', u'http://www.levante-emv.com/elementosInt/rss/19'))
feeds.append((u'Valencia', u'http://www.levante-emv.com/elementosInt/rss/16'))
feeds.append((u'Castello', u'http://www.levante-emv.com/elementosInt/rss/4'))
feeds.append((u'Alacant', u'http://www.levante-emv.com/elementosInt/rss/17'))
feeds.append((u'Comarcas', u'http://www.levante-emv.com/elementosInt/rss/12'))
feeds.append((u'Espana', u'http://www.levante-emv.com/elementosInt/rss/6'))
feeds.append((u'Internacional', u'http://www.levante-emv.com/elementosInt/rss/7'))
feeds.append((u'Opinion', u'http://www.levante-emv.com/elementosInt/rss/5'))
feeds.append((u'Economia', u'http://www.levante-emv.com/elementosInt/rss/8'))
feeds.append((u'Sociedad', u'http://www.levante-emv.com/elementosInt/rss/9'))
feeds.append((u'Sucesos', u'http://www.levante-emv.com/elementosInt/rss/10'))
feeds.append((u'Deportes', u'http://www.levante-emv.com/elementosInt/rss/11'))
feeds.append((u'Motor', u'http://www.levante-emv.com/elementosInt/rss/31'))
feeds.append((u'Panorama', u'http://www.levante-emv.com/elementosInt/rss/18'))
feeds.append((u'Salud y Vida', u'http://www.levante-emv.com/elementosInt/rss/20'))
feeds.append((u'Ciencia y Salud', u'http://www.levante-emv.com/elementosInt/rss/44'))
feeds.append((u'Ciencia e Investigacion', u'http://www.levante-emv.com/elementosInt/rss/23'))
feeds.append((u'Ensenanza', u'http://www.levante-emv.com/elementosInt/rss/22'))
feeds.append((u'Fiestas y Tradiciones', u'http://www.levante-emv.com/elementosInt/rss/24'))
feeds.append((u'Club Diario', u'http://www.levante-emv.com/elementosInt/rss/26'))
feeds.append((u'Juntos', u'http://www.levante-emv.com/elementosInt/rss/33'))
feeds.append((u'Integrados', u'http://www.levante-emv.com/elementosInt/rss/35'))
feeds.append((u'Agenda', u'http://www.levante-emv.com/elementosInt/rss/36'))
feeds.append((u'Cultura', u'http://www.levante-emv.com/elementosInt/rss/39'))
feeds.append((u'Tecnologia', u'http://www.levante-emv.com/elementosInt/rss/40'))
feeds.append((u'Gente', u'http://www.levante-emv.com/elementosInt/rss/41'))
feeds.append((u'Television', u'http://www.levante-emv.com/elementosInt/rss/42'))
feeds.append((u'Participa', u'http://www.levante-emv.com/elementosInt/rss/45'))
keep_only_tags = [dict(name='div', attrs={'class' : 'noticia_titular'}),
dict(name='div', attrs={'class' : 'subtitulo'}),
dict(name='div', attrs={'id' : 'noticia_texto', 'class' : 'noticia_texto'})]
def preprocess_html(self, soup):
# Nuke some real crappy html
theirHead = soup.head
theirHead.extract()
myHead = Tag(soup, 'head')
soup.insert(0, myHead)
return soup