calibre/recipes/levante.recipe

106 lines
4.3 KiB
Plaintext

from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class LevanteRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
version = 1
language = 'es'
description = u'El Mercantil Valenciano'
title = u'Levante'
oldest_article = 2
max_articles_per_feed = 100
encoding = 'latin1'
no_stylesheets = True
remove_javascript = True
# Feeds taken from http://www.levante-emv.com/servicios/rss/rss.jsp?pServicio=rss
# Feed titles are without accented characters for now. Hope to resolve
# this in the future.
feeds = []
feeds.append(
(u'Portada Valencia', u'http://www.levante-emv.com/elementosInt/rss/1'))
feeds.append(
(u'Portada Castello', u'http://www.levante-emv.com/elementosInt/rss/2'))
feeds.append(
(u'Portada Alacant', u'http://www.levante-emv.com/elementosInt/rss/3'))
feeds.append(
(u'Lo Mas Leido', u'http://www.levante-emv.com/elementosInt/rss/LoMas'))
feeds.append(
(u'Seccion al minuto', u'http://www.levante-emv.com/elementosInt/rss/AlMinuto'))
feeds.append((u'Comunidad Valenciana',
u'http://www.levante-emv.com/elementosInt/rss/19'))
feeds.append(
(u'Valencia', u'http://www.levante-emv.com/elementosInt/rss/16'))
feeds.append(
(u'Castello', u'http://www.levante-emv.com/elementosInt/rss/4'))
feeds.append(
(u'Alacant', u'http://www.levante-emv.com/elementosInt/rss/17'))
feeds.append(
(u'Comarcas', u'http://www.levante-emv.com/elementosInt/rss/12'))
feeds.append((u'Espana', u'http://www.levante-emv.com/elementosInt/rss/6'))
feeds.append(
(u'Internacional', u'http://www.levante-emv.com/elementosInt/rss/7'))
feeds.append(
(u'Opinion', u'http://www.levante-emv.com/elementosInt/rss/5'))
feeds.append(
(u'Economia', u'http://www.levante-emv.com/elementosInt/rss/8'))
feeds.append(
(u'Sociedad', u'http://www.levante-emv.com/elementosInt/rss/9'))
feeds.append(
(u'Sucesos', u'http://www.levante-emv.com/elementosInt/rss/10'))
feeds.append(
(u'Deportes', u'http://www.levante-emv.com/elementosInt/rss/11'))
feeds.append((u'Motor', u'http://www.levante-emv.com/elementosInt/rss/31'))
feeds.append(
(u'Panorama', u'http://www.levante-emv.com/elementosInt/rss/18'))
feeds.append(
(u'Salud y Vida', u'http://www.levante-emv.com/elementosInt/rss/20'))
feeds.append(
(u'Ciencia y Salud', u'http://www.levante-emv.com/elementosInt/rss/44'))
feeds.append((u'Ciencia e Investigacion',
u'http://www.levante-emv.com/elementosInt/rss/23'))
feeds.append(
(u'Ensenanza', u'http://www.levante-emv.com/elementosInt/rss/22'))
feeds.append((u'Fiestas y Tradiciones',
u'http://www.levante-emv.com/elementosInt/rss/24'))
feeds.append(
(u'Club Diario', u'http://www.levante-emv.com/elementosInt/rss/26'))
feeds.append(
(u'Juntos', u'http://www.levante-emv.com/elementosInt/rss/33'))
feeds.append(
(u'Integrados', u'http://www.levante-emv.com/elementosInt/rss/35'))
feeds.append(
(u'Agenda', u'http://www.levante-emv.com/elementosInt/rss/36'))
feeds.append(
(u'Cultura', u'http://www.levante-emv.com/elementosInt/rss/39'))
feeds.append(
(u'Tecnologia', u'http://www.levante-emv.com/elementosInt/rss/40'))
feeds.append((u'Gente', u'http://www.levante-emv.com/elementosInt/rss/41'))
feeds.append(
(u'Television', u'http://www.levante-emv.com/elementosInt/rss/42'))
feeds.append(
(u'Participa', u'http://www.levante-emv.com/elementosInt/rss/45'))
keep_only_tags = [dict(name='div', attrs={'class': 'noticia_titular'}),
dict(name='div', attrs={'class': 'subtitulo'}),
dict(name='div', attrs={'id': 'noticia_texto', 'class': 'noticia_texto'})]
def preprocess_html(self, soup):
# Nuke some real crappy html
theirHead = soup.head
theirHead.extract()
myHead = new_tag(soup, 'head')
soup.insert(0, myHead)
return soup