mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
106 lines
4.3 KiB
Plaintext
106 lines
4.3 KiB
Plaintext
from calibre.web.feeds.news import BasicNewsRecipe
|
|
from calibre.ebooks.BeautifulSoup import Tag
|
|
|
|
|
|
def new_tag(soup, name, attrs=()):
|
|
impl = getattr(soup, 'new_tag', None)
|
|
if impl is not None:
|
|
return impl(name, attrs=dict(attrs))
|
|
return Tag(soup, name, attrs=attrs or None)
|
|
|
|
|
|
class LevanteRecipe(BasicNewsRecipe):
|
|
__license__ = 'GPL v3'
|
|
__author__ = 'kwetal'
|
|
version = 1
|
|
language = 'es'
|
|
description = u'El Mercantil Valenciano'
|
|
title = u'Levante'
|
|
|
|
oldest_article = 2
|
|
max_articles_per_feed = 100
|
|
encoding = 'latin1'
|
|
no_stylesheets = True
|
|
remove_javascript = True
|
|
|
|
# Feeds taken from http://www.levante-emv.com/servicios/rss/rss.jsp?pServicio=rss
|
|
# Feed titles are without accented characters for now. Hope to resolve
|
|
# this in the future.
|
|
feeds = []
|
|
feeds.append(
|
|
(u'Portada Valencia', u'http://www.levante-emv.com/elementosInt/rss/1'))
|
|
feeds.append(
|
|
(u'Portada Castello', u'http://www.levante-emv.com/elementosInt/rss/2'))
|
|
feeds.append(
|
|
(u'Portada Alacant', u'http://www.levante-emv.com/elementosInt/rss/3'))
|
|
feeds.append(
|
|
(u'Lo Mas Leido', u'http://www.levante-emv.com/elementosInt/rss/LoMas'))
|
|
feeds.append(
|
|
(u'Seccion al minuto', u'http://www.levante-emv.com/elementosInt/rss/AlMinuto'))
|
|
feeds.append((u'Comunidad Valenciana',
|
|
u'http://www.levante-emv.com/elementosInt/rss/19'))
|
|
feeds.append(
|
|
(u'Valencia', u'http://www.levante-emv.com/elementosInt/rss/16'))
|
|
feeds.append(
|
|
(u'Castello', u'http://www.levante-emv.com/elementosInt/rss/4'))
|
|
feeds.append(
|
|
(u'Alacant', u'http://www.levante-emv.com/elementosInt/rss/17'))
|
|
feeds.append(
|
|
(u'Comarcas', u'http://www.levante-emv.com/elementosInt/rss/12'))
|
|
feeds.append((u'Espana', u'http://www.levante-emv.com/elementosInt/rss/6'))
|
|
feeds.append(
|
|
(u'Internacional', u'http://www.levante-emv.com/elementosInt/rss/7'))
|
|
feeds.append(
|
|
(u'Opinion', u'http://www.levante-emv.com/elementosInt/rss/5'))
|
|
feeds.append(
|
|
(u'Economia', u'http://www.levante-emv.com/elementosInt/rss/8'))
|
|
feeds.append(
|
|
(u'Sociedad', u'http://www.levante-emv.com/elementosInt/rss/9'))
|
|
feeds.append(
|
|
(u'Sucesos', u'http://www.levante-emv.com/elementosInt/rss/10'))
|
|
feeds.append(
|
|
(u'Deportes', u'http://www.levante-emv.com/elementosInt/rss/11'))
|
|
feeds.append((u'Motor', u'http://www.levante-emv.com/elementosInt/rss/31'))
|
|
feeds.append(
|
|
(u'Panorama', u'http://www.levante-emv.com/elementosInt/rss/18'))
|
|
feeds.append(
|
|
(u'Salud y Vida', u'http://www.levante-emv.com/elementosInt/rss/20'))
|
|
feeds.append(
|
|
(u'Ciencia y Salud', u'http://www.levante-emv.com/elementosInt/rss/44'))
|
|
feeds.append((u'Ciencia e Investigacion',
|
|
u'http://www.levante-emv.com/elementosInt/rss/23'))
|
|
feeds.append(
|
|
(u'Ensenanza', u'http://www.levante-emv.com/elementosInt/rss/22'))
|
|
feeds.append((u'Fiestas y Tradiciones',
|
|
u'http://www.levante-emv.com/elementosInt/rss/24'))
|
|
feeds.append(
|
|
(u'Club Diario', u'http://www.levante-emv.com/elementosInt/rss/26'))
|
|
feeds.append(
|
|
(u'Juntos', u'http://www.levante-emv.com/elementosInt/rss/33'))
|
|
feeds.append(
|
|
(u'Integrados', u'http://www.levante-emv.com/elementosInt/rss/35'))
|
|
feeds.append(
|
|
(u'Agenda', u'http://www.levante-emv.com/elementosInt/rss/36'))
|
|
feeds.append(
|
|
(u'Cultura', u'http://www.levante-emv.com/elementosInt/rss/39'))
|
|
feeds.append(
|
|
(u'Tecnologia', u'http://www.levante-emv.com/elementosInt/rss/40'))
|
|
feeds.append((u'Gente', u'http://www.levante-emv.com/elementosInt/rss/41'))
|
|
feeds.append(
|
|
(u'Television', u'http://www.levante-emv.com/elementosInt/rss/42'))
|
|
feeds.append(
|
|
(u'Participa', u'http://www.levante-emv.com/elementosInt/rss/45'))
|
|
|
|
keep_only_tags = [dict(name='div', attrs={'class': 'noticia_titular'}),
|
|
dict(name='div', attrs={'class': 'subtitulo'}),
|
|
dict(name='div', attrs={'id': 'noticia_texto', 'class': 'noticia_texto'})]
|
|
|
|
def preprocess_html(self, soup):
|
|
# Nuke some real crappy html
|
|
theirHead = soup.head
|
|
theirHead.extract()
|
|
myHead = new_tag(soup, 'head')
|
|
soup.insert(0, myHead)
|
|
|
|
return soup
|