mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
Fix El Periodico de Aragon and Weblogs SL
This commit is contained in:
parent
d4b0a5b711
commit
24ff60d36c
@ -5,12 +5,11 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '04 December 2010, desUBIKado'
|
||||
__author__ = 'desUBIKado'
|
||||
__description__ = 'Daily newspaper from Aragon'
|
||||
__version__ = 'v0.07'
|
||||
__date__ = '06, February 2011'
|
||||
__version__ = 'v0.08'
|
||||
__date__ = '13, November 2011'
|
||||
'''
|
||||
elperiodicodearagon.com
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
@ -20,13 +19,13 @@ class elperiodicodearagon(BasicNewsRecipe):
|
||||
description = u'Noticias desde Aragon'
|
||||
publisher = u'elperiodicodearagon.com'
|
||||
category = u'news, politics, Spain, Aragon'
|
||||
oldest_article = 2
|
||||
oldest_article = 1
|
||||
delay = 0
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
language = 'es'
|
||||
encoding = 'utf8'
|
||||
encoding = 'iso-8859-1'
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
|
||||
@ -39,61 +38,30 @@ class elperiodicodearagon(BasicNewsRecipe):
|
||||
}
|
||||
|
||||
feeds = [
|
||||
(u'Arag\xf3n', u'http://elperiodicodearagon.com/RSS/2.xml'),
|
||||
(u'Internacional', u'http://elperiodicodearagon.com/RSS/4.xml'),
|
||||
(u'Espa\xf1a', u'http://elperiodicodearagon.com/RSS/3.xml'),
|
||||
(u'Econom\xeda', u'http://elperiodicodearagon.com/RSS/5.xml'),
|
||||
(u'Deportes', u'http://elperiodicodearagon.com/RSS/7.xml'),
|
||||
(u'Real Zaragoza', u'http://elperiodicodearagon.com/RSS/10.xml'),
|
||||
(u'Opini\xf3n', u'http://elperiodicodearagon.com/RSS/103.xml'),
|
||||
(u'Escenarios', u'http://elperiodicodearagon.com/RSS/105.xml'),
|
||||
(u'Sociedad', u'http://elperiodicodearagon.com/RSS/104.xml'),
|
||||
(u'Gente', u'http://elperiodicodearagon.com/RSS/330.xml')
|
||||
(u'Portada', u'http://zetaestaticos.com/aragon/rss/portada_es.xml'),
|
||||
(u'Arag\xf3n', u'http://zetaestaticos.com/aragon/rss/2_es.xml'),
|
||||
(u'Internacional', u'http://zetaestaticos.com/aragon/rss/4_es.xml'),
|
||||
(u'Espa\xf1a', u'http://zetaestaticos.com/aragon/rss/3_es.xml'),
|
||||
(u'Econom\xeda', u'http://zetaestaticos.com/aragon/rss/5_es.xml'),
|
||||
(u'Deportes', u'http://zetaestaticos.com/aragon/rss/7_es.xml'),
|
||||
(u'Real Zaragoza', u'http://zetaestaticos.com/aragon/rss/10_es.xml'),
|
||||
(u'CAI Zaragoza', u'http://zetaestaticos.com/aragon/rss/91_es.xml'),
|
||||
(u'Monta\xf1ismo', u'http://zetaestaticos.com/aragon/rss/354_es.xml'),
|
||||
(u'Opini\xf3n', u'http://zetaestaticos.com/aragon/rss/103_es.xml'),
|
||||
(u'Tema del d\xeda', u'http://zetaestaticos.com/aragon/rss/102_es.xml'),
|
||||
(u'Escenarios', u'http://zetaestaticos.com/aragon/rss/105_es.xml'),
|
||||
(u'Sociedad', u'http://zetaestaticos.com/aragon/rss/104_es.xml'),
|
||||
(u'Gente', u'http://zetaestaticos.com/aragon/rss/330_es.xml'),
|
||||
(u'Espacio 3', u'http://zetaestaticos.com/aragon/rss/328_es.xml'),
|
||||
(u'Fiestas del Pilar', u'http://zetaestaticos.com/aragon/rss/107_es.xml')
|
||||
]
|
||||
|
||||
|
||||
extra_css = '''
|
||||
h3 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
|
||||
h2 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:18px;}
|
||||
h4 {font-family:Arial,Helvetica,sans-serif; font-style:italic; font-weight:normal;font-size:20px;}
|
||||
.columnaDeRecursosRelacionados {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:14px;}
|
||||
img{margin-bottom: 0.4em}
|
||||
'''
|
||||
|
||||
remove_attributes = ['height','width']
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'contenidos'})]
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'Noticia'})]
|
||||
|
||||
|
||||
# Quitar toda la morralla
|
||||
|
||||
remove_tags = [dict(name='ul', attrs={'class':'herramientasDeNoticia'}),
|
||||
dict(name='span', attrs={'class':'MasInformacion '}),
|
||||
dict(name='span', attrs={'class':'MasInformacion'}),
|
||||
dict(name='div', attrs={'class':'Middle'}),
|
||||
dict(name='div', attrs={'class':'MenuCabeceraRZaragoza'}),
|
||||
dict(name='div', attrs={'id':'MenuCabeceraRZaragoza'}),
|
||||
dict(name='div', attrs={'class':'MenuEquipo'}),
|
||||
dict(name='div', attrs={'class':'TemasRelacionados'}),
|
||||
dict(name='div', attrs={'class':'GaleriaEnNoticia'}),
|
||||
dict(name='div', attrs={'class':'Recorte'}),
|
||||
dict(name='div', attrs={'id':'NoticiasenRecursos'}),
|
||||
dict(name='div', attrs={'id':'NoticiaEnPapel'}),
|
||||
dict(name='p', attrs={'class':'RecorteEnNoticias'}),
|
||||
dict(name='div', attrs={'id':'Comparte'}),
|
||||
dict(name='div', attrs={'id':'CajaComparte'}),
|
||||
dict(name='a', attrs={'class':'EscribirComentario'}),
|
||||
dict(name='a', attrs={'class':'AvisoComentario'}),
|
||||
dict(name='div', attrs={'class':'CajaAvisoComentario'}),
|
||||
dict(name='div', attrs={'class':'navegaNoticias'}),
|
||||
dict(name='div', attrs={'class':'Mensaje'}),
|
||||
dict(name='div', attrs={'id':'PaginadorDiCom'}),
|
||||
dict(name='div', attrs={'id':'CajaAccesoCuentaUsuario'}),
|
||||
dict(name='div', attrs={'id':'CintilloComentario'}),
|
||||
dict(name='div', attrs={'id':'EscribeComentario'}),
|
||||
dict(name='div', attrs={'id':'FormularioComentario'}),
|
||||
dict(name='div', attrs={'id':'FormularioNormas'})]
|
||||
|
||||
# Recuperamos la portada de papel (la imagen format=1 tiene mayor resolucion)
|
||||
|
||||
def get_cover_url(self):
|
||||
@ -104,23 +72,7 @@ class elperiodicodearagon(BasicNewsRecipe):
|
||||
return image['src'].rstrip('format=2') + 'format=1'
|
||||
return None
|
||||
|
||||
# Para quitar espacios entre la noticia y los comentarios (lineas 1 y 2)
|
||||
# El indice no apuntaba correctamente al empiece de la noticia (linea 3)
|
||||
# Usamos la versión para móviles
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<p> </p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||
(re.compile(r'<p> </p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||
(re.compile(r'<p id="">', re.DOTALL|re.IGNORECASE), lambda match: '<p>')
|
||||
]
|
||||
|
||||
# Para sustituir el video incrustado de YouTube por una imagen
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for video_yt in soup.findAll('iframe',{'title':'YouTube video player'}):
|
||||
if video_yt:
|
||||
video_yt.name = 'img'
|
||||
fuente = video_yt['src']
|
||||
fuente2 = fuente.replace('http://www.youtube.com/embed/','http://img.youtube.com/vi/')
|
||||
video_yt['src'] = fuente2 + '/0.jpg'
|
||||
|
||||
return soup
|
||||
def print_version(self, url):
|
||||
return url.replace('http://www.elperiodicodearagon.com/', 'http://www.elperiodicodearagon.com/m/')
|
||||
|
@ -2,8 +2,8 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '4 February 2011, desUBIKado'
|
||||
__author__ = 'desUBIKado'
|
||||
__version__ = 'v0.05'
|
||||
__date__ = '13, April 2011'
|
||||
__version__ = 'v0.07'
|
||||
__date__ = '13, November 2011'
|
||||
'''
|
||||
http://www.weblogssl.com/
|
||||
'''
|
||||
@ -33,6 +33,7 @@ class weblogssl(BasicNewsRecipe):
|
||||
|
||||
feeds = [
|
||||
(u'Xataka', u'http://feeds.weblogssl.com/xataka2')
|
||||
,(u'Xataka Mexico', u'http://feeds.weblogssl.com/xatakamx')
|
||||
,(u'Xataka M\xf3vil', u'http://feeds.weblogssl.com/xatakamovil')
|
||||
,(u'Xataka Android', u'http://feeds.weblogssl.com/xatakandroid')
|
||||
,(u'Xataka Foto', u'http://feeds.weblogssl.com/xatakafoto')
|
||||
@ -40,6 +41,7 @@ class weblogssl(BasicNewsRecipe):
|
||||
,(u'Xataka Ciencia', u'http://feeds.weblogssl.com/xatakaciencia')
|
||||
,(u'Genbeta', u'http://feeds.weblogssl.com/genbeta')
|
||||
,(u'Genbeta Dev', u'http://feeds.weblogssl.com/genbetadev')
|
||||
,(u'Genbeta Social Media', u'http://feeds.weblogssl.com/genbetasocialmedia')
|
||||
,(u'Applesfera', u'http://feeds.weblogssl.com/applesfera')
|
||||
,(u'Vida Extra', u'http://feeds.weblogssl.com/vidaextra')
|
||||
,(u'Naci\xf3n Red', u'http://feeds.weblogssl.com/nacionred')
|
||||
@ -51,7 +53,6 @@ class weblogssl(BasicNewsRecipe):
|
||||
,(u'Pop rosa', u'http://feeds.weblogssl.com/poprosa')
|
||||
,(u'Zona FandoM', u'http://feeds.weblogssl.com/zonafandom')
|
||||
,(u'Fandemia', u'http://feeds.weblogssl.com/fandemia')
|
||||
,(u'Noctamina', u'http://feeds.weblogssl.com/noctamina')
|
||||
,(u'Tendencias', u'http://feeds.weblogssl.com/trendencias')
|
||||
,(u'Beb\xe9s y m\xe1s', u'http://feeds.weblogssl.com/bebesymas')
|
||||
,(u'Directo al paladar', u'http://feeds.weblogssl.com/directoalpaladar')
|
||||
@ -60,8 +61,8 @@ class weblogssl(BasicNewsRecipe):
|
||||
,(u'Embelezzia', u'http://feeds.weblogssl.com/embelezzia')
|
||||
,(u'Vit\xf3nica', u'http://feeds.weblogssl.com/vitonica')
|
||||
,(u'Ambiente G', u'http://feeds.weblogssl.com/ambienteg')
|
||||
,(u'Arrebatadora', u'http://feeds.weblogssl.com/arrebatadora')
|
||||
,(u'Mensencia', u'http://feeds.weblogssl.com/mensencia')
|
||||
,(u'Tendencias Belleza', u'http://feeds.weblogssl.com/trendenciasbelleza')
|
||||
,(u'Tendencias Hombre', u'http://feeds.weblogssl.com/trendenciashombre')
|
||||
,(u'Peques y m\xe1s', u'http://feeds.weblogssl.com/pequesymas')
|
||||
,(u'Motorpasi\xf3n', u'http://feeds.weblogssl.com/motorpasion')
|
||||
,(u'Motorpasi\xf3n F1', u'http://feeds.weblogssl.com/motorpasionf1')
|
||||
@ -69,7 +70,6 @@ class weblogssl(BasicNewsRecipe):
|
||||
,(u'Motorpasi\xf3n Futuro', u'http://feeds.weblogssl.com/motorpasionfuturo')
|
||||
,(u'Notas de futbol', u'http://feeds.weblogssl.com/notasdefutbol')
|
||||
,(u'Fuera de l\xedmites', u'http://feeds.weblogssl.com/fueradelimites')
|
||||
,(u'Salir a ganar', u'http://feeds.weblogssl.com/saliraganar')
|
||||
,(u'El blog salm\xf3n', u'http://feeds.weblogssl.com/elblogsalmon2')
|
||||
,(u'Pymes y aut\xf3nomos', u'http://feeds.weblogssl.com/pymesyautonomos')
|
||||
,(u'Tecnolog\xeda Pyme', u'http://feeds.weblogssl.com/tecnologiapyme')
|
||||
@ -105,3 +105,22 @@ class weblogssl(BasicNewsRecipe):
|
||||
|
||||
return soup
|
||||
|
||||
# Para obtener la url original del articulo a partir de la de "feedsportal"
|
||||
# El siguiente código es gracias al usuario "bosplans" de www.mobileread.com
|
||||
# http://www.mobileread.com/forums/showthread.php?t=130297
|
||||
|
||||
def get_article_url(self, article):
|
||||
link = article.get('link', None)
|
||||
if link is None:
|
||||
return article
|
||||
if link.split('/')[-1]=="story01.htm":
|
||||
link=link.split('/')[-2]
|
||||
a=['0B','0C','0D','0E','0F','0G','0N' ,'0L0S','0A']
|
||||
b=['.' ,'/' ,'?' ,'-' ,'=' ,'&' ,'.com','www.','0']
|
||||
for i in range(0,len(a)):
|
||||
link=link.replace(a[i],b[i])
|
||||
link="http://"+link
|
||||
return link
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user