mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Updated La Jornada
This commit is contained in:
parent
2d6009b45f
commit
a3bb2c06fb
BIN
resources/images/news/la_jornada.png
Normal file
BIN
resources/images/news/la_jornada.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 943 B |
@ -18,7 +18,7 @@ class Clarin(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
language = 'es_AR'
|
||||
language = 'es'
|
||||
publication_type = 'newspaper'
|
||||
INDEX = 'http://www.clarin.com'
|
||||
masthead_url = 'http://www.clarin.com/static/CLAClarin/images/logo-clarin-print.jpg'
|
||||
|
@ -20,7 +20,7 @@ class Europasur(BasicNewsRecipe):
|
||||
delay = 2
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
language = 'es_ES'
|
||||
language = 'es'
|
||||
publication_type = 'newspaper'
|
||||
extra_css = """ body{font-family: Verdana,Arial,Helvetica,sans-serif}
|
||||
h2{font-family: Georgia,Times New Roman,Times,serif}
|
||||
|
@ -1,120 +1,64 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Rogelio Dominguez <rogelio.dominguez at gmail.com>'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.jornada.unam.mx
|
||||
'''
|
||||
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||
|
||||
import re
|
||||
|
||||
class LaJornada(BasicNewsRecipe):
|
||||
title = u'La Jornada'
|
||||
language = 'es'
|
||||
oldest_article = 1
|
||||
__author__ = 'rogeliodh'
|
||||
max_articles_per_feed = 100
|
||||
remove_tags = [dict(name='div', attrs={'class':['go gui','go gui top','comment-cont',]})]
|
||||
remove_tags_before = dict(id='article-cont')
|
||||
remove_tags_after = dict(id='article-cont')
|
||||
class LaJornada_mx(BasicNewsRecipe):
|
||||
title = 'La Jornada (Mexico)'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias del diario mexicano La Jornada'
|
||||
publisher = 'DEMOS, Desarrollo de Medios, S.A. de C.V.'
|
||||
category = 'news, Mexico'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
extra_css = ' .series{ \
|
||||
border-bottom: 1px solid #626366; \
|
||||
font-weight: bold; \
|
||||
} \
|
||||
.sumario{ \
|
||||
font-weight: bold; \
|
||||
margin-top: 2em; \
|
||||
text-align: center \
|
||||
} \
|
||||
p.sumario{ \
|
||||
text-align: center \
|
||||
} \
|
||||
.sumarios{font-weight: bold} \
|
||||
.cabeza{ font-size: 1.5em} \
|
||||
.pie-foto { \
|
||||
text-align: justify; \
|
||||
font-size: 0.8em; \
|
||||
text-align: justify; \
|
||||
} \
|
||||
.pie-foto .credito { \
|
||||
font-weight: bold; \
|
||||
display: block \
|
||||
} \
|
||||
.credito-autor{ \
|
||||
margin-top: 1.5em; \
|
||||
padding-left: 0.6em; \
|
||||
border-bottom: 1px solid #626366; \
|
||||
font-variant: small-caps; \
|
||||
font-weight: bold \
|
||||
} \
|
||||
.credito-articulo{ \
|
||||
margin-top: 1.5em; \
|
||||
padding-left: 0.6em; \
|
||||
border-bottom: 1px solid #626366; \
|
||||
font-variant: small-caps; \
|
||||
font-weight: bold \
|
||||
} \
|
||||
.credito-titulo{text-align: right} \
|
||||
.hemero { \
|
||||
text-align: right; \
|
||||
font-size: 0.9em; \
|
||||
margin-bottom: 8px; \
|
||||
} \
|
||||
.loc { \
|
||||
font-weight: bold; \
|
||||
} \
|
||||
.carton { \
|
||||
text-align: center; \
|
||||
} \
|
||||
.credit { \
|
||||
font-weight: bold; \
|
||||
} \
|
||||
'
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'es'
|
||||
remove_empty_feeds = True
|
||||
cover_url = strftime("http://www.jornada.unam.mx/%Y/%m/%d/planitas/portadita.jpg")
|
||||
masthead_url = 'http://www.jornada.unam.mx/v7.0/imagenes/la-jornada-trans.png'
|
||||
extra_css = """
|
||||
body{font-family: "Times New Roman",serif }
|
||||
.cabeza{font-size: xx-large; font-weight: bold }
|
||||
.credito-articulo{font-size: 1.3em}
|
||||
"""
|
||||
|
||||
preprocess_regexps = [
|
||||
# Remove capitalized initial letter on some articles (editorial)
|
||||
(re.compile(r'<div class="inicial">(.*)</div><p class="s-s">', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: match.group(1)),
|
||||
# Cartons section uses a class instead of a div to identify the main content. Change it.
|
||||
(re.compile(r'class="carton"', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: 'id="article-cont" class="carton"'),
|
||||
# Remove <link rel="alternate"> as calibre has a bug (to report)
|
||||
(re.compile(r'<link rel="alternate".*?/>', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: ''),
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':['documentContent','cabeza','sumarios','text']})
|
||||
,dict(name='div', attrs={'id':'renderComments'})
|
||||
]
|
||||
remove_tags = [dict(name='div', attrs={'class':'buttonbar'})]
|
||||
|
||||
INDEX = 'http://www.jornada.unam.mx/rss/edicion.xml'
|
||||
feeds = [
|
||||
(u'Opinion','http://www.jornada.unam.mx/rss/opinion.xml'),
|
||||
(u'Cartones','http://www.jornada.unam.mx/rss/cartones.xml'),
|
||||
(u'Política','http://www.jornada.unam.mx/rss/politica.xml'),
|
||||
(u'Economía','http://www.jornada.unam.mx/rss/economia.xml'),
|
||||
(u'Mundo','http://www.jornada.unam.mx/rss/mundo.xml'),
|
||||
(u'Estados','http://www.jornada.unam.mx/rss/estados.xml'),
|
||||
(u'Capital','http://www.jornada.unam.mx/rss/capital.xml'),
|
||||
(u'Sociedad','http://www.jornada.unam.mx/rss/sociedad.xml'),
|
||||
(u'Ciencias','http://www.jornada.unam.mx/rss/ciencias.xml'),
|
||||
(u'Cultura','http://www.jornada.unam.mx/rss/cultura.xml'),
|
||||
(u'Gastronomia','http://www.jornada.unam.mx/rss/gastronomia.xml'),
|
||||
(u'Espectáculos','http://www.jornada.unam.mx/rss/espectaculos.xml'),
|
||||
(u'Deportes','http://www.jornada.unam.mx/rss/deportes.xml'),
|
||||
(u'Ultimas noticias' , u'http://www.jornada.unam.mx/ultimas/news/RSS' )
|
||||
,(u'Opinion' , u'http://www.jornada.unam.mx/rss/opinion.xml' )
|
||||
,(u'Politica' , u'http://www.jornada.unam.mx/rss/politica.xml' )
|
||||
,(u'Economia' , u'http://www.jornada.unam.mx/rss/economia.xml' )
|
||||
,(u'Mundo' , u'http://www.jornada.unam.mx/rss/mundo.xml' )
|
||||
,(u'Estados' , u'http://www.jornada.unam.mx/rss/estados.xml' )
|
||||
,(u'Capital' , u'http://www.jornada.unam.mx/rss/capital.xml' )
|
||||
,(u'Sociedad y justicia' , u'http://www.jornada.unam.mx/rss/sociedad.xml' )
|
||||
,(u'Ciencias' , u'http://www.jornada.unam.mx/rss/ciencias.xml' )
|
||||
,(u'Cultura' , u'http://www.jornada.unam.mx/rss/cultura.xml' )
|
||||
,(u'Gastronomia' , u'http://www.jornada.unam.mx/rss/gastronomia.xml' )
|
||||
,(u'Espectaculos' , u'http://www.jornada.unam.mx/rss/espectaculos.xml' )
|
||||
,(u'Deportes' , u'http://www.jornada.unam.mx/rss/deportes.xml' )
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
'''
|
||||
Cover URL is http://www.jornada.unam.mx/YYYY/MM/DD/portada.pdf
|
||||
'''
|
||||
cover_url = None
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
soupstone = BeautifulStoneSoup(str(soup))
|
||||
urlbase = str(soupstone('link')[0])
|
||||
r= re.compile(r'.*http://www.jornada.unam.mx/([0-9]{4})/([0-9]{2})/([0-9]{2})', re.DOTALL|re.IGNORECASE)
|
||||
m = r.match(urlbase)
|
||||
if m:
|
||||
cover_url = 'http://www.jornada.unam.mx/' + m.groups()[0] + '/' + m.groups()[1] + '/' + m.groups()[2] + '/portada.pdf'
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
return cover_url
|
||||
|
@ -166,7 +166,7 @@ Search & Sort
|
||||
|
||||
The Search & Sort section allows you to perform several powerful actions on your book collections.
|
||||
|
||||
* You can sort them by title, author, date, rating etc. by clicking on the column titles.
|
||||
* You can sort them by title, author, date, rating etc. by clicking on the column titles. You can also sub-sort (i.e. sort on multiple columns). For example, if you click on the title column and then the author column, the book will be sorted by author and then all the entries for the same author will be sorted by title.
|
||||
|
||||
* You can search for a particular book or set of books using the search bar. More on that below.
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user