Update Diagonales

Fixes #1239235 [Updated recipe for Diagonales](https://bugs.launchpad.net/calibre/+bug/1239235)
This commit is contained in:
Kovid Goyal 2013-10-13 07:50:48 +05:30
parent 22289f95d4
commit fac708b267
2 changed files with 26 additions and 48 deletions

View File

@ -1,72 +1,50 @@
#!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2009-2013, Darko Miletic <darko.miletic at gmail.com>'
''' '''
elargentino.com diagonales.infonews.com
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
class Diagonales(BasicNewsRecipe): class Diagonales(BasicNewsRecipe):
title = 'Diagonales' title = 'Diagonales'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'El nuevo diario de La Plata' description = 'Para estar bien informado sobre los temas de actualidad. Conoce sobre pais, economia, deportes, mundo, espectaculos, sociedad, entrevistas y tecnologia.'
publisher = 'ElArgentino.com' publisher = 'INFOFIN S.A.'
category = 'news, politics, Argentina, La Plata' category = 'news, politics, Argentina, La Plata'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'utf-8' encoding = 'utf-8'
language = 'es_AR' language = 'es_AR'
publication_type = 'newspaper'
lang = 'es-AR' delay = 1
direction = 'ltr' remove_empty_feeds = True
INDEX = 'http://www.elargentino.com/medios/122/Diagonales.html'
extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} ' extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} '
html2lrf_options = [ conversion_options = {
'--comment' , description 'comment' : description
, '--category' , category , 'tags' : category
, '--publisher', publisher , 'publisher' : publisher
] , 'language' : language
}
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
keep_only_tags = [dict(name='div', attrs={'class':'ContainerPop'})] keep_only_tags = [dict(name='div', attrs={'class':'ContainerPop'})]
remove_tags = [dict(name='link')]
remove_tags = [dict(name='link')] feeds = [
(u'Pais' , u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=112&Content-Type=text/xml&ChannelDesc=Pa%C3%ADs')
feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=122&Content-Type=text/xml&ChannelDesc=Diagonales')] ,(u'Deportes' , u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=106&Content-Type=text/xml&ChannelDesc=Deportes')
,(u'Economia' , u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=107&Content-Type=text/xml&ChannelDesc=Econom%C3%ADa')
,(u'Sociedad' , u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=109&Content-Type=text/xml&ChannelDesc=Sociedad')
,(u'Mundo' , u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=113&Content-Type=text/xml&ChannelDesc=Mundo')
,(u'Espectaculos', u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=114&Content-Type=text/xml&ChannelDesc=Espect%C3%A1culos')
,(u'Entrevistas' , u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=115&Content-Type=text/xml&ChannelDesc=Entrevistas')
,(u'Tecnologia' , u'http://diagonales.infonews.com/Highlights.aspx?ParentType=Section&ParentId=118&Content-Type=text/xml&ChannelDesc=Tecnolog%C3%ADa')
]
def print_version(self, url): def print_version(self, url):
main, sep, article_part = url.partition('/nota-') main, sep, article_part = url.partition('/nota-')
article_id, rsep, rrest = article_part.partition('-') article_id, rsep, rrest = article_part.partition('-')
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id return u'http://diagonales.infonews.com/Impresion.aspx?Id=' + article_id
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
return soup
def get_cover_url(self):
cover_url = None
soup = self.index_to_soup(self.INDEX)
cover_item = soup.find('div',attrs={'class':'colder'})
if cover_item:
clean_url = self.image_url_processor(None,cover_item.div.img['src'])
cover_url = 'http://www.elargentino.com' + clean_url + '&height=600'
return cover_url
def image_url_processor(self, baseurl, url):
base, sep, rest = url.rpartition('?Id=')
img, sep2, rrest = rest.partition('&')
return base + sep + img

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.8 KiB