New Honduran recipes by Darko Miletic

This commit is contained in:
Kovid Goyal 2009-07-06 17:16:10 -06:00
parent 4f1c599581
commit 3bb5d82da6
7 changed files with 174 additions and 1 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 306 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 553 B

View File

@ -51,9 +51,11 @@ recipe_modules = ['recipe_' + r for r in (
'theeconomictimes_india', '7dias', 'buenosaireseconomico', 'theeconomictimes_india', '7dias', 'buenosaireseconomico',
'diagonales', 'miradasalsur', 'newsweek_argentina', 'veintitres', 'diagonales', 'miradasalsur', 'newsweek_argentina', 'veintitres',
'gva_be', 'hln', 'tijd', 'degentenaar', 'inquirer_net', 'uncrate', 'gva_be', 'hln', 'tijd', 'degentenaar', 'inquirer_net', 'uncrate',
'fastcompany', 'accountancyage', 'fastcompany', 'accountancyage', 'laprensa_hn', 'latribuna',
'eltiempo_hn',
)] )]
import re, imp, inspect, time, os import re, imp, inspect, time, os
from calibre.web.feeds.news import BasicNewsRecipe, CustomIndexRecipe, AutomaticNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe, CustomIndexRecipe, AutomaticNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup

View File

@ -0,0 +1,52 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
www.tiempo.hn
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class ElTiempoHn(BasicNewsRecipe):
title = 'El Tiempo - Honduras'
__author__ = 'Darko Miletic'
description = 'Noticias de Honduras y mundo'
publisher = 'El Tiempo'
category = 'news, politics, Honduras'
oldest_article = 2
max_articles_per_feed = 100
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
encoding = 'utf-8'
language = _('Spanish')
lang = 'es-HN'
direction = 'ltr'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} img {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em}"'
remove_tags = [dict(name=['form','object','embed','base'])]
keep_only_tags = [dict(name='td' , attrs={'id':'mainbodycont'})]
feeds = [(u'Noticias', u'http://www.tiempo.hn/index.php?format=feed&type=rss')]
def preprocess_html(self, soup):
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
for item in soup.findAll(style=True):
del item['style']
return self.adeify_images(soup)

View File

@ -0,0 +1,54 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
www.laprensahn.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class LaPrensaHn(BasicNewsRecipe):
title = 'La Prensa - Honduras'
__author__ = 'Darko Miletic'
description = 'Noticias de Honduras y mundo'
publisher = 'La Prensa'
category = 'news, politics, Honduras'
oldest_article = 2
max_articles_per_feed = 100
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
encoding = 'utf-8'
language = _('Spanish')
lang = 'es-HN'
direction = 'ltr'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
remove_tags = [dict(name=['form','object','embed'])]
keep_only_tags = [
dict(name='h1' , attrs={'class':'titulo1'})
,dict(name='div', attrs={'class':['sumario11','hora','texto']})
]
feeds = [(u'Noticias', u'http://feeds.feedburner.com/laprensa_titulares')]
def preprocess_html(self, soup):
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -0,0 +1,65 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
www.latribuna.hn
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class LaTribuna(BasicNewsRecipe):
title = 'La Tribuna - Honduras'
__author__ = 'Darko Miletic'
description = 'Noticias de Honduras y mundo'
publisher = 'La Tribuna'
category = 'news, politics, Honduras'
oldest_article = 2
max_articles_per_feed = 100
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
encoding = 'utf-8'
language = _('Spanish')
lang = 'es-HN'
direction = 'ltr'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
remove_tags = [dict(name=['form','object','embed'])]
keep_only_tags = [
dict(name='p', attrs={'id':['BlogTitle','BlogDate']})
,dict(name='div', attrs={'id':'BlogContent'})
]
feeds = [(u'Noticias', u'http://www.latribuna.hn/web2.0/?feed=rss')]
def print_version(self, url):
return url + '&print=1'
def preprocess_html(self, soup):
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
for item in soup.findAll(style=True):
del item['style']
return soup
def get_cover_url(self):
cover_url = None
soup = self.index_to_soup('http://www.latribuna.hn/web2.0/')
cover_item = soup.find('div',attrs={'class':'portada_impresa'})
if cover_item:
cover_url = cover_item.a.img['src']
return cover_url