Italian newspaper by Lorenzo Vigentini

This commit is contained in:
Kovid Goyal 2010-01-31 23:46:57 -07:00
parent 876c63aa2a
commit 8bc1415d94
6 changed files with 228 additions and 4 deletions

View File

@ -5,7 +5,7 @@
# Also, each release can have new and improved recipes.
- version: 0.6.37
date: 2010-01-31
date: 2010-02-01
new features:
- title: "E-book viewer: Add support for viewing SVG images"
@ -94,6 +94,9 @@
- title: NY Time Sunday Book Review
author: Krittika Goyal
- title: Various Italian newspapers
author: Lorenzo Vigentini
improved recipes:
- The Irish Times

View File

@ -0,0 +1,67 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini & Edwin van Maastrigt'
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com> and Edwin van Maastrigt <evanmaastrigt at gmail.com>'
__description__ = 'Financial news daily paper - v1.02 (30, January 2010)'
'''
http://www.ilsole24ore.com/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ilsole(BasicNewsRecipe):
author = 'Lorenzo Vigentini & Edwin van Maastrigt'
description = 'Financial news daily paper'
cover_url = 'http://www.ilsole24ore.com/img2009/header/t_logosole.gif'
title = u'il Sole 24 Ore '
publisher = 'italiaNews'
category = 'News, finance, economy, politics'
language = 'it'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 2
max_articles_per_feed = 50
use_embedded_content = False
remove_javascript = True
no_stylesheets = True
def get_article_url(self, article):
return article.get('id', article.get('guid', None))
def print_version(self, url):
link, sep, params = url.rpartition('?')
return link.replace('.shtml', '_PRN.shtml')
keep_only_tags = [
dict(name='div', attrs={'class':'txt'})
]
remove_tags = [dict(name='br')]
feeds = [
(u'Prima pagina', u'http://www.ilsole24ore.com/rss/primapagina.xml'),
(u'Norme e tributi', u'http://www.ilsole24ore.com/rss/norme-tributi.xml'),
(u'Finanza e mercati', u'http://www.ilsole24ore.com/rss/finanza-mercati.xml'),
(u'Economia e lavoro', u'http://www.ilsole24ore.com/rss/economia-lavoro.xml'),
(u'Italia', u'http://www.ilsole24ore.com/rss/italia.xml'),
(u'Mondo', u'http://www.ilsole24ore.com/rss/mondo.xml'),
(u'Tecnologia e business', u'http://www.ilsole24ore.com/rss/tecnologia-business.xml'),
(u'Cultura e tempo libero', u'http://www.ilsole24ore.com/rss/tempolibero-cultura.xml'),
(u'Sport', u'http://www.ilsole24ore.com/rss/sport.xml'),
(u'Professionisti 24', u'http://www.ilsole24ore.com/rss/prof_home.xml')
]
extra_css = '''
html, body, table, tr, td, h1, h2, h3, h4, h5, h6, p, a, span, br, img {margin:0;padding:0;border:0;font-size:12px;font-family:Arial;}
.linkHighlight {color:#0292c6;}
.txt {border-bottom:1px solid #7c7c7c;padding-bottom:20px;text-align:justify;}
.txt p {line-height:18px;}
.txt span {line-height:22px;}
.title h3 {color:#7b7b7b;}
.title h4 {color:#08526e;font-size:26px;font-family:"Times New Roman";font-weight:normal;}
'''

View File

@ -0,0 +1,89 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini'
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
__version__ = 'v1.01'
__date__ = '10, January 2010'
__description__ = 'Monthly Italian edition of Scientific American'
'''
http://lescienze.espresso.repubblica.it/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class leScienze(BasicNewsRecipe):
author = 'Lorenzo Vigentini'
description = 'Monthly Italian edition of Scientific American'
cover_url = 'http://lescienze.espresso.repubblica.it/images/logo_lescienze.gif'
title = 'le Scienze'
publisher = 'Gruppo editoriale lEspresso'
category = 'Science, general interest'
language = 'it'
encoding = 'cp1252'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 31
max_articles_per_feed = 20
use_embedded_content = False
recursion = 10
remove_javascript = True
no_stylesheets = True
keep_only_tags = [
dict(name='div', attrs={'class':'bigbox'})
]
remove_tags = [
dict(name='span',attrs={'class':'linkindice'}),
dict(name='div',attrs={'class':'box-commenti'}),
dict(name='div',attrs={'id':['rssdiv','blocco']})
]
remove_tags_after = [dict(name='div',attrs={'class':'box-commenti'})]
feeds = [
(u'Antropologia', u'http://data.kataweb.it/rss/scienze/antropologia'),
(u'Archeologia', u'http://data.kataweb.it/rss/scienze/archeologia'),
(u'Arte e Musica', u'http://data.kataweb.it/rss/scienze/arte_e_musica'),
(u'Astrofisica', u'http://data.kataweb.it/rss/scienze/astrofisica'),
(u'Astronautica', u'http://data.kataweb.it/rss/scienze/astronautica'),
(u'Astronomia', u'http://data.kataweb.it/rss/scienze/astronomia_e_cosmologia'),
(u'Biologia', u'http://data.kataweb.it/rss/scienze/biologia'),
(u'Chimica', u'http://data.kataweb.it/rss/scienze/chimica'),
(u'Ecologia & ambiente', u'http://data.kataweb.it/rss/scienze/ecologia_e_ambiente'),
(u'Economia', u'http://data.kataweb.it/rss/scienze/Economia'),
(u'Fisica', u'http://data.kataweb.it/rss/scienze/Fisica'),
(u'Informatica', u'http://data.kataweb.it/rss/scienze/informatica_e_telecomunicazioni'),
(u'Ingegneria', u'http://data.kataweb.it/rss/scienze/ingegneria_e_tecnologia'),
(u'Matematica', u'http://data.kataweb.it/rss/scienze/Matematica'),
(u'Medicina', u'http://data.kataweb.it/rss/scienze/Medicina'),
(u'Paleontologia', u'http://data.kataweb.it/rss/scienze/Paleontologia'),
(u'Recensioni', u'http://data.kataweb.it/rss/scienze/Recensioni'),
(u'Psicologia', u'http://data.kataweb.it/rss/scienze/psicologie_e_scienze_cognitive'),
(u'Scienze della Terra', u'http://data.kataweb.it/rss/scienze/scienze_della_terra'),
(u'Scienze dello spazio', u'http://data.kataweb.it/rss/scienze/scienze_dello_spazio'),
(u'Scienze naturali', u'http://data.kataweb.it/rss/scienze/scienze_naturali'),
(u'Scienze sociali', u'http://data.kataweb.it/rss/scienze/scienze_sociali'),
(u'Statistica', u'http://data.kataweb.it/rss/scienze/statistica'),
(u'Storia della scienza', u'http://data.kataweb.it/rss/scienze/storia_della_scienza')
]
extra_css = '''
h1 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;}
h2 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
h3 {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
h4 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
h5 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
.occhiello {color:#666666;display:block;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:13px;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;line-height:15px;}
.titolo {font-weight:bold;}
.label {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;height:15px;line-height:15px;text-transform:uppercase;}
.firma {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:15px; text-decoration:none;}
.testo {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:10px;}
'''

View File

@ -72,9 +72,8 @@ class Nin(BasicNewsRecipe):
section = self.tag_to_string(item)
feedlink = self.PREFIX + item['href']
feedpage = self.index_to_soup(feedlink)
self.report_progress(0, _('Fetching feed')+' %s...'%(section))
self.report_progress(0, _('Fetching feed')+' %s...'%(section))
inarts = []
count2 = 0
for art in feedpage.findAll('span',attrs={'class':'artTitle'}):
alink = art.parent
url = self.PREFIX + alink['href']

View File

@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
nytimes.com
'''
import re
import re, time
from calibre import entity_to_unicode
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, Comment

View File

@ -0,0 +1,66 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini'
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
__version__ = 'v1.01'
__date__ = '30, January 2010'
__description__ = 'Sport daily news from Italy'
'''www.tuttosport.com'''
from calibre.web.feeds.news import BasicNewsRecipe
class tuttosport(BasicNewsRecipe):
author = 'Lorenzo Vigentini'
description = 'Sport daily news from Italy'
cover_url = 'http://www.tuttosport.com/res/imgs/logo_TuttoSport.png'
title = 'Tuttosport'
publisher = 'Nuova Editoriale Sportiva S.r.l'
category = 'Sport News'
language = 'it'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 2
max_articles_per_feed = 20
use_embedded_content = False
recursion = 10
remove_javascript = True
no_stylesheets = True
def print_version(self,url):
segments = url.split('/')
printURL = '/'.join(segments[0:10]) + '?print'
return printURL
keep_only_tags = [
dict(name='h2', attrs={'class':'tit_Article'}),
dict(name='div', attrs={'class':['box_Img img_L ','txt_ArticleAbstract','txt_Article txtBox_cms']})
]
feeds = [
(u'Primo piano',u'http://www.tuttosport.com/rss/primo_piano.xml'),
(u'Cronanca',u'http://www.tuttosport.com/rss/Cronaca-205.xml'),
(u'Lettere al direttore',u'http://blog.tuttosport.com/direttore/feed'),
(u'Calcio',u'http://www.tuttosport.com/rss/Calcio-3.xml'),
(u'Speciale Derby',u'http://www.tuttosport.com/rss/Speciale-derby-310.xml'),
(u'Formula 1',u'hhttp://www.tuttosport.com/rss/Formula-1-7.xml'),
(u'Moto',u'hhttp://www.tuttosport.com/rss/Moto-8.xml'),
(u'Basket',u'http://www.tuttosport.com/rss/Basket-9.xml'),
(u'Altri Sport',u'http://www.tuttosport.com/rss/Altri-Sport-2.xml'),
(u'Tuttosport League',u'http://www.tuttosport.com/rss/Tuttosport-League-245.xml'),
(u'Scommesse',u'http://www.tuttosport.com/rss/Scommesse-286.xml')
]
extra_css = '''
body {font-family: Arial, Verdana, sans-serif; margin-bottom: 3em;}
h1 {color:#9C3A0B;font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;}
h3 {color:#9C3A0B;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:15px;}
h2.tit_Article {color:#9C3A0B;margin: 15px 8px 0; margin-bottom: 1px; border-bottom: 3px solid;}
.txt_ArticleAbstract {color:#4080AE;clear: both; margin: 3px 8px;}
.txt_Article {clear: both; margin: 8px 8px 12px;}
.txt_Author {float: right;}
.txt_ArticleAuthor {clear: both; margin: 8px;}
'''