diff --git a/Changelog.yaml b/Changelog.yaml index 6ca5b37926..1ec61fa0b5 100644 --- a/Changelog.yaml +++ b/Changelog.yaml @@ -5,7 +5,7 @@ # Also, each release can have new and improved recipes. - version: 0.6.37 - date: 2010-01-31 + date: 2010-02-01 new features: - title: "E-book viewer: Add support for viewing SVG images" @@ -94,6 +94,9 @@ - title: NY Time Sunday Book Review author: Krittika Goyal + - title: Various Italian newspapers + author: Lorenzo Vigentini + improved recipes: - The Irish Times diff --git a/resources/recipes/ilsole24ore.recipe b/resources/recipes/ilsole24ore.recipe new file mode 100644 index 0000000000..8258bb563d --- /dev/null +++ b/resources/recipes/ilsole24ore.recipe @@ -0,0 +1,67 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__author__ = 'Lorenzo Vigentini & Edwin van Maastrigt' +__copyright__ = '2009, Lorenzo Vigentini and Edwin van Maastrigt ' +__description__ = 'Financial news daily paper - v1.02 (30, January 2010)' + +''' +http://www.ilsole24ore.com/ +''' + +from calibre.web.feeds.news import BasicNewsRecipe + + +class ilsole(BasicNewsRecipe): + author = 'Lorenzo Vigentini & Edwin van Maastrigt' + description = 'Financial news daily paper' + + cover_url = 'http://www.ilsole24ore.com/img2009/header/t_logosole.gif' + title = u'il Sole 24 Ore ' + publisher = 'italiaNews' + category = 'News, finance, economy, politics' + + language = 'it' + timefmt = '[%a, %d %b, %Y]' + + oldest_article = 2 + max_articles_per_feed = 50 + use_embedded_content = False + + remove_javascript = True + no_stylesheets = True + + def get_article_url(self, article): + return article.get('id', article.get('guid', None)) + + def print_version(self, url): + link, sep, params = url.rpartition('?') + return link.replace('.shtml', '_PRN.shtml') + + keep_only_tags = [ + dict(name='div', attrs={'class':'txt'}) + ] + remove_tags = [dict(name='br')] + + feeds = [ + (u'Prima pagina', u'http://www.ilsole24ore.com/rss/primapagina.xml'), + (u'Norme e tributi', u'http://www.ilsole24ore.com/rss/norme-tributi.xml'), + (u'Finanza e mercati', u'http://www.ilsole24ore.com/rss/finanza-mercati.xml'), + (u'Economia e lavoro', u'http://www.ilsole24ore.com/rss/economia-lavoro.xml'), + (u'Italia', u'http://www.ilsole24ore.com/rss/italia.xml'), + (u'Mondo', u'http://www.ilsole24ore.com/rss/mondo.xml'), + (u'Tecnologia e business', u'http://www.ilsole24ore.com/rss/tecnologia-business.xml'), + (u'Cultura e tempo libero', u'http://www.ilsole24ore.com/rss/tempolibero-cultura.xml'), + (u'Sport', u'http://www.ilsole24ore.com/rss/sport.xml'), + (u'Professionisti 24', u'http://www.ilsole24ore.com/rss/prof_home.xml') + ] + + extra_css = ''' + html, body, table, tr, td, h1, h2, h3, h4, h5, h6, p, a, span, br, img {margin:0;padding:0;border:0;font-size:12px;font-family:Arial;} + .linkHighlight {color:#0292c6;} + .txt {border-bottom:1px solid #7c7c7c;padding-bottom:20px;text-align:justify;} + .txt p {line-height:18px;} + .txt span {line-height:22px;} + .title h3 {color:#7b7b7b;} + .title h4 {color:#08526e;font-size:26px;font-family:"Times New Roman";font-weight:normal;} + ''' + diff --git a/resources/recipes/lescienze.recipe b/resources/recipes/lescienze.recipe new file mode 100644 index 0000000000..13d7ea8ea2 --- /dev/null +++ b/resources/recipes/lescienze.recipe @@ -0,0 +1,89 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__author__ = 'Lorenzo Vigentini' +__copyright__ = '2009, Lorenzo Vigentini ' +__version__ = 'v1.01' +__date__ = '10, January 2010' +__description__ = 'Monthly Italian edition of Scientific American' + +''' +http://lescienze.espresso.repubblica.it/ +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class leScienze(BasicNewsRecipe): + author = 'Lorenzo Vigentini' + description = 'Monthly Italian edition of Scientific American' + + cover_url = 'http://lescienze.espresso.repubblica.it/images/logo_lescienze.gif' + title = 'le Scienze' + publisher = 'Gruppo editoriale lEspresso' + category = 'Science, general interest' + + language = 'it' + encoding = 'cp1252' + timefmt = '[%a, %d %b, %Y]' + + oldest_article = 31 + max_articles_per_feed = 20 + use_embedded_content = False + recursion = 10 + + remove_javascript = True + no_stylesheets = True + + + keep_only_tags = [ + dict(name='div', attrs={'class':'bigbox'}) + ] + + remove_tags = [ + dict(name='span',attrs={'class':'linkindice'}), + dict(name='div',attrs={'class':'box-commenti'}), + dict(name='div',attrs={'id':['rssdiv','blocco']}) + ] + remove_tags_after = [dict(name='div',attrs={'class':'box-commenti'})] + + feeds = [ + (u'Antropologia', u'http://data.kataweb.it/rss/scienze/antropologia'), + (u'Archeologia', u'http://data.kataweb.it/rss/scienze/archeologia'), + (u'Arte e Musica', u'http://data.kataweb.it/rss/scienze/arte_e_musica'), + (u'Astrofisica', u'http://data.kataweb.it/rss/scienze/astrofisica'), + (u'Astronautica', u'http://data.kataweb.it/rss/scienze/astronautica'), + (u'Astronomia', u'http://data.kataweb.it/rss/scienze/astronomia_e_cosmologia'), + (u'Biologia', u'http://data.kataweb.it/rss/scienze/biologia'), + (u'Chimica', u'http://data.kataweb.it/rss/scienze/chimica'), + (u'Ecologia & ambiente', u'http://data.kataweb.it/rss/scienze/ecologia_e_ambiente'), + (u'Economia', u'http://data.kataweb.it/rss/scienze/Economia'), + (u'Fisica', u'http://data.kataweb.it/rss/scienze/Fisica'), + (u'Informatica', u'http://data.kataweb.it/rss/scienze/informatica_e_telecomunicazioni'), + (u'Ingegneria', u'http://data.kataweb.it/rss/scienze/ingegneria_e_tecnologia'), + (u'Matematica', u'http://data.kataweb.it/rss/scienze/Matematica'), + (u'Medicina', u'http://data.kataweb.it/rss/scienze/Medicina'), + (u'Paleontologia', u'http://data.kataweb.it/rss/scienze/Paleontologia'), + (u'Recensioni', u'http://data.kataweb.it/rss/scienze/Recensioni'), + (u'Psicologia', u'http://data.kataweb.it/rss/scienze/psicologie_e_scienze_cognitive'), + (u'Scienze della Terra', u'http://data.kataweb.it/rss/scienze/scienze_della_terra'), + (u'Scienze dello spazio', u'http://data.kataweb.it/rss/scienze/scienze_dello_spazio'), + (u'Scienze naturali', u'http://data.kataweb.it/rss/scienze/scienze_naturali'), + (u'Scienze sociali', u'http://data.kataweb.it/rss/scienze/scienze_sociali'), + (u'Statistica', u'http://data.kataweb.it/rss/scienze/statistica'), + (u'Storia della scienza', u'http://data.kataweb.it/rss/scienze/storia_della_scienza') + ] + + extra_css = ''' + h1 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;} + h2 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; } + h3 {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;} + h4 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; } + h5 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;} + .occhiello {color:#666666;display:block;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:13px;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;line-height:15px;} + .titolo {font-weight:bold;} + .label {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;height:15px;line-height:15px;text-transform:uppercase;} + .firma {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:15px; text-decoration:none;} + .testo {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:10px;} + ''' + + + diff --git a/resources/recipes/nin.recipe b/resources/recipes/nin.recipe index 0872467d2f..a349f0e11f 100644 --- a/resources/recipes/nin.recipe +++ b/resources/recipes/nin.recipe @@ -72,9 +72,8 @@ class Nin(BasicNewsRecipe): section = self.tag_to_string(item) feedlink = self.PREFIX + item['href'] feedpage = self.index_to_soup(feedlink) - self.report_progress(0, _('Fetching feed')+' %s...'%(section)) + self.report_progress(0, _('Fetching feed')+' %s...'%(section)) inarts = [] - count2 = 0 for art in feedpage.findAll('span',attrs={'class':'artTitle'}): alink = art.parent url = self.PREFIX + alink['href'] diff --git a/resources/recipes/nytimes.recipe b/resources/recipes/nytimes.recipe index 8b9283a0af..32e5a4825e 100644 --- a/resources/recipes/nytimes.recipe +++ b/resources/recipes/nytimes.recipe @@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal ' ''' nytimes.com ''' -import re +import re, time from calibre import entity_to_unicode from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, Comment diff --git a/resources/recipes/tuttosport.recipe b/resources/recipes/tuttosport.recipe new file mode 100644 index 0000000000..cc1f27e73a --- /dev/null +++ b/resources/recipes/tuttosport.recipe @@ -0,0 +1,66 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__author__ = 'Lorenzo Vigentini' +__copyright__ = '2009, Lorenzo Vigentini ' +__version__ = 'v1.01' +__date__ = '30, January 2010' +__description__ = 'Sport daily news from Italy' + +'''www.tuttosport.com''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class tuttosport(BasicNewsRecipe): + author = 'Lorenzo Vigentini' + description = 'Sport daily news from Italy' + + cover_url = 'http://www.tuttosport.com/res/imgs/logo_TuttoSport.png' + title = 'Tuttosport' + publisher = 'Nuova Editoriale Sportiva S.r.l' + category = 'Sport News' + + language = 'it' + timefmt = '[%a, %d %b, %Y]' + + oldest_article = 2 + max_articles_per_feed = 20 + use_embedded_content = False + recursion = 10 + + remove_javascript = True + no_stylesheets = True + + def print_version(self,url): + segments = url.split('/') + printURL = '/'.join(segments[0:10]) + '?print' + return printURL + + keep_only_tags = [ + dict(name='h2', attrs={'class':'tit_Article'}), + dict(name='div', attrs={'class':['box_Img img_L ','txt_ArticleAbstract','txt_Article txtBox_cms']}) + ] + + feeds = [ + (u'Primo piano',u'http://www.tuttosport.com/rss/primo_piano.xml'), + (u'Cronanca',u'http://www.tuttosport.com/rss/Cronaca-205.xml'), + (u'Lettere al direttore',u'http://blog.tuttosport.com/direttore/feed'), + (u'Calcio',u'http://www.tuttosport.com/rss/Calcio-3.xml'), + (u'Speciale Derby',u'http://www.tuttosport.com/rss/Speciale-derby-310.xml'), + (u'Formula 1',u'hhttp://www.tuttosport.com/rss/Formula-1-7.xml'), + (u'Moto',u'hhttp://www.tuttosport.com/rss/Moto-8.xml'), + (u'Basket',u'http://www.tuttosport.com/rss/Basket-9.xml'), + (u'Altri Sport',u'http://www.tuttosport.com/rss/Altri-Sport-2.xml'), + (u'Tuttosport League',u'http://www.tuttosport.com/rss/Tuttosport-League-245.xml'), + (u'Scommesse',u'http://www.tuttosport.com/rss/Scommesse-286.xml') + ] + + extra_css = ''' + body {font-family: Arial, Verdana, sans-serif; margin-bottom: 3em;} + h1 {color:#9C3A0B;font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;} + h3 {color:#9C3A0B;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:15px;} + h2.tit_Article {color:#9C3A0B;margin: 15px 8px 0; margin-bottom: 1px; border-bottom: 3px solid;} + .txt_ArticleAbstract {color:#4080AE;clear: both; margin: 3px 8px;} + .txt_Article {clear: both; margin: 8px 8px 12px;} + .txt_Author {float: right;} + .txt_ArticleAuthor {clear: both; margin: 8px;} + '''