diff --git a/resources/recipes/corriere_della_sera_en.recipe b/resources/recipes/corriere_della_sera_en.recipe index 15fd4c7c5d..0a15d77b9a 100644 --- a/resources/recipes/corriere_della_sera_en.recipe +++ b/resources/recipes/corriere_della_sera_en.recipe @@ -1,27 +1,35 @@ #!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' +__author__ = 'Lorenzo Vigentini, based on Darko Miletic' +__copyright__ = '2009, Darko Miletic , Lorenzo Vigentini ' +__version__ = 'v1.01' +__date__ = '10, January 2010' +__description__ = 'Italian daily newspaper (english version)' ''' -www.corriere.it/english +http://www.corriere.it/ ''' from calibre.web.feeds.news import BasicNewsRecipe -class Corriere_en(BasicNewsRecipe): - title = 'Corriere della Sera in English' - __author__ = 'Darko Miletic' - description = 'News from Milan and Italy' - oldest_article = 15 - publisher = 'Corriere della Sera' - category = 'news, politics, Italy' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'cp1252' - remove_javascript = True - language = 'en' +class ilCorriere(BasicNewsRecipe): + __author__ = 'Lorenzo Vigentini, based on Darko Miletic' + description = 'Italian daily newspaper (english version)' + cover_url = 'http://images.corriereobjects.it/images/static/common/logo_home.gif?v=200709121520' + title = u'Il Corriere della sera (english) ' + publisher = 'RCS Digital' + category = 'News, politics, culture, economy, general interest' + + language = 'en' + timefmt = '[%a, %d %b, %Y]' + + oldest_article = 1 + max_articles_per_feed = 100 + use_embedded_content = False + recursion = 10 + + remove_javascript = True + no_stylesheets = True html2lrf_options = [ '--comment', description @@ -35,12 +43,13 @@ class Corriere_en(BasicNewsRecipe): keep_only_tags = [dict(name='div', attrs={'class':['news-dettaglio article','article']})] remove_tags = [ - dict(name=['base','object','link','embed','img']) - ,dict(name='div', attrs={'class':'news-goback'}) - ,dict(name='ul', attrs={'class':'toolbar'}) + dict(name=['base','object','link','embed']), + dict(name='div', attrs={'class':'news-goback'}), + dict(name='ul', attrs={'class':'toolbar'}) ] remove_tags_after = dict(name='p', attrs={'class':'footnotes'}) - feeds = [(u'Italian Life', u'http://www.corriere.it/rss/english.xml')] - + feeds = [ + (u'News' , u'http://www.corriere.it/rss/english.xml' ) + ] diff --git a/resources/recipes/corriere_della_sera_it.recipe b/resources/recipes/corriere_della_sera_it.recipe index 0eaa6818af..15d0bac928 100644 --- a/resources/recipes/corriere_della_sera_it.recipe +++ b/resources/recipes/corriere_della_sera_it.recipe @@ -1,26 +1,36 @@ #!/usr/bin/env python +__license__ = 'GPL v3' +__author__ = 'Lorenzo Vigentini, based on Darko Miletic' +__copyright__ = '2009, Darko Miletic , Lorenzo Vigentini ' +__version__ = 'v1.01' +__date__ = '10, January 2010' +__description__ = 'Italian daily newspaper' -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' ''' -www.corriere.it +http://www.corriere.it/ ''' from calibre.web.feeds.news import BasicNewsRecipe -class Corriere_it(BasicNewsRecipe): - title = 'Corriere della Sera' - __author__ = 'Darko Miletic' - description = 'News from Milan and Italy' - oldest_article = 7 - publisher = 'Corriere della Sera' - category = 'news, politics, Italy' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'cp1252' - remove_javascript = True - language = 'it' +class ilCorriere(BasicNewsRecipe): + __author__ = 'Lorenzo Vigentini, based on Darko Miletic' + description = 'Italian daily newspaper' + + cover_url = 'http://images.corriereobjects.it/images/static/common/logo_home.gif?v=200709121520' + title = u'Il Corriere della sera ' + publisher = 'RCS Digital' + category = 'News, politics, culture, economy, general interest' + + language = 'it' + timefmt = '[%a, %d %b, %Y]' + + oldest_article = 1 + max_articles_per_feed = 100 + use_embedded_content = False + recursion = 10 + + remove_javascript = True + no_stylesheets = True html2lrf_options = [ '--comment', description @@ -28,29 +38,30 @@ class Corriere_it(BasicNewsRecipe): , '--publisher', publisher , '--ignore-tables' ] - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' keep_only_tags = [dict(name='div', attrs={'class':['news-dettaglio article','article']})] remove_tags = [ - dict(name=['base','object','link','embed','img']) - ,dict(name='div', attrs={'class':'news-goback'}) - ,dict(name='ul', attrs={'class':'toolbar'}) + dict(name=['base','object','link','embed']), + dict(name='div', attrs={'class':'news-goback'}), + dict(name='ul', attrs={'class':'toolbar'}) ] remove_tags_after = dict(name='p', attrs={'class':'footnotes'}) - - feeds = [ - (u'Ultimora' , u'http://www.corriere.it/rss/ultimora.xml' ) - ,(u'Cronache' , u'http://www.corriere.it/rss/cronache.xml' ) - ,(u'Economia' , u'http://www.corriere.it/rss/economia.xml' ) - ,(u'Editoriali', u'http://www.corriere.it/rss/editoriali.xml') - ,(u'Esteri' , u'http://www.corriere.it/rss/esteri.xml' ) - ,(u'Politica' , u'http://www.corriere.it/rss/politica.xml' ) - ,(u'Salute' , u'http://www.corriere.it/rss/salute.xml' ) - ,(u'Scienze' , u'http://www.corriere.it/rss/scienze.xml' ) - ,(u'Spettacolo', u'http://www.corriere.it/rss/spettacoli.xml') - ,(u'Sport' , u'http://www.corriere.it/rss/sport.xml' ) - ] + feeds = [ + (u'Ultimora' , u'http://www.corriere.it/rss/ultimora.xml' ), + (u'Editoriali', u'http://www.corriere.it/rss/editoriali.xml'), + (u'Cronache' , u'http://www.corriere.it/rss/cronache.xml' ), + (u'Politica' , u'http://www.corriere.it/rss/politica.xml' ), + (u'Esteri' , u'http://www.corriere.it/rss/esteri.xml' ), + (u'Economia' , u'http://www.corriere.it/rss/economia.xml' ), + (u'Cultura' , u'http://www.corriere.it/rss/cultura.xml' ), + (u'Scienze' , u'http://www.corriere.it/rss/scienze.xml' ), + (u'Salute' , u'http://www.corriere.it/rss/salute.xml' ), + (u'Spettacolo', u'http://www.corriere.it/rss/spettacoli.xml'), + (u'Cinema e TV', u'http://www.corriere.it/rss/cinema.xml' ), + (u'Sport' , u'http://www.corriere.it/rss/sport.xml' ) + ] diff --git a/resources/recipes/l_espresso.recipe b/resources/recipes/l_espresso.recipe new file mode 100644 index 0000000000..945f0bf31a --- /dev/null +++ b/resources/recipes/l_espresso.recipe @@ -0,0 +1,67 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__author__ = 'Lorenzo Vigentini' +__copyright__ = '2009, Lorenzo Vigentini ' +__version__ = 'v1.02' +__date__ = '10, January 2010' +__description__ = 'Italian weekly magazine' + +'''espresso.repubblica.it''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class laGazzetta(BasicNewsRecipe): + __author__ = 'Lorenzo Vigentini' + description = 'Italian weekly magazine' + + cover_url = 'http://espresso.repubblica.it/images/logo_espresso.gif' + title = 'l Espresso ' + publisher = 'Gruppo editoriale lEspresso' + category = 'News, politics, culture, economy, general interest' + + language = 'it' + encoding = 'cp1252' + timefmt = '[%a, %d %b, %Y]' + + oldest_article = 16 + max_articles_per_feed = 100 + use_embedded_content = False + recursion = 10 + + remove_javascript = True + no_stylesheets = True + + + feeds = [ + (u'Espresso Homepage', u'http://kpm.data.kataweb.it/kpm3eolx/rss/home'), + (u'Espresso Local', u'http://kpm.data.kataweb.it/kpm3eolx/rss/local'), + (u'Espresso Style & Design', u'http://kpm.data.kataweb.it/kpm3eolx/rss/style_design'), + (u'Espresso Opinioni', u'http://kpm.data.kataweb.it/kpm3eolx/rss/opinioni'), + (u'Espresso Rubriche', u'http://kpm.data.kataweb.it/kpm3eolx/rss/rubriche'), + (u'Espresso Limes', u'http://temi.repubblica.it/limes/feed/') + ] + + def print_version(self,url): + return url + '/&print=true' + + keep_only_tags = [ + dict(name='div', attrs={'class':['testo','copertina','occhiello','firma','didascalia','content-second-right','detail-articles','titolo-local','generic-articles']}), + dict(name='div', attrs={'class':['generic-articles','summary','detail-articles']}), + dict(name='div', attrs={'id':'content-second-right'}) + ] + + remove_tags = [ + dict(name='div',attrs={'class':['servizi','aggiungi','label-web','bottom-mobile','box-abbonamenti','box-cerca','big','little','stampaweb']}), + dict(name='div',attrs={'id':['topheader','header','navigation-new','navigation','content-second-left']}), + dict(name=['script','noscript','iframe']) + ] + extra_css = ''' + h1 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;} + h2 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; } + h3 {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;} + h4 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; } + h5 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;} + .firma {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:15px; text-decoration:none;} + .testo {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:10px;} + ''' + diff --git a/resources/recipes/la_gazzeta_dello_sport.recipe b/resources/recipes/la_gazzeta_dello_sport.recipe new file mode 100644 index 0000000000..5fb61407ea --- /dev/null +++ b/resources/recipes/la_gazzeta_dello_sport.recipe @@ -0,0 +1,79 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__author__ = 'Lorenzo Vigentini' +__copyright__ = '2009, Lorenzo Vigentini ' +__version__ = 'v1.02' +__date__ = '10, January 2010' +__description__ = 'Sport news from the most read sport newspaper in Italy' + +'''www.gazzetta.it''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class laGazzetta(BasicNewsRecipe): + __author__ = 'Lorenzo Vigentini' + description = 'Sport news from the most read sport newspaper in Italy' + + cover_url = 'http://www.gazzetta.it/primapagina/images/prima_pagina_grande.png' + title = 'La Gazzetta dello Sport ' + publisher = 'RCS Digital' + category = 'Sport News' + + language = 'it' + encoding = 'cp1252' + timefmt = '[%a, %d %b, %Y]' + + oldest_article = 2 + max_articles_per_feed = 20 + use_embedded_content = False + recursion = 10 + + remove_javascript = True + no_stylesheets = True + + keep_only_tags = [ dict(name='div', attrs={'id':'articolo'})] + + remove_tags = [ + dict(name='ul',attrs={'id':['service-toolbar','sections-menu']}), + dict(name='div',attrs={'id':['header','rightcol','sponsored','vxFlashPlayer','footer','print-box']}), + dict(name='iframe',attrs={'id':'mirago-feed'}), + dict(name='a',attrs={'id':'commenta-up'}), + dict(name='cite',attrs={'class':['signature','parag-title']}), + dict(name='a',attrs={'class':['last-comment','button-bold2']}), + dict(name=['base','object','link','a','script','noscript']) + ] + + extra_css = ''' + h1 {font: sans-serif large;} + h2 {font: sans-serif medium;} + h3 {font: sans-serif small;} + h4 {font: sans-serif bold small;} + p {font:10pt helvetica} + dd {font:8pt helvetica} + ''' + + feeds = [ + (u'Calcio',u'http://www.gazzetta.it/rss/Calcio.xml'), + (u'Formula 1',u'http://www.gazzetta.it/rss/Formula1.xml'), + (u'Motomodiale',u'http://www.gazzetta.it/rss/Motomondiale.xml'), + (u'Motori',u'http://www.gazzetta.it/rss/Motori.xml'), + (u'Ciclismo',u'http://www.gazzetta.it/rss/Ciclismo.xml'), + (u'Basket',u'http://www.gazzetta.it/rss/Basket.xml'), + (u'Tennis',u'http://www.gazzetta.it/rss/Tennis.xml'), + (u'Pallavolo',u'http://www.gazzetta.it/rss/Pallavolo.xml'), + (u'Vela',u'http://www.gazzetta.it/rss/Vela.xml'), + (u'Atletica',u'http://www.gazzetta.it/rss/Atletica.xml'), + (u'Altri Sport',u'http://www.gazzetta.it/rss/Sport_Vari.xml') + ] + + def print_version(self,url): + segments = url.split('/') + basename = '/'.join(segments[:3])+'/' + subPath= '/'.join(segments[3:7])+'/' + articleURL=(segments[len(segments)-1])[:-6] + myArticleSegs=articleURL.split('.') + myArticle=myArticleSegs[0] + printVerString=myArticle+ '_print.html' + myURL = basename + subPath + printVerString + print 'this is the url: ' + myURL + return basename + subPath + printVerString diff --git a/resources/recipes/la_republica.recipe b/resources/recipes/la_republica.recipe index 773def9e1a..3bc1fa5ece 100644 --- a/resources/recipes/la_republica.recipe +++ b/resources/recipes/la_republica.recipe @@ -1,29 +1,55 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__author__ = 'Lorenzo Vigentini, based on Darko Miletic' +__copyright__ = '2009, Darko Miletic , Lorenzo Vigentini ' +description = 'Italian daily newspaper - v1.01 (04, January 2010)' + +''' +http://www.repubblica.it/ +''' + from calibre.web.feeds.news import BasicNewsRecipe class LaRepublica(BasicNewsRecipe): - title = u'la Repubblica' - oldest_article = 1 - language = 'it' + author = 'Lorenzo Vigentini, based on Darko Miletic' + description = 'Italian daily newspaper' - author = 'Darko Miletic' + cover_url = 'http://www.repubblica.it/images/homepage/la_repubblica_logo.gif' + title = u'La Repubblica' + publisher = 'Gruppo editoriale L\'Espresso' + category = 'News, politics, culture, economy, general interest' + + language = 'it' + timefmt = '[%a, %d %b, %Y]' + + oldest_article = 1 max_articles_per_feed = 100 + use_embedded_content = False + recursion = 10 + remove_javascript = True - no_stylesheets = True - + keep_only_tags = [dict(name='div', attrs={'class':'articolo'})] - remove_tags = [ - dict(name=['object','link']) - ,dict(name='span',attrs={'class':'linkindice'}) - ,dict(name='div',attrs={'class':'bottom-mobile'}) - ,dict(name='div',attrs={'id':['rssdiv','blocco']}) + dict(name=['object','link']), + dict(name='span',attrs={'class':'linkindice'}), + dict(name='div',attrs={'class':'bottom-mobile'}), + dict(name='div',attrs={'id':['rssdiv','blocco']}) ] - + feeds = [ - (u'Repubblica homepage', u'http://www.repubblica.it/rss/homepage/rss2.0.xml'), + (u'Repubblica Rilievo', u'http://www.repubblica.it/rss/homepage/rss2.0.xml'), + (u'Repubblica Cronaca', u'http://www.repubblica.it/rss/cronaca/rss2.0.xml'), + (u'Repubblica Esteri', u'http://www.repubblica.it/rss/esteri/rss2.0.xml'), + (u'Repubblica Economia', u'http://www.repubblica.it/rss/economia/rss2.0.xml'), + (u'Repubblica Politica', u'http://www.repubblica.it/rss/politica/rss2.0.xml'), (u'Repubblica Scienze', u'http://www.repubblica.it/rss/scienze/rss2.0.xml'), (u'Repubblica Tecnologia', u'http://www.repubblica.it/rss/tecnologia/rss2.0.xml'), - (u'Repubblica Esteri', u'http://www.repubblica.it/rss/esteri/rss2.0.xml') + (u'Repubblica Scuola e Universita', u'http://www.repubblica.it/rss/scuola_e_universita/rss2.0.xml'), + (u'Repubblica Ambiente', u'http://www.repubblica.it/rss/ambiente/rss2.0.xml'), + (u'Repubblica Cultura', u'http://www.repubblica.it/rss/spettacoli_e_cultura/rss2.0.xml'), + (u'Repubblica Persone', u'http://www.repubblica.it/rss/persone/rss2.0.xml'), + (u'Repubblica Sport', u'http://www.repubblica.it/rss/sport/rss2.0.xml'), + (u'Repubblica Calcio', u'http://www.repubblica.it/rss/sport/calcio/rss2.0.xml') ] - diff --git a/resources/recipes/panorama.recipe b/resources/recipes/panorama.recipe new file mode 100644 index 0000000000..ed1202fe00 --- /dev/null +++ b/resources/recipes/panorama.recipe @@ -0,0 +1,51 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__author__ = 'Lorenzo Vigentini' +__copyright__ = '2009, Lorenzo Vigentini ' +__version__ = 'v1.01' +__date__ = '10, January 2010' +__description__ = 'Italian weekly magazine' + +''' +http://www.panorama.it/ +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class panorama(BasicNewsRecipe): + __author__ = 'Lorenzo Vigentini, based on Darko Miletic' + description = 'Italian weekly magazine' + + cover_url = 'http://www.panorama.it/panorama/images/panorama_large.gif' + title = u'Panorama ' + publisher = 'Mondadori' + category = 'News, politics, culture, economy, general interest' + + language = 'it' + timefmt = '[%a, %d %b, %Y]' + + oldest_article = 7 + max_articles_per_feed = 100 + use_embedded_content = False + recursion = 10 + + remove_javascript = True + + keep_only_tags = [dict(name='div', attrs={'class':['post','article']})] + + remove_tags = [ + dict(name=['object','link']), + dict(name='div',attrs={'class':['post-meta','sharing-tools','related','comments','prev-next']}), + dict(name='div',attrs={'id':['related-posts','footer']}) + ] + + feeds = [ + (u'Panorama Italia', u'http://blog.panorama.it/italia/feed'), + (u'Panorama Mondo', u'http://blog.panorama.it/mondo/feed'), + (u'Panorama Cultura e societa', u'http://blog.panorama.it/culturaesocieta/feed'), + (u'Panorama Hitech e scienza', u'http://blog.panorama.it/hitechescienza/feed'), + (u'Panorama Motori', u'http://blog.panorama.it/autoemoto/feed'), + (u'Panorama libri', u'http://blog.panorama.it/libri/feed'), + (u'Panorama Opinioni', u'http://blog.panorama.it/opinioni/feed'), + + ] diff --git a/resources/recipes/quotidiano.recipe b/resources/recipes/quotidiano.recipe new file mode 100644 index 0000000000..f697564836 --- /dev/null +++ b/resources/recipes/quotidiano.recipe @@ -0,0 +1,52 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__author__ = 'Lorenzo Vigentini' +__copyright__ = '2009, Lorenzo Vigentini ' +__version__ = 'v1.01' +__date__ = '10, January 2010' +__description__ = 'Italian News Agency' + +''' +http://www.quotidianonet.ilsole24ore.com/ +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class panorama(BasicNewsRecipe): + __author__ = 'Lorenzo Vigentini, based on Darko Miletic' + description = 'Italian News Agency' + + cover_url = 'http://quotidianonet.ilsole24ore.com/file_generali/img/logo_quotidianonet-top.gif' + title = u'Quotidiano Net ' + publisher = 'italiaNews' + category = 'News, politics, culture, economy, general interest' + + language = 'it' + timefmt = '[%a, %d %b, %Y]' + + oldest_article = 7 + max_articles_per_feed = 100 + use_embedded_content = False + recursion = 10 + + remove_javascript = True + + keep_only_tags = [dict(name='div', attrs={'class':'box_contenuto articolo'})] + + remove_tags = [ + dict(name=['object','link']), + dict(name='div',attrs={'class':['post-meta','sharing-tools','related','comments','prev-next','box_contenuto adsense']}), + dict(name='div',attrs={'id':['strumenti','related-posts','footer','inline_boxes','inline_boxes_header','inline_boxes_body','bottom']}), + dict(name='span',attrs={'class':'titolosezione default'}) + ] + + feeds = [ + (u'Prima pagina', u'http://quotidianonet.ilsole24ore.com/rss/home.xml'), + (u'Cronaca', u'http://quotidianonet.ilsole24ore.com/rss/cronaca.xml'), + (u'Economia', u'http://quotidianonet.ilsole24ore.com/rss/economia.xml'), + (u'Esteri', u'http://quotidianonet.ilsole24ore.com/rss/esteri.xml'), + (u'Politica', u'http://quotidianonet.ilsole24ore.com/rss/politica.xml'), + (u'Salute', u'http://quotidianonet.ilsole24ore.com/rss/salute.xml'), + (u'Tecnologia', u'http://quotidianonet.ilsole24ore.com/rss/tecnologia.xml'), + + ]