From 5cdf23b5e4968f7ca596423dc990b10d2520ec4c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 16 May 2010 09:11:53 -0600 Subject: [PATCH] Libero by Gabriele Marini --- resources/recipes/l_espresso.recipe | 56 ++++++++++++++---------- resources/recipes/la_republica.recipe | 15 +++---- resources/recipes/lescienze.recipe | 31 ++++--------- resources/recipes/libero.recipe | 56 ++++++++++++++++++++++++ src/calibre/ebooks/conversion/plumber.py | 2 + 5 files changed, 107 insertions(+), 53 deletions(-) create mode 100644 resources/recipes/libero.recipe diff --git a/resources/recipes/l_espresso.recipe b/resources/recipes/l_espresso.recipe index 945f0bf31a..f7ae4db159 100644 --- a/resources/recipes/l_espresso.recipe +++ b/resources/recipes/l_espresso.recipe @@ -1,6 +1,6 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__author__ = 'Lorenzo Vigentini' +__author__ = 'Lorenzo Vigentini, Gabriele Marini' __copyright__ = '2009, Lorenzo Vigentini ' __version__ = 'v1.02' __date__ = '10, January 2010' @@ -10,17 +10,17 @@ __description__ = 'Italian weekly magazine' from calibre.web.feeds.news import BasicNewsRecipe -class laGazzetta(BasicNewsRecipe): - __author__ = 'Lorenzo Vigentini' +class Espresso(BasicNewsRecipe): + __author__ = 'Lorenzo Vigentini, Gabriele Marini' description = 'Italian weekly magazine' cover_url = 'http://espresso.repubblica.it/images/logo_espresso.gif' - title = 'l Espresso ' - publisher = 'Gruppo editoriale lEspresso' + title = 'L\'Espresso ' + publisher = 'Gruppo editoriale L\'Espresso' category = 'News, politics, culture, economy, general interest' language = 'it' - encoding = 'cp1252' +# encoding = 'cp1252' timefmt = '[%a, %d %b, %Y]' oldest_article = 16 @@ -33,35 +33,45 @@ class laGazzetta(BasicNewsRecipe): feeds = [ - (u'Espresso Homepage', u'http://kpm.data.kataweb.it/kpm3eolx/rss/home'), - (u'Espresso Local', u'http://kpm.data.kataweb.it/kpm3eolx/rss/local'), - (u'Espresso Style & Design', u'http://kpm.data.kataweb.it/kpm3eolx/rss/style_design'), - (u'Espresso Opinioni', u'http://kpm.data.kataweb.it/kpm3eolx/rss/opinioni'), - (u'Espresso Rubriche', u'http://kpm.data.kataweb.it/kpm3eolx/rss/rubriche'), - (u'Espresso Limes', u'http://temi.repubblica.it/limes/feed/') + (u'Homepage', u'http://kpm.data.kataweb.it/kpm3eolx/rss/home'), + (u'Local', u'http://kpm.data.kataweb.it/kpm3eolx/rss/local'), + (u'Style & Design', u'http://kpm.data.kataweb.it/kpm3eolx/rss/style_design'), + (u'Opinioni', u'http://kpm.data.kataweb.it/kpm3eolx/rss/opinioni'), + (u'Rubriche', u'http://kpm.data.kataweb.it/kpm3eolx/rss/rubriche'), + (u'Limes', u'http://temi.repubblica.it/limes/feed/'), + (u'Chiesa: HomePage', u'http://data.kataweb.it/rss/chiesa/homepage/it'), + (u'Chiesa: Speciali e Focus', u'http://data.kataweb.it/rss/chiesa/speciali_e_focus/it') ] + def print_version(self,url): + print url[7:25] + if url[7:25] == 'temi.repubblica.it': + return url + '/?printpage=undefined' + elif url[7:25] == 'www.chiesa.espress': + return url return url + '/&print=true' + keep_only_tags = [ dict(name='div', attrs={'class':['testo','copertina','occhiello','firma','didascalia','content-second-right','detail-articles','titolo-local','generic-articles']}), dict(name='div', attrs={'class':['generic-articles','summary','detail-articles']}), - dict(name='div', attrs={'id':'content-second-right'}) + dict(name='div', attrs={'id':['content-second-right','content2']}) ] remove_tags = [ dict(name='div',attrs={'class':['servizi','aggiungi','label-web','bottom-mobile','box-abbonamenti','box-cerca','big','little','stampaweb']}), - dict(name='div',attrs={'id':['topheader','header','navigation-new','navigation','content-second-left']}), + dict(name='div',attrs={'id':['topheader','header','navigation-new','navigation','content-second-left','menutext']}), + dict(name='ul',attrs={'id':'user-utility'}), dict(name=['script','noscript','iframe']) ] - extra_css = ''' - h1 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;} - h2 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; } - h3 {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;} - h4 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; } - h5 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;} - .firma {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:15px; text-decoration:none;} - .testo {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:10px;} - ''' +# extra_css = ''' +# h1 {font-family:Times New Roman,"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:24px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;} +# h2 {font-family:Times New Roman, "Trebuchet MS",Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; } +# h3 {color:#333333;font-family:Times New Roman, "Trebuchet MS",Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;} +# h4 {color:#333333; font-family:Times New Roman, "Trebuchet MS",Arial,Helvetica,sans-serif;font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; } +# h5 {color:#333333; font-family:Times New Roman, "Trebuchet MS",Arial,Helvetica,sans-serif; font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;} +# .firma {color:#333333;font-family:Times New Roman, "Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:15px; text-decoration:none;} +# .testo {font-family:Times New Roman, "Trebuchet MS",Arial,Helvetica,sans-serif; font-size:10px;} +# ''' diff --git a/resources/recipes/la_republica.recipe b/resources/recipes/la_republica.recipe index 75374d9834..c74f2d7b05 100644 --- a/resources/recipes/la_republica.recipe +++ b/resources/recipes/la_republica.recipe @@ -1,8 +1,8 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__author__ = 'Lorenzo Vigentini, based on Darko Miletic' +__author__ = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini' __copyright__ = '2009, Darko Miletic , Lorenzo Vigentini ' -description = 'Italian daily newspaper - v1.01 (04, January 2010)' +description = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version' ''' http://www.repubblica.it/ @@ -11,7 +11,7 @@ http://www.repubblica.it/ from calibre.web.feeds.news import BasicNewsRecipe class LaRepubblica(BasicNewsRecipe): - author = 'Lorenzo Vigentini, based on Darko Miletic' + __author__ = 'Lorenzo Vigentini, Gabriele Marini' description = 'Italian daily newspaper' cover_url = 'http://www.repubblica.it/images/homepage/la_repubblica_logo.gif' @@ -36,7 +36,8 @@ class LaRepubblica(BasicNewsRecipe): keep_only_tags = [dict(name='div', attrs={'class':'articolo'}), dict(name='div', attrs={'class':'body-text'}), - dict(name='div', attrs={'class':'page-content'}), +# dict(name='div', attrs={'class':'page-content'}), + dict(name='p', attrs={'class':'disclaimer clearfix'}), dict(name='div', attrs={'id':'contA'}) ] @@ -47,11 +48,9 @@ class LaRepubblica(BasicNewsRecipe): dict(name='div', attrs={'class':'bottom-mobile'}), dict(name='div', attrs={'id':['rssdiv','blocco']}), dict(name='div', attrs={'class':'utility'}), - dict(name='div', attrs={'class':'generalbox'}) + dict(name='div', attrs={'class':'generalbox'}), + dict(name='ul', attrs={'id':'hystory'}) ] - remove_tags_after = [ - dict(name='div',attrs={'id':'ugc_linkUpload'}) - ] feeds = [ (u'Rilievo', u'http://www.repubblica.it/rss/homepage/rss2.0.xml'), diff --git a/resources/recipes/lescienze.recipe b/resources/recipes/lescienze.recipe index 13d7ea8ea2..b924844987 100644 --- a/resources/recipes/lescienze.recipe +++ b/resources/recipes/lescienze.recipe @@ -4,7 +4,7 @@ __author__ = 'Lorenzo Vigentini' __copyright__ = '2009, Lorenzo Vigentini ' __version__ = 'v1.01' __date__ = '10, January 2010' -__description__ = 'Monthly Italian edition of Scientific American' +__description__ = 'Monthly Italian edition of Scientific American, 16.05.2010 new version' ''' http://lescienze.espresso.repubblica.it/ @@ -13,22 +13,22 @@ http://lescienze.espresso.repubblica.it/ from calibre.web.feeds.news import BasicNewsRecipe class leScienze(BasicNewsRecipe): - author = 'Lorenzo Vigentini' + __author__ = 'Lorenzo Vigentini, Gabriele Marini' description = 'Monthly Italian edition of Scientific American' cover_url = 'http://lescienze.espresso.repubblica.it/images/logo_lescienze.gif' title = 'le Scienze' - publisher = 'Gruppo editoriale lEspresso' + publisher = 'Gruppo editoriale L\'Espresso' category = 'Science, general interest' language = 'it' - encoding = 'cp1252' +# encoding = 'cp1252' timefmt = '[%a, %d %b, %Y]' - oldest_article = 31 - max_articles_per_feed = 20 + oldest_article = 100 + max_articles_per_feed = 100 use_embedded_content = False - recursion = 10 + recursion = 20 remove_javascript = True no_stylesheets = True @@ -46,6 +46,8 @@ class leScienze(BasicNewsRecipe): remove_tags_after = [dict(name='div',attrs={'class':'box-commenti'})] feeds = [ + + (u'Home', u'http://data.kataweb.it/rss/scienze'), (u'Antropologia', u'http://data.kataweb.it/rss/scienze/antropologia'), (u'Archeologia', u'http://data.kataweb.it/rss/scienze/archeologia'), (u'Arte e Musica', u'http://data.kataweb.it/rss/scienze/arte_e_musica'), @@ -72,18 +74,3 @@ class leScienze(BasicNewsRecipe): (u'Storia della scienza', u'http://data.kataweb.it/rss/scienze/storia_della_scienza') ] - extra_css = ''' - h1 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;} - h2 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; } - h3 {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;} - h4 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; } - h5 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;} - .occhiello {color:#666666;display:block;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:13px;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;line-height:15px;} - .titolo {font-weight:bold;} - .label {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;height:15px;line-height:15px;text-transform:uppercase;} - .firma {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:15px; text-decoration:none;} - .testo {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:10px;} - ''' - - - diff --git a/resources/recipes/libero.recipe b/resources/recipes/libero.recipe new file mode 100644 index 0000000000..76663f7fe0 --- /dev/null +++ b/resources/recipes/libero.recipe @@ -0,0 +1,56 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__author__ = 'Gabriele Marini, based on Darko Miletic' +__copyright__ = '2009-2010, Darko Miletic ' +description = 'Italian daily newspaper - 13-05-2010' + +''' +http://www.libero-news.it/ +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class LiberoNews(BasicNewsRecipe): + __author__ = 'Marini Gabriele' + description = 'Italian daily newspaper' + + cover_url = 'http://www.ilgiornale.it/img_v1/logo.gif' + title = u'Libero ' + publisher = 'EDITORIALE LIBERO s.r.l 2006' + category = 'News, politics, culture, economy, general interest' + + language = 'it' + timefmt = '[%a, %d %b, %Y]' + + oldest_article = 7 + max_articles_per_feed = 50 + use_embedded_content = False + recursion = 100 + + no_stylesheets = True + conversion_options = {'linearize_tables':True} + remove_javascript = True + + keep_only_tags = [ + dict(name='div', attrs={'class':'Articolo'}) + ] + remove_tags = [ + dict(name='div', attrs={'class':['CommentaFoto','Priva2']}), + dict(name='div', attrs={'id':['commentigenerale']}) + ] + feeds = [ + (u'Politica', u'http://www.libero-news.it/rss.jsp?sezione=14'), + (u'Italia', u'http://www.libero-news.it/rss.jsp?sezione=15'), + (u'Esteri', u'http://www.libero-news.it/rss.jsp?sezione=16'), + (u'Economia', u'http://www.libero-news.it/rss.jsp?sezione=17'), + (u'Cultura', u'http://www.libero-news.it/rss.jsp?sezione=18'), + (u'Scienze', u'http://www.libero-news.it/rss.jsp?sezione=19'), + (u'Tecnologia', u'http://www.libero-news.it/rss.jsp?sezione=20'), + (u'LifeStyle', u'http://www.libero-news.it/rss.jsp?sezione=22'), + (u'Sport', u'http://www.libero-news.it/rss.jsp?sezione=23'), + (u'Costume e Società', u' http://www.libero-news.it/rss.jsp?sezione=24'), + (u'Milano', u'http://www.libero-news.it/rss.jsp?sezione=26'), + (u'Roma', u'http://www.libero-news.it/rss.jsp?sezione=27'), + (u'Alimentazione', u'http://www.libero-news.it/rss.jsp?sezione=29') + ] + diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 55a853f1d2..1034511016 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -15,6 +15,7 @@ from calibre.ptempfile import PersistentTemporaryDirectory from calibre.utils.date import parse_date from calibre.utils.zipfile import ZipFile from calibre import extract, walk +from calibre.constants import __version__ DEBUG_README=u''' This debug directory contains snapshots of the e-book as it passes through the @@ -711,6 +712,7 @@ OptionRecommendation(name='timestamp', if self.opts.verbose > 1: self.log.debug('Resolved conversion options') try: + self.log.debug('calibre version:', __version__) self.log.debug(pprint.pformat(self.opts.__dict__)) except: self.log.exception('Failed to get resolved conversion options')