From cdd8c443968a733d15ff74ba41b8bd2f5a01700e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 31 Mar 2011 18:47:39 -0600 Subject: [PATCH] Improved Irish Times --- recipes/irish_times.recipe | 39 +++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/recipes/irish_times.recipe b/recipes/irish_times.recipe index 83ea496b2c..1434a25725 100644 --- a/recipes/irish_times.recipe +++ b/recipes/irish_times.recipe @@ -1,5 +1,5 @@ __license__ = 'GPL v3' -__copyright__ = "2008, Derry FitzGerald. 2009 Modified by Ray Kinsella and David O'Callaghan" +__copyright__ = "2008, Derry FitzGerald. 2009 Modified by Ray Kinsella and David O'Callaghan, 2011 Modified by Phil Burns" ''' irishtimes.com ''' @@ -9,17 +9,20 @@ from calibre.web.feeds.news import BasicNewsRecipe class IrishTimes(BasicNewsRecipe): title = u'The Irish Times' - __author__ = "Derry FitzGerald, Ray Kinsella and David O'Callaghan" + encoding = 'ISO-8859-15' + __author__ = "Derry FitzGerald, Ray Kinsella, David O'Callaghan and Phil Burns" language = 'en_IE' timefmt = ' (%A, %B %d, %Y)' - oldest_article = 3 + + oldest_article = 1.0 + max_articles_per_feed = 100 no_stylesheets = True - simultaneous_downloads= 1 + simultaneous_downloads= 5 r = re.compile('.*(?Phttp:\/\/(www.irishtimes.com)|(rss.feedsportal.com\/c)\/.*\.html?).*') remove_tags = [dict(name='div', attrs={'class':'footer'})] - extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }' + extra_css = 'p, div { margin: 0pt; border: 0pt; text-indent: 0.5em } .headline {font-size: large;} \n .fact { padding-top: 10pt }' feeds = [ ('Frontpage', 'http://www.irishtimes.com/feeds/rss/newspaper/index.rss'), @@ -30,15 +33,29 @@ class IrishTimes(BasicNewsRecipe): ('Sport', 'http://www.irishtimes.com/feeds/rss/newspaper/sport.rss'), ('Opinion', 'http://www.irishtimes.com/feeds/rss/newspaper/opinion.rss'), ('Letters', 'http://www.irishtimes.com/feeds/rss/newspaper/letters.rss'), + ('Magazine', 'http://www.irishtimes.com/feeds/rss/newspaper/magazine.rss'), + ('Health', 'http://www.irishtimes.com/feeds/rss/newspaper/health.rss'), + ('Education & Parenting', 'http://www.irishtimes.com/feeds/rss/newspaper/education.rss'), + ('Motors', 'http://www.irishtimes.com/feeds/rss/newspaper/motors.rss'), + ('An Teanga Bheo', 'http://www.irishtimes.com/feeds/rss/newspaper/anteangabheo.rss'), + ('Commercial Property', 'http://www.irishtimes.com/feeds/rss/newspaper/commercialproperty.rss'), + ('Science Today', 'http://www.irishtimes.com/feeds/rss/newspaper/sciencetoday.rss'), + ('Property', 'http://www.irishtimes.com/feeds/rss/newspaper/property.rss'), + ('The Tickets', 'http://www.irishtimes.com/feeds/rss/newspaper/theticket.rss'), + ('Weekend', 'http://www.irishtimes.com/feeds/rss/newspaper/weekend.rss'), + ('News features', 'http://www.irishtimes.com/feeds/rss/newspaper/newsfeatures.rss'), + ('Obituaries', 'http://www.irishtimes.com/feeds/rss/newspaper/obituaries.rss'), ] + def print_version(self, url): - if url.count('rss.feedsportal.com'): - u = 'http://www.irishtimes.com' + \ - (((url[70:].replace('0C','/')).replace('0A','0'))).replace('0Bhtml/story01.htm','_pf.html') - else: - u = url.replace('.html','_pf.html') - return u + if url.count('rss.feedsportal.com'): + u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm') + else: + u = url.replace('.html','_pf.html') + return u def get_article_url(self, article): return article.link + +