Improved Irish Times

2025-11-28 17:25:01 -05:00 · 2011-03-31 18:47:39 -06:00 · 2011-03-31 18:47:39 -06:00 · cdd8c44396
commit cdd8c44396
parent 191d30c27e
1 changed files with 28 additions and 11 deletions
--- a/recipes/irish_times.recipe
+++ b/recipes/irish_times.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = "2008, Derry FitzGerald. 2009 Modified by Ray Kinsella and David O'Callaghan"
+__copyright__ = "2008, Derry FitzGerald. 2009 Modified by Ray Kinsella and David O'Callaghan, 2011 Modified by Phil Burns"
 '''
 irishtimes.com
 '''
@ -9,17 +9,20 @@ from calibre.web.feeds.news import BasicNewsRecipe

 class IrishTimes(BasicNewsRecipe):
    title          = u'The Irish Times'
-    __author__     = "Derry FitzGerald, Ray Kinsella and David O'Callaghan"
+    encoding  = 'ISO-8859-15'
+    __author__     = "Derry FitzGerald, Ray Kinsella, David O'Callaghan and Phil Burns"
    language = 'en_IE'
    timefmt = ' (%A, %B %d, %Y)'

-    oldest_article = 3
+
+    oldest_article = 1.0
+    max_articles_per_feed  = 100
    no_stylesheets = True
-    simultaneous_downloads= 1
+    simultaneous_downloads= 5

    r = re.compile('.*(?P<url>http:\/\/(www.irishtimes.com)|(rss.feedsportal.com\/c)\/.*\.html?).*')
    remove_tags    = [dict(name='div', attrs={'class':'footer'})]
-    extra_css      = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt  }'
+    extra_css      = 'p, div { margin: 0pt; border: 0pt; text-indent: 0.5em } .headline {font-size: large;} \n .fact { padding-top: 10pt  }'

    feeds          = [
                      ('Frontpage', 'http://www.irishtimes.com/feeds/rss/newspaper/index.rss'),
@ -30,15 +33,29 @@ class IrishTimes(BasicNewsRecipe):
                      ('Sport', 'http://www.irishtimes.com/feeds/rss/newspaper/sport.rss'),
                      ('Opinion', 'http://www.irishtimes.com/feeds/rss/newspaper/opinion.rss'),
                      ('Letters', 'http://www.irishtimes.com/feeds/rss/newspaper/letters.rss'),
+                      ('Magazine', 'http://www.irishtimes.com/feeds/rss/newspaper/magazine.rss'),
+                      ('Health', 'http://www.irishtimes.com/feeds/rss/newspaper/health.rss'),
+                      ('Education & Parenting', 'http://www.irishtimes.com/feeds/rss/newspaper/education.rss'),
+                      ('Motors', 'http://www.irishtimes.com/feeds/rss/newspaper/motors.rss'),
+                      ('An Teanga Bheo', 'http://www.irishtimes.com/feeds/rss/newspaper/anteangabheo.rss'),
+                      ('Commercial Property', 'http://www.irishtimes.com/feeds/rss/newspaper/commercialproperty.rss'),
+                      ('Science Today', 'http://www.irishtimes.com/feeds/rss/newspaper/sciencetoday.rss'),
+                      ('Property', 'http://www.irishtimes.com/feeds/rss/newspaper/property.rss'),
+                      ('The Tickets', 'http://www.irishtimes.com/feeds/rss/newspaper/theticket.rss'),
+                      ('Weekend', 'http://www.irishtimes.com/feeds/rss/newspaper/weekend.rss'),
+                      ('News features', 'http://www.irishtimes.com/feeds/rss/newspaper/newsfeatures.rss'),
+                      ('Obituaries', 'http://www.irishtimes.com/feeds/rss/newspaper/obituaries.rss'),
                    ]

+
    def print_version(self, url):
        if url.count('rss.feedsportal.com'):
-            u = 'http://www.irishtimes.com' + \
-                     (((url[70:].replace('0C','/')).replace('0A','0'))).replace('0Bhtml/story01.htm','_pf.html')
+            u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm')
        else:
            u = url.replace('.html','_pf.html')
        return u

    def get_article_url(self, article):
        return article.link
+
+