Recipe for The Age by Matthew Briggs

2025-07-09 03:04:10 -04:00 · 2009-01-19 15:51:46 -08:00 · 2009-01-19 15:51:46 -08:00 · 2ab9c0229b
commit 2ab9c0229b
parent e3115f03cf
2 changed files with 56 additions and 1 deletions
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -22,7 +22,7 @@ recipe_modules = ['recipe_' + r for r in (
           'time_magazine', 'endgadget', 'fudzilla', 'nspm_int', 'nspm', 'pescanik',
           'spiegel_int', 'themarketticker', 'tomshardware', 'xkcd', 'ftd', 'zdnet',
           'joelonsoftware', 'telepolis', 'common_dreams', 'nin', 'tomshardware_de',
-           'pagina12', 'infobae', 'ambito', 'elargentino', 'sueddeutsche', 
+           'pagina12', 'infobae', 'ambito', 'elargentino', 'sueddeutsche', 'the_age', 
          )]

 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/recipe_the_age.py
+++ b/src/calibre/web/feeds/recipes/recipe_the_age.py
@ -0,0 +1,55 @@
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+__copyright__ = '2009, Matthew Briggs <hal.sulphur@gmail.com>'
+__docformat__ = 'restructuredtext en'
+
+'''
+theage.com.au
+'''
+from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+
+
+class TheAge(BasicNewsRecipe):
+    
+    title = 'The Age'
+    description = 'Business News, World News and Breaking News in Melbourne, Australia'
+    __author__ = 'Matthew Briggs'
+    
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        br.set_handle_refresh(False)
+        return br
+    
+    def parse_index(self):
+        
+        soup = BeautifulSoup(self.browser.open('http://www.theage.com.au/text/').read())
+        
+        feeds, articles = [], []
+        feed = None
+        
+        
+        for tag in soup.findAll(['h3', 'a']):
+            if tag.name == 'h3':
+                if articles:
+                    feeds.append((feed, articles))
+                    articles = []
+                feed = self.tag_to_string(tag)
+            elif feed is not None and tag.has_key('href') and tag['href'].strip():
+                url = tag['href'].strip()
+                if url.startswith('/'):
+                    url   = 'http://www.theage.com.au' + url 
+                title = self.tag_to_string(tag)
+                articles.append({
+                                 'title': title,
+                                 'url'  : url,
+                                 'date' : strftime('%a, %d %b'),
+                                 'description' : '',
+                                 'content'     : '',
+                                 }) 
+                
+        return feeds
+                
+
+