From 2ab9c0229b1c333f6823102faeca28f9da375931 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 19 Jan 2009 15:51:46 -0800 Subject: [PATCH] Recipe for The Age by Matthew Briggs --- src/calibre/web/feeds/recipes/__init__.py | 2 +- .../web/feeds/recipes/recipe_the_age.py | 55 +++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 src/calibre/web/feeds/recipes/recipe_the_age.py diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index f0687ece28..96dec2e032 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -22,7 +22,7 @@ recipe_modules = ['recipe_' + r for r in ( 'time_magazine', 'endgadget', 'fudzilla', 'nspm_int', 'nspm', 'pescanik', 'spiegel_int', 'themarketticker', 'tomshardware', 'xkcd', 'ftd', 'zdnet', 'joelonsoftware', 'telepolis', 'common_dreams', 'nin', 'tomshardware_de', - 'pagina12', 'infobae', 'ambito', 'elargentino', 'sueddeutsche', + 'pagina12', 'infobae', 'ambito', 'elargentino', 'sueddeutsche', 'the_age', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_the_age.py b/src/calibre/web/feeds/recipes/recipe_the_age.py new file mode 100644 index 0000000000..8bedc50cd5 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_the_age.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__copyright__ = '2009, Matthew Briggs ' +__docformat__ = 'restructuredtext en' + +''' +theage.com.au +''' +from calibre import strftime +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup + + +class TheAge(BasicNewsRecipe): + + title = 'The Age' + description = 'Business News, World News and Breaking News in Melbourne, Australia' + __author__ = 'Matthew Briggs' + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + br.set_handle_refresh(False) + return br + + def parse_index(self): + + soup = BeautifulSoup(self.browser.open('http://www.theage.com.au/text/').read()) + + feeds, articles = [], [] + feed = None + + + for tag in soup.findAll(['h3', 'a']): + if tag.name == 'h3': + if articles: + feeds.append((feed, articles)) + articles = [] + feed = self.tag_to_string(tag) + elif feed is not None and tag.has_key('href') and tag['href'].strip(): + url = tag['href'].strip() + if url.startswith('/'): + url = 'http://www.theage.com.au' + url + title = self.tag_to_string(tag) + articles.append({ + 'title': title, + 'url' : url, + 'date' : strftime('%a, %d %b'), + 'description' : '', + 'content' : '', + }) + + return feeds + + +