From b3052188d2571ef0d679fbcc1c02907aba0e9afe Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 16 May 2008 07:58:45 -0700 Subject: [PATCH] Add recipe for the Sydney Morning Herald --- src/calibre/web/feeds/recipes/__init__.py | 2 +- src/calibre/web/feeds/recipes/smh.py | 54 +++++++++++++++++++++++ src/calibre/web/fetch/simple.py | 2 +- 3 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 src/calibre/web/feeds/recipes/smh.py diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index 5dd143e846..f77cf65a53 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -7,7 +7,7 @@ Builtin recipes. recipes = [ 'newsweek', 'atlantic', 'economist', 'portfolio', 'nytimes', 'usatoday', 'outlook_india', 'bbc', 'greader', 'wsj', - 'wired', 'globe_and_mail', + 'wired', 'globe_and_mail', 'smh', ] import re, imp, inspect, time diff --git a/src/calibre/web/feeds/recipes/smh.py b/src/calibre/web/feeds/recipes/smh.py new file mode 100644 index 0000000000..09c3347282 --- /dev/null +++ b/src/calibre/web/feeds/recipes/smh.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' +__docformat__ = 'restructuredtext en' + +''' +smh.com.au +''' +import time +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup + + +class SMH(BasicNewsRecipe): + + title = 'Sydney Morning Herald' + description = 'Business News, World News and Breaking News in Australia' + __author__ = 'Kovid Goyal' + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + br.set_handle_refresh(False) + return br + + def parse_index(self): + + soup = BeautifulSoup(self.browser.open('http://www.smh.com.au/text/').read()) + + feeds, articles = [], [] + feed = None + + + for tag in soup.findAll(['h3', 'a']): + if tag.name == 'h3': + if articles: + feeds.append((feed, articles)) + articles = [] + feed = self.tag_to_string(tag) + elif feed is not None and tag.has_key('href') and tag['href'].strip(): + url = tag['href'].strip() + if url.startswith('/'): + url = 'http://www.smh.com.au' + url + title = self.tag_to_string(tag) + articles.append({ + 'title': title, + 'url' : url, + 'date' : time.strftime('%a, %d %b'), + 'description' : '', + 'content' : '', + }) + + return feeds + + diff --git a/src/calibre/web/fetch/simple.py b/src/calibre/web/fetch/simple.py index 85c32f23ac..6308eab3a0 100644 --- a/src/calibre/web/fetch/simple.py +++ b/src/calibre/web/fetch/simple.py @@ -140,7 +140,7 @@ class RecursiveFetcher(object, LoggingInterface): except urllib2.URLError, err: if hasattr(err, 'code') and responses.has_key(err.code): raise FetchError, responses[err.code] - if err.reason[0] == 104: # Connection reset by peer + if getattr(err, 'reason', [0])[0] == 104: # Connection reset by peer self.log_debug('Connection reset by peer retrying in 1 second.') time.sleep(1) f = self.browser.open(url)