From 449bc10862f27bffbf2522abcb1017d97b328524 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 26 Jan 2008 23:04:50 +0000 Subject: [PATCH] Dont die when print_version raises an exception --- .../ebooks/lrf/web/profiles/__init__.py | 17 +++++++++++++++-- src/libprs500/ebooks/lrf/web/profiles/wsj.py | 2 +- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/libprs500/ebooks/lrf/web/profiles/__init__.py b/src/libprs500/ebooks/lrf/web/profiles/__init__.py index 34848a1616..c5b7104ba0 100644 --- a/src/libprs500/ebooks/lrf/web/profiles/__init__.py +++ b/src/libprs500/ebooks/lrf/web/profiles/__init__.py @@ -214,10 +214,15 @@ class DefaultProfile(object): content = self.process_html_description(content, strip_links=False) else: content = '' - + purl = url + try: + purl = self.print_version(url) + except Exception, err: + self.logger.debug('Skipping %s as could not find URL for print version. Error:\n%s'%(url, err)) + continue d = { 'title' : item.find('title').string, - 'url' : self.print_version(url), + 'url' : purl, 'timestamp': self.strptime(pubdate) if self.use_pubdate else time.time(), 'date' : pubdate if self.use_pubdate else time.ctime(), 'content' : content, @@ -412,4 +417,12 @@ def cutoff(src, pos, fuzz=50): if npos < 0: npos = pos return src[:npos+1] + +def create_class(src): + environment = {'FullContentProfile':FullContentProfile, 'DefaultProfile':DefaultProfile} + exec src in environment + for item in environment.values(): + if hasattr(item, 'build_index'): + if item.__name__ not in ['DefaultProfile', 'FullContentProfile']: + return item \ No newline at end of file diff --git a/src/libprs500/ebooks/lrf/web/profiles/wsj.py b/src/libprs500/ebooks/lrf/web/profiles/wsj.py index 5294130d93..2f99ccc984 100644 --- a/src/libprs500/ebooks/lrf/web/profiles/wsj.py +++ b/src/libprs500/ebooks/lrf/web/profiles/wsj.py @@ -15,7 +15,7 @@ class WallStreetJournal(DefaultProfile): title = 'Wall Street Journal' max_recursions = 2 needs_subscription = True - max_articles_per_feed = 50 + max_articles_per_feed = 10 timefmt = ' [%a, %b %d, %Y]' html_description = True no_stylesheets = False