From a1389cfd57d21d89c10f768c71a7ae7e89fe6504 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 13 Feb 2008 04:52:28 +0000 Subject: [PATCH] Fix #521 --- .../ebooks/lrf/web/profiles/__init__.py | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/libprs500/ebooks/lrf/web/profiles/__init__.py b/src/libprs500/ebooks/lrf/web/profiles/__init__.py index 9e6e1312e0..ade97c6505 100644 --- a/src/libprs500/ebooks/lrf/web/profiles/__init__.py +++ b/src/libprs500/ebooks/lrf/web/profiles/__init__.py @@ -232,12 +232,16 @@ class DefaultProfile(object): pubdate = pubdate.replace('+0000', 'GMT') url = self.get_article_url(item) - - url = self.tag_to_string(url) if require_url and not url: self.logger.debug('Skipping article as it does not have a link url') continue + purl = url + try: + purl = self.print_version(url) + except Exception, err: + self.logger.debug('Skipping %s as could not find URL for print version. Error:\n%s'%(url, err)) + continue content = item.find('content:encoded') if not content: @@ -246,12 +250,7 @@ class DefaultProfile(object): content = self.process_html_description(content, strip_links=False) else: content = '' - purl = url - try: - purl = self.print_version(url) - except Exception, err: - self.logger.debug('Skipping %s as could not find URL for print version. Error:\n%s'%(url, err)) - continue + d = { 'title' : self.tag_to_string(item.find('title')), 'url' : purl, @@ -266,18 +265,23 @@ class DefaultProfile(object): added_articles[title].append(d['title']) if delta > self.oldest_article*3600*24: continue + except Exception, err: if self.verbose: self.logger.exception('Error parsing article:\n%s'%(item,)) continue try: - desc = item.find('description') + desc = '' + for c in item.findAll('description'): + desc = self.tag_to_string(c) + if desc: + break d['description'] = self.process_html_description(desc) if self.html_description else desc.string except: d['description'] = '' articles[title].append(d) articles[title].sort(key=operator.itemgetter('timestamp'), reverse=True) - articles[title][self.max_articles_per_feed:] = [] + articles[title] = articles[title][:self.max_articles_per_feed+1] for item in articles[title]: item.pop('timestamp') if not articles[title]: