From fb5634ab4a122eab47ca7cc9d0661dc8a353b302 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 11 Nov 2009 11:11:04 -0700 Subject: [PATCH] IGN:News download: Fix logging during feed parsing --- resources/recipes/infobae.recipe | 8 +++++--- resources/recipes/zaobao.recipe | 3 ++- src/calibre/web/feeds/__init__.py | 19 ++++++++++++------- src/calibre/web/feeds/news.py | 4 +++- 4 files changed, 22 insertions(+), 12 deletions(-) diff --git a/resources/recipes/infobae.recipe b/resources/recipes/infobae.recipe index 79937ce4f7..cda9bf83d2 100644 --- a/resources/recipes/infobae.recipe +++ b/resources/recipes/infobae.recipe @@ -6,6 +6,8 @@ __copyright__ = '2008-2009, Darko Miletic ' infobae.com ''' import re +import urllib, urlparse + from calibre.web.feeds.news import BasicNewsRecipe class Infobae(BasicNewsRecipe): @@ -61,11 +63,11 @@ class Infobae(BasicNewsRecipe): # return u'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id def get_article_url(self, article): - import urllib, urlparse - parts = list(urlparse.urlparse(article.get('link'))) + ans = article.get('link').encode('utf-8') + parts = list(urlparse.urlparse(ans)) parts[2] = urllib.quote(parts[2]) ans = urlparse.urlunparse(parts) - return ans + return ans.decode('utf-8') def preprocess_html(self, soup): diff --git a/resources/recipes/zaobao.recipe b/resources/recipes/zaobao.recipe index ef4221e896..bce594bafa 100644 --- a/resources/recipes/zaobao.recipe +++ b/resources/recipes/zaobao.recipe @@ -97,7 +97,8 @@ class ZAOBAO(BasicNewsRecipe): }) pfeeds = feeds_from_index([(title, articles)], oldest_article=self.oldest_article, - max_articles_per_feed=self.max_articles_per_feed) + max_articles_per_feed=self.max_articles_per_feed, + log=self.log) self.log.debug('adding %s to feed'%(title)) for feed in pfeeds: diff --git a/src/calibre/web/feeds/__init__.py b/src/calibre/web/feeds/__init__.py index c76ae7168e..886a825846 100644 --- a/src/calibre/web/feeds/__init__.py +++ b/src/calibre/web/feeds/__init__.py @@ -5,10 +5,11 @@ __copyright__ = '2008, Kovid Goyal ' ''' Contains the logic for parsing feeds. ''' -import time, logging, traceback, copy, re +import time, traceback, copy, re from datetime import datetime from calibre.web.feeds.feedparser import parse +from calibre.utils.logging import default_log from calibre import entity_to_unicode from lxml import html @@ -87,11 +88,12 @@ Has content : %s class Feed(object): - def __init__(self, get_article_url=lambda item: item.get('link', None)): + def __init__(self, get_article_url=lambda item: item.get('link', None), + log=default_log): ''' Parse a feed into articles. ''' - self.logger = logging.getLogger('feeds2disk') + self.logger = log self.get_article_url = get_article_url def populate_from_feed(self, feed, title=None, oldest_article=7, @@ -288,15 +290,18 @@ class FeedCollection(list): def feed_from_xml(raw_xml, title=None, oldest_article=7, - max_articles_per_feed=100, get_article_url=lambda item: item.get('link', None)): + max_articles_per_feed=100, + get_article_url=lambda item: item.get('link', None), + log=default_log): feed = parse(raw_xml) - pfeed = Feed(get_article_url=get_article_url) + pfeed = Feed(get_article_url=get_article_url, log=log) pfeed.populate_from_feed(feed, title=title, oldest_article=oldest_article, max_articles_per_feed=max_articles_per_feed) return pfeed -def feeds_from_index(index, oldest_article=7, max_articles_per_feed=100): +def feeds_from_index(index, oldest_article=7, max_articles_per_feed=100, + log=default_log): ''' @param index: A parsed index as returned by L{BasicNewsRecipe.parse_index}. @return: A list of L{Feed} objects. @@ -304,7 +309,7 @@ def feeds_from_index(index, oldest_article=7, max_articles_per_feed=100): ''' feeds = [] for title, articles in index: - pfeed = Feed() + pfeed = Feed(log=log) pfeed.populate_from_preparsed_feed(title, articles, oldest_article=oldest_article, max_articles_per_feed=max_articles_per_feed) feeds.append(pfeed) diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 43c1dfd5f2..08ccb1f708 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -704,7 +704,8 @@ class BasicNewsRecipe(Recipe): self.report_progress(0, _('Fetching feeds...')) try: feeds = feeds_from_index(self.parse_index(), oldest_article=self.oldest_article, - max_articles_per_feed=self.max_articles_per_feed) + max_articles_per_feed=self.max_articles_per_feed, + log=self.log) self.report_progress(0, _('Got feeds from index page')) except NotImplementedError: feeds = self.parse_feeds() @@ -1028,6 +1029,7 @@ class BasicNewsRecipe(Recipe): with closing(self.browser.open(url)) as f: parsed_feeds.append(feed_from_xml(f.read(), title=title, + log=self.log, oldest_article=self.oldest_article, max_articles_per_feed=self.max_articles_per_feed, get_article_url=self.get_article_url))