mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
IGN:News download: Fix logging during feed parsing
This commit is contained in:
parent
14156737ce
commit
fb5634ab4a
@ -6,6 +6,8 @@ __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
infobae.com
|
||||
'''
|
||||
import re
|
||||
import urllib, urlparse
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Infobae(BasicNewsRecipe):
|
||||
@ -61,11 +63,11 @@ class Infobae(BasicNewsRecipe):
|
||||
# return u'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id
|
||||
|
||||
def get_article_url(self, article):
|
||||
import urllib, urlparse
|
||||
parts = list(urlparse.urlparse(article.get('link')))
|
||||
ans = article.get('link').encode('utf-8')
|
||||
parts = list(urlparse.urlparse(ans))
|
||||
parts[2] = urllib.quote(parts[2])
|
||||
ans = urlparse.urlunparse(parts)
|
||||
return ans
|
||||
return ans.decode('utf-8')
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
@ -97,7 +97,8 @@ class ZAOBAO(BasicNewsRecipe):
|
||||
})
|
||||
|
||||
pfeeds = feeds_from_index([(title, articles)], oldest_article=self.oldest_article,
|
||||
max_articles_per_feed=self.max_articles_per_feed)
|
||||
max_articles_per_feed=self.max_articles_per_feed,
|
||||
log=self.log)
|
||||
|
||||
self.log.debug('adding %s to feed'%(title))
|
||||
for feed in pfeeds:
|
||||
|
@ -5,10 +5,11 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
'''
|
||||
Contains the logic for parsing feeds.
|
||||
'''
|
||||
import time, logging, traceback, copy, re
|
||||
import time, traceback, copy, re
|
||||
from datetime import datetime
|
||||
|
||||
from calibre.web.feeds.feedparser import parse
|
||||
from calibre.utils.logging import default_log
|
||||
from calibre import entity_to_unicode
|
||||
from lxml import html
|
||||
|
||||
@ -87,11 +88,12 @@ Has content : %s
|
||||
|
||||
class Feed(object):
|
||||
|
||||
def __init__(self, get_article_url=lambda item: item.get('link', None)):
|
||||
def __init__(self, get_article_url=lambda item: item.get('link', None),
|
||||
log=default_log):
|
||||
'''
|
||||
Parse a feed into articles.
|
||||
'''
|
||||
self.logger = logging.getLogger('feeds2disk')
|
||||
self.logger = log
|
||||
self.get_article_url = get_article_url
|
||||
|
||||
def populate_from_feed(self, feed, title=None, oldest_article=7,
|
||||
@ -288,15 +290,18 @@ class FeedCollection(list):
|
||||
|
||||
|
||||
def feed_from_xml(raw_xml, title=None, oldest_article=7,
|
||||
max_articles_per_feed=100, get_article_url=lambda item: item.get('link', None)):
|
||||
max_articles_per_feed=100,
|
||||
get_article_url=lambda item: item.get('link', None),
|
||||
log=default_log):
|
||||
feed = parse(raw_xml)
|
||||
pfeed = Feed(get_article_url=get_article_url)
|
||||
pfeed = Feed(get_article_url=get_article_url, log=log)
|
||||
pfeed.populate_from_feed(feed, title=title,
|
||||
oldest_article=oldest_article,
|
||||
max_articles_per_feed=max_articles_per_feed)
|
||||
return pfeed
|
||||
|
||||
def feeds_from_index(index, oldest_article=7, max_articles_per_feed=100):
|
||||
def feeds_from_index(index, oldest_article=7, max_articles_per_feed=100,
|
||||
log=default_log):
|
||||
'''
|
||||
@param index: A parsed index as returned by L{BasicNewsRecipe.parse_index}.
|
||||
@return: A list of L{Feed} objects.
|
||||
@ -304,7 +309,7 @@ def feeds_from_index(index, oldest_article=7, max_articles_per_feed=100):
|
||||
'''
|
||||
feeds = []
|
||||
for title, articles in index:
|
||||
pfeed = Feed()
|
||||
pfeed = Feed(log=log)
|
||||
pfeed.populate_from_preparsed_feed(title, articles, oldest_article=oldest_article,
|
||||
max_articles_per_feed=max_articles_per_feed)
|
||||
feeds.append(pfeed)
|
||||
|
@ -704,7 +704,8 @@ class BasicNewsRecipe(Recipe):
|
||||
self.report_progress(0, _('Fetching feeds...'))
|
||||
try:
|
||||
feeds = feeds_from_index(self.parse_index(), oldest_article=self.oldest_article,
|
||||
max_articles_per_feed=self.max_articles_per_feed)
|
||||
max_articles_per_feed=self.max_articles_per_feed,
|
||||
log=self.log)
|
||||
self.report_progress(0, _('Got feeds from index page'))
|
||||
except NotImplementedError:
|
||||
feeds = self.parse_feeds()
|
||||
@ -1028,6 +1029,7 @@ class BasicNewsRecipe(Recipe):
|
||||
with closing(self.browser.open(url)) as f:
|
||||
parsed_feeds.append(feed_from_xml(f.read(),
|
||||
title=title,
|
||||
log=self.log,
|
||||
oldest_article=self.oldest_article,
|
||||
max_articles_per_feed=self.max_articles_per_feed,
|
||||
get_article_url=self.get_article_url))
|
||||
|
Loading…
x
Reference in New Issue
Block a user