mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
IGN:News download: Fix logging during feed parsing
This commit is contained in:
parent
14156737ce
commit
fb5634ab4a
@ -6,6 +6,8 @@ __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
infobae.com
|
infobae.com
|
||||||
'''
|
'''
|
||||||
import re
|
import re
|
||||||
|
import urllib, urlparse
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Infobae(BasicNewsRecipe):
|
class Infobae(BasicNewsRecipe):
|
||||||
@ -61,11 +63,11 @@ class Infobae(BasicNewsRecipe):
|
|||||||
# return u'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id
|
# return u'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
import urllib, urlparse
|
ans = article.get('link').encode('utf-8')
|
||||||
parts = list(urlparse.urlparse(article.get('link')))
|
parts = list(urlparse.urlparse(ans))
|
||||||
parts[2] = urllib.quote(parts[2])
|
parts[2] = urllib.quote(parts[2])
|
||||||
ans = urlparse.urlunparse(parts)
|
ans = urlparse.urlunparse(parts)
|
||||||
return ans
|
return ans.decode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
@ -97,7 +97,8 @@ class ZAOBAO(BasicNewsRecipe):
|
|||||||
})
|
})
|
||||||
|
|
||||||
pfeeds = feeds_from_index([(title, articles)], oldest_article=self.oldest_article,
|
pfeeds = feeds_from_index([(title, articles)], oldest_article=self.oldest_article,
|
||||||
max_articles_per_feed=self.max_articles_per_feed)
|
max_articles_per_feed=self.max_articles_per_feed,
|
||||||
|
log=self.log)
|
||||||
|
|
||||||
self.log.debug('adding %s to feed'%(title))
|
self.log.debug('adding %s to feed'%(title))
|
||||||
for feed in pfeeds:
|
for feed in pfeeds:
|
||||||
|
@ -5,10 +5,11 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
'''
|
'''
|
||||||
Contains the logic for parsing feeds.
|
Contains the logic for parsing feeds.
|
||||||
'''
|
'''
|
||||||
import time, logging, traceback, copy, re
|
import time, traceback, copy, re
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from calibre.web.feeds.feedparser import parse
|
from calibre.web.feeds.feedparser import parse
|
||||||
|
from calibre.utils.logging import default_log
|
||||||
from calibre import entity_to_unicode
|
from calibre import entity_to_unicode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
|
||||||
@ -87,11 +88,12 @@ Has content : %s
|
|||||||
|
|
||||||
class Feed(object):
|
class Feed(object):
|
||||||
|
|
||||||
def __init__(self, get_article_url=lambda item: item.get('link', None)):
|
def __init__(self, get_article_url=lambda item: item.get('link', None),
|
||||||
|
log=default_log):
|
||||||
'''
|
'''
|
||||||
Parse a feed into articles.
|
Parse a feed into articles.
|
||||||
'''
|
'''
|
||||||
self.logger = logging.getLogger('feeds2disk')
|
self.logger = log
|
||||||
self.get_article_url = get_article_url
|
self.get_article_url = get_article_url
|
||||||
|
|
||||||
def populate_from_feed(self, feed, title=None, oldest_article=7,
|
def populate_from_feed(self, feed, title=None, oldest_article=7,
|
||||||
@ -288,15 +290,18 @@ class FeedCollection(list):
|
|||||||
|
|
||||||
|
|
||||||
def feed_from_xml(raw_xml, title=None, oldest_article=7,
|
def feed_from_xml(raw_xml, title=None, oldest_article=7,
|
||||||
max_articles_per_feed=100, get_article_url=lambda item: item.get('link', None)):
|
max_articles_per_feed=100,
|
||||||
|
get_article_url=lambda item: item.get('link', None),
|
||||||
|
log=default_log):
|
||||||
feed = parse(raw_xml)
|
feed = parse(raw_xml)
|
||||||
pfeed = Feed(get_article_url=get_article_url)
|
pfeed = Feed(get_article_url=get_article_url, log=log)
|
||||||
pfeed.populate_from_feed(feed, title=title,
|
pfeed.populate_from_feed(feed, title=title,
|
||||||
oldest_article=oldest_article,
|
oldest_article=oldest_article,
|
||||||
max_articles_per_feed=max_articles_per_feed)
|
max_articles_per_feed=max_articles_per_feed)
|
||||||
return pfeed
|
return pfeed
|
||||||
|
|
||||||
def feeds_from_index(index, oldest_article=7, max_articles_per_feed=100):
|
def feeds_from_index(index, oldest_article=7, max_articles_per_feed=100,
|
||||||
|
log=default_log):
|
||||||
'''
|
'''
|
||||||
@param index: A parsed index as returned by L{BasicNewsRecipe.parse_index}.
|
@param index: A parsed index as returned by L{BasicNewsRecipe.parse_index}.
|
||||||
@return: A list of L{Feed} objects.
|
@return: A list of L{Feed} objects.
|
||||||
@ -304,7 +309,7 @@ def feeds_from_index(index, oldest_article=7, max_articles_per_feed=100):
|
|||||||
'''
|
'''
|
||||||
feeds = []
|
feeds = []
|
||||||
for title, articles in index:
|
for title, articles in index:
|
||||||
pfeed = Feed()
|
pfeed = Feed(log=log)
|
||||||
pfeed.populate_from_preparsed_feed(title, articles, oldest_article=oldest_article,
|
pfeed.populate_from_preparsed_feed(title, articles, oldest_article=oldest_article,
|
||||||
max_articles_per_feed=max_articles_per_feed)
|
max_articles_per_feed=max_articles_per_feed)
|
||||||
feeds.append(pfeed)
|
feeds.append(pfeed)
|
||||||
|
@ -704,7 +704,8 @@ class BasicNewsRecipe(Recipe):
|
|||||||
self.report_progress(0, _('Fetching feeds...'))
|
self.report_progress(0, _('Fetching feeds...'))
|
||||||
try:
|
try:
|
||||||
feeds = feeds_from_index(self.parse_index(), oldest_article=self.oldest_article,
|
feeds = feeds_from_index(self.parse_index(), oldest_article=self.oldest_article,
|
||||||
max_articles_per_feed=self.max_articles_per_feed)
|
max_articles_per_feed=self.max_articles_per_feed,
|
||||||
|
log=self.log)
|
||||||
self.report_progress(0, _('Got feeds from index page'))
|
self.report_progress(0, _('Got feeds from index page'))
|
||||||
except NotImplementedError:
|
except NotImplementedError:
|
||||||
feeds = self.parse_feeds()
|
feeds = self.parse_feeds()
|
||||||
@ -1028,6 +1029,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
with closing(self.browser.open(url)) as f:
|
with closing(self.browser.open(url)) as f:
|
||||||
parsed_feeds.append(feed_from_xml(f.read(),
|
parsed_feeds.append(feed_from_xml(f.read(),
|
||||||
title=title,
|
title=title,
|
||||||
|
log=self.log,
|
||||||
oldest_article=self.oldest_article,
|
oldest_article=self.oldest_article,
|
||||||
max_articles_per_feed=self.max_articles_per_feed,
|
max_articles_per_feed=self.max_articles_per_feed,
|
||||||
get_article_url=self.get_article_url))
|
get_article_url=self.get_article_url))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user