This commit is contained in:
Kovid Goyal 2012-04-06 09:00:37 +05:30
parent 768928a5d3
commit 921769bd6a
3 changed files with 50 additions and 56 deletions

View File

@ -4,10 +4,8 @@ __copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
www.buenosairesherald.com
'''
import re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class BuenosAiresHerald(BasicNewsRecipe):
title = 'Buenos Aires Herald'
@ -62,7 +60,7 @@ class BuenosAiresHerald(BasicNewsRecipe):
lfeeds = self.get_feeds()
for feedobj in lfeeds:
feedtitle, feedurl = feedobj
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
articles = []
soup = self.index_to_soup(feedurl)
for item in soup.findAll('div', attrs={'class':'nota_texto_seccion'}):

View File

@ -1,4 +1,3 @@
import string, re
import time
import traceback
# above for debugging via stack
@ -6,13 +5,10 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
# Allows the Python soup converter, which makes parsing easier.
from calibre.ebooks.BeautifulSoup import BeautifulSoup
import os, time, traceback, re, urlparse, sys, cStringIO
from collections import defaultdict
from functools import partial
from contextlib import nested, closing
import os
from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed
from calibre.web.feeds import feeds_from_index
from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
@ -214,12 +210,12 @@ class OReillyPremium(BasicNewsRecipe):
def build_index(self):
print("In OReilly build_index()\n\n")
feedsRSS = []
self.report_progress(0, _('Fetching feeds...'))
self.report_progress(0, ('Fetching feeds...'))
#try:
feeds = feeds_from_index(self.parse_index(), oldest_article=self.oldest_article,
max_articles_per_feed=self.max_articles_per_feed,
log=self.log)
self.report_progress(0, _('Got feeds from index page'))
self.report_progress(0, ('Got feeds from index page'))
#except NotImplementedError:
# feeds = self.parse_feeds()
# Now add regular feeds.
@ -235,9 +231,9 @@ class OReillyPremium(BasicNewsRecipe):
#feeds = FeedCollection(feeds)
self.report_progress(0, _('Trying to download cover...'))
self.report_progress(0, ('Trying to download cover...'))
self.download_cover()
self.report_progress(0, _('Generating masthead...'))
self.report_progress(0, ('Generating masthead...'))
self.masthead_path = None
try:
@ -317,7 +313,7 @@ class OReillyPremium(BasicNewsRecipe):
tp.putRequest(req, block=True, timeout=0)
self.report_progress(0, _('Starting download [%d thread(s)]...')%self.simultaneous_downloads)
self.report_progress(0, ('Starting download [%d thread(s)]...')%self.simultaneous_downloads)
while True:
try:
tp.poll()
@ -331,7 +327,7 @@ class OReillyPremium(BasicNewsRecipe):
with open(os.path.join(feed_dir, 'index.html'), 'wb') as fi:
fi.write(html)
self.create_opf(feeds)
self.report_progress(1, _('Feeds downloaded to %s')%index)
self.report_progress(1, ('Feeds downloaded to %s')%index)
return index

View File

@ -1,9 +1,9 @@
# Test with "\Program Files\Calibre2\ebook-convert.exe" RealClear.recipe .epub --test -vv --debug-pipeline debug
import string, re
import re
import time
from urlparse import urlparse
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
from calibre.ebooks.BeautifulSoup import NavigableString
class RealClear(BasicNewsRecipe):
title = u'Real Clear'
@ -190,7 +190,7 @@ class RealClear(BasicNewsRecipe):
def parse_index(self):
# Parse the page into Python Soup
articleList = []
#articleList = []
ans = []
feedsCount = len(self.feedsets)
for x in range(0,feedsCount): # should be ,4