This commit is contained in:
Kovid Goyal 2012-04-06 09:00:37 +05:30
parent 768928a5d3
commit 921769bd6a
3 changed files with 50 additions and 56 deletions

View File

@ -4,10 +4,8 @@ __copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
www.buenosairesherald.com www.buenosairesherald.com
''' '''
import re
from calibre import strftime from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class BuenosAiresHerald(BasicNewsRecipe): class BuenosAiresHerald(BasicNewsRecipe):
title = 'Buenos Aires Herald' title = 'Buenos Aires Herald'
@ -62,7 +60,7 @@ class BuenosAiresHerald(BasicNewsRecipe):
lfeeds = self.get_feeds() lfeeds = self.get_feeds()
for feedobj in lfeeds: for feedobj in lfeeds:
feedtitle, feedurl = feedobj feedtitle, feedurl = feedobj
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
articles = [] articles = []
soup = self.index_to_soup(feedurl) soup = self.index_to_soup(feedurl)
for item in soup.findAll('div', attrs={'class':'nota_texto_seccion'}): for item in soup.findAll('div', attrs={'class':'nota_texto_seccion'}):

View File

@ -1,4 +1,3 @@
import string, re
import time import time
import traceback import traceback
# above for debugging via stack # above for debugging via stack
@ -6,13 +5,10 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
# Allows the Python soup converter, which makes parsing easier. # Allows the Python soup converter, which makes parsing easier.
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
import os, time, traceback, re, urlparse, sys, cStringIO import os
from collections import defaultdict
from functools import partial
from contextlib import nested, closing
from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed from calibre.web.feeds import feeds_from_index
from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
@ -214,12 +210,12 @@ class OReillyPremium(BasicNewsRecipe):
def build_index(self): def build_index(self):
print("In OReilly build_index()\n\n") print("In OReilly build_index()\n\n")
feedsRSS = [] feedsRSS = []
self.report_progress(0, _('Fetching feeds...')) self.report_progress(0, ('Fetching feeds...'))
#try: #try:
feeds = feeds_from_index(self.parse_index(), oldest_article=self.oldest_article, feeds = feeds_from_index(self.parse_index(), oldest_article=self.oldest_article,
max_articles_per_feed=self.max_articles_per_feed, max_articles_per_feed=self.max_articles_per_feed,
log=self.log) log=self.log)
self.report_progress(0, _('Got feeds from index page')) self.report_progress(0, ('Got feeds from index page'))
#except NotImplementedError: #except NotImplementedError:
# feeds = self.parse_feeds() # feeds = self.parse_feeds()
# Now add regular feeds. # Now add regular feeds.
@ -235,9 +231,9 @@ class OReillyPremium(BasicNewsRecipe):
#feeds = FeedCollection(feeds) #feeds = FeedCollection(feeds)
self.report_progress(0, _('Trying to download cover...')) self.report_progress(0, ('Trying to download cover...'))
self.download_cover() self.download_cover()
self.report_progress(0, _('Generating masthead...')) self.report_progress(0, ('Generating masthead...'))
self.masthead_path = None self.masthead_path = None
try: try:
@ -317,7 +313,7 @@ class OReillyPremium(BasicNewsRecipe):
tp.putRequest(req, block=True, timeout=0) tp.putRequest(req, block=True, timeout=0)
self.report_progress(0, _('Starting download [%d thread(s)]...')%self.simultaneous_downloads) self.report_progress(0, ('Starting download [%d thread(s)]...')%self.simultaneous_downloads)
while True: while True:
try: try:
tp.poll() tp.poll()
@ -331,7 +327,7 @@ class OReillyPremium(BasicNewsRecipe):
with open(os.path.join(feed_dir, 'index.html'), 'wb') as fi: with open(os.path.join(feed_dir, 'index.html'), 'wb') as fi:
fi.write(html) fi.write(html)
self.create_opf(feeds) self.create_opf(feeds)
self.report_progress(1, _('Feeds downloaded to %s')%index) self.report_progress(1, ('Feeds downloaded to %s')%index)
return index return index

View File

@ -1,9 +1,9 @@
# Test with "\Program Files\Calibre2\ebook-convert.exe" RealClear.recipe .epub --test -vv --debug-pipeline debug # Test with "\Program Files\Calibre2\ebook-convert.exe" RealClear.recipe .epub --test -vv --debug-pipeline debug
import string, re import re
import time import time
from urlparse import urlparse from urlparse import urlparse
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString from calibre.ebooks.BeautifulSoup import NavigableString
class RealClear(BasicNewsRecipe): class RealClear(BasicNewsRecipe):
title = u'Real Clear' title = u'Real Clear'
@ -190,7 +190,7 @@ class RealClear(BasicNewsRecipe):
def parse_index(self): def parse_index(self):
# Parse the page into Python Soup # Parse the page into Python Soup
articleList = [] #articleList = []
ans = [] ans = []
feedsCount = len(self.feedsets) feedsCount = len(self.feedsets)
for x in range(0,feedsCount): # should be ,4 for x in range(0,feedsCount): # should be ,4