Add recipe for ESPN

This commit is contained in:
Kovid Goyal 2008-05-21 12:37:24 -07:00
parent 8d3b8a595e
commit 3773631cea
6 changed files with 94 additions and 10 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.8 KiB

View File

@ -82,6 +82,9 @@ class BasicNewsRecipe(object, LoggingInterface):
#: @type: boolean
needs_subscription = False
#: If True the navigation bar is center aligned, otherwise it is left aligned
center_navbar = True
#: Specify an override encoding for sites that have an incorrect
#: charset specification. The most common being specifying ``latin1`` and
#: using ``cp1252``. If None, try to detect the encoding.
@ -451,7 +454,7 @@ class BasicNewsRecipe(object, LoggingInterface):
if body is not None:
templ = self.navbar.generate(False, f, a, feed_len,
not self.has_single_feed,
url, __appname__)
url, __appname__, center=self.center_navbar)
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
body.insert(0, elem)
@ -522,10 +525,13 @@ class BasicNewsRecipe(object, LoggingInterface):
bn = bn.rpartition('/')[-1]
if bn:
img = os.path.join(imgdir, 'feed_image_%d%s'%(self.image_counter, os.path.splitext(bn)))
open(img, 'wb').write(self.browser.open(feed.image_url).read())
self.image_counter += 1
feed.image_url = img
self.image_map[feed.image_url] = img
try:
open(img, 'wb').write(self.browser.open(feed.image_url).read())
self.image_counter += 1
feed.image_url = img
self.image_map[feed.image_url] = img
except:
pass
templ = templates.FeedTemplate()
return templ.generate(feed, self.description_limiter).render(doctype='xhtml')
@ -606,6 +612,8 @@ class BasicNewsRecipe(object, LoggingInterface):
url = self.print_version(article.url)
except NotImplementedError:
url = article.url
if not url:
continue
func, arg = (self.fetch_embedded_article, article) if self.use_embedded_content else \
(self.fetch_article, url)
@ -703,7 +711,8 @@ class BasicNewsRecipe(object, LoggingInterface):
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed,
a.orig_url, __appname__, prefix=prefix)
a.orig_url, __appname__, prefix=prefix,
center=self.center_navbar)
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem)
open(last, 'wb').write(unicode(soup).encode('utf-8'))

View File

@ -7,7 +7,7 @@ Builtin recipes.
recipes = [
'newsweek', 'atlantic', 'economist', 'portfolio',
'nytimes', 'usatoday', 'outlook_india', 'bbc', 'greader', 'wsj',
'wired', 'globe_and_mail', 'smh',
'wired', 'globe_and_mail', 'smh', 'espn',
]
import re, imp, inspect, time

View File

@ -0,0 +1,74 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
espn.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class ESPN(BasicNewsRecipe):
title = 'ESPN'
description = 'Sports news'
__author__ = 'Kovid Goyal'
needs_subscription = True
remove_tags = [dict(name='font', attrs={'class':'footer'}), dict(name='hr', noshade='noshade')]
remove_tags_before = dict(name='font', attrs={'class':'date'})
center_navbar = False
html2lrf_options = ['--base-font-size', '0']
feeds = [('Top Headlines', 'http://sports.espn.go.com/espn/rss/news'),
'http://sports.espn.go.com/espn/rss/nfl/news',
'http://sports.espn.go.com/espn/rss/nba/news',
'http://sports.espn.go.com/espn/rss/mlb/news',
'http://sports.espn.go.com/espn/rss/nhl/news',
'http://sports.espn.go.com/espn/rss/golf/news',
'http://sports.espn.go.com/espn/rss/rpm/news',
'http://sports.espn.go.com/espn/rss/tennis/news',
'http://sports.espn.go.com/espn/rss/boxing/news',
'http://soccernet.espn.go.com/rss/news',
'http://sports.espn.go.com/espn/rss/ncb/news',
'http://sports.espn.go.com/espn/rss/ncf/news',
'http://sports.espn.go.com/espn/rss/ncaa/news',
'http://sports.espn.go.com/espn/rss/outdoors/news',
'http://sports.espn.go.com/espn/rss/bassmaster/news',
'http://sports.espn.go.com/espn/rss/oly/news',
'http://sports.espn.go.com/espn/rss/horse/news']
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.set_handle_refresh(False)
if self.username is not None and self.password is not None:
br.open('http://espn.com')
br.select_form(nr=0)
br.form.find_control(name='username', type='text').value = self.username
br.form['password'] = self.password
br.submit()
br.set_handle_refresh(True)
return br
def print_version(self, url):
if 'eticket' in url:
return url.partition('&')[0].replace('story?', 'print?')
match = re.search(r'story\?(id=\d+)', url)
if match:
return 'http://sports.espn.go.com/espn/print?'+match.group(1)+'&type=story'
return None
def preprocess_html(self, soup):
for div in soup.findAll('div'):
if div.has_key('style') and 'px' in div['style']:
div['style'] = ''
return soup
def postprocess_html(self, soup):
for div in soup.findAll('div', style=True):
div['style'] = div['style'].replace('center', 'left')
return soup

View File

@ -53,7 +53,7 @@ class NavBarTemplate(Template):
>
<body>
<div class="navbar" style="text-align:center; font-family:monospace; font-size:8pt">
<div class="navbar" style="text-align:${'center' if center else 'left'}; font-family:monospace; font-size:8pt">
<hr py:if="bottom" />
<p py:if="bottom" style="text-align:left">
This article was downloaded by <b>${__appname__}</b> from <a href="${url}">${url}</a>
@ -80,13 +80,14 @@ class NavBarTemplate(Template):
''')
def generate(self, bottom, feed, art, number_of_articles_in_feed,
two_levels, url, __appname__, prefix=''):
two_levels, url, __appname__, prefix='', center=True):
if prefix and not prefix.endswith('/'):
prefix += '/'
return Template.generate(self, bottom=bottom, art=art, feed=feed,
num=number_of_articles_in_feed,
two_levels=two_levels, url=url,
__appname__=__appname__, prefix=prefix)
__appname__=__appname__, prefix=prefix,
center=center)
class IndexTemplate(Template):