#!/usr/bin/env python2 __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' ''' espn.com ''' import re from calibre.web.feeds.news import BasicNewsRecipe from calibre.ptempfile import TemporaryFile class ESPN(BasicNewsRecipe): title = 'ESPN' description = 'Sports news' __author__ = 'Kovid Goyal and Sujata Raman' language = 'en' no_stylesheets = True use_embedded_content = False remove_javascript = True needs_subscription = 'optional' encoding = 'ISO-8859-1' remove_tags_before = dict(name='font', attrs={'class': 'date'}) center_navbar = False remove_tags = [ dict(name='font', attrs={'class': 'footer'}), dict( name='hr', noshade='noshade'), dict(name='img', src='/winnercomm/horseracing/DRF.jpg') ] extra_css = ''' body{font-family:Verdana,Arial,Helvetica,sans-serif; font-size:x-small; font-weight:normal;} .subhead{color:#666666;font-family:Verdana,sans-serif; font-size:x-small; font-weight:bold;} .clearfix{font-family:Verdana,sans-serif; font-size:xx-small; } .date{ font-family:Verdana,Arial,Helvetica,sans-serif ; font-size:xx-small;color:#7A7A7A;} .byline{ font-family:Verdana,Arial,Helvetica,sans-serif ; font-size:xx-small;color:#666666;} .headline{font-family:Verdana,Arial,Helvetica,sans-serif ; font-size:large; font-weight:bold;} ''' feeds = [ ('Top Headlines', 'http://sports.espn.go.com/espn/rss/news'), 'http://sports.espn.go.com/espn/rss/nfl/news', 'http://sports.espn.go.com/espn/rss/nba/news', 'http://sports.espn.go.com/espn/rss/mlb/news', 'http://sports.espn.go.com/espn/rss/nhl/news', 'http://sports.espn.go.com/espn/rss/golf/news', 'http://sports.espn.go.com/espn/rss/rpm/news', 'http://sports.espn.go.com/espn/rss/tennis/news', 'http://sports.espn.go.com/espn/rss/boxing/news', 'http://soccernet.espn.go.com/rss/news', 'http://sports.espn.go.com/espn/rss/ncb/news', 'http://sports.espn.go.com/espn/rss/ncf/news', 'http://sports.espn.go.com/espn/rss/ncaa/news', 'http://sports.espn.go.com/espn/rss/outdoors/news', # 'http://sports.espn.go.com/espn/rss/bassmaster/news', 'http://sports.espn.go.com/espn/rss/oly/news', 'http://sports.espn.go.com/espn/rss/horse/news' ] def preprocess_html(self, soup): for div in soup.findAll('div'): if div.has_key('style') and 'px' in div['style']: # noqa div['style'] = '' return soup def postprocess_html(self, soup, first_fetch): for div in soup.findAll('div', style=True): div['style'] = div['style'].replace('center', 'left') return soup def get_browser(self): br = BasicNewsRecipe.get_browser(self) if self.username and self.password: br.set_handle_refresh(False) url = ('https://r.espn.go.com/members/v3_1/login') raw = br.open(url).read() raw = re.sub(r'(?s)
', '', raw) with TemporaryFile(suffix='.htm') as fname: with open(fname, 'wb') as f: f.write(raw) br.open_local_file(fname) br.form = br.forms().next() br.form.find_control( name='username', type='text').value = self.username br.form['password'] = self.password br.submit().read() br.open('http://espn.go.com').read() br.set_handle_refresh(True) return br def get_article_url(self, article): return article.get('guid', None) def print_version(self, url): if 'eticket' in url: return url.partition('&')[0].replace('story?', 'print?') match = re.search(r'story\?(id=\d+)', url) if match and 'soccernet' not in url and 'bassmaster' not in url: return 'http://sports.espn.go.com/espn/print?' + match.group(1) + '&type=story' else: if 'soccernet' in url: match = re.search(r'/id/(\d+)/', url) if match: return \ 'http://soccernet.espn.go.com/print?id=%s&type=story' % match.group( 1) # else: # if 'bassmaster' in url: # return url return None