mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #4649 (Sport Illustrated Recipe)
This commit is contained in:
parent
839b5618cb
commit
abf95b3511
@ -1,6 +1,5 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
#from random import randint
|
||||
from urllib import quote
|
||||
|
||||
class SportsIllustratedRecipe(BasicNewsRecipe) :
|
||||
@ -9,12 +8,11 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
|
||||
__license__ = 'GPL v3'
|
||||
language = 'en'
|
||||
description = 'Sports Illustrated'
|
||||
version = 1
|
||||
version = 3
|
||||
title = u'Sports Illustrated'
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
#template_css = ''
|
||||
use_embedded_content = False
|
||||
|
||||
INDEX = 'http://sportsillustrated.cnn.com/'
|
||||
@ -22,13 +20,39 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
|
||||
def parse_index(self):
|
||||
answer = []
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
# Find the link to the current issue on the front page.
|
||||
# Find the link to the current issue on the front page. SI Cover
|
||||
cover = soup.find('img', attrs = {'alt' : 'Read All Articles', 'style' : 'vertical-align:bottom;'})
|
||||
if cover:
|
||||
currentIssue = cover.parent['href']
|
||||
if currentIssue:
|
||||
# Open the index of current issue
|
||||
|
||||
index = self.index_to_soup(currentIssue)
|
||||
self.log('\tLooking for current issue in: ' + currentIssue)
|
||||
# Now let us see if they updated their frontpage
|
||||
nav = index.find('div', attrs = {'class': 'siv_trav_top'})
|
||||
if nav:
|
||||
img = nav.find('img', attrs = {'src': 'http://i.cdn.turner.com/sivault/.element/img/1.0/btn_next_v2.jpg'})
|
||||
if img:
|
||||
parent = img.parent
|
||||
if parent.name == 'a':
|
||||
# They didn't update their frontpage; Load the next issue from here
|
||||
href = self.INDEX + parent['href']
|
||||
index = self.index_to_soup(href)
|
||||
self.log('\tLooking for current issue in: ' + href)
|
||||
|
||||
if index.find('div', 'siv_noArticleMessage'):
|
||||
nav = index.find('div', attrs = {'class': 'siv_trav_top'})
|
||||
if nav:
|
||||
# Their frontpage points to an issue without any articles; Use the previous issue
|
||||
img = nav.find('img', attrs = {'src': 'http://i.cdn.turner.com/sivault/.element/img/1.0/btn_previous_v2.jpg'})
|
||||
if img:
|
||||
parent = img.parent
|
||||
if parent.name == 'a':
|
||||
href = self.INDEX + parent['href']
|
||||
index = self.index_to_soup(href)
|
||||
self.log('\tLooking for current issue in: ' + href)
|
||||
|
||||
|
||||
# Find all articles.
|
||||
list = index.find('div', attrs = {'class' : 'siv_artList'})
|
||||
@ -69,31 +93,26 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
header = soup.find('div', attrs = {'class' : 'siv_artheader'})
|
||||
if header:
|
||||
# It's an article, prepare a container for the content
|
||||
homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>')
|
||||
body = homeMadeSoup.find('body')
|
||||
homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>')
|
||||
body = homeMadeSoup.body
|
||||
|
||||
# Find the date, title and byline
|
||||
temp = header.find('td', attrs = {'class' : 'title'})
|
||||
if temp :
|
||||
date = temp.find('div', attrs = {'class' : 'date'})
|
||||
if date:
|
||||
body.append(date)
|
||||
if temp.h1:
|
||||
body.append(temp.h1)
|
||||
if temp.h2 :
|
||||
body.append(temp.h2)
|
||||
byline = temp.find('div', attrs = {'class' : 'byline'})
|
||||
if byline:
|
||||
body.append(byline)
|
||||
# Find the date, title and byline
|
||||
temp = header.find('td', attrs = {'class' : 'title'})
|
||||
if temp :
|
||||
date = temp.find('div', attrs = {'class' : 'date'})
|
||||
if date:
|
||||
body.append(date)
|
||||
if temp.h1:
|
||||
body.append(temp.h1)
|
||||
if temp.h2 :
|
||||
body.append(temp.h2)
|
||||
byline = temp.find('div', attrs = {'class' : 'byline'})
|
||||
if byline:
|
||||
body.append(byline)
|
||||
|
||||
# Find the content
|
||||
for para in soup.findAll('div', attrs = {'class' : 'siv_artpara'}) :
|
||||
body.append(para)
|
||||
# Find the content
|
||||
for para in soup.findAll('div', attrs = {'class' : 'siv_artpara'}) :
|
||||
body.append(para)
|
||||
|
||||
return homeMadeSoup
|
||||
else :
|
||||
# It's a TOC, just return the whole lot
|
||||
return soup
|
||||
return homeMadeSoup
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user