mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #4649 (Sport Illustrated Recipe)
This commit is contained in:
parent
839b5618cb
commit
abf95b3511
@ -1,6 +1,5 @@
|
|||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
#from random import randint
|
|
||||||
from urllib import quote
|
from urllib import quote
|
||||||
|
|
||||||
class SportsIllustratedRecipe(BasicNewsRecipe) :
|
class SportsIllustratedRecipe(BasicNewsRecipe) :
|
||||||
@ -9,12 +8,11 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
description = 'Sports Illustrated'
|
description = 'Sports Illustrated'
|
||||||
version = 1
|
version = 3
|
||||||
title = u'Sports Illustrated'
|
title = u'Sports Illustrated'
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
#template_css = ''
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
|
||||||
INDEX = 'http://sportsillustrated.cnn.com/'
|
INDEX = 'http://sportsillustrated.cnn.com/'
|
||||||
@ -22,13 +20,39 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
|
|||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
answer = []
|
answer = []
|
||||||
soup = self.index_to_soup(self.INDEX)
|
soup = self.index_to_soup(self.INDEX)
|
||||||
# Find the link to the current issue on the front page.
|
# Find the link to the current issue on the front page. SI Cover
|
||||||
cover = soup.find('img', attrs = {'alt' : 'Read All Articles', 'style' : 'vertical-align:bottom;'})
|
cover = soup.find('img', attrs = {'alt' : 'Read All Articles', 'style' : 'vertical-align:bottom;'})
|
||||||
if cover:
|
if cover:
|
||||||
currentIssue = cover.parent['href']
|
currentIssue = cover.parent['href']
|
||||||
if currentIssue:
|
if currentIssue:
|
||||||
# Open the index of current issue
|
# Open the index of current issue
|
||||||
|
|
||||||
index = self.index_to_soup(currentIssue)
|
index = self.index_to_soup(currentIssue)
|
||||||
|
self.log('\tLooking for current issue in: ' + currentIssue)
|
||||||
|
# Now let us see if they updated their frontpage
|
||||||
|
nav = index.find('div', attrs = {'class': 'siv_trav_top'})
|
||||||
|
if nav:
|
||||||
|
img = nav.find('img', attrs = {'src': 'http://i.cdn.turner.com/sivault/.element/img/1.0/btn_next_v2.jpg'})
|
||||||
|
if img:
|
||||||
|
parent = img.parent
|
||||||
|
if parent.name == 'a':
|
||||||
|
# They didn't update their frontpage; Load the next issue from here
|
||||||
|
href = self.INDEX + parent['href']
|
||||||
|
index = self.index_to_soup(href)
|
||||||
|
self.log('\tLooking for current issue in: ' + href)
|
||||||
|
|
||||||
|
if index.find('div', 'siv_noArticleMessage'):
|
||||||
|
nav = index.find('div', attrs = {'class': 'siv_trav_top'})
|
||||||
|
if nav:
|
||||||
|
# Their frontpage points to an issue without any articles; Use the previous issue
|
||||||
|
img = nav.find('img', attrs = {'src': 'http://i.cdn.turner.com/sivault/.element/img/1.0/btn_previous_v2.jpg'})
|
||||||
|
if img:
|
||||||
|
parent = img.parent
|
||||||
|
if parent.name == 'a':
|
||||||
|
href = self.INDEX + parent['href']
|
||||||
|
index = self.index_to_soup(href)
|
||||||
|
self.log('\tLooking for current issue in: ' + href)
|
||||||
|
|
||||||
|
|
||||||
# Find all articles.
|
# Find all articles.
|
||||||
list = index.find('div', attrs = {'class' : 'siv_artList'})
|
list = index.find('div', attrs = {'class' : 'siv_artList'})
|
||||||
@ -69,10 +93,8 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
|
|||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
header = soup.find('div', attrs = {'class' : 'siv_artheader'})
|
header = soup.find('div', attrs = {'class' : 'siv_artheader'})
|
||||||
if header:
|
|
||||||
# It's an article, prepare a container for the content
|
|
||||||
homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>')
|
homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>')
|
||||||
body = homeMadeSoup.find('body')
|
body = homeMadeSoup.body
|
||||||
|
|
||||||
# Find the date, title and byline
|
# Find the date, title and byline
|
||||||
temp = header.find('td', attrs = {'class' : 'title'})
|
temp = header.find('td', attrs = {'class' : 'title'})
|
||||||
@ -93,7 +115,4 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
|
|||||||
body.append(para)
|
body.append(para)
|
||||||
|
|
||||||
return homeMadeSoup
|
return homeMadeSoup
|
||||||
else :
|
|
||||||
# It's a TOC, just return the whole lot
|
|
||||||
return soup
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user