mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update chronicle of higher education
This commit is contained in:
parent
952f5709b0
commit
984d2b8b76
@ -1,3 +1,4 @@
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from collections import OrderedDict
|
||||
|
||||
@ -14,7 +15,8 @@ class Chronicle(BasicNewsRecipe):
|
||||
dict(name='div', attrs={'class':'article'}),
|
||||
]
|
||||
remove_tags = [dict(name='div',attrs={'class':['related module1','maintitle']}),
|
||||
dict(name='div', attrs={'id':['section-nav','icon-row']})]
|
||||
dict(name='div', attrs={'id':['section-nav','icon-row', 'enlarge-popup']}),
|
||||
dict(name='a', attrs={'class':'show-enlarge enlarge'})]
|
||||
no_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
@ -31,7 +33,6 @@ class Chronicle(BasicNewsRecipe):
|
||||
return br
|
||||
|
||||
def parse_index(self):
|
||||
|
||||
#Go to the issue
|
||||
soup0 = self.index_to_soup('http://chronicle.com/section/Archives/39/')
|
||||
issue = soup0.find('ul',attrs={'class':'feature-promo-list'}).li
|
||||
@ -42,9 +43,12 @@ class Chronicle(BasicNewsRecipe):
|
||||
self.timefmt = u' [%s]'%dates
|
||||
|
||||
#Find cover
|
||||
cover=soup0.find('div',attrs={'class':'promo'}).findNext('div')
|
||||
self.cover_url="http://chronicle.com"+cover.find('img')['src']
|
||||
|
||||
cover=soup0.find('div',attrs={'class':'side-content'}).find(attrs={'src':re.compile("photos/biz/Current")})
|
||||
if cover is not None:
|
||||
if "chronicle.com" in cover['src']:
|
||||
self.cover_url=cover['src']
|
||||
else:
|
||||
self.cover_url="http://chronicle.com" + cover['src']
|
||||
#Go to the main body
|
||||
soup = self.index_to_soup(issueurl)
|
||||
div = soup.find ('div', attrs={'id':'article-body'})
|
||||
@ -74,8 +78,10 @@ class Chronicle(BasicNewsRecipe):
|
||||
def preprocess_html(self,soup):
|
||||
#process all the images
|
||||
for div in soup.findAll('div', attrs={'class':'tableauPlaceholder'}):
|
||||
|
||||
noscripts=div.find('noscript').a
|
||||
div.replaceWith(noscripts)
|
||||
for div0 in soup.findAll('div',text='Powered by Tableau'):
|
||||
div0.extract()
|
||||
return soup
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user