mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
MIT Tech Review update
This commit is contained in:
parent
018ce6dd93
commit
b89cd3566e
@ -24,7 +24,7 @@ class EpochTimes(BasicNewsRecipe):
|
||||
]
|
||||
remove_tags = [
|
||||
classes('print:hidden h-header shortcode aspect-square'),
|
||||
dict(name='button'),
|
||||
dict(name='button', 'svg'),
|
||||
dict(name='img', attrs={'src':lambda x: x and x.endswith('svg')})
|
||||
]
|
||||
|
||||
|
@ -50,6 +50,7 @@ class MitTechnologyReview(BasicNewsRecipe):
|
||||
#cre-d{font-size:xx-small; text-align:center; color:gray;}
|
||||
#cap-d{font-size:small; text-align:center;}
|
||||
blockquote{text-align:center; color:#404040;}
|
||||
em { color:#202020;}
|
||||
'''
|
||||
keep_only_tags = [
|
||||
prefixed_classes('contentHeader contentArticleHeader contentBody')
|
||||
@ -65,14 +66,15 @@ class MitTechnologyReview(BasicNewsRecipe):
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('https://www.technologyreview.com/')
|
||||
if script := soup.find('script', id='preload'):
|
||||
link = re.findall('https\S+?front_cover\S+?.png', self.tag_to_string(script))
|
||||
return link[-1] + '?fit=572,786'
|
||||
link = re.search('(https\S+?front_cover\S+?(jpg|png))', self.tag_to_string(script))
|
||||
return link.group(1) + '?fit=572,786'
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
issue = soup.find('h1', attrs={'class':lambda x: x and x.startswith('magazineHero__title')})
|
||||
issue = soup.find(attrs={'class':lambda x: x and x.startswith('magazineHero__title')})
|
||||
time = soup.find(attrs={'class': lambda x: x and x.startswith('magazineHero__date')})
|
||||
self.timefmt = ' (' + self.tag_to_string(issue) + ') [' + self.tag_to_string(time) + ']'
|
||||
self.title = 'MIT Tech Review ' + self.tag_to_string(issue)
|
||||
self.timefmt = ' [' + self.tag_to_string(time) + ']'
|
||||
self.log('Downloading issue: ', self.timefmt)
|
||||
|
||||
# parse articles
|
||||
|
@ -21,16 +21,24 @@ class PhilosophyNow(BasicNewsRecipe):
|
||||
remove_attributes = ['height', 'width', 'style']
|
||||
encoding = 'utf-8'
|
||||
ignore_duplicate_articles = {'url'}
|
||||
masthead_url = 'https://philosophynow.org/media/images/regulars/logoStructuredData.png'
|
||||
|
||||
keep_only_tags = [classes('article_page')]
|
||||
remove_tags = [dict(name='div', attrs={'id':'welcome_box'})]
|
||||
extra_css = '''
|
||||
img {display:block; margin:0 auto;}
|
||||
.articleImage { font-size:small; text-align:center; }
|
||||
em, blockquote { color:#202020; }
|
||||
'''
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup('https://philosophynow.org/')
|
||||
div = soup.find('div', attrs={'id': 'aside_issue_cover'})
|
||||
url = div.find('a', href=True)['href']
|
||||
for issue in div.findAll('div', attrs={'id':'aside_issue_text'}):
|
||||
if issue := div.find('div', attrs={'id':'aside_issue_text'}):
|
||||
self.log('Downloading issue:', self.tag_to_string(issue).strip())
|
||||
self.timefmt = ' [' + self.tag_to_string(issue.find(attrs={'id':'aside_issue_date'})) + ']'
|
||||
self.title = 'Philosophy Now ' + self.tag_to_string(issue.find(attrs={'id':'aside_issue_number'}))
|
||||
cov_url = div.find('img', src=True)['src']
|
||||
self.cover_url = 'https://philosophynow.org' + cov_url
|
||||
soup = self.index_to_soup('https://philosophynow.org' + url)
|
||||
|
Loading…
x
Reference in New Issue
Block a user