mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
MIT Tech Review update
This commit is contained in:
parent
018ce6dd93
commit
b89cd3566e
@ -24,7 +24,7 @@ class EpochTimes(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
classes('print:hidden h-header shortcode aspect-square'),
|
classes('print:hidden h-header shortcode aspect-square'),
|
||||||
dict(name='button'),
|
dict(name='button', 'svg'),
|
||||||
dict(name='img', attrs={'src':lambda x: x and x.endswith('svg')})
|
dict(name='img', attrs={'src':lambda x: x and x.endswith('svg')})
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -50,6 +50,7 @@ class MitTechnologyReview(BasicNewsRecipe):
|
|||||||
#cre-d{font-size:xx-small; text-align:center; color:gray;}
|
#cre-d{font-size:xx-small; text-align:center; color:gray;}
|
||||||
#cap-d{font-size:small; text-align:center;}
|
#cap-d{font-size:small; text-align:center;}
|
||||||
blockquote{text-align:center; color:#404040;}
|
blockquote{text-align:center; color:#404040;}
|
||||||
|
em { color:#202020;}
|
||||||
'''
|
'''
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
prefixed_classes('contentHeader contentArticleHeader contentBody')
|
prefixed_classes('contentHeader contentArticleHeader contentBody')
|
||||||
@ -65,14 +66,15 @@ class MitTechnologyReview(BasicNewsRecipe):
|
|||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
soup = self.index_to_soup('https://www.technologyreview.com/')
|
soup = self.index_to_soup('https://www.technologyreview.com/')
|
||||||
if script := soup.find('script', id='preload'):
|
if script := soup.find('script', id='preload'):
|
||||||
link = re.findall('https\S+?front_cover\S+?.png', self.tag_to_string(script))
|
link = re.search('(https\S+?front_cover\S+?(jpg|png))', self.tag_to_string(script))
|
||||||
return link[-1] + '?fit=572,786'
|
return link.group(1) + '?fit=572,786'
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
soup = self.index_to_soup(self.INDEX)
|
soup = self.index_to_soup(self.INDEX)
|
||||||
issue = soup.find('h1', attrs={'class':lambda x: x and x.startswith('magazineHero__title')})
|
issue = soup.find(attrs={'class':lambda x: x and x.startswith('magazineHero__title')})
|
||||||
time = soup.find(attrs={'class': lambda x: x and x.startswith('magazineHero__date')})
|
time = soup.find(attrs={'class': lambda x: x and x.startswith('magazineHero__date')})
|
||||||
self.timefmt = ' (' + self.tag_to_string(issue) + ') [' + self.tag_to_string(time) + ']'
|
self.title = 'MIT Tech Review ' + self.tag_to_string(issue)
|
||||||
|
self.timefmt = ' [' + self.tag_to_string(time) + ']'
|
||||||
self.log('Downloading issue: ', self.timefmt)
|
self.log('Downloading issue: ', self.timefmt)
|
||||||
|
|
||||||
# parse articles
|
# parse articles
|
||||||
|
@ -21,16 +21,24 @@ class PhilosophyNow(BasicNewsRecipe):
|
|||||||
remove_attributes = ['height', 'width', 'style']
|
remove_attributes = ['height', 'width', 'style']
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
ignore_duplicate_articles = {'url'}
|
ignore_duplicate_articles = {'url'}
|
||||||
|
masthead_url = 'https://philosophynow.org/media/images/regulars/logoStructuredData.png'
|
||||||
|
|
||||||
keep_only_tags = [classes('article_page')]
|
keep_only_tags = [classes('article_page')]
|
||||||
remove_tags = [dict(name='div', attrs={'id':'welcome_box'})]
|
remove_tags = [dict(name='div', attrs={'id':'welcome_box'})]
|
||||||
|
extra_css = '''
|
||||||
|
img {display:block; margin:0 auto;}
|
||||||
|
.articleImage { font-size:small; text-align:center; }
|
||||||
|
em, blockquote { color:#202020; }
|
||||||
|
'''
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
soup = self.index_to_soup('https://philosophynow.org/')
|
soup = self.index_to_soup('https://philosophynow.org/')
|
||||||
div = soup.find('div', attrs={'id': 'aside_issue_cover'})
|
div = soup.find('div', attrs={'id': 'aside_issue_cover'})
|
||||||
url = div.find('a', href=True)['href']
|
url = div.find('a', href=True)['href']
|
||||||
for issue in div.findAll('div', attrs={'id':'aside_issue_text'}):
|
if issue := div.find('div', attrs={'id':'aside_issue_text'}):
|
||||||
self.log('Downloading issue:', self.tag_to_string(issue).strip())
|
self.log('Downloading issue:', self.tag_to_string(issue).strip())
|
||||||
|
self.timefmt = ' [' + self.tag_to_string(issue.find(attrs={'id':'aside_issue_date'})) + ']'
|
||||||
|
self.title = 'Philosophy Now ' + self.tag_to_string(issue.find(attrs={'id':'aside_issue_number'}))
|
||||||
cov_url = div.find('img', src=True)['src']
|
cov_url = div.find('img', src=True)['src']
|
||||||
self.cover_url = 'https://philosophynow.org' + cov_url
|
self.cover_url = 'https://philosophynow.org' + cov_url
|
||||||
soup = self.index_to_soup('https://philosophynow.org' + url)
|
soup = self.index_to_soup('https://philosophynow.org' + url)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user