mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-11-04 03:27:00 -05:00 
			
		
		
		
	Update MIT Tech Review
This commit is contained in:
		
							parent
							
								
									4565069f61
								
							
						
					
					
						commit
						dd684bc70a
					
				@ -4,11 +4,12 @@ from __future__ import unicode_literals
 | 
				
			|||||||
__license__ = 'GPL v3'
 | 
					__license__ = 'GPL v3'
 | 
				
			||||||
__copyright__ = '2015 Michael Marotta <mikefm at gmail.net>'
 | 
					__copyright__ = '2015 Michael Marotta <mikefm at gmail.net>'
 | 
				
			||||||
# Written April 2015
 | 
					# Written April 2015
 | 
				
			||||||
# Last edited 08/2022
 | 
					# Last edited 07/2024
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
technologyreview.com
 | 
					technologyreview.com
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
import json
 | 
					import json
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
from collections import OrderedDict
 | 
					from collections import OrderedDict
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from calibre.web.feeds.news import BasicNewsRecipe, prefixed_classes
 | 
					from calibre.web.feeds.news import BasicNewsRecipe, prefixed_classes
 | 
				
			||||||
@ -38,7 +39,6 @@ class MitTechnologyReview(BasicNewsRecipe):
 | 
				
			|||||||
        ' This is different than the recipe named simply "Technology Review"'
 | 
					        ' This is different than the recipe named simply "Technology Review"'
 | 
				
			||||||
        ' which downloads the rss feed with daily articles from the website.'
 | 
					        ' which downloads the rss feed with daily articles from the website.'
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    INDEX = 'http://www.technologyreview.com/magazine/'
 | 
					 | 
				
			||||||
    language = 'en'
 | 
					    language = 'en'
 | 
				
			||||||
    encoding = 'utf-8'
 | 
					    encoding = 'utf-8'
 | 
				
			||||||
    tags = 'news, technology, science'
 | 
					    tags = 'news, technology, science'
 | 
				
			||||||
@ -65,22 +65,19 @@ class MitTechnologyReview(BasicNewsRecipe):
 | 
				
			|||||||
        ),
 | 
					        ),
 | 
				
			||||||
    ]
 | 
					    ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get_cover_url(self):
 | 
					 | 
				
			||||||
        soup = self.index_to_soup('https://www.technologyreview.com/')
 | 
					 | 
				
			||||||
        if script := soup.find('script', id='preload'):
 | 
					 | 
				
			||||||
            JSON = script.contents[0].split('magazineCover\":')[1].strip()
 | 
					 | 
				
			||||||
            data = json.JSONDecoder().raw_decode(JSON)[0]
 | 
					 | 
				
			||||||
            return data['config']['src']
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def parse_index(self):
 | 
					    def parse_index(self):
 | 
				
			||||||
        soup = self.index_to_soup(self.INDEX)
 | 
					        # for past editions, change the issue link below 
 | 
				
			||||||
        issue = soup.find(attrs={'class':lambda x: x and x.startswith('magazineHero__title')})
 | 
					        issue = 'http://www.technologyreview.com/magazine/'
 | 
				
			||||||
        time = soup.find(attrs={'class': lambda x: x and x.startswith('magazineHero__date')})
 | 
					        soup = self.index_to_soup(issue)
 | 
				
			||||||
        desc = soup.find(attrs={'class': lambda x: x and x.startswith('magazineHero__description')})
 | 
					        if script := soup.find('script', id='preload'):
 | 
				
			||||||
        self.title = 'MIT Tech Review ' + self.tag_to_string(issue)
 | 
					            raw = script.contents[0]
 | 
				
			||||||
        self.description = self.tag_to_string(desc)
 | 
					            m = re.search(r'\"children\":\[{\"name\":\"magazine-hero\"', raw)
 | 
				
			||||||
        self.timefmt = ' [' + self.tag_to_string(time) + ']'
 | 
					            spl = re.split(r"(?=\{)", raw[m.start():], 1)[1]
 | 
				
			||||||
        self.log('Downloading issue: ', self.timefmt)
 | 
					            data = json.JSONDecoder().raw_decode(spl)[0]
 | 
				
			||||||
 | 
					            self.cover_url = data['children'][0]['config']['src'] + '?fit=572,786'
 | 
				
			||||||
 | 
					            self.timefmt = ' [' + data['config']['issueDate'] + ']'
 | 
				
			||||||
 | 
					            self.description = data['config']['description']
 | 
				
			||||||
 | 
					            self.title = 'MIT TR: ' + data['config']['title']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # parse articles
 | 
					        # parse articles
 | 
				
			||||||
        feeds = OrderedDict()
 | 
					        feeds = OrderedDict()
 | 
				
			||||||
 | 
				
			|||||||
@ -50,7 +50,7 @@ class times(BasicNewsRecipe):
 | 
				
			|||||||
    ]
 | 
					    ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    remove_tags = [
 | 
					    remove_tags = [
 | 
				
			||||||
        dict(name=['svg']),
 | 
					        dict(name=['svg', 'times-datawrapper']),
 | 
				
			||||||
        dict(attrs={'id':'iframe-wrapper'}),
 | 
					        dict(attrs={'id':'iframe-wrapper'}),
 | 
				
			||||||
        dict(attrs={'old-position':'sticky'}),
 | 
					        dict(attrs={'old-position':'sticky'}),
 | 
				
			||||||
        prefixed_classes(
 | 
					        prefixed_classes(
 | 
				
			||||||
 | 
				
			|||||||
@ -156,7 +156,7 @@ class WSJ(BasicNewsRecipe):
 | 
				
			|||||||
                if '-pages_' in k:
 | 
					                if '-pages_' in k:
 | 
				
			||||||
                    section = k.split('-pages_')[0].replace('_', ' ')
 | 
					                    section = k.split('-pages_')[0].replace('_', ' ')
 | 
				
			||||||
                    if 'MAGAZINE' in section:
 | 
					                    if 'MAGAZINE' in section:
 | 
				
			||||||
                        if not dt.strftime('%d') == 1:
 | 
					                        if not dt.day == 1:
 | 
				
			||||||
                            continue
 | 
					                            continue
 | 
				
			||||||
                        self.log('Loading Magazine section')
 | 
					                        self.log('Loading Magazine section')
 | 
				
			||||||
                    self.log(section)
 | 
					                    self.log(section)
 | 
				
			||||||
 | 
				
			|||||||
@ -111,6 +111,7 @@ class WSJ(BasicNewsRecipe):
 | 
				
			|||||||
                date = itm['date']
 | 
					                date = itm['date']
 | 
				
			||||||
                key = itm['key']
 | 
					                key = itm['key']
 | 
				
			||||||
                manifest = itm['manifest']
 | 
					                manifest = itm['manifest']
 | 
				
			||||||
 | 
					                self.title = itm['label']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        dt = datetime.fromisoformat(date[:-1]) + timedelta(seconds=time.timezone)
 | 
					        dt = datetime.fromisoformat(date[:-1]) + timedelta(seconds=time.timezone)
 | 
				
			||||||
        dt = dt.strftime('%b, %Y')
 | 
					        dt = dt.strftime('%b, %Y')
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user