mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-10-26 08:12:25 -04:00 
			
		
		
		
	Update New York Magazine
This commit is contained in:
		
							parent
							
								
									b54a0957e3
								
							
						
					
					
						commit
						f31b11aa07
					
				| @ -23,19 +23,31 @@ class NewYorkMagazine(BasicNewsRecipe): | |||||||
|     no_stylesheets = True |     no_stylesheets = True | ||||||
|     remove_javascript = True |     remove_javascript = True | ||||||
|     encoding = 'utf-8' |     encoding = 'utf-8' | ||||||
|  |     extra_css = ''' | ||||||
|  |         .nym-image-figcaption, | ||||||
|  |         .bylines, .rubric, | ||||||
|  |         .clay-paragraph_prologue,  | ||||||
|  |         .secondary-area-caption-credits { | ||||||
|  |             font-size: small; | ||||||
|  |         } | ||||||
|  |     ''' | ||||||
|     keep_only_tags = [ |     keep_only_tags = [ | ||||||
|         dict(name='article', attrs={'class': lambda x: x and 'article' in x.split()}) |         dict(name='article', attrs={'class': lambda x: x and 'article' in x.split()}) | ||||||
|     ] |     ] | ||||||
|     remove_tags = [ |     remove_tags = [ | ||||||
|         classes('related-stories start-discussion newsletter-flex-text comments-link tags related secondary-area'), |         dict(name=['svg', 'iframe']), | ||||||
|         dict(id=['minibrowserbox', 'article-related', 'article-tools']) |         classes( | ||||||
|  |             'related-stories start-discussion newsletter-flex-text package-toc ' | ||||||
|  |             'comments-link tags related secondary-area author-photo error-pop-up' | ||||||
|  |         ), | ||||||
|  |         dict(id=['minibrowserbox', 'article-related', 'article-tools']), | ||||||
|     ] |     ] | ||||||
|     remove_attributes = ['srcset'] |     remove_attributes = ['style', 'height', 'width', 'srcset'] | ||||||
| 
 | 
 | ||||||
|     recipe_specific_options = { |     recipe_specific_options = { | ||||||
|         'date': { |         'date': { | ||||||
|             'short': 'The date of the edition to download (YYYY-MM-DD format)', |             'short': 'The date of the edition to download (YYYY-MM-DD format)', | ||||||
|             'long': 'For example, 2024-07-01' |             'long': 'For example, 2024-07-01', | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| @ -55,6 +67,20 @@ class NewYorkMagazine(BasicNewsRecipe): | |||||||
|                 self.log('Cover:', self.cover_url) |                 self.log('Cover:', self.cover_url) | ||||||
|                 break |                 break | ||||||
|         feeds = [] |         feeds = [] | ||||||
|  |         if cover_art := soup.find(**classes('magazine-toc-cover-text')): | ||||||
|  |             a = cover_art.find('a', **classes('headline-link')) | ||||||
|  |             c_url = a['href'] | ||||||
|  |             c_title = self.tag_to_string( | ||||||
|  |                 a.find(**classes('magazine-toc-cover-headline')) | ||||||
|  |             ).strip() | ||||||
|  |             c_desc = self.tag_to_string( | ||||||
|  |                 a.find(**classes('magazine-toc-cover-teaser')) | ||||||
|  |             ).strip() | ||||||
|  |             self.log('Cover Story', '\n\t', c_title, c_url) | ||||||
|  |             feeds.append(( | ||||||
|  |                 'Cover Story', | ||||||
|  |                 [{'title': c_title, 'url': c_url, 'description': c_desc}], | ||||||
|  |             )) | ||||||
|         for div in soup.findAll(attrs={'data-editable': 'settingTitle'}): |         for div in soup.findAll(attrs={'data-editable': 'settingTitle'}): | ||||||
|             section = self.tag_to_string(div).strip().capitalize() |             section = self.tag_to_string(div).strip().capitalize() | ||||||
|             articles = [] |             articles = [] | ||||||
| @ -79,9 +105,13 @@ class NewYorkMagazine(BasicNewsRecipe): | |||||||
|         return feeds |         return feeds | ||||||
| 
 | 
 | ||||||
|     def preprocess_html(self, soup): |     def preprocess_html(self, soup): | ||||||
|         if lede := soup.findAll('div', attrs={'class':lambda x: x and 'lede-image-wrapper' in x.split()}): |         if lede := soup.findAll( | ||||||
|  |             'div', attrs={'class': lambda x: x and 'lede-image-wrapper' in x.split()} | ||||||
|  |         ): | ||||||
|             if len(lede) > 1: |             if len(lede) > 1: | ||||||
|                 lede[1].extract() |                 lede[1].extract() | ||||||
|         for img in soup.findAll('img', attrs={'data-src': True}): |         for img in soup.findAll('img', attrs={'data-src': True}): | ||||||
|             img['src'] = img['data-src'] |             img['src'] = img['data-src'] | ||||||
|  |         for h2 in soup.findAll(['h2', 'h3']): | ||||||
|  |             h2.name = 'h4' | ||||||
|         return soup |         return soup | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user