mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-10-30 18:22:25 -04:00 
			
		
		
		
	TOI Print update
This commit is contained in:
		
							parent
							
								
									cb1f70e816
								
							
						
					
					
						commit
						8f1573b628
					
				| @ -79,11 +79,10 @@ class TheEconomicTimes(BasicNewsRecipe): | ||||
|             for h3 in section.findAll(("h1", "h3", "h4", "h5")): | ||||
|                 span = h3.find( | ||||
|                     'span', | ||||
|                     href=lambda x: x and x.startswith('/epaper/'), | ||||
|                     href=lambda x: x and x.startswith('https://economictimes.indiatimes.com/epaper/'), | ||||
|                     attrs={'class': 'banner'} | ||||
|                 ) | ||||
|                 url = span['href'] | ||||
|                 url = 'https://economictimes.indiatimes.com' + url | ||||
|                 title = self.tag_to_string(span) | ||||
|                 div = h3.find_next_sibling('div', attrs={'class': 'dsc'}) | ||||
|                 if div is not None: | ||||
|  | ||||
| @ -1,6 +1,7 @@ | ||||
| from calibre.web.feeds.news import BasicNewsRecipe | ||||
| import json | ||||
| from datetime import date | ||||
| from collections import defaultdict | ||||
| 
 | ||||
| 
 | ||||
| # default edition is Delhi i.e., 'cap' | ||||
| @ -54,33 +55,32 @@ class toiprint(BasicNewsRecipe): | ||||
|         url = index + '/DayIndex/' + date_ + '_' + le + '.json' | ||||
|         raw = self.index_to_soup(url, raw=True) | ||||
|         data = json.loads(raw) | ||||
|         if 'DigitalIndex' not in data: | ||||
|         if 'DayIndex' not in data: | ||||
|             raise ValueError( | ||||
|                     'The Times of India Newspaper is not published today.' | ||||
|                 ) | ||||
|         data = data['DigitalIndex'] | ||||
|         feeds = [] | ||||
|         data = data['DayIndex'] | ||||
|         feeds_dict = defaultdict(list) | ||||
|         for link in data: | ||||
|             sec_name = link['PageTitle'] | ||||
|             if sec_name == 'Advertisement': | ||||
|                 continue | ||||
|             self.log(sec_name) | ||||
|             articles = [] | ||||
|             if 'Views' in link: | ||||
|                 for sec in link['Views']: | ||||
|                     if 'Articles' in sec: | ||||
|                         for art in sec['Articles']: | ||||
|                             if 'ArticleName' not in art: | ||||
|                                 continue | ||||
|                             url = art['ArticleName'] | ||||
|                             title = art.get('ArticleTitle', 'unknown').replace('<br>', '') | ||||
|                             if art.get('ColumnTitle', '') == '': | ||||
|                                 desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ArticleBody', '') | ||||
|                             else: | ||||
|                                 desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ColumnTitle', '') | ||||
|                             self.log('\t', title, '\n\t', desc, '\n\t\t', url) | ||||
|                             articles.append({'title': title, 'description':desc, 'url': url}) | ||||
|             if articles: | ||||
|                 feeds.append((sec_name, articles)) | ||||
|         return feeds | ||||
|             if 'Articles' in link: | ||||
|                 for art in link['Articles']: | ||||
|                     section = sec_name | ||||
|                     if 'ArticleName' not in art: | ||||
|                         continue | ||||
|                     url = art['ArticleName'] | ||||
|                     title = art.get('ArticleTitle', 'unknown').replace('<br>', '') | ||||
|                     if art.get('ColumnTitle', '') == '': | ||||
|                         desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ArticleBody', '') | ||||
|                     else: | ||||
|                         desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ColumnTitle', '') | ||||
|                     self.log('\t', title, '\n\t', desc.replace('\n', '')) | ||||
|                     feeds_dict[section].append({"title": title, "url": url, "description": desc}) | ||||
|         return [(section, articles) for section, articles in feeds_dict.items()] | ||||
| 
 | ||||
|     def preprocess_raw_html(self, raw, *a): | ||||
|         data = json.loads(raw) | ||||
| @ -107,8 +107,11 @@ class toiprint(BasicNewsRecipe): | ||||
|             elif 'ZoneText' in x: | ||||
|                 body += '<p><i>' + x['ZoneText'] + '</i></p>' | ||||
|         return '<html><body><div>' \ | ||||
|                     + body.replace('<br>', '<p>').replace('<br/>', '<p>').replace('<br>', '<p>').replace('\n', '<div>') \ | ||||
|                     + body.replace('<br>', '<p>').replace('<br/>', '<p>').replace('<br>', '<p>').replace('\n', '<br>') \ | ||||
|                         + '</div></body></html>' | ||||
| 
 | ||||
|     def print_version(self, url): | ||||
|         return index + '/ArticleZoneJson/' + url.split('_')[-3] + '/' + url + '.json' | ||||
| 
 | ||||
|     def populate_article_metadata(self, article, soup, first): | ||||
|         article.url = '***' | ||||
|  | ||||
| @ -35,6 +35,7 @@ class WSJ(BasicNewsRecipe): | ||||
|     __author__ = 'Kovid Goyal' | ||||
|     description = 'News and current affairs' | ||||
|     language = 'en' | ||||
|     masthead_url = 'https://s.wsj.net/media/wsj_amp_masthead_lg.png' | ||||
| 
 | ||||
|     compress_news_images = True | ||||
|     compress_news_images_auto_size = 7 | ||||
|  | ||||
| @ -35,6 +35,7 @@ class WSJ(BasicNewsRecipe): | ||||
|     __author__ = 'Kovid Goyal' | ||||
|     description = 'News and current affairs' | ||||
|     language = 'en' | ||||
|     masthead_url = 'https://s.wsj.net/media/wsj_amp_masthead_lg.png' | ||||
| 
 | ||||
|     compress_news_images = True | ||||
|     compress_news_images_auto_size = 7 | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user