mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-11-03 19:17:02 -05:00 
			
		
		
		
	TOI Print update
This commit is contained in:
		
							parent
							
								
									cb1f70e816
								
							
						
					
					
						commit
						8f1573b628
					
				@ -79,11 +79,10 @@ class TheEconomicTimes(BasicNewsRecipe):
 | 
				
			|||||||
            for h3 in section.findAll(("h1", "h3", "h4", "h5")):
 | 
					            for h3 in section.findAll(("h1", "h3", "h4", "h5")):
 | 
				
			||||||
                span = h3.find(
 | 
					                span = h3.find(
 | 
				
			||||||
                    'span',
 | 
					                    'span',
 | 
				
			||||||
                    href=lambda x: x and x.startswith('/epaper/'),
 | 
					                    href=lambda x: x and x.startswith('https://economictimes.indiatimes.com/epaper/'),
 | 
				
			||||||
                    attrs={'class': 'banner'}
 | 
					                    attrs={'class': 'banner'}
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
                url = span['href']
 | 
					                url = span['href']
 | 
				
			||||||
                url = 'https://economictimes.indiatimes.com' + url
 | 
					 | 
				
			||||||
                title = self.tag_to_string(span)
 | 
					                title = self.tag_to_string(span)
 | 
				
			||||||
                div = h3.find_next_sibling('div', attrs={'class': 'dsc'})
 | 
					                div = h3.find_next_sibling('div', attrs={'class': 'dsc'})
 | 
				
			||||||
                if div is not None:
 | 
					                if div is not None:
 | 
				
			||||||
 | 
				
			|||||||
@ -1,6 +1,7 @@
 | 
				
			|||||||
from calibre.web.feeds.news import BasicNewsRecipe
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
import json
 | 
					import json
 | 
				
			||||||
from datetime import date
 | 
					from datetime import date
 | 
				
			||||||
 | 
					from collections import defaultdict
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# default edition is Delhi i.e., 'cap'
 | 
					# default edition is Delhi i.e., 'cap'
 | 
				
			||||||
@ -54,20 +55,21 @@ class toiprint(BasicNewsRecipe):
 | 
				
			|||||||
        url = index + '/DayIndex/' + date_ + '_' + le + '.json'
 | 
					        url = index + '/DayIndex/' + date_ + '_' + le + '.json'
 | 
				
			||||||
        raw = self.index_to_soup(url, raw=True)
 | 
					        raw = self.index_to_soup(url, raw=True)
 | 
				
			||||||
        data = json.loads(raw)
 | 
					        data = json.loads(raw)
 | 
				
			||||||
        if 'DigitalIndex' not in data:
 | 
					        if 'DayIndex' not in data:
 | 
				
			||||||
            raise ValueError(
 | 
					            raise ValueError(
 | 
				
			||||||
                    'The Times of India Newspaper is not published today.'
 | 
					                    'The Times of India Newspaper is not published today.'
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
        data = data['DigitalIndex']
 | 
					        data = data['DayIndex']
 | 
				
			||||||
        feeds = []
 | 
					        feeds_dict = defaultdict(list)
 | 
				
			||||||
        for link in data:
 | 
					        for link in data:
 | 
				
			||||||
            sec_name = link['PageTitle']
 | 
					            sec_name = link['PageTitle']
 | 
				
			||||||
 | 
					            if sec_name == 'Advertisement':
 | 
				
			||||||
 | 
					                continue
 | 
				
			||||||
            self.log(sec_name)
 | 
					            self.log(sec_name)
 | 
				
			||||||
            articles = []
 | 
					            articles = []
 | 
				
			||||||
            if 'Views' in link:
 | 
					            if 'Articles' in link:
 | 
				
			||||||
                for sec in link['Views']:
 | 
					                for art in link['Articles']:
 | 
				
			||||||
                    if 'Articles' in sec:
 | 
					                    section = sec_name
 | 
				
			||||||
                        for art in sec['Articles']:
 | 
					 | 
				
			||||||
                    if 'ArticleName' not in art:
 | 
					                    if 'ArticleName' not in art:
 | 
				
			||||||
                        continue
 | 
					                        continue
 | 
				
			||||||
                    url = art['ArticleName']
 | 
					                    url = art['ArticleName']
 | 
				
			||||||
@ -76,11 +78,9 @@ class toiprint(BasicNewsRecipe):
 | 
				
			|||||||
                        desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ArticleBody', '')
 | 
					                        desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ArticleBody', '')
 | 
				
			||||||
                    else:
 | 
					                    else:
 | 
				
			||||||
                        desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ColumnTitle', '')
 | 
					                        desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ColumnTitle', '')
 | 
				
			||||||
                            self.log('\t', title, '\n\t', desc, '\n\t\t', url)
 | 
					                    self.log('\t', title, '\n\t', desc.replace('\n', ''))
 | 
				
			||||||
                            articles.append({'title': title, 'description':desc, 'url': url})
 | 
					                    feeds_dict[section].append({"title": title, "url": url, "description": desc})
 | 
				
			||||||
            if articles:
 | 
					        return [(section, articles) for section, articles in feeds_dict.items()]
 | 
				
			||||||
                feeds.append((sec_name, articles))
 | 
					 | 
				
			||||||
        return feeds
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def preprocess_raw_html(self, raw, *a):
 | 
					    def preprocess_raw_html(self, raw, *a):
 | 
				
			||||||
        data = json.loads(raw)
 | 
					        data = json.loads(raw)
 | 
				
			||||||
@ -107,8 +107,11 @@ class toiprint(BasicNewsRecipe):
 | 
				
			|||||||
            elif 'ZoneText' in x:
 | 
					            elif 'ZoneText' in x:
 | 
				
			||||||
                body += '<p><i>' + x['ZoneText'] + '</i></p>'
 | 
					                body += '<p><i>' + x['ZoneText'] + '</i></p>'
 | 
				
			||||||
        return '<html><body><div>' \
 | 
					        return '<html><body><div>' \
 | 
				
			||||||
                    + body.replace('<br>', '<p>').replace('<br/>', '<p>').replace('<br>', '<p>').replace('\n', '<div>') \
 | 
					                    + body.replace('<br>', '<p>').replace('<br/>', '<p>').replace('<br>', '<p>').replace('\n', '<br>') \
 | 
				
			||||||
                        + '</div></body></html>'
 | 
					                        + '</div></body></html>'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def print_version(self, url):
 | 
					    def print_version(self, url):
 | 
				
			||||||
        return index + '/ArticleZoneJson/' + url.split('_')[-3] + '/' + url + '.json'
 | 
					        return index + '/ArticleZoneJson/' + url.split('_')[-3] + '/' + url + '.json'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def populate_article_metadata(self, article, soup, first):
 | 
				
			||||||
 | 
					        article.url = '***'
 | 
				
			||||||
 | 
				
			|||||||
@ -35,6 +35,7 @@ class WSJ(BasicNewsRecipe):
 | 
				
			|||||||
    __author__ = 'Kovid Goyal'
 | 
					    __author__ = 'Kovid Goyal'
 | 
				
			||||||
    description = 'News and current affairs'
 | 
					    description = 'News and current affairs'
 | 
				
			||||||
    language = 'en'
 | 
					    language = 'en'
 | 
				
			||||||
 | 
					    masthead_url = 'https://s.wsj.net/media/wsj_amp_masthead_lg.png'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    compress_news_images = True
 | 
					    compress_news_images = True
 | 
				
			||||||
    compress_news_images_auto_size = 7
 | 
					    compress_news_images_auto_size = 7
 | 
				
			||||||
 | 
				
			|||||||
@ -35,6 +35,7 @@ class WSJ(BasicNewsRecipe):
 | 
				
			|||||||
    __author__ = 'Kovid Goyal'
 | 
					    __author__ = 'Kovid Goyal'
 | 
				
			||||||
    description = 'News and current affairs'
 | 
					    description = 'News and current affairs'
 | 
				
			||||||
    language = 'en'
 | 
					    language = 'en'
 | 
				
			||||||
 | 
					    masthead_url = 'https://s.wsj.net/media/wsj_amp_masthead_lg.png'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    compress_news_images = True
 | 
					    compress_news_images = True
 | 
				
			||||||
    compress_news_images_auto_size = 7
 | 
					    compress_news_images_auto_size = 7
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user