diff --git a/recipes/indian_express.recipe b/recipes/indian_express.recipe index 60fc14c3f8..efbd434694 100644 --- a/recipes/indian_express.recipe +++ b/recipes/indian_express.recipe @@ -1,7 +1,8 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes -from datetime import date, datetime, timedelta +from datetime import datetime, timedelta from calibre.utils.date import parse_date + class IndianExpress(BasicNewsRecipe): title = u'Indian Express' language = 'en_IN' @@ -14,7 +15,7 @@ class IndianExpress(BasicNewsRecipe): use_embedded_content = False remove_attributes = ['style', 'height', 'width'] ignore_duplicate_articles = {'url'} - + extra_css = ''' #storycenterbyline {font-size:small;} #img-cap {font-size:small;} @@ -23,7 +24,7 @@ class IndianExpress(BasicNewsRecipe): #sub-d{color:#202020; font-style:italic;} .ie-authorbox{font-size:small;} ''' - + resolve_internal_links = True remove_empty_feeds = True @@ -41,31 +42,31 @@ class IndianExpress(BasicNewsRecipe): ' custom-share o-story-paper-quite ie-network-commenting audio-player-tts-sec' ) ] - + def parse_index(self): - + section_list = [ ('Front Page', 'https://indianexpress.com/print/front-page/'), ('India', 'https://indianexpress.com/section/india/'), - #('Express Network', 'https://indianexpress.com/print/express-network/'), + # ('Express Network', 'https://indianexpress.com/print/express-network/'), ('Delhi Confidential', 'https://indianexpress.com/section/delhi-confidential/'), ('Opinion', 'http://indianexpress.com/section/opinion/'), ('UPSC-CSE Key', 'https://indianexpress.com/section/upsc-current-affairs/'), ('Explained', 'https://indianexpress.com/section/explained/'), ('Business', 'https://indianexpress.com/section/business/'), - #('Political Pulse', 'https://indianexpress.com/section/political-pulse/'), + # ('Political Pulse', 'https://indianexpress.com/section/political-pulse/'), ('Sunday Eye', 'https://indianexpress.com/section/express-sunday-eye/'), ('World', 'https://indianexpress.com/section/world/'), - #('Education', 'https://indianexpress.com/section/education/'), - #('Gadgets', 'https://indianexpress.com/section/technology/gadgets/'), + # ('Education', 'https://indianexpress.com/section/education/'), + # ('Gadgets', 'https://indianexpress.com/section/technology/gadgets/'), ('Tech Review', 'https://indianexpress.com/section/technology/tech-reviews/'), - #('Techhook', 'https://indianexpress.com/section/technology/techook/'), - #('Laptops', 'https://indianexpress.com/section/technology/laptops/'), - #('Mobiles & Tabs', 'https://indianexpress.com/section/technology/mobile-tabs/'), + # ('Techhook', 'https://indianexpress.com/section/technology/techook/'), + # ('Laptops', 'https://indianexpress.com/section/technology/laptops/'), + # ('Mobiles & Tabs', 'https://indianexpress.com/section/technology/mobile-tabs/'), ('Science', 'https://indianexpress.com/section/technology/science/'), ('Movie Review', 'https://indianexpress.com/section/entertainment/movie-review/'), ] - + feeds = [] # For each section title, fetch the article urls @@ -81,24 +82,24 @@ class IndianExpress(BasicNewsRecipe): if articles: feeds.append((section_title, articles)) return feeds - + def articles_from_page(self, soup): ans = [] for div in soup.findAll(attrs={'class':['northeast-topbox', 'explained-section-grid']}): for a in div.findAll('a', href=True): - if not a.find('img') and not '/section/' in a['href']: + if not a.find('img') and '/section/' not in a['href']: url = a['href'] title = self.tag_to_string(a) self.log('\t', title, '\n\t\t', url) ans.append({'title': title, 'url': url, 'description': ''}) return ans - + def articles_from_soup(self, soup): ans = [] div = soup.find('div', attrs={'class':['nation', 'o-opin']}) for art in div.findAll(attrs={'class':['articles', 'o-opin-article']}): for a in art.findAll('a', href=True): - if not a.find('img') and not '/profile/' in a['href']: + if not a.find('img') and '/profile/' not in a['href']: url = a['href'] title = self.tag_to_string(a) desc = '' @@ -140,4 +141,4 @@ class IndianExpress(BasicNewsRecipe): today = datetime.now() if (today - date) > timedelta(self.oldest_article): self.abort_article('Skipping old article') - return soup \ No newline at end of file + return soup