diff --git a/recipes/business_today.recipe b/recipes/business_today.recipe index 44c5705ba3..1ea2f2b349 100644 --- a/recipes/business_today.recipe +++ b/recipes/business_today.recipe @@ -74,7 +74,7 @@ class BT(BasicNewsRecipe): # Insert feeds in specified order, if available - feedSort = ['Editor\'s Note'] + feedSort = ['Editor\'s Note', 'Editors note'] for i in feedSort: if i in sections: feeds.append((i, sections[i])) @@ -82,7 +82,8 @@ class BT(BasicNewsRecipe): # Done with the sorted feeds for i in feedSort: - del sections[i] + if i in sections: + del sections[i] # Append what is left over... diff --git a/recipes/harpers.recipe b/recipes/harpers.recipe index 059f4b5e08..6f0d8497d5 100644 --- a/recipes/harpers.recipe +++ b/recipes/harpers.recipe @@ -79,5 +79,10 @@ class Harpers(BasicNewsRecipe): .index-statement .index-tooltip { font-size: small; } """ + def get_cover_url(self): + issues_soup = self.index_to_soup("https://harpers.org/issues/") + curr_issue_a_ele = issues_soup.select_one("div.issue-card a") + if curr_issue_a_ele.find("img"): + return curr_issue_a_ele.img["src"] feeds = [(u"Harper's Magazine", u'https://harpers.org/feed/')] diff --git a/recipes/harpers_full.recipe b/recipes/harpers_full.recipe index 159419d623..bf8f8dae79 100644 --- a/recipes/harpers_full.recipe +++ b/recipes/harpers_full.recipe @@ -131,7 +131,8 @@ class Harpers_full(BasicNewsRecipe): if not _issue_url: issues_soup = self.index_to_soup("https://harpers.org/issues/") curr_issue_a_ele = issues_soup.select_one("div.issue-card a") - curr_issue_url = urljoin(self.base_url, curr_issue_a_ele["href"]) + if curr_issue_a_ele.find("img"): + self.cover_url = curr_issue_a_ele.img["src"] else: curr_issue_url = _issue_url diff --git a/recipes/hindu_business_line_print_edition.recipe b/recipes/hindu_business_line_print_edition.recipe deleted file mode 100644 index b0811a087c..0000000000 --- a/recipes/hindu_business_line_print_edition.recipe +++ /dev/null @@ -1,94 +0,0 @@ -import json -import re -from collections import defaultdict -from datetime import date - -from calibre.web.feeds.news import BasicNewsRecipe, classes - - -def absurl(url): - if url.startswith('/'): - url = 'https://www.thehindubusinessline.com' + url - return url - - -local_edition = None -# Chennai is default edition, for other editions use 'bl_hyderabad', 'bl_bangalore', 'bl_mumbai' - - -class BusinessLine(BasicNewsRecipe): - title = 'The Hindu BusinessLine | Print Edition' - __author__ = 'unkn0wn' - description = ( - 'The Hindu BusinessLine is known for its credibility, accuracy, in-depth analysis of markets and sober coverage' - ' of business news. BusinessLine reduces the daily grind of business to relevant, readable, byte-sized stories.' - ' The newspaper is extensively followed by the decision makers and change leaders from the world of business.' - ) - language = 'en_IN' - no_stylesheets = True - masthead_url = 'https://www.thehindubusinessline.com/theme/images/bl-online/bllogo.png' - remove_attributes = ['style', 'height', 'width'] - extra_css = '.caption{font-size:small; text-align:center;}'\ - '.author{font-size:small; font-weight:bold;}'\ - '.subhead, .subhead_lead {font-weight:bold;}'\ - 'img {display:block; margin:0 auto;}' - - ignore_duplicate_articles = {'url'} - - keep_only_tags = [ - classes('articlepage') - ] - - remove_tags = [ - classes('hide-mobile comments-shares share-page editiondetails author-img') - ] - - def preprocess_html(self, soup): - for cap in soup.findAll('p', attrs={'class':'caption'}): - cap.name = 'figcaption' - for img in soup.findAll('img', attrs={'data-original':True}): - img['src'] = img['data-original'] - return soup - - def parse_index(self): - dt = date.today().strftime('%Y-%m-%d') - # For past editions, set date to, for example, '2023-01-28' - # dt = '2023-01-28' - if local_edition: - url = absurl('/todays-paper/' + dt + '/' + local_edition + '/') - else: - url = absurl('/todays-paper/' + dt + '/bl_chennai/') - raw = self.index_to_soup(url, raw=True) - soup = self.index_to_soup(raw) - ans = self.hindu_parse_index(soup) - if not ans: - raise ValueError( - 'The Hindu BusinessLine Newspaper is not published Today.' - ) - cover = soup.find(attrs={'class':'hindu-ad'}) - if cover: - self.cover_url = cover.img['src'] - return ans - - def hindu_parse_index(self, soup): - for script in soup.findAll('script'): - if not self.tag_to_string(script).strip().startswith('let grouped_articles = {}'): - continue - if script is not None: - art = re.search(r'grouped_articles = ({\".*)', self.tag_to_string(script)) - data = json.JSONDecoder().raw_decode(art.group(1))[0] - - feeds_dict = defaultdict(list) - - a = json.dumps(data) - for sec in json.loads(a): - for item in data[sec]: - section = sec.replace('BL_', '') - title = item['articleheadline'] - url = absurl(item['href']) - desc = 'Page no.' + item['pageno'] + ' | ' + item['teaser_text'] or '' - self.log('\t', title, '\n\t\t', url) - feeds_dict[section].append({"title": title, "url": url, "description": desc}) - return [(section, articles) for section, articles in feeds_dict.items()] - else: - return [] diff --git a/recipes/icons/hindu_business_line_print_edition.png b/recipes/icons/hindu_business_line_print_edition.png deleted file mode 100644 index 94791ebf0d..0000000000 Binary files a/recipes/icons/hindu_business_line_print_edition.png and /dev/null differ diff --git a/recipes/new_yorker.recipe b/recipes/new_yorker.recipe index 352e7f26c8..8629c79f2b 100644 --- a/recipes/new_yorker.recipe +++ b/recipes/new_yorker.recipe @@ -80,27 +80,21 @@ class NewYorker(BasicNewsRecipe): # return buf.getvalue() def parse_index(self): - # Get cover - cover_soup = self.index_to_soup('https://www.newyorker.com/archive') - cover_img = cover_soup.find( - attrs={'class': lambda x: x and 'MagazineSection__cover___' in x}) - if cover_img is not None: - cover_img = cover_img.find('img') - if cover_img is not None: - self.cover_url = cover_img.get('src') - try: - # the src original resolution w_280 was too low, replace w_280 with w_560 - cover_url_width_index = self.cover_url.find("w_") - old_width = self.cover_url[cover_url_width_index:cover_url_width_index+5] - self.cover_url = self.cover_url.replace(old_width, "w_560") - except Exception: - self.log('Failed enlarging cover img, using the original one') - self.log('Found cover:', self.cover_url) - - # Get content - soup = self.index_to_soup( 'https://www.newyorker.com/magazine?intcid=magazine') + cover_img = soup.find('picture', + attrs={'class': lambda x: x and 'asset-embed__responsive-asset' in x}) + if cover_img is not None: + self.cover_url = cover_img.img['src'] + self.log('Found cover:', self.cover_url) + try: + # the src original resolution w_280 was too low, replace w_280 with w_560 + cover_url_width_index = self.cover_url.find("w_") + old_width = self.cover_url[cover_url_width_index:cover_url_width_index+5] + self.cover_url = self.cover_url.replace(old_width, "w_640") + except Exception: + self.log('Failed enlarging cover img, using the original one') + feeds_dict = defaultdict(list) for section in soup.findAll('section', attrs={'class': lambda x: x and 'SummaryRiverSection-' in x}):