From 3936de93f58c5c1108e2e4e8bd6d6dace9efb1ec Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 14 Feb 2023 16:21:59 +0530 Subject: [PATCH] Horizons by unkn0wn --- recipes/horizons.recipe | 68 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 recipes/horizons.recipe diff --git a/recipes/horizons.recipe b/recipes/horizons.recipe new file mode 100644 index 0000000000..bae979a043 --- /dev/null +++ b/recipes/horizons.recipe @@ -0,0 +1,68 @@ +''' +https://www.cirsd.org/en/horizons +''' + +from calibre.web.feeds.news import BasicNewsRecipe, classes + +class horizons(BasicNewsRecipe): + title = 'Horizons' + __author__ = 'unkn0wn' + description = (' Horizons – Journal of International Relations and Sustainable Development.' + ' Horizons serves as a high-level platform for influential voices from around the world to' + ' provide informed analysis and conduct reasoned exchanges on the full spectrum of issues' + ' that shape international developments.') + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + language = 'en' + remove_attributes = ['style', 'height', 'width'] + masthead_url = 'https://www.cirsd.org/bundles/olpublic/images/horizons-logo.jpg' + ignore_duplicate_articles = {'url'} + extra_css = 'em{color:#404040;}' + + keep_only_tags = [ + dict(name='div', attrs={'class':'article'}) + ] + remove_tags = [ + classes('back-link'), + dict(name='div', attrs={'class':'single-post-footer'}) + ] + + def parse_index(self): + soup = self.index_to_soup('https://www.cirsd.org/en/horizons') + a = soup.findAll('a', href=True, attrs={'class':'horizon-gallery-box'})[0] #use 1 for previous edition + url = a['href'] + if url.startswith('/'): + url = 'https://www.cirsd.org' + url + self.cover_url = a.find('img')['src'] + self.log(self.cover_url) + issue = a.find('div', attrs={'class':'horizon-gallery-title'}) + if issue: + self.timefmt = ' [' + self.tag_to_string(issue).strip() + ']' + self.log('Downloading Issue: ', self.timefmt) + soup = self.index_to_soup(url) + + feeds = [] + for section in soup.findAll('h2', attrs={'class':'mt-3'}): + secname = self.tag_to_string(section).strip() + self.log(secname) + articles = [] + div = section.findNext('div', attrs={'class':'mb-3'}) + for li in div.findAll('li', attrs={'class':'mb-2'}): + a = li.find('a', href=True) + url = a['href'] + if url.startswith('/'): + url = 'https://www.cirsd.org' + url + title = self.tag_to_string(a) + span = li.find('span', attrs={'class':'section-author'}) + desc = '' + if span: + desc = self.tag_to_string(span).strip() + self.log('\t', title, '\n\t', desc, '\n\t\t', url) + articles.append({ + 'title': title, + 'url': url, + 'description': desc}) + if articles: + feeds.append((secname, articles)) + return feeds