From 80863f5a9784c3fb610ac7e91651d9f9a71b8877 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 3 Apr 2022 08:31:50 +0530 Subject: [PATCH] The Diplomat by unkn0wn --- recipes/the_diplomat.recipe | 71 +++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 recipes/the_diplomat.recipe diff --git a/recipes/the_diplomat.recipe b/recipes/the_diplomat.recipe new file mode 100644 index 0000000000..fde4a0f1a7 --- /dev/null +++ b/recipes/the_diplomat.recipe @@ -0,0 +1,71 @@ +#!/usr/bin/env python + +from calibre.web.feeds.news import BasicNewsRecipe, classes + + +class Diplomat(BasicNewsRecipe): + title = u'The Diplomat' + description = 'The Diplomat is the premier international current-affairs magazine for the Asia-Pacific region. READ THE DIPLOMAT, KNOW THE ASIA-PACIFIC' + language = 'en' + __author__ = 'unkn0wn' + oldest_article = 14 # days + max_articles_per_feed = 100 + encoding = 'utf-8' + use_embedded_content = False + no_stylesheets = True + remove_attributes = ['style', 'height', 'width'] + masthead_url = 'https://thediplomat.com/wp-content/themes/td_theme_v3/assets/logo/diplomat_logo_black.svg' + ignore_duplicate_articles = {'url'} + extra_css = ''' + #td-meta { font-size: x-small } + #td-story-authors { font-size: small} + #td-story-image{font-size: small; font-style: italic;} + [id^="caption-attachment"] {font-size: small; font-style: italic;} + ''' + + def get_cover_url(self): + soup = self.index_to_soup('https://thediplomat.com') + tag = soup.find(attrs={'class': 'td-nav-mag'}) + if tag: + url = tag.find('img')['src'].split("/")[-1] + self.cover_url = ('https://magazine.thediplomat.com/media/1080/' + + url) + # /ads/magazine/cover/td-mag-s-1/issue_89_cover.jpg + # https://magazine.thediplomat.com/media/1080/issue_89_cover.jpg + return super().get_cover_url() + + keep_only_tags = [ + dict(name='header', attrs={'id': 'td-story-head'}), + classes('td-media--story td-prose td-story-magazine__link td-authors'), + ] + + remove_tags = [ + dict(name='aside', attrs={'id': 'td-box-newsletter'}), + classes('td-share td-ad td-ad-inline-txt'), + ] + + feeds = [ + ('Features', 'https://thediplomat.com/category/features/feed'), + ('Interviews', 'https://thediplomat.com/category/interviews/feed'), + ('Magazine Preview', 'https://thediplomat.com/category/magazine/feed'), + # REGIONS + ('Central Asia', 'https://thediplomat.com/regions/central-asia/feed'), + ('East Asia', 'https://thediplomat.com/regions/east-asia/feed'), + ('Oceania', 'https://thediplomat.com/regions/oceania-region/feed'), + ('South Asia', 'https://thediplomat.com/regions/south-asia/feed'), + ('South East Asia', + 'https://thediplomat.com/regions/southeast-asia/feed'), + # TOPICS + ('Diplomacy', 'https://thediplomat.com/topics/diplomacy/feed'), + ('Economy', 'https://thediplomat.com/topics/economy/feed'), + ('Environment', 'https://thediplomat.com/topics/environment/feed'), + ('Opinion', 'https://thediplomat.com/topics/opinion/feed'), + ('Politics', 'https://thediplomat.com/topics/politics/feed'), + ('Security', 'https://thediplomat.com/topics/security/feed'), + ('Society', 'https://thediplomat.com/topics/society/feed'), + ] + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'src': True}): + img['src'] = img['src'].replace("td-story-s-1", "td-story-s-2") + return soup