The Diplomat by unkn0wn

This commit is contained in:
Kovid Goyal 2022-04-03 08:31:50 +05:30
parent c98ad9a233
commit 80863f5a97
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -0,0 +1,71 @@
#!/usr/bin/env python
from calibre.web.feeds.news import BasicNewsRecipe, classes
class Diplomat(BasicNewsRecipe):
title = u'The Diplomat'
description = 'The Diplomat is the premier international current-affairs magazine for the Asia-Pacific region. READ THE DIPLOMAT, KNOW THE ASIA-PACIFIC'
language = 'en'
__author__ = 'unkn0wn'
oldest_article = 14 # days
max_articles_per_feed = 100
encoding = 'utf-8'
use_embedded_content = False
no_stylesheets = True
remove_attributes = ['style', 'height', 'width']
masthead_url = 'https://thediplomat.com/wp-content/themes/td_theme_v3/assets/logo/diplomat_logo_black.svg'
ignore_duplicate_articles = {'url'}
extra_css = '''
#td-meta { font-size: x-small }
#td-story-authors { font-size: small}
#td-story-image{font-size: small; font-style: italic;}
[id^="caption-attachment"] {font-size: small; font-style: italic;}
'''
def get_cover_url(self):
soup = self.index_to_soup('https://thediplomat.com')
tag = soup.find(attrs={'class': 'td-nav-mag'})
if tag:
url = tag.find('img')['src'].split("/")[-1]
self.cover_url = ('https://magazine.thediplomat.com/media/1080/' +
url)
# /ads/magazine/cover/td-mag-s-1/issue_89_cover.jpg
# https://magazine.thediplomat.com/media/1080/issue_89_cover.jpg
return super().get_cover_url()
keep_only_tags = [
dict(name='header', attrs={'id': 'td-story-head'}),
classes('td-media--story td-prose td-story-magazine__link td-authors'),
]
remove_tags = [
dict(name='aside', attrs={'id': 'td-box-newsletter'}),
classes('td-share td-ad td-ad-inline-txt'),
]
feeds = [
('Features', 'https://thediplomat.com/category/features/feed'),
('Interviews', 'https://thediplomat.com/category/interviews/feed'),
('Magazine Preview', 'https://thediplomat.com/category/magazine/feed'),
# REGIONS
('Central Asia', 'https://thediplomat.com/regions/central-asia/feed'),
('East Asia', 'https://thediplomat.com/regions/east-asia/feed'),
('Oceania', 'https://thediplomat.com/regions/oceania-region/feed'),
('South Asia', 'https://thediplomat.com/regions/south-asia/feed'),
('South East Asia',
'https://thediplomat.com/regions/southeast-asia/feed'),
# TOPICS
('Diplomacy', 'https://thediplomat.com/topics/diplomacy/feed'),
('Economy', 'https://thediplomat.com/topics/economy/feed'),
('Environment', 'https://thediplomat.com/topics/environment/feed'),
('Opinion', 'https://thediplomat.com/topics/opinion/feed'),
('Politics', 'https://thediplomat.com/topics/politics/feed'),
('Security', 'https://thediplomat.com/topics/security/feed'),
('Society', 'https://thediplomat.com/topics/society/feed'),
]
def preprocess_html(self, soup):
for img in soup.findAll('img', attrs={'src': True}):
img['src'] = img['src'].replace("td-story-s-1", "td-story-s-2")
return soup