#!/usr/bin/env python ''' https://www.economist.com/the-world-in-brief ''' import re from calibre.ebooks.BeautifulSoup import Tag from calibre.web.feeds.news import BasicNewsRecipe, classes def new_tag(soup, name, attrs=()): impl = getattr(soup, 'new_tag', None) if impl is not None: return impl(name, attrs=dict(attrs)) return Tag(soup, name, attrs=attrs or None) class Espresso(BasicNewsRecipe): title = 'The Economist Espresso' language = 'en_GB' __author__ = 'unkn0wn' description = ( 'Espresso is a rich, full-flavoured shot of daily global analysis ' 'from the editors of The Economist to get you up to speed, fast. ' 'Maximise your understanding of the most significant business, ' 'economic, political and cultural developments globally.' ) cover_url = ( 'https://downloadr2.apkmirror.com/wp-content/uploads/2021/10/75/615777cc6611b.png' ) no_stylesheets = True remove_attributes = ['height', 'width', 'style'] use_embedded_content = False masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' extra_css = """ h1 { text-align:center; } ._main-image, ._description, .sub { text-align:center; font-size:small; } ._quote-container { font-size:x-large; font-style:italic; color:#202020; } """ keep_only_tags = [dict(name='main', attrs={'id': 'content'})] remove_tags = [ classes('_podcast-promo _newsletter-promo-container _time-last-updated'), dict(attrs={'data-test-id': 'twib-audio-player'}), ] def parse_index(self): return [ ( 'Espresso', [ { 'title': 'The World in Brief', 'url': 'https://www.economist.com/the-world-in-brief', 'description': 'Catch up quickly on the global stories that matter', }, ], ), ] def preprocess_html(self, soup): if h1 := soup.find('h1'): if p := h1.find_next_sibling('p'): p['class'] = 'sub' for hr in soup.findAll(attrs={'class': ['_gobbet', '_article']}): nt = new_tag(soup, 'hr') hr.append(nt) for img in soup.findAll('img', src=True): img['src'] = re.sub(r'width=\d+', 'width=600', img['src']) return soup def get_browser(self, *args, **kwargs): kwargs['user_agent'] = ( 'Mozilla/5.0 (Linux; Android 14) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.6533.103 Mobile Safari/537.36 Lamarr' ) return BasicNewsRecipe.get_browser(self, *args, **kwargs)