From 5d222cfd74f95151f605ff69d11953bb307c3cbf Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 13 May 2022 07:29:54 +0530 Subject: [PATCH] Le Monde (English) by Darko Miletic Fixes #1973215 [New recipe for Le Monde in English](https://bugs.launchpad.net/calibre/+bug/1973215) --- recipes/icons/le_monde_en.png | Bin 0 -> 1200 bytes recipes/le_monde_en.recipe | 134 ++++++++++++++++++++++++++++++++++ 2 files changed, 134 insertions(+) create mode 100644 recipes/icons/le_monde_en.png create mode 100644 recipes/le_monde_en.recipe diff --git a/recipes/icons/le_monde_en.png b/recipes/icons/le_monde_en.png new file mode 100644 index 0000000000000000000000000000000000000000..1ff0dd2806030d3b11056a3aeb97cab110c5f411 GIT binary patch literal 1200 zcmds#J!_Ov5QS%rpoZOzoni=9LO_JT6qZKWU{+)an-tn<;UGdlQG$rXuZY-)2wO~P zsYL`^8w)K01A>BRp|<)1tjA~WFR-%kvimNx_uezlIrBbV8J+G;ADR}C-f(#^=68R6 z_fGMgKQ^-}G9@c#*On+nRK!GFBt)_*RZ$al(GbnrG!-*37Ynhhv##PM?&2Yybv0DN zBwQjS5+zBMG)b2XNeZ-)MvKdw&gxUkR9!Vxv)WRbskvIHWl^x{rta#Yo(09(Fb&rT zjf9b_nx^TRp-ES$5d#`Zu-Kz9+DN0tAl$*WZRZyW>|lyYK4q`RgM6clGT|j`nBy^UQE?a_!(`@?@v?cCowb*Q3p! z_1Snm?jPA)e|p%u&+g4#yZQBb_u]*>)([^<]*)', + re.IGNORECASE), lambda match: match.group(1) + ' ' + match.group(2)), + # insert " | " between article type and description + (re.compile(r'(]*>[^<]*)()', + re.IGNORECASE), lambda match: match.group(1) + ' | ' + match.group(2)) + ] + + extra_css = ''' + h2 { font-size: 1em; } + h3 { font-size: 1em; } + .article__desc { font-weight: bold; } + .article__fact { font-weight: bold; text-transform: uppercase; } + .article__kicker { text-transform: uppercase; } + .article__legend { font-size: 0.6em; margin-bottom: 1em; } + .article__title { margin-top: 0em; } + ''' + + def get_browser(self): + br = BasicNewsRecipe.get_browser(self) + if self.username is not None and self.password is not None: + br.open('https://secure.lemonde.fr/sfuser/connexion') + br.select_form(nr=0) + br['email'] = self.username + br['password'] = self.password + br.submit() + return br + + def get_article_url(self, article): + url = BasicNewsRecipe.get_article_url(self, article) + # skip articles without relevant content (e.g., videos) + for el in 'blog chat live podcasts portfolio video visuel'.split(): + if '/' + el + '/' in url: + self.log(url) + self.abort_article() + return url + + def preprocess_html(self, soup): + # when an image is available in multiple sizes, select the smallest one + for img in soup.find_all('img', {'data-srcset': True}): + data_srcset = img['data-srcset'].split() + if len(data_srcset) > 1: + img['src'] = data_srcset[-2] + del img['data-srcset'] + return soup + + def postprocess_html(self, soup, first_fetch): + # remove local hyperlinks + for a in soup.find_all('a', {'href': True}): + if '.lemonde.fr/' in a['href']: + a.replace_with(self.tag_to_string(a)) + # clean up header + for ul in soup.find_all('ul', {'class': 'breadcrumb'}): + div = soup.new_tag('div') + category = '' + for li in ul.find_all('li', {'class': True}): + category += self.tag_to_string(li).strip().upper() + ' - ' + div.string = category[:-3] + ul.replace_with(div) + return soup