# -*- mode: python; coding: utf-8; -*- # vim: set syntax=python fileencoding=utf-8 __license__ = "GPL v3" __copyright__ = "2023, Tomás Di Domenico " """ www.eldiplo.org """ from calibre.web.feeds.news import BasicNewsRecipe class ElDiplo2023(BasicNewsRecipe): title = "Le Monde Diplomatique - cono sur" __author__ = "Tomás Di Domenico" description = "Publicación de Le Monde Diplomatique para el cono sur." publisher = "Capital Intelectual" category = "News, Politics, Argentina, Uruguay, Paraguay, South America, World" oldest_article = 31 no_stylesheets = True encoding = "utf8" use_embedded_content = False language = "es_AR" remove_empty_feeds = True publication_type = "magazine" delay = 1 simultaneous_downloads = 1 timeout = 8 needs_subscription = True ignore_duplicate_articles = {"url"} temp_files = [] fetch_retries = 10 handle_gzip = True compress_news_images = True scale_news_images_to_device = True masthead_url = ( "https://www.eldiplo.org/wp-content/themes/_polenta_/assets/diplo.png" ) INDEX = "https://www.eldiplo.org/" conversion_options = {"series": "El Dipló", "publisher": publisher, "base_font_size": 8, "tags": category} keep_only_tags = [dict(name=["article"])] remove_tags = [dict(name=["button"])] extra_css = """ .entry-title { text-align: center; } .text-right { text-align: right; } .bajada { display: block; font-family: sans-serif; text-align: center; font-size: 110%; padding: 2%; } .Destacado{ display: block; font-size: 120%; font-weight: bold; font-style: italic; padding-left: 10%; padding-right: 10%; } """ def get_browser(self): br = BasicNewsRecipe.get_browser(self) br.open(self.INDEX) if self.username is not None and self.password is not None: br.select_form(id="loginform") br["log"] = self.username br["pwd"] = self.password br.submit() return br def get_cover_url(self): soup_index = self.index_to_soup(self.INDEX) tag_sumario = soup_index.find("span", text="Sumario") url_sumario = "https://www.eldiplo.org" + tag_sumario.parent["href"] soup = self.index_to_soup(url_sumario) container = soup.find("div", class_="px-16") url = container.find("img")["src"] return getattr(self, "cover_url", url) def _process_article(self, article): url = article.find("a", href=True, attrs={"class": "title"})["href"] title = self.tag_to_string(article).replace("Editorial", "Editorial: ") try: title, authors = title.split(", por") authors = f"por {authors}" except ValueError: authors = "" self.log("title: ", title, " url: ", url) return {"title": title, "url": url, "description": authors, "date": ""} def preprocess_html(self, soup): font_size = "90%" # make the footnotes smaller for p in soup.find("div", id="nota_pie").findChildren("p", recursive=False): p["style"] = f"font-size: {font_size};" return soup def parse_index(self): soup_index = self.index_to_soup(self.INDEX) tag_sumario = soup_index.find("span", text="Sumario") if tag_sumario is None: return None url_sumario = "https://www.eldiplo.org" + tag_sumario.parent["href"] self.log(url_sumario) soup_sumario = self.index_to_soup(url_sumario) feeds = [] articles = [] dossiers = [] sumario = soup_sumario.find("div", class_="sumario") for section in sumario.find_all("div", recursive=False): classes = section.attrs["class"] if "dossier" in classes: dtitle = self.tag_to_string(section.find("h3")) darticles = [] for article in section.find_all("div", recursive=False): darticles.append(self._process_article(article)) dossiers.append((dtitle, darticles)) else: articles.append(self._process_article(section)) feeds.append(("Artículos", articles)) feeds += dossiers return feeds