mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update recipe for El Diplo (Le Monde Diplimatique Cono Sur)
Complete rewrite of the recipe. A more conservative and minimalistic approach, it updates the look and feel to closely match the one from the original publication, including adding internal article images. It also fixes internal links which are now fully functional.
This commit is contained in:
parent
a725dd21ac
commit
3ad8452879
@ -14,10 +14,10 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ElDiplo2023(BasicNewsRecipe):
|
||||
title = "Le Monde Diplomatique - cono sur"
|
||||
__author__ = "Darko Miletic and Tomás Di Domenico"
|
||||
__author__ = "Tomás Di Domenico"
|
||||
description = "Publicación de Le Monde Diplomatique para el cono sur."
|
||||
publisher = "Capital Intelectual"
|
||||
category = "news, politics, Argentina, Uruguay, Paraguay, South America, World"
|
||||
category = "News, Politics, Argentina, Uruguay, Paraguay, South America, World"
|
||||
oldest_article = 31
|
||||
no_stylesheets = True
|
||||
encoding = "utf8"
|
||||
@ -25,13 +25,11 @@ class ElDiplo2023(BasicNewsRecipe):
|
||||
language = "es_AR"
|
||||
remove_empty_feeds = True
|
||||
publication_type = "magazine"
|
||||
auto_cleanup = True
|
||||
delay = 1
|
||||
simultaneous_downloads = 1
|
||||
timeout = 8
|
||||
needs_subscription = True
|
||||
ignore_duplicate_articles = {"url"}
|
||||
articles_are_obfuscated = True
|
||||
temp_files = []
|
||||
fetch_retries = 10
|
||||
handle_gzip = True
|
||||
@ -42,6 +40,36 @@ class ElDiplo2023(BasicNewsRecipe):
|
||||
)
|
||||
INDEX = "https://www.eldiplo.org/"
|
||||
|
||||
conversion_options = {"series": "El Dipló", "publisher": publisher, "base_font_size": 8, "tags": category}
|
||||
|
||||
keep_only_tags = [dict(name=["article"])]
|
||||
|
||||
remove_tags = [dict(name=["button"])]
|
||||
|
||||
extra_css = """
|
||||
.entry-title {
|
||||
text-align: center;
|
||||
}
|
||||
.text-right {
|
||||
text-align: right;
|
||||
}
|
||||
.bajada {
|
||||
display: block;
|
||||
font-family: sans-serif;
|
||||
text-align: center;
|
||||
font-size: 110%;
|
||||
padding: 2%;
|
||||
}
|
||||
.Destacado{
|
||||
display: block;
|
||||
font-size: 120%;
|
||||
font-weight: bold;
|
||||
font-style: italic;
|
||||
padding-left: 10%;
|
||||
padding-right: 10%;
|
||||
}
|
||||
"""
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open(self.INDEX)
|
||||
@ -76,12 +104,13 @@ class ElDiplo2023(BasicNewsRecipe):
|
||||
return {"title": title, "url": url, "description": authors, "date": ""}
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
# cleanup internal references' anchor links, leave the inner text
|
||||
# it would be nice to eventually make the internal links work
|
||||
import re
|
||||
|
||||
for l in soup.find_all(name="a", attrs={"href": re.compile(r"#")}):
|
||||
l.replaceWithChildren()
|
||||
font_size = "90%"
|
||||
|
||||
# make the footnotes smaller
|
||||
for p in soup.find("div", id="nota_pie").findChildren("p", recursive=False):
|
||||
p["style"] = f"font-size: {font_size};"
|
||||
|
||||
return soup
|
||||
|
||||
@ -119,21 +148,3 @@ class ElDiplo2023(BasicNewsRecipe):
|
||||
feeds += dossiers
|
||||
|
||||
return feeds
|
||||
|
||||
def get_obfuscated_article(self, url):
|
||||
result = None
|
||||
count = 0
|
||||
while count < self.fetch_retries:
|
||||
try:
|
||||
response = self.browser.open(url, timeout=self.timeout)
|
||||
html = response.read()
|
||||
count = self.fetch_retries
|
||||
tfile = PersistentTemporaryFile("_fa.html")
|
||||
tfile.write(html)
|
||||
tfile.close()
|
||||
self.temp_files.append(tfile)
|
||||
result = tfile.name
|
||||
except:
|
||||
self.info("Retrying download...")
|
||||
count += 1
|
||||
return result
|
||||
|
Loading…
x
Reference in New Issue
Block a user