diff --git a/recipes/zerodeux.recipe b/recipes/zerodeux.recipe new file mode 100644 index 0000000000..1fc7dda613 --- /dev/null +++ b/recipes/zerodeux.recipe @@ -0,0 +1,97 @@ +#!/usr/bin/env python + +''' +zerodeux.fr +''' + +from calibre.web.feeds.news import BasicNewsRecipe + + +class ZeroDeuxRecipe(BasicNewsRecipe): + title = 'Zérodeux' + __author__ = 'Kabonix' + description = 'Revue d\'art contemporain trimestrielle' + publisher = 'Zérodeux' + category = 'art, contemporary art, criticism' + language = 'fr' + encoding = 'utf-8' + oldest_article = 60 + max_articles_per_feed = 25 + + no_stylesheets = True + remove_javascript = True + auto_cleanup = False + + feeds = [ + ('Essais', 'https://www.zerodeux.fr/category/essais/feed/'), + ('Guests', 'https://www.zerodeux.fr/category/guests/feed/'), + ('Interviews', 'https://www.zerodeux.fr/category/interviews/feed/'), + ('Reviews', 'https://www.zerodeux.fr/category/reviews/feed/'), + ('News', 'https://www.zerodeux.fr/category/news/feed/'), + ('Special Web', 'https://www.zerodeux.fr/category/specialweb/feed/') + ] + + def get_cover_url(self): + soup = self.index_to_soup('https://www.zerodeux.fr') + cover_div = soup.find('div', {'id': 'responsive_lightbox_image_widget-2'}) + if cover_div: + cover_img = cover_div.find('img', {'class': 'rl-image-widget-image'}) + if cover_img and 'src' in cover_img.attrs: + return cover_img['src'] + return None + + keep_only_tags = [ + dict(name='article', attrs={'id': 'single-post'}) + ] + + remove_tags = [ + dict(name='div', attrs={'class': ['single-associate', 'single-info', 'transition']}), + dict(name='div', attrs={'id': ['sidebar', 'menu_footer']}), + dict(name='ul', attrs={'class': ['single-info']}), # Supprime le bloc "Partage" + dict(name='li', text='Du même auteur :'), # Supprime le titre "Du même auteur" + dict(name='p', attrs={'class': ['recomand']}) # Supprime "articles liés" + ] + + def preprocess_html(self, soup): + # Nettoyage des styles + for tag in soup.findAll(True): + if 'style' in tag.attrs: + del tag['style'] + + # Nettoyage des images + for img in soup.findAll('img'): + for attr in ['srcset', 'sizes', 'loading', 'class', 'width', 'height', 'decoding', 'fetchpriority']: + if attr in img.attrs: + del img[attr] + + if img.get('src', '').startswith('/'): + img['src'] = 'https://www.zerodeux.fr' + img['src'] + + # Suppression du texte "Partage :" et "Du même auteur :" + for text in soup.findAll(text=True): + if text.strip() in ['Partage :', 'Du même auteur :']: + text.extract() + + return soup + + extra_css = ''' + h1 { font-size: 1.8em; font-weight: bold; margin: 0 0 1em 0; } + h2 { font-size: 1.4em; font-weight: bold; margin: 1em 0; } + .single-author { font-style: italic; margin-bottom: 1.5em; color: #666; } + img { max-width: 100%; height: auto; margin: 1em auto; } + figcaption { font-size: 0.9em; font-style: italic; color: #666; margin: 0.5em 0 1.5em 0; } + p { margin-bottom: 1em; line-height: 1.5; } + ol { margin: 1em 0 1em 2em; } + blockquote { margin: 1em 0; padding: 0 1em; border-left: 3px solid #ccc; } + .wp-block-image { margin: 1.5em 0; } + .has-small-font-size { font-size: 0.9em; } + ''' + + def populate_article_metadata(self, article, soup, first): + author_div = soup.find('div', {'class': 'single-author'}) + if author_div: + author = author_div.get_text().strip() + if author.startswith('par '): + author = author[4:] + article.author = author + return