#!/usr/bin/env python ''' zerodeux.fr ''' from calibre.web.feeds.news import BasicNewsRecipe class ZeroDeuxRecipe(BasicNewsRecipe): title = 'Zérodeux' __author__ = 'Kabonix' description = "Revue d'art contemporain trimestrielle" publisher = 'Zérodeux' category = 'art, contemporary art, criticism' language = 'fr' encoding = 'utf-8' oldest_article = 60 max_articles_per_feed = 25 no_stylesheets = True remove_javascript = True auto_cleanup = False feeds = [ ('Essais', 'https://www.zerodeux.fr/category/essais/feed/'), ('Guests', 'https://www.zerodeux.fr/category/guests/feed/'), ('Interviews', 'https://www.zerodeux.fr/category/interviews/feed/'), ('Reviews', 'https://www.zerodeux.fr/category/reviews/feed/'), ('News', 'https://www.zerodeux.fr/category/news/feed/'), ('Special Web', 'https://www.zerodeux.fr/category/specialweb/feed/') ] def get_cover_url(self): soup = self.index_to_soup('https://www.zerodeux.fr') cover_div = soup.find('div', {'id': 'responsive_lightbox_image_widget-2'}) if cover_div: cover_img = cover_div.find('img', {'class': 'rl-image-widget-image'}) if cover_img and 'src' in cover_img.attrs: return cover_img['src'] return None keep_only_tags = [ dict(name='article', attrs={'id': 'single-post'}) ] remove_tags = [ dict(name='div', attrs={'class': ['single-associate', 'single-info', 'transition']}), dict(name='div', attrs={'id': ['sidebar', 'menu_footer']}), dict(name='ul', attrs={'class': ['single-info']}), # Supprime le bloc "Partage" dict(name='li', text='Du même auteur :'), # Supprime le titre "Du même auteur" dict(name='p', attrs={'class': ['recomand']}) # Supprime "articles liés" ] def preprocess_html(self, soup): # Nettoyage des styles for tag in soup.findAll(True): if 'style' in tag.attrs: del tag['style'] # Nettoyage des images for img in soup.findAll('img'): for attr in ['srcset', 'sizes', 'loading', 'class', 'width', 'height', 'decoding', 'fetchpriority']: if attr in img.attrs: del img[attr] if img.get('src', '').startswith('/'): img['src'] = 'https://www.zerodeux.fr' + img['src'] # Suppression du texte "Partage :" et "Du même auteur :" for text in soup.findAll(text=True): if text.strip() in ['Partage :', 'Du même auteur :']: text.extract() return soup extra_css = ''' h1 { font-size: 1.8em; font-weight: bold; margin: 0 0 1em 0; } h2 { font-size: 1.4em; font-weight: bold; margin: 1em 0; } .single-author { font-style: italic; margin-bottom: 1.5em; color: #666; } img { max-width: 100%; height: auto; margin: 1em auto; } figcaption { font-size: 0.9em; font-style: italic; color: #666; margin: 0.5em 0 1.5em 0; } p { margin-bottom: 1em; line-height: 1.5; } ol { margin: 1em 0 1em 2em; } blockquote { margin: 1em 0; padding: 0 1em; border-left: 3px solid #ccc; } .wp-block-image { margin: 1.5em 0; } .has-small-font-size { font-size: 0.9em; } ''' def populate_article_metadata(self, article, soup, first): author_div = soup.find('div', {'class': 'single-author'}) if author_div: author = author_div.get_text().strip() if author.startswith('par '): author = author[4:] article.author = author return