mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
98 lines
3.6 KiB
Python
98 lines
3.6 KiB
Python
#!/usr/bin/env python
|
|
|
|
'''
|
|
zerodeux.fr
|
|
'''
|
|
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
|
|
class ZeroDeuxRecipe(BasicNewsRecipe):
|
|
title = 'Zérodeux'
|
|
__author__ = 'Kabonix'
|
|
description = "Revue d'art contemporain trimestrielle"
|
|
publisher = 'Zérodeux'
|
|
category = 'art, contemporary art, criticism'
|
|
language = 'fr'
|
|
encoding = 'utf-8'
|
|
oldest_article = 60
|
|
max_articles_per_feed = 25
|
|
|
|
no_stylesheets = True
|
|
remove_javascript = True
|
|
auto_cleanup = False
|
|
|
|
feeds = [
|
|
('Essais', 'https://www.zerodeux.fr/category/essais/feed/'),
|
|
('Guests', 'https://www.zerodeux.fr/category/guests/feed/'),
|
|
('Interviews', 'https://www.zerodeux.fr/category/interviews/feed/'),
|
|
('Reviews', 'https://www.zerodeux.fr/category/reviews/feed/'),
|
|
('News', 'https://www.zerodeux.fr/category/news/feed/'),
|
|
('Special Web', 'https://www.zerodeux.fr/category/specialweb/feed/')
|
|
]
|
|
|
|
def get_cover_url(self):
|
|
soup = self.index_to_soup('https://www.zerodeux.fr')
|
|
cover_div = soup.find('div', {'id': 'responsive_lightbox_image_widget-2'})
|
|
if cover_div:
|
|
cover_img = cover_div.find('img', {'class': 'rl-image-widget-image'})
|
|
if cover_img and 'src' in cover_img.attrs:
|
|
return cover_img['src']
|
|
return None
|
|
|
|
keep_only_tags = [
|
|
dict(name='article', attrs={'id': 'single-post'})
|
|
]
|
|
|
|
remove_tags = [
|
|
dict(name='div', attrs={'class': ['single-associate', 'single-info', 'transition']}),
|
|
dict(name='div', attrs={'id': ['sidebar', 'menu_footer']}),
|
|
dict(name='ul', attrs={'class': ['single-info']}), # Supprime le bloc "Partage"
|
|
dict(name='li', text='Du même auteur :'), # Supprime le titre "Du même auteur"
|
|
dict(name='p', attrs={'class': ['recomand']}) # Supprime "articles liés"
|
|
]
|
|
|
|
def preprocess_html(self, soup):
|
|
# Nettoyage des styles
|
|
for tag in soup.findAll(True):
|
|
if 'style' in tag.attrs:
|
|
del tag['style']
|
|
|
|
# Nettoyage des images
|
|
for img in soup.findAll('img'):
|
|
for attr in ['srcset', 'sizes', 'loading', 'class', 'width', 'height', 'decoding', 'fetchpriority']:
|
|
if attr in img.attrs:
|
|
del img[attr]
|
|
|
|
if img.get('src', '').startswith('/'):
|
|
img['src'] = 'https://www.zerodeux.fr' + img['src']
|
|
|
|
# Suppression du texte "Partage :" et "Du même auteur :"
|
|
for text in soup.findAll(text=True):
|
|
if text.strip() in ['Partage :', 'Du même auteur :']:
|
|
text.extract()
|
|
|
|
return soup
|
|
|
|
extra_css = '''
|
|
h1 { font-size: 1.8em; font-weight: bold; margin: 0 0 1em 0; }
|
|
h2 { font-size: 1.4em; font-weight: bold; margin: 1em 0; }
|
|
.single-author { font-style: italic; margin-bottom: 1.5em; color: #666; }
|
|
img { max-width: 100%; height: auto; margin: 1em auto; }
|
|
figcaption { font-size: 0.9em; font-style: italic; color: #666; margin: 0.5em 0 1.5em 0; }
|
|
p { margin-bottom: 1em; line-height: 1.5; }
|
|
ol { margin: 1em 0 1em 2em; }
|
|
blockquote { margin: 1em 0; padding: 0 1em; border-left: 3px solid #ccc; }
|
|
.wp-block-image { margin: 1.5em 0; }
|
|
.has-small-font-size { font-size: 0.9em; }
|
|
'''
|
|
|
|
def populate_article_metadata(self, article, soup, first):
|
|
author_div = soup.find('div', {'class': 'single-author'})
|
|
if author_div:
|
|
author = author_div.get_text().strip()
|
|
if author.startswith('par '):
|
|
author = author[4:]
|
|
article.author = author
|
|
return
|