calibre/recipes/unz.recipe
2024-11-04 18:31:08 +05:30

59 lines
1.8 KiB
Python

#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe, classes
class Unz(BasicNewsRecipe):
title = 'The Unz Review'
description = (
'A Collection of Interesting, Important, and Controversial Perspectives '
'Largely Excluded from the American Mainstream Media.'
)
__author__ = 'unkn0wn'
oldest_article = 7
language = 'en_US'
max_articles_per_feed = 100
use_embedded_content = False
cover_url = 'https://www.unz.com/wp-content/themes/unzSite/IMAGES/unz_large_logo.png'
encoding = 'utf-8'
# browser_type = 'webengine'
no_stylesheets = True
remove_attributes = ['style', 'height', 'width']
extra_css = '.byline, .caption { font-size: small; }'
keep_only_tags = [
dict(
name='div',
attrs={'class': ['head', 'byline', 'page-thumb', 'section-holder']},
),
]
remove_tags = [
dict(name=['audio', 'iframe', 'svg']),
classes('commentlink replylink'),
]
remove_tags_after = [dict(name='div', attrs={'class': 'section-holder'})]
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article),
},
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
feeds = ['https://www.unz.com/feed']
def preprocess_html(self, soup):
head = soup.find(**classes('head'))
if head:
head.name = 'h1'
return soup