This commit is contained in:
Kovid Goyal 2024-07-26 10:45:44 +05:30
commit 21583ad1d0
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
27 changed files with 375 additions and 3 deletions

View File

@ -54,6 +54,20 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
preprocess_regexps = [(re.compile(
r'<a href="http://estaticos.*?[0-999]px;" target="_blank">', re.DOTALL), lambda m: '')]
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
feeds = [
(u'Portada', u'http://www.20minutos.es/rss/'),

View File

@ -24,6 +24,20 @@ class ABCNews(BasicNewsRecipe):
max_articles_per_feed = 100
publication_type = 'newspaper'
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
# auto_cleanup = True # enable this as a backup option if recipe stops working
# use_embedded_content = False # if set to true will assume that all the article content is within the feed (i.e. won't try to fetch more data)

View File

@ -22,7 +22,7 @@ class AdvancedUserRecipe1296604369(BasicNewsRecipe):
description = 'Noticias de Spain y el mundo'
category = 'News,Spain,National,International,Economy'
oldest_article = 2
max_articles_per_feed = 10
max_articles_per_feed = 25
no_stylesheets = True
use_embedded_content = False
@ -31,6 +31,20 @@ class AdvancedUserRecipe1296604369(BasicNewsRecipe):
remove_javascript = True
language = 'es'
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
extra_css = """
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }

View File

@ -32,6 +32,20 @@ class AsianReviewOfBooks(BasicNewsRecipe):
img {display: block}
"""
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
conversion_options = {
'comment': description,
'tags': category,

View File

@ -151,6 +151,20 @@ class BBC(BasicNewsRecipe):
ignore_duplicate_articles = {'title', 'url'}
resolve_internal_links = True
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
feeds = [
('Top Stories', 'https://feeds.bbci.co.uk/news/rss.xml'),
('Science/Environment',

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
@ -10,6 +12,21 @@ class AdvancedUserRecipe1278162597(BasicNewsRecipe):
publisher = 'www.ce.cn - China Economic net - Beijing'
description = 'China Economic Net Magazine'
category = 'Economic News Magazine, Chinese, China'
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
feeds = [
(u'Stock Market 股市', u'http://finance.ce.cn/stock/index_6304.xml'),
(u'Money 理财', u'http://finance.ce.cn/money/index_6301.xml'),

View File

@ -70,6 +70,20 @@ class Clarin(BasicNewsRecipe):
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
keep_only_tags = [
dict(name='p' , attrs={'class' : 'volanta'}),
dict(name='h1' , attrs={'id': 'title'}),

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
@ -26,6 +28,20 @@ class CNN(BasicNewsRecipe):
]
remove_tags = [classes('video-inline_carousel')]
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
feeds = [
('Top News', 'http://rss.cnn.com/rss/cnn_topstories.rss'),
('World', 'http://rss.cnn.com/rss/cnn_world.rss'),

View File

@ -43,6 +43,20 @@ class ilCorriereEn(BasicNewsRecipe):
basename = '/'.join(segments[:3]) + '/' + \
'International/english/articoli/'
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
# the date has to be redone with the url structure
mlist1 = ['gennaio', 'febbraio', 'marzo', 'aprile', 'maggio', 'giugno',
'luglio', 'agosto', 'settembre', 'ottobre', 'novembre', 'dicembre']

View File

@ -28,6 +28,20 @@ class CorriereDellaSeraRecipe(BasicNewsRecipe):
remove_tags = [dict(id='gallery')]
ignore_duplicate_articles = {'title', 'url'}
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
feeds = [
('Homepage', 'http://xml2.corriereobjects.it/rss/homepage.xml'),
('Editoriali', 'http://xml2.corriereobjects.it/rss/editoriali.xml'),

View File

@ -20,6 +20,20 @@ class CourrierInternational(BasicNewsRecipe):
oldest_article = 7
language = 'fr'
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
max_articles_per_feed = 50
no_stylesheets = True

View File

@ -88,6 +88,20 @@ class elcorreo(BasicNewsRecipe):
p.name = 'div'
return soup
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
def get_browser(self, *args, **kwargs):
kwargs['user_agent'] = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
br = BasicNewsRecipe.get_browser(self, *args, **kwargs)

View File

@ -31,6 +31,20 @@ class FoxNews(BasicNewsRecipe):
.author,.dateline{font-size: small}
"""
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
conversion_options = {
'comment': description,
'tags': category,

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# Calibre recipe for Instapaper.com (Stable version)
#
# Homepage: http://khromov.wordpress.com/projects/instapaper-calibre-recipe/
@ -29,6 +31,21 @@ class InstapaperRecipe(BasicNewsRecipe):
encoding = 'utf-8'
language = 'en'
remove_javascript = True
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
remove_tags = [
dict(name='div', attrs={'id': 'reflow'}),
dict(name='div', attrs={'id': 'modal_backer'}),

View File

@ -32,6 +32,20 @@ class JapanTimes(BasicNewsRecipe):
masthead_url = "https://cdn-japantimes.com/wp-content/themes/jt_theme/library/img/japantimes-logo-tagline.png"
extra_css = "body{font-family: Geneva,Arial,Helvetica,sans-serif}"
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
conversion_options = {
"comment": description,
"tags": category,

View File

@ -58,6 +58,20 @@ class LaJornada_mx(BasicNewsRecipe):
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
preprocess_regexps = [
(re.compile(r'<div class="inicial">(.*)</div><p class="s-s">', re.DOTALL | re.IGNORECASE),
lambda match: '<p class="inicial">' + match.group(1) + '</p><p class="s-s">')

View File

@ -23,6 +23,20 @@ class NationalPost(BasicNewsRecipe):
oldest_article = 1.5
use_embedded_content = False
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
keep_only_tags = [
dict(itemprop='headline'),
classes('featured-image'),

View File

@ -29,3 +29,9 @@ class ReutersJa(BasicNewsRecipe):
('スポーツ', 'https://www.nhk.or.jp/rss/news/cat7.xml?format=xml'),
('文化・エンタメ', 'https://www.nhk.or.jp/rss/news/cat2.xml?format=xml')
]
def preprocess_html(self, soup):
for img in soup.findAll('img', attrs={'data-src':True}):
img['src'] = img['data-src']
return soup

View File

@ -22,9 +22,23 @@ class NewYorkPost(BasicNewsRecipe):
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'en'
language = 'en_US'
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} '
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}

View File

@ -86,7 +86,7 @@ class NewYorkTimes(BasicNewsRecipe):
description = 'Today\'s New York Times'
encoding = 'utf-8'
__author__ = 'Kovid Goyal'
language = 'en'
language = 'en_US'
ignore_duplicate_articles = {'title', 'url'}
no_stylesheets = True
compress_news_images = True

View File

@ -28,6 +28,20 @@ class SCMP(BasicNewsRecipe):
compress_news_images = True
ignore_duplicate_articles = {"title", "url"}
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
# used when unable to extract article from <script>, particularly in the Sports section
remove_tags = [
dict(

View File

@ -30,6 +30,20 @@ class Substack(BasicNewsRecipe):
needs_subscription = 'optional'
use_embedded_content = False
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
# Every Substack publication has an RSS feed at https://{name}.substack.com/feed.
# The same URL provides either all posts, or all free posts + previews of paid posts,
# depending on whether you're logged in.

View File

@ -27,6 +27,20 @@ class TagesspiegelRss(BasicNewsRecipe):
ignore_duplicate_articles = {'title', 'url'}
remove_empty_feeds = True
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
def get_browser(self):
return BasicNewsRecipe.get_browser(self, verify_ssl_certificates=False)

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
@ -13,6 +15,20 @@ class HindustanTimes(BasicNewsRecipe):
auto_cleanup = True
auto_cleanup_keep = '//div[@class="story-image shadowbox entry-content-asset"]'
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
feeds = [
('News',
'http://www.theverge.com/rss/index.xml'),

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
__license__ = 'GPL v3'
__copyright__ = '2014, Darko Miletic <darko.miletic at gmail.com>'
'''
@ -44,6 +46,20 @@ class WiredDailyNews(BasicNewsRecipe):
ul li{display: inline}
"""
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
remove_tags = [
classes('related-cne-video-component tags-component podcast_42 storyboard inset-left-component social-icons'),
dict(name=['meta', 'link', 'aside']),

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
__license__ = 'GPL v3'
__copyright__ = '2013, Armin Geller'
@ -34,6 +36,20 @@ class WirtschaftsWocheOnline(BasicNewsRecipe):
cover_source = 'https://www.ikiosk.de/shop/epaper/wirtschaftswoche.html'
masthead_url = 'http://www.wiwo.de/images/wiwo_logo/5748610/1-formatOriginal.png'
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
def get_cover_url(self):
cover_source_soup = self.index_to_soup(self.cover_source)
preview_image_div = cover_source_soup.find(attrs={'class': 'gallery'})

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
@ -55,6 +57,20 @@ class ZeitDe(BasicNewsRecipe):
dict(name='a', class_='faq-link'),
]
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
feeds = [
(u'Startseite Die wichtigsten Themen auf einen Blick',
u'https://newsfeed.zeit.de/index'),