mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Updates recipe_specific_options
This commit is contained in:
parent
bdbfdf0f43
commit
16a1f2890b
@ -47,17 +47,30 @@ class IndiaToday(BasicNewsRecipe):
|
||||
def preprocess_raw_html(self, raw_html, url):
|
||||
return raw_html.replace('—', '--')
|
||||
|
||||
recipe_specific_options = {
|
||||
'date': {
|
||||
'short': 'The date of the edition to download (DD-MM-YYYY format)',
|
||||
'long': 'For example, 22-07-2024'
|
||||
}
|
||||
}
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup(
|
||||
'https://www.readwhere.com/magazine/the-india-today-group/India-Today/1154'
|
||||
)
|
||||
for citem in soup.findAll(
|
||||
'meta', content=lambda s: s and s.endswith('/magazine/300/new')
|
||||
):
|
||||
return citem['content'].replace('300', '600')
|
||||
d = self.recipe_specific_options.get('date')
|
||||
if not (d and isinstance(d, str)):
|
||||
soup = self.index_to_soup(
|
||||
'https://www.readwhere.com/magazine/the-india-today-group/India-Today/1154'
|
||||
)
|
||||
for citem in soup.findAll(
|
||||
'meta', content=lambda s: s and s.endswith('/magazine/300/new')
|
||||
):
|
||||
return citem['content'].replace('300', '600')
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup('https://www.indiatoday.in/magazine')
|
||||
issue = https://www.indiatoday.in/magazine'
|
||||
d = self.recipe_specific_options.get('date')
|
||||
if d and isinstance(d, str):
|
||||
issue = issue + '/' + d
|
||||
soup = self.index_to_soup(issue)
|
||||
|
||||
section = None
|
||||
sections = {}
|
||||
|
@ -81,7 +81,7 @@ class Liberation(BasicNewsRecipe):
|
||||
'les mutations des sociétés et des cultures.'
|
||||
)
|
||||
language = 'fr'
|
||||
oldest_article = 1
|
||||
oldest_article = 1.15
|
||||
remove_empty_feeds = True
|
||||
articles_are_obfuscated = True
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
@ -94,6 +94,20 @@ class Liberation(BasicNewsRecipe):
|
||||
blockquote { color:#202020; }
|
||||
'''
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
feeds = [
|
||||
('A la une', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/collection/accueil-une/?outputType=xml'),
|
||||
('Politique', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/politique/?outputType=xml'),
|
||||
|
@ -19,6 +19,20 @@ class LiveMint(BasicNewsRecipe):
|
||||
remove_attributes = ['style', 'height', 'width']
|
||||
masthead_url = 'https://images.livemint.com/static/livemint-logo-v1.svg'
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
remove_empty_feeds = True
|
||||
resolve_internal_links = True
|
||||
|
||||
|
@ -34,6 +34,13 @@ class outlook(BasicNewsRecipe):
|
||||
classes('ads-box info-img-absolute mobile-info-id story-dec-time-mobile sb-also-read ads-box1')
|
||||
]
|
||||
|
||||
recipe_specific_options = {
|
||||
'date': {
|
||||
'short': 'The date of the edition to download (DD-Month-YYYY format)',
|
||||
'long': 'For example, 10-june-2024'
|
||||
}
|
||||
}
|
||||
|
||||
def get_browser(self):
|
||||
return BasicNewsRecipe.get_browser(self, user_agent='common_words/based', verify_ssl_certificates=False)
|
||||
|
||||
@ -42,14 +49,27 @@ class outlook(BasicNewsRecipe):
|
||||
'\n***\nif this recipe fails, report it on: '
|
||||
'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
|
||||
)
|
||||
soup = self.index_to_soup('https://www.outlookindia.com/magazine')
|
||||
a = soup.find('a', attrs={'aria-label':'magazine-cover-image'})
|
||||
self.cover_url = a.img['src'].split('?')[0]
|
||||
url = a['href']
|
||||
self.description = self.tag_to_string(a)
|
||||
self.timefmt = ' [' + self.tag_to_string(a.div).strip() + ']'
|
||||
self.log('Downloading issue:', url, self.timefmt)
|
||||
|
||||
d = self.recipe_specific_options.get('date')
|
||||
if d and isinstance(d, str):
|
||||
url = 'https://www.outlookindia.com/magazine/' + d
|
||||
else:
|
||||
soup = self.index_to_soup('https://www.outlookindia.com/magazine')
|
||||
a = soup.find('a', attrs={'aria-label':'magazine-cover-image'})
|
||||
url = a['href']
|
||||
|
||||
self.log('Downloading issue:', url)
|
||||
|
||||
soup = self.index_to_soup(url)
|
||||
cov = soup.find(attrs={'aria-label':'magazine-cover-image'})
|
||||
self.cover_url = cov.img['src'].split('?')[0]
|
||||
summ = soup.find(attrs={'data-test-id':'magazine-summary'})
|
||||
if summ:
|
||||
self.description = self.tag_to_string(summ)
|
||||
tme = soup.find(attrs={'class':'arr__timeago'})
|
||||
if tme:
|
||||
self.timefmt = ' [' + self.tag_to_string(tme).strip() + ']'
|
||||
|
||||
|
||||
ans = []
|
||||
|
||||
|
@ -26,6 +26,20 @@ class RT_eng(BasicNewsRecipe):
|
||||
remove_attributes = ['height', 'width', 'style']
|
||||
publication_type = 'newsportal'
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
extra_css = '''
|
||||
img {display:block; margin:0 auto;}
|
||||
em { color:#202020; }
|
||||
|
@ -56,8 +56,19 @@ class spectator(BasicNewsRecipe):
|
||||
]
|
||||
return br
|
||||
|
||||
recipe_specific_options = {
|
||||
'date': {
|
||||
'short': 'The date of the edition to download (DD-MM-YYYY format)',
|
||||
'long': 'For example, 20-07-2024'
|
||||
}
|
||||
}
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup('https://www.spectator.co.uk/magazine')
|
||||
index = 'https://www.spectator.co.uk/magazine'
|
||||
d = self.recipe_specific_options.get('date')
|
||||
if d and isinstance(d, str):
|
||||
index = index + '/' + d + '/'
|
||||
soup = self.index_to_soup(index)
|
||||
self.cover_url = soup.find(**classes(
|
||||
'magazine-header__container')).img['src'].split('?')[0]
|
||||
issue = self.tag_to_string(soup.find(**classes(
|
||||
|
@ -29,17 +29,32 @@ class TheWeek(BasicNewsRecipe):
|
||||
.article-info { font-size:small; }
|
||||
'''
|
||||
|
||||
recipe_specific_options = {
|
||||
'date': {
|
||||
'short': 'The date of the edition to download (YYYY.MM.DD format)',
|
||||
'long': 'For example, 2024.06.30'
|
||||
}
|
||||
}
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup(
|
||||
'https://www.magzter.com/IN/Malayala_Manorama/THE_WEEK/Business/'
|
||||
)
|
||||
for citem in soup.findAll(
|
||||
'meta', content=lambda s: s and s.endswith('view/3.jpg')
|
||||
):
|
||||
return citem['content']
|
||||
d = self.recipe_specific_options.get('date')
|
||||
if not (d and isinstance(d, str)):
|
||||
soup = self.index_to_soup(
|
||||
'https://www.magzter.com/IN/Malayala_Manorama/THE_WEEK/Business/'
|
||||
)
|
||||
for citem in soup.findAll(
|
||||
'meta', content=lambda s: s and s.endswith('view/3.jpg')
|
||||
):
|
||||
return citem['content']
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup('https://www.theweek.in/theweek.html')
|
||||
issue = 'https://www.theweek.in/theweek.html'
|
||||
|
||||
d = self.recipe_specific_options.get('date')
|
||||
if d and isinstance(d, str):
|
||||
issue = 'https://www.theweek.in/theweek.' + d + '.html'
|
||||
|
||||
soup = self.index_to_soup(issue)
|
||||
ans = []
|
||||
d = datetime.today()
|
||||
|
||||
|
@ -38,7 +38,21 @@ class WSJ(BasicNewsRecipe):
|
||||
resolve_internal_links = True
|
||||
ignore_duplicate_articles = {'url', 'title'}
|
||||
remove_empty_feeds = True
|
||||
oldest_article = 1 # days
|
||||
oldest_article = 1.2 # days
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
extra_css = '''
|
||||
#subhed, em { font-style:italic; color:#202020; }
|
||||
|
Loading…
x
Reference in New Issue
Block a user