mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Updates recipe_specific_options
This commit is contained in:
parent
bdbfdf0f43
commit
16a1f2890b
@ -47,17 +47,30 @@ class IndiaToday(BasicNewsRecipe):
|
|||||||
def preprocess_raw_html(self, raw_html, url):
|
def preprocess_raw_html(self, raw_html, url):
|
||||||
return raw_html.replace('—', '--')
|
return raw_html.replace('—', '--')
|
||||||
|
|
||||||
|
recipe_specific_options = {
|
||||||
|
'date': {
|
||||||
|
'short': 'The date of the edition to download (DD-MM-YYYY format)',
|
||||||
|
'long': 'For example, 22-07-2024'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
soup = self.index_to_soup(
|
d = self.recipe_specific_options.get('date')
|
||||||
'https://www.readwhere.com/magazine/the-india-today-group/India-Today/1154'
|
if not (d and isinstance(d, str)):
|
||||||
)
|
soup = self.index_to_soup(
|
||||||
for citem in soup.findAll(
|
'https://www.readwhere.com/magazine/the-india-today-group/India-Today/1154'
|
||||||
'meta', content=lambda s: s and s.endswith('/magazine/300/new')
|
)
|
||||||
):
|
for citem in soup.findAll(
|
||||||
return citem['content'].replace('300', '600')
|
'meta', content=lambda s: s and s.endswith('/magazine/300/new')
|
||||||
|
):
|
||||||
|
return citem['content'].replace('300', '600')
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
soup = self.index_to_soup('https://www.indiatoday.in/magazine')
|
issue = https://www.indiatoday.in/magazine'
|
||||||
|
d = self.recipe_specific_options.get('date')
|
||||||
|
if d and isinstance(d, str):
|
||||||
|
issue = issue + '/' + d
|
||||||
|
soup = self.index_to_soup(issue)
|
||||||
|
|
||||||
section = None
|
section = None
|
||||||
sections = {}
|
sections = {}
|
||||||
|
@ -81,7 +81,7 @@ class Liberation(BasicNewsRecipe):
|
|||||||
'les mutations des sociétés et des cultures.'
|
'les mutations des sociétés et des cultures.'
|
||||||
)
|
)
|
||||||
language = 'fr'
|
language = 'fr'
|
||||||
oldest_article = 1
|
oldest_article = 1.15
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
articles_are_obfuscated = True
|
articles_are_obfuscated = True
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
@ -94,6 +94,20 @@ class Liberation(BasicNewsRecipe):
|
|||||||
blockquote { color:#202020; }
|
blockquote { color:#202020; }
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
recipe_specific_options = {
|
||||||
|
'days': {
|
||||||
|
'short': 'Oldest article to download from this news source. In days ',
|
||||||
|
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||||
|
'default': str(oldest_article)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||||
|
d = self.recipe_specific_options.get('days')
|
||||||
|
if d and isinstance(d, str):
|
||||||
|
self.oldest_article = float(d)
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('A la une', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/collection/accueil-une/?outputType=xml'),
|
('A la une', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/collection/accueil-une/?outputType=xml'),
|
||||||
('Politique', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/politique/?outputType=xml'),
|
('Politique', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/politique/?outputType=xml'),
|
||||||
|
@ -19,6 +19,20 @@ class LiveMint(BasicNewsRecipe):
|
|||||||
remove_attributes = ['style', 'height', 'width']
|
remove_attributes = ['style', 'height', 'width']
|
||||||
masthead_url = 'https://images.livemint.com/static/livemint-logo-v1.svg'
|
masthead_url = 'https://images.livemint.com/static/livemint-logo-v1.svg'
|
||||||
|
|
||||||
|
recipe_specific_options = {
|
||||||
|
'days': {
|
||||||
|
'short': 'Oldest article to download from this news source. In days ',
|
||||||
|
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||||
|
'default': str(oldest_article)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||||
|
d = self.recipe_specific_options.get('days')
|
||||||
|
if d and isinstance(d, str):
|
||||||
|
self.oldest_article = float(d)
|
||||||
|
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
resolve_internal_links = True
|
resolve_internal_links = True
|
||||||
|
|
||||||
|
@ -34,6 +34,13 @@ class outlook(BasicNewsRecipe):
|
|||||||
classes('ads-box info-img-absolute mobile-info-id story-dec-time-mobile sb-also-read ads-box1')
|
classes('ads-box info-img-absolute mobile-info-id story-dec-time-mobile sb-also-read ads-box1')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
recipe_specific_options = {
|
||||||
|
'date': {
|
||||||
|
'short': 'The date of the edition to download (DD-Month-YYYY format)',
|
||||||
|
'long': 'For example, 10-june-2024'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
return BasicNewsRecipe.get_browser(self, user_agent='common_words/based', verify_ssl_certificates=False)
|
return BasicNewsRecipe.get_browser(self, user_agent='common_words/based', verify_ssl_certificates=False)
|
||||||
|
|
||||||
@ -42,14 +49,27 @@ class outlook(BasicNewsRecipe):
|
|||||||
'\n***\nif this recipe fails, report it on: '
|
'\n***\nif this recipe fails, report it on: '
|
||||||
'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
|
'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
|
||||||
)
|
)
|
||||||
soup = self.index_to_soup('https://www.outlookindia.com/magazine')
|
|
||||||
a = soup.find('a', attrs={'aria-label':'magazine-cover-image'})
|
d = self.recipe_specific_options.get('date')
|
||||||
self.cover_url = a.img['src'].split('?')[0]
|
if d and isinstance(d, str):
|
||||||
url = a['href']
|
url = 'https://www.outlookindia.com/magazine/' + d
|
||||||
self.description = self.tag_to_string(a)
|
else:
|
||||||
self.timefmt = ' [' + self.tag_to_string(a.div).strip() + ']'
|
soup = self.index_to_soup('https://www.outlookindia.com/magazine')
|
||||||
self.log('Downloading issue:', url, self.timefmt)
|
a = soup.find('a', attrs={'aria-label':'magazine-cover-image'})
|
||||||
|
url = a['href']
|
||||||
|
|
||||||
|
self.log('Downloading issue:', url)
|
||||||
|
|
||||||
soup = self.index_to_soup(url)
|
soup = self.index_to_soup(url)
|
||||||
|
cov = soup.find(attrs={'aria-label':'magazine-cover-image'})
|
||||||
|
self.cover_url = cov.img['src'].split('?')[0]
|
||||||
|
summ = soup.find(attrs={'data-test-id':'magazine-summary'})
|
||||||
|
if summ:
|
||||||
|
self.description = self.tag_to_string(summ)
|
||||||
|
tme = soup.find(attrs={'class':'arr__timeago'})
|
||||||
|
if tme:
|
||||||
|
self.timefmt = ' [' + self.tag_to_string(tme).strip() + ']'
|
||||||
|
|
||||||
|
|
||||||
ans = []
|
ans = []
|
||||||
|
|
||||||
|
@ -26,6 +26,20 @@ class RT_eng(BasicNewsRecipe):
|
|||||||
remove_attributes = ['height', 'width', 'style']
|
remove_attributes = ['height', 'width', 'style']
|
||||||
publication_type = 'newsportal'
|
publication_type = 'newsportal'
|
||||||
|
|
||||||
|
recipe_specific_options = {
|
||||||
|
'days': {
|
||||||
|
'short': 'Oldest article to download from this news source. In days ',
|
||||||
|
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||||
|
'default': str(oldest_article)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||||
|
d = self.recipe_specific_options.get('days')
|
||||||
|
if d and isinstance(d, str):
|
||||||
|
self.oldest_article = float(d)
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
img {display:block; margin:0 auto;}
|
img {display:block; margin:0 auto;}
|
||||||
em { color:#202020; }
|
em { color:#202020; }
|
||||||
|
@ -56,8 +56,19 @@ class spectator(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
return br
|
return br
|
||||||
|
|
||||||
|
recipe_specific_options = {
|
||||||
|
'date': {
|
||||||
|
'short': 'The date of the edition to download (DD-MM-YYYY format)',
|
||||||
|
'long': 'For example, 20-07-2024'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
soup = self.index_to_soup('https://www.spectator.co.uk/magazine')
|
index = 'https://www.spectator.co.uk/magazine'
|
||||||
|
d = self.recipe_specific_options.get('date')
|
||||||
|
if d and isinstance(d, str):
|
||||||
|
index = index + '/' + d + '/'
|
||||||
|
soup = self.index_to_soup(index)
|
||||||
self.cover_url = soup.find(**classes(
|
self.cover_url = soup.find(**classes(
|
||||||
'magazine-header__container')).img['src'].split('?')[0]
|
'magazine-header__container')).img['src'].split('?')[0]
|
||||||
issue = self.tag_to_string(soup.find(**classes(
|
issue = self.tag_to_string(soup.find(**classes(
|
||||||
|
@ -29,17 +29,32 @@ class TheWeek(BasicNewsRecipe):
|
|||||||
.article-info { font-size:small; }
|
.article-info { font-size:small; }
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
recipe_specific_options = {
|
||||||
|
'date': {
|
||||||
|
'short': 'The date of the edition to download (YYYY.MM.DD format)',
|
||||||
|
'long': 'For example, 2024.06.30'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
soup = self.index_to_soup(
|
d = self.recipe_specific_options.get('date')
|
||||||
'https://www.magzter.com/IN/Malayala_Manorama/THE_WEEK/Business/'
|
if not (d and isinstance(d, str)):
|
||||||
)
|
soup = self.index_to_soup(
|
||||||
for citem in soup.findAll(
|
'https://www.magzter.com/IN/Malayala_Manorama/THE_WEEK/Business/'
|
||||||
'meta', content=lambda s: s and s.endswith('view/3.jpg')
|
)
|
||||||
):
|
for citem in soup.findAll(
|
||||||
return citem['content']
|
'meta', content=lambda s: s and s.endswith('view/3.jpg')
|
||||||
|
):
|
||||||
|
return citem['content']
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
soup = self.index_to_soup('https://www.theweek.in/theweek.html')
|
issue = 'https://www.theweek.in/theweek.html'
|
||||||
|
|
||||||
|
d = self.recipe_specific_options.get('date')
|
||||||
|
if d and isinstance(d, str):
|
||||||
|
issue = 'https://www.theweek.in/theweek.' + d + '.html'
|
||||||
|
|
||||||
|
soup = self.index_to_soup(issue)
|
||||||
ans = []
|
ans = []
|
||||||
d = datetime.today()
|
d = datetime.today()
|
||||||
|
|
||||||
|
@ -38,7 +38,21 @@ class WSJ(BasicNewsRecipe):
|
|||||||
resolve_internal_links = True
|
resolve_internal_links = True
|
||||||
ignore_duplicate_articles = {'url', 'title'}
|
ignore_duplicate_articles = {'url', 'title'}
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
oldest_article = 1 # days
|
oldest_article = 1.2 # days
|
||||||
|
|
||||||
|
recipe_specific_options = {
|
||||||
|
'days': {
|
||||||
|
'short': 'Oldest article to download from this news source. In days ',
|
||||||
|
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||||
|
'default': str(oldest_article)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||||
|
d = self.recipe_specific_options.get('days')
|
||||||
|
if d and isinstance(d, str):
|
||||||
|
self.oldest_article = float(d)
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
#subhed, em { font-style:italic; color:#202020; }
|
#subhed, em { font-style:italic; color:#202020; }
|
||||||
|
Loading…
x
Reference in New Issue
Block a user