mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
...
This commit is contained in:
parent
dc90b7840e
commit
26af835e54
@ -1,3 +1,5 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
import re
|
import re
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from datetime import date
|
from datetime import date
|
||||||
@ -77,12 +79,23 @@ class barrons(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
return br
|
return br
|
||||||
|
|
||||||
|
recipe_specific_options = {
|
||||||
|
'date': {
|
||||||
|
'short': 'The date of the edition to download (YYYYMMDD format)',
|
||||||
|
'long': 'For example, 20240722.\nIf it didn\'t work, try again later.'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
self.log(
|
self.log(
|
||||||
'\n***\nif this recipe fails, report it on: '
|
'\n***\nif this recipe fails, report it on: '
|
||||||
'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
|
'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
|
||||||
)
|
)
|
||||||
archive = self.index_to_soup('https://www.barrons.com/magazine?archives=' + date.today().strftime('%Y'))
|
issue_url = 'https://www.barrons.com/magazine?archives=' + date.today().strftime('%Y')'
|
||||||
|
d = self.recipe_specific_options.get('date')
|
||||||
|
if d and isinstance(d, str):
|
||||||
|
issue_url = 'https://www.barrons.com/magazine?archives=' + d
|
||||||
|
archive = self.index_to_soup(issue_url)
|
||||||
issue = archive.find(**prefixed_classes('BarronsTheme--archive-box--'))
|
issue = archive.find(**prefixed_classes('BarronsTheme--archive-box--'))
|
||||||
self.timefmt = ' [' + self.tag_to_string(issue.find(**prefixed_classes('BarronsTheme--date--'))) + ']'
|
self.timefmt = ' [' + self.tag_to_string(issue.find(**prefixed_classes('BarronsTheme--date--'))) + ']'
|
||||||
self.description = self.tag_to_string(issue.find(**prefixed_classes('BarronsTheme--headline--')))
|
self.description = self.tag_to_string(issue.find(**prefixed_classes('BarronsTheme--headline--')))
|
||||||
|
@ -51,14 +51,14 @@ class BT(BasicNewsRecipe):
|
|||||||
'\n***\nif this recipe fails, report it on: '
|
'\n***\nif this recipe fails, report it on: '
|
||||||
'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
|
'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
|
||||||
)
|
)
|
||||||
soup = self.index_to_soup('https://www.businesstoday.in')
|
|
||||||
a = soup.findAll('a', attrs={'class':'mag_sld_img'})[1]
|
|
||||||
url = a['href']
|
|
||||||
|
|
||||||
d = self.recipe_specific_options.get('date')
|
d = self.recipe_specific_options.get('date')
|
||||||
if d and isinstance(d, str):
|
if d and isinstance(d, str):
|
||||||
url = 'https://www.businesstoday.in/magazine/issue/' + d
|
url = 'https://www.businesstoday.in/magazine/issue/' + d
|
||||||
else:
|
else:
|
||||||
|
soup = self.index_to_soup('https://www.businesstoday.in')
|
||||||
|
a = soup.findAll('a', attrs={'class':'mag_sld_img'})[1]
|
||||||
|
url = a['href']
|
||||||
self.cover_url = a.img['data-src'].split('?')[0]
|
self.cover_url = a.img['data-src'].split('?')[0]
|
||||||
|
|
||||||
self.log('issue =', url)
|
self.log('issue =', url)
|
||||||
|
@ -55,7 +55,10 @@ class ht(BasicNewsRecipe):
|
|||||||
if p and isinstance(p, str):
|
if p and isinstance(p, str):
|
||||||
today = p
|
today = p
|
||||||
|
|
||||||
self.timefmt = ' [%s]' % today
|
day, month, year = (int(x) for x in today.split('/'))
|
||||||
|
dt = date(year, month, day)
|
||||||
|
|
||||||
|
self.timefmt = ' [%s]' % dt.strftime('%b %d, %Y')
|
||||||
|
|
||||||
today = today.replace('/', '%2F')
|
today = today.replace('/', '%2F')
|
||||||
|
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
'''
|
'''
|
||||||
https://www.cirsd.org/en/horizons
|
https://www.cirsd.org/en/horizons
|
||||||
'''
|
'''
|
||||||
@ -28,23 +30,34 @@ class horizons(BasicNewsRecipe):
|
|||||||
classes('back-link'),
|
classes('back-link'),
|
||||||
dict(name='div', attrs={'class':'single-post-footer'})
|
dict(name='div', attrs={'class':'single-post-footer'})
|
||||||
]
|
]
|
||||||
|
|
||||||
|
recipe_specific_options = {
|
||||||
|
'issue_url': {
|
||||||
|
'short': 'The issue URL ',
|
||||||
|
'long': 'For example, https://www.cirsd.org/en/horizons/horizons-winter-2024--issue-no-25',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
return BasicNewsRecipe.get_browser(self, verify_ssl_certificates=False)
|
return BasicNewsRecipe.get_browser(self, verify_ssl_certificates=False)
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
soup = self.index_to_soup('https://www.cirsd.org/en/horizons')
|
d = self.recipe_specific_options.get('issue_url')
|
||||||
a = soup.findAll('a', href=True, attrs={'class':'horizon-gallery-box'})[0] #use 1 for previous edition
|
if d and isinstance(d, str):
|
||||||
url = a['href']
|
url = d
|
||||||
if url.startswith('/'):
|
else:
|
||||||
url = 'https://www.cirsd.org' + url
|
soup = self.index_to_soup('https://www.cirsd.org/en/horizons')
|
||||||
self.cover_url = a.find('img')['src']
|
a = soup.findAll('a', href=True, attrs={'class':'horizon-gallery-box'})[0] #use 1 for previous edition
|
||||||
self.log(self.cover_url)
|
url = a['href']
|
||||||
issue = a.find('div', attrs={'class':'horizon-gallery-title'})
|
if url.startswith('/'):
|
||||||
if issue:
|
url = 'https://www.cirsd.org' + url
|
||||||
self.title = self.tag_to_string(issue).strip()
|
self.cover_url = a.find('img')['src']
|
||||||
self.timefmt = ' [' + self.tag_to_string(issue).strip().replace('Horizons ', '') + ']'
|
self.log(self.cover_url)
|
||||||
self.log('Downloading Issue: ', self.timefmt, self.title)
|
issue = a.find('div', attrs={'class':'horizon-gallery-title'})
|
||||||
|
if issue:
|
||||||
|
self.title = self.tag_to_string(issue).strip()
|
||||||
|
self.timefmt = ' [' + self.tag_to_string(issue).strip().replace('Horizons ', '') + ']'
|
||||||
|
self.log('Downloading Issue: ', self.timefmt, self.title)
|
||||||
soup = self.index_to_soup(url)
|
soup = self.index_to_soup(url)
|
||||||
|
|
||||||
feeds = []
|
feeds = []
|
||||||
|
@ -67,7 +67,18 @@ class LondonReviewOfBooksPayed(BasicNewsRecipe):
|
|||||||
img['src'] = 'https://www.lrb.co.uk/storage/400_filter/images/' + img['data-appsrc'].split('/images/')[-1]
|
img['src'] = 'https://www.lrb.co.uk/storage/400_filter/images/' + img['data-appsrc'].split('/images/')[-1]
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
recipe_specific_options = {
|
||||||
|
'issue_url': {
|
||||||
|
'short': 'The issue URL ',
|
||||||
|
'long': 'For example, https://www.lrb.co.uk/the-paper/v46/n01',
|
||||||
|
'default': 'https://www.lrb.co.uk/the-paper/'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
|
d = self.recipe_specific_options.get('issue_url')
|
||||||
|
if d and isinstance(d, str):
|
||||||
|
self.INDEX = d
|
||||||
soup = self.index_to_soup(self.INDEX)
|
soup = self.index_to_soup(self.INDEX)
|
||||||
container = soup.find('div', attrs={'class': 'article-issue-cover-image'})
|
container = soup.find('div', attrs={'class': 'article-issue-cover-image'})
|
||||||
if container:
|
if container:
|
||||||
|
@ -168,7 +168,11 @@ class WSJ(BasicNewsRecipe):
|
|||||||
sec_parse = json.loads(self.index_to_soup(index + v, raw=True))
|
sec_parse = json.loads(self.index_to_soup(index + v, raw=True))
|
||||||
data = sec_parse['articles']
|
data = sec_parse['articles']
|
||||||
for art in data:
|
for art in data:
|
||||||
dt = datetime.fromtimestamp(data[art]['pubdateNumber'] + time.timezone)
|
try:
|
||||||
|
tme = data[art]['pubdateNumber']
|
||||||
|
except Exception:
|
||||||
|
tme = data[art]['origPubdateNumber']
|
||||||
|
dt = datetime.fromtimestamp(tme + time.timezone)
|
||||||
if (datetime.now() - dt) > timedelta(self.oldest_article):
|
if (datetime.now() - dt) > timedelta(self.oldest_article):
|
||||||
continue
|
continue
|
||||||
title = data[art]['headline']
|
title = data[art]['headline']
|
||||||
|
Loading…
x
Reference in New Issue
Block a user