mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge branch 'kovidgoyal:master' into tolino
This commit is contained in:
commit
ff93e10e00
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
'''
|
||||
https://ancientegyptmagazine.com
|
||||
'''
|
||||
|
@ -81,6 +81,19 @@ class TheAtlantic(BasicNewsRecipe):
|
||||
language = 'en'
|
||||
encoding = 'utf-8'
|
||||
|
||||
recipe_specific_options = {
|
||||
'date': {
|
||||
'short': 'The date of the edition to download (YYYY/MM format)',
|
||||
'long': 'For example, 2024/05'
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('date')
|
||||
if d and isinstance(d, str):
|
||||
self.INDEX = 'https://www.theatlantic.com/magazine/toc/' + d + '/'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(itemprop=['headline']),
|
||||
classes(
|
||||
|
@ -234,6 +234,20 @@ class BBCNews(BasicNewsRecipe):
|
||||
#
|
||||
oldest_article = 1.5
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
# Number of simultaneous downloads. 20 is consistently working fine on the
|
||||
# BBC News feeds with no problems. Speeds things up from the default of 5.
|
||||
# If you have a lot of feeds and/or have increased oldest_article above 2
|
||||
|
@ -1,3 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
|
||||
import json
|
||||
import time
|
||||
from datetime import datetime
|
||||
@ -58,7 +61,7 @@ class Bloomberg(BasicNewsRecipe):
|
||||
remove_empty_feeds = True
|
||||
|
||||
recipe_specific_options = {
|
||||
'date': {
|
||||
'issue': {
|
||||
'short': 'The ID of the edition to download (YY_XX format)',
|
||||
'long': 'For example, 24_17\nHint: Edition ID can be found at the end of its URL'
|
||||
}
|
||||
@ -86,7 +89,7 @@ class Bloomberg(BasicNewsRecipe):
|
||||
inx = 'https://cdn-mobapi.bloomberg.com'
|
||||
sec = self.index_to_soup(inx + '/wssmobile/v1/bw/news/list?limit=1', raw=True)
|
||||
id = json.loads(sec)['magazines'][0]['id']
|
||||
past_edition = self.recipe_specific_options.get('date')
|
||||
past_edition = self.recipe_specific_options.get('issue')
|
||||
if past_edition and isinstance(past_edition, str):
|
||||
id = past_edition
|
||||
edit = self.index_to_soup(inx + '/wssmobile/v1/bw/news/week/' + id, raw=True)
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
import json
|
||||
from urllib.parse import quote, urlparse
|
||||
|
||||
@ -118,6 +120,13 @@ class CaravanMagazine(BasicNewsRecipe):
|
||||
return br
|
||||
return br
|
||||
|
||||
recipe_specific_options = {
|
||||
'date': {
|
||||
'short': 'The date of the edition to download (MM-YYYY format)',
|
||||
'long': 'For example, 07-2024'
|
||||
}
|
||||
}
|
||||
|
||||
def parse_index(self):
|
||||
self.log(
|
||||
'\n***\nif this recipe fails, report it on: '
|
||||
@ -125,9 +134,11 @@ class CaravanMagazine(BasicNewsRecipe):
|
||||
)
|
||||
|
||||
api = 'https://api.caravanmagazine.in/api/trpc/magazines.getLatestIssue'
|
||||
# for past editions
|
||||
# inp = json.dumps({"0":{"json":{"month":6,"year":2023}}})
|
||||
# api = 'https://api.caravanmagazine.in/api/trpc/magazines.getForMonthAndYear?batch=1&input=' + quote(inp, safe='')
|
||||
d = self.recipe_specific_options.get('date')
|
||||
if d and isinstance(d, str):
|
||||
x = d.split('-')
|
||||
inp = json.dumps({"0":{"json":{"month":int(x[0]),"year":int(x[1])}}})
|
||||
api = 'https://api.caravanmagazine.in/api/trpc/magazines.getForMonthAndYear?batch=1&input=' + quote(inp, safe='')
|
||||
|
||||
raw = json.loads(self.index_to_soup(api, raw=True))
|
||||
if isinstance(raw, list):
|
||||
|
@ -23,13 +23,27 @@ class DeutscheWelle_bs(BasicNewsRecipe):
|
||||
keep_only_tags = [
|
||||
dict(name='article')
|
||||
]
|
||||
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['footer', 'source']),
|
||||
dict(attrs={'data-tracking-name':'sharing-icons-inline'}),
|
||||
classes('kicker advertisement vjs-wrapper')
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Politika', u'http://rss.dw-world.de/rdf/rss-bos-pol'),
|
||||
(u'Evropa', u'http://rss.dw-world.de/rdf/rss-bos-eu'),
|
||||
|
@ -21,6 +21,20 @@ class DeutscheWelle(BasicNewsRecipe):
|
||||
dict(name='article')
|
||||
]
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['footer', 'source']),
|
||||
dict(attrs={'data-tracking-name':'sharing-icons-inline'}),
|
||||
|
@ -15,7 +15,21 @@ class DeutscheWelle_en(BasicNewsRecipe):
|
||||
remove_empty_feeds = True
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
remove_attributes = ['height', 'width', 'style']
|
||||
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='article')
|
||||
]
|
||||
|
@ -17,10 +17,24 @@ class DeutscheWelle_es(BasicNewsRecipe):
|
||||
|
||||
remove_attributes = ['height', 'width', 'style']
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='article')
|
||||
]
|
||||
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['footer', 'source']),
|
||||
dict(attrs={'data-tracking-name':'sharing-icons-inline'}),
|
||||
@ -40,7 +54,7 @@ class DeutscheWelle_es(BasicNewsRecipe):
|
||||
('Conozca Alemania', 'http://rss.dw-world.de/rdf/rss-sp-con')
|
||||
]
|
||||
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for img in soup.findAll('img', srcset=True):
|
||||
img['src'] = img['srcset'].split()[6]
|
||||
|
@ -16,20 +16,34 @@ class DeutscheWelle_hr(BasicNewsRecipe):
|
||||
remove_empty_feeds = True
|
||||
masthead_url = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif'
|
||||
remove_javascript = True
|
||||
|
||||
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
remove_attributes = ['height', 'width', 'style']
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='article')
|
||||
]
|
||||
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['footer', 'source']),
|
||||
dict(attrs={'data-tracking-name':'sharing-icons-inline'}),
|
||||
classes('kicker advertisement vjs-wrapper')
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Svijet', u'http://rss.dw-world.de/rdf/rss-cro-svijet'),
|
||||
(u'Europa', u'http://rss.dw-world.de/rdf/rss-cro-eu'),
|
||||
|
@ -15,12 +15,25 @@ class DeutscheWelle_pt(BasicNewsRecipe):
|
||||
publication_type = 'newsportal'
|
||||
remove_empty_feeds = True
|
||||
masthead_url = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif'
|
||||
|
||||
|
||||
|
||||
remove_javascript = True
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
remove_attributes = ['height', 'width', 'style']
|
||||
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for img in soup.findAll('img', srcset=True):
|
||||
img['src'] = img['srcset'].split()[6]
|
||||
@ -29,7 +42,7 @@ class DeutscheWelle_pt(BasicNewsRecipe):
|
||||
keep_only_tags = [
|
||||
dict(name='article')
|
||||
]
|
||||
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['footer', 'source']),
|
||||
dict(attrs={'data-tracking-name':'sharing-icons-inline'}),
|
||||
|
@ -16,7 +16,21 @@ class DeutscheWelle(BasicNewsRecipe):
|
||||
remove_empty_feeds = True
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
remove_attributes = ['height', 'width', 'style']
|
||||
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for img in soup.findAll('img', srcset=True):
|
||||
img['src'] = img['srcset'].split()[6]
|
||||
|
@ -18,7 +18,21 @@ class DeutscheWelle_sr(BasicNewsRecipe):
|
||||
remove_javascript = True
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
remove_attributes = ['height', 'width', 'style']
|
||||
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for img in soup.findAll('img', srcset=True):
|
||||
img['src'] = img['srcset'].split()[6]
|
||||
@ -27,13 +41,13 @@ class DeutscheWelle_sr(BasicNewsRecipe):
|
||||
keep_only_tags = [
|
||||
dict(name='article')
|
||||
]
|
||||
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['footer', 'source']),
|
||||
dict(attrs={'data-tracking-name':'sharing-icons-inline'}),
|
||||
classes('kicker advertisement vjs-wrapper')
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Politika', u'http://rss.dw-world.de/rdf/rss-ser-pol'),
|
||||
(u'Srbija', u'http://rss.dw-world.de/rdf/rss-ser-pol-ser'),
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
import json
|
||||
import re
|
||||
from datetime import date, datetime, timedelta
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
from urllib.parse import quote
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
'''
|
||||
http://www.elcorreo.com/
|
||||
'''
|
||||
@ -22,6 +24,20 @@ class elcorreo(BasicNewsRecipe):
|
||||
max_articles_per_feed = 25 # articles
|
||||
compress_news_images = True
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
extra_css = '''
|
||||
.v-mdl-ath__inf, .v-mdl-ath__p--2, .v-mdl-ath__p {font-size:small; color:#404040;}
|
||||
.v-fc, .v-a-fig { text-align:center; font-size:small; }
|
||||
|
@ -27,6 +27,20 @@ class ElPais(BasicNewsRecipe):
|
||||
oldest_article = 2.1
|
||||
max_articles_per_feed = 25
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
use_embedded_content = False
|
||||
recursion = 5
|
||||
|
||||
|
@ -19,6 +19,20 @@ class EpochTimes(BasicNewsRecipe):
|
||||
masthead_url = 'https://epochtimes-ny.newsmemory.com/eeLayout/epochtimes/1.0.a/images/webapp/banner.png'
|
||||
extra_css = '.post_caption, .text-sm, .uppercase {font-size:small;}'
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='article')
|
||||
]
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
import json
|
||||
import re
|
||||
from urllib.parse import quote
|
||||
@ -31,6 +33,20 @@ class ft(BasicNewsRecipe):
|
||||
.o-topper__topic { font-size:small; color:#5c5c5c; }
|
||||
'''
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
keep_only_tags = [
|
||||
classes(
|
||||
'body_json o-topper__topic o-topper__headline o-topper__standfirst o-topper__visual article-info__time-byline main-image'
|
||||
|
@ -47,8 +47,20 @@ class ForeignPolicy(BasicNewsRecipe):
|
||||
]
|
||||
remove_tags_after = [classes('post-content-main')]
|
||||
|
||||
recipe_specific_options = {
|
||||
'issue': {
|
||||
'short': 'Enter the Issue ID you want to download ',
|
||||
'long': 'For example, 411131563'
|
||||
}
|
||||
}
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup('https://foreignpolicy.com/the-magazine')
|
||||
issue_url = 'https://foreignpolicy.com/the-magazine'
|
||||
d = self.recipe_specific_options.get('issue')
|
||||
if d and isinstance(d, str):
|
||||
issue_url = issue_url + '/?issue_id=' + d
|
||||
|
||||
soup = self.index_to_soup(issue_url)
|
||||
img = soup.find('img', attrs={'src': lambda x: x and '-cover' in x})
|
||||
if img:
|
||||
self.cover_url = img['src'].split('?')[0] + '?w=800?quality=90'
|
||||
|
@ -127,6 +127,13 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
|
||||
|
||||
INDEX = 'https://www.foreignaffairs.com/magazine'
|
||||
|
||||
recipe_specific_options = {
|
||||
'issue': {
|
||||
'short': 'Enter the Issue Number you want to download ',
|
||||
'long': 'For example, 2024/103/1'
|
||||
}
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
classes('article-header article-body article-lead-image article-body-text'),
|
||||
]
|
||||
@ -140,6 +147,10 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
|
||||
remove_empty_feeds = True
|
||||
|
||||
def parse_index(self):
|
||||
d = self.recipe_specific_options.get('issue')
|
||||
if d and isinstance(d, str):
|
||||
self.INDEX = 'https://www.foreignaffairs.com/issues/' + d
|
||||
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
# get dates
|
||||
date = re.split(r'\s\|\s', self.tag_to_string(
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from collections import defaultdict
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe, classes
|
||||
@ -53,8 +55,20 @@ class Frontline(BasicNewsRecipe):
|
||||
src.extract()
|
||||
return soup
|
||||
|
||||
recipe_specific_options = {
|
||||
'issue': {
|
||||
'short': 'Enter the Issue Number you want to download\n(Volume-Issue format)',
|
||||
'long': 'For example, 41-12'
|
||||
}
|
||||
}
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup('https://frontline.thehindu.com/current-issue/')
|
||||
issue_url = 'https://frontline.thehindu.com/current-issue/'
|
||||
d = self.recipe_specific_options.get('issue')
|
||||
if d and isinstance(d, str):
|
||||
issue_url = 'https://frontline.thehindu.com/magazine/issue/vol' + d
|
||||
|
||||
soup = self.index_to_soup(issue_url)
|
||||
|
||||
if cover := soup.find('div', attrs={'class':'magazine'}):
|
||||
self.cover_url = cover.find(**classes('sptar-image')).img['data-original'].replace('_320', '_1200')
|
||||
@ -82,4 +96,4 @@ class Frontline(BasicNewsRecipe):
|
||||
continue
|
||||
self.log(section, '\n\t', title, '\n\t', desc, '\n\t\t', url)
|
||||
feeds_dict[section].append({"title": title, "url": url, "description": desc})
|
||||
return [(section, articles) for section, articles in feeds_dict.items()]
|
||||
return [(section, articles) for section, articles in feeds_dict.items()]
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
from calibre.utils.date import parse_date
|
||||
@ -29,6 +31,20 @@ class GlobalTimes(BasicNewsRecipe):
|
||||
blockquote, em {color:#202020;}
|
||||
'''
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
keep_only_tags = [
|
||||
classes(
|
||||
'article_column article_title author_share_left article_content'
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
'''
|
||||
harpers.org
|
||||
'''
|
||||
@ -70,7 +72,7 @@ class Harpers(BasicNewsRecipe):
|
||||
edition = self.recipe_specific_options.get('date')
|
||||
if edition and isinstance(edition, str):
|
||||
url = 'https://harpers.org/archive/' + edition
|
||||
self.timefmt = ' [' +edition + ']'
|
||||
self.timefmt = ' [' + edition + ']'
|
||||
|
||||
soup = self.index_to_soup(url)
|
||||
cov_div = soup.find('div', attrs={'class':'issue-cover'})
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
import json
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
@ -37,6 +39,20 @@ class himal(BasicNewsRecipe):
|
||||
resolve_internal_links = True
|
||||
oldest_article = 30 # days
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
extra_css = '''
|
||||
.cap, .auth {font-size:small;}
|
||||
em, blockquote {color:#404040;}
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
import json
|
||||
import re
|
||||
from collections import defaultdict
|
||||
@ -32,7 +34,12 @@ class TheHindu(BasicNewsRecipe):
|
||||
recipe_specific_options = {
|
||||
'location': {
|
||||
'short': 'The name of the local edition',
|
||||
'long': 'If The Hindu is available in your local town/city,\nset this to your location, for example, hyderabad',
|
||||
'long': ('If The Hindu is available in your local town/city,\n'
|
||||
'set this to your location, for example, hyderabad\n'
|
||||
'Available Editions: bengaluru, chennai, coimbatore, delhi, '
|
||||
'erode, hyderabad, international, kochi, kolkata,\n'
|
||||
'kozhikode, madurai, mangalore, mumbai, thiruvananthapuram, '
|
||||
'tiruchirapalli, vijayawada, visakhapatnam'),
|
||||
'default': 'international'
|
||||
},
|
||||
'date': {
|
||||
|
@ -1,5 +1,5 @@
|
||||
from datetime import date
|
||||
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from calibre.web.feeds.news import BasicNewsRecipe, classes
|
||||
|
||||
|
||||
@ -24,6 +24,20 @@ class TheHindufeeds(BasicNewsRecipe):
|
||||
.italic {font-style:italic; color:#202020;}
|
||||
'''
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
ignore_duplicate_articles = {'url'}
|
||||
|
||||
keep_only_tags = [
|
||||
@ -60,14 +74,9 @@ class TheHindufeeds(BasicNewsRecipe):
|
||||
src.extract()
|
||||
return soup
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
if self.output_profile.short_name.startswith('kindle'):
|
||||
self.title = 'The Hindu (Feeds) ' + date.today().strftime('%b %d, %Y')
|
||||
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('https://www.thehindu.com/todays-paper/')
|
||||
soup = self.index_to_soup('https://www.thehindu.com/todays-paper/')
|
||||
if cover := soup.find(attrs={'class':'hindu-ad'}):
|
||||
return cover.img['src']
|
||||
|
||||
|
@ -1,21 +1,11 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
import json
|
||||
from collections import defaultdict
|
||||
from datetime import date
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
# figure out your local_edition from the fetch news log of this recipe
|
||||
local_edition = 'Delhi'
|
||||
|
||||
today = date.today().strftime('%d/%m/%Y')
|
||||
|
||||
# for older edition, change today
|
||||
# today = '22/12/2023'
|
||||
|
||||
day, month, year = (int(x) for x in today.split('/'))
|
||||
dt = date(year, month, day)
|
||||
today = today.replace('/', '%2F')
|
||||
|
||||
index = 'https://epaper.hindustantimes.com'
|
||||
|
||||
class ht(BasicNewsRecipe):
|
||||
@ -23,28 +13,51 @@ class ht(BasicNewsRecipe):
|
||||
language = 'en_IN'
|
||||
__author__ = 'unkn0wn'
|
||||
masthead_url = 'https://www.htmedia.in/wp-content/uploads/2020/08/HT-dot-com-logo-product.png'
|
||||
timefmt = ' [' + dt.strftime('%b %d, %Y') + ']'
|
||||
description = 'Articles from the Hindustan Times epaper, digital edition'
|
||||
encoding = 'utf-8'
|
||||
delay = 1
|
||||
ignore_duplicate_articles = {'title'}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
if self.output_profile.short_name.startswith('kindle'):
|
||||
self.title = 'HT Print Edition ' + dt.strftime('%b %d, %Y')
|
||||
|
||||
extra_css = '''
|
||||
.cap { text-align:center; font-size:small; }
|
||||
img { display:block; margin:0 auto; }
|
||||
'''
|
||||
|
||||
def parse_index(self):
|
||||
recipe_specific_options = {
|
||||
'location': {
|
||||
'short': 'The name of the local edition',
|
||||
'long': ('If The Hindustan Times is available in your local town/city,\n'
|
||||
'set this to your location, for example, Delhi\nAvailable Editions:'
|
||||
'Delhi, Mumbai, Chandigarh, Lucknow, Patna, Bengaluru, Pune, Gurgaon,'
|
||||
'Ludhiana, Rajasthan, Amritsar,\nEast UP, Haryana, Jammu, Navi Mumbai,'
|
||||
'Noida, Punjab, Ranchi, Thane, Uttarakhand, West UP'),
|
||||
'default': 'Delhi'
|
||||
},
|
||||
'date': {
|
||||
'short': 'The date of the edition to download (DD/MM/YYYY format)',
|
||||
'long': 'For example, 22/12/2023'
|
||||
}
|
||||
}
|
||||
|
||||
def parse_index(self):
|
||||
self.log(
|
||||
'\n***\nif this recipe fails, report it on: '
|
||||
'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
|
||||
)
|
||||
local_edition = 'Delhi'
|
||||
d = self.recipe_specific_options.get('location')
|
||||
if d and isinstance(d, str):
|
||||
local_edition = d
|
||||
|
||||
today = date.today().strftime('%d/%m/%Y')
|
||||
|
||||
p = self.recipe_specific_options.get('date')
|
||||
if p and isinstance(p, str):
|
||||
today = p
|
||||
|
||||
self.timefmt = ' [%s]' % today
|
||||
|
||||
today = today.replace('/', '%2F')
|
||||
|
||||
get_edition = index + '/Home/GetEditionSupplementHierarchy?EditionDate=' + today
|
||||
edi_data = json.loads(self.index_to_soup(get_edition, raw=True))
|
||||
@ -56,7 +69,7 @@ class ht(BasicNewsRecipe):
|
||||
if edi['EditionName'] == local_edition:
|
||||
edi_name = edi['EditionName']
|
||||
edi_id = str(edi['EditionId'])
|
||||
self.log('Downloading', edi_name, 'Edition')
|
||||
self.log('Downloading', edi_name, 'Edition', self.timefmt)
|
||||
|
||||
url = index + '/Home/GetAllpages?editionid=' + edi_id + '&editiondate=' + today
|
||||
main_data = json.loads(self.index_to_soup(url, raw=True))
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
@ -47,17 +49,30 @@ class IndiaToday(BasicNewsRecipe):
|
||||
def preprocess_raw_html(self, raw_html, url):
|
||||
return raw_html.replace('—', '--')
|
||||
|
||||
recipe_specific_options = {
|
||||
'date': {
|
||||
'short': 'The date of the edition to download (DD-MM-YYYY format)',
|
||||
'long': 'For example, 22-07-2024'
|
||||
}
|
||||
}
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup(
|
||||
'https://www.readwhere.com/magazine/the-india-today-group/India-Today/1154'
|
||||
)
|
||||
for citem in soup.findAll(
|
||||
'meta', content=lambda s: s and s.endswith('/magazine/300/new')
|
||||
):
|
||||
return citem['content'].replace('300', '600')
|
||||
d = self.recipe_specific_options.get('date')
|
||||
if not (d and isinstance(d, str)):
|
||||
soup = self.index_to_soup(
|
||||
'https://www.readwhere.com/magazine/the-india-today-group/India-Today/1154'
|
||||
)
|
||||
for citem in soup.findAll(
|
||||
'meta', content=lambda s: s and s.endswith('/magazine/300/new')
|
||||
):
|
||||
return citem['content'].replace('300', '600')
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup('https://www.indiatoday.in/magazine')
|
||||
issue = 'https://www.indiatoday.in/magazine'
|
||||
d = self.recipe_specific_options.get('date')
|
||||
if d and isinstance(d, str):
|
||||
issue = issue + '/' + d
|
||||
soup = self.index_to_soup(issue)
|
||||
|
||||
section = None
|
||||
sections = {}
|
||||
|
@ -36,6 +36,20 @@ class LeMonde(BasicNewsRecipe):
|
||||
'publisher': publisher
|
||||
}
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
masthead_url = 'http://upload.wikimedia.org/wikipedia/commons/thumb/5/54/Le_monde_logo.svg/800px-Le_monde_logo.svg.png'
|
||||
|
||||
feeds = [
|
||||
|
@ -1,3 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
@ -33,6 +34,20 @@ class LeMondeDiplomatiqueSiteWeb(BasicNewsRecipe):
|
||||
timefmt = ' [%d %b %Y]'
|
||||
no_stylesheets = True
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
feeds = [(u'Blogs', u'http://blog.mondediplo.net/spip.php?page=backend'),
|
||||
(u'Archives', u'http://www.monde-diplomatique.fr/rss/')]
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
'''
|
||||
liberation.fr
|
||||
'''
|
||||
@ -81,7 +83,7 @@ class Liberation(BasicNewsRecipe):
|
||||
'les mutations des sociétés et des cultures.'
|
||||
)
|
||||
language = 'fr'
|
||||
oldest_article = 1
|
||||
oldest_article = 1.15
|
||||
remove_empty_feeds = True
|
||||
articles_are_obfuscated = True
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
@ -94,6 +96,20 @@ class Liberation(BasicNewsRecipe):
|
||||
blockquote { color:#202020; }
|
||||
'''
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
feeds = [
|
||||
('A la une', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/collection/accueil-une/?outputType=xml'),
|
||||
('Politique', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/politique/?outputType=xml'),
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
import json
|
||||
import re
|
||||
from datetime import date
|
||||
@ -19,15 +21,22 @@ class LiveMint(BasicNewsRecipe):
|
||||
remove_attributes = ['style', 'height', 'width']
|
||||
masthead_url = 'https://images.livemint.com/static/livemint-logo-v1.svg'
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
remove_empty_feeds = True
|
||||
resolve_internal_links = True
|
||||
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
if self.output_profile.short_name.startswith('kindle'):
|
||||
self.title = 'Mint | ' + date.today().strftime('%b %d, %Y')
|
||||
if is_saturday:
|
||||
self.title = 'Mint Lounge | ' + date.today().strftime('%b %d, %Y')
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
def get_cover_url(self):
|
||||
today = date.today().strftime('%d/%m/%Y')
|
||||
@ -40,7 +49,7 @@ class LiveMint(BasicNewsRecipe):
|
||||
return cov['HighResolution']
|
||||
|
||||
if is_saturday:
|
||||
|
||||
title = 'Mint Lounge'
|
||||
masthead_url = 'https://lifestyle.livemint.com/mintlounge/static-images/lounge-logo.svg'
|
||||
|
||||
oldest_article = 6.5 # days
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
'''
|
||||
https://www.military-history.org/
|
||||
'''
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
'''
|
||||
https://minervamagazine.com/
|
||||
'''
|
||||
|
@ -65,9 +65,20 @@ class MitTechnologyReview(BasicNewsRecipe):
|
||||
),
|
||||
]
|
||||
|
||||
recipe_specific_options = {
|
||||
'issue_url': {
|
||||
'short': 'The issue URL ',
|
||||
'long': 'For example, https://www.technologyreview.com/magazines/the-education-issue/',
|
||||
'default': 'http://www.technologyreview.com/magazine/'
|
||||
}
|
||||
}
|
||||
|
||||
def parse_index(self):
|
||||
# for past editions, change the issue link below
|
||||
issue = 'http://www.technologyreview.com/magazine/'
|
||||
d = self.recipe_specific_options.get('issue_url')
|
||||
if d and isinstance(d, str):
|
||||
issue = d
|
||||
soup = self.index_to_soup(issue)
|
||||
if script := soup.find('script', id='preload'):
|
||||
raw = script.contents[0]
|
||||
|
@ -192,7 +192,6 @@ class NatGeo(BasicNewsRecipe):
|
||||
# self.cover_url = png[0] + '?w=1000&h=1000'
|
||||
self.cover_url = soup.find('meta', attrs={'property':'og:image'})['content'].split('?')[0] + '?w=1000'
|
||||
|
||||
name = soup.find(attrs={'class':lambda x: x and 'Header__Description' in x.split()})
|
||||
# self.title = 'National Geographic ' + self.tag_to_string(name)
|
||||
ans = {}
|
||||
if photoart := soup.find(attrs={'class':lambda x: x and 'BgImagePromo__Container__Text__Link' in x.split()}):
|
||||
|
@ -121,9 +121,19 @@ class NewYorkTimes(BasicNewsRecipe):
|
||||
tf.write(self.get_nyt_page(url))
|
||||
return tf.name
|
||||
|
||||
recipe_specific_options = {
|
||||
'date': {
|
||||
'short': 'The date of the edition to download (YYYY/MM/DD format)',
|
||||
'long': 'For example, 2024/07/16'
|
||||
}
|
||||
}
|
||||
|
||||
def read_todays_paper(self):
|
||||
INDEX = 'https://www.nytimes.com/section/todayspaper'
|
||||
# INDEX = 'file:///t/raw.html'
|
||||
d = self.recipe_specific_options.get('date')
|
||||
if d and isinstance(d, str):
|
||||
INDEX = 'https://www.nytimes.com/issue/todayspaper/' + d + '/todays-new-york-times'
|
||||
return self.index_to_soup(self.get_nyt_page(INDEX, skip_wayback=True))
|
||||
|
||||
def read_nyt_metadata(self):
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from calibre.web.feeds.news import BasicNewsRecipe, classes
|
||||
|
||||
|
||||
@ -34,6 +36,13 @@ class outlook(BasicNewsRecipe):
|
||||
classes('ads-box info-img-absolute mobile-info-id story-dec-time-mobile sb-also-read ads-box1')
|
||||
]
|
||||
|
||||
recipe_specific_options = {
|
||||
'date': {
|
||||
'short': 'The date of the edition to download (DD-Month-YYYY format)',
|
||||
'long': 'For example, 10-june-2024'
|
||||
}
|
||||
}
|
||||
|
||||
def get_browser(self):
|
||||
return BasicNewsRecipe.get_browser(self, user_agent='common_words/based', verify_ssl_certificates=False)
|
||||
|
||||
@ -42,14 +51,27 @@ class outlook(BasicNewsRecipe):
|
||||
'\n***\nif this recipe fails, report it on: '
|
||||
'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
|
||||
)
|
||||
soup = self.index_to_soup('https://www.outlookindia.com/magazine')
|
||||
a = soup.find('a', attrs={'aria-label':'magazine-cover-image'})
|
||||
self.cover_url = a.img['src'].split('?')[0]
|
||||
url = a['href']
|
||||
self.description = self.tag_to_string(a)
|
||||
self.timefmt = ' [' + self.tag_to_string(a.div).strip() + ']'
|
||||
self.log('Downloading issue:', url, self.timefmt)
|
||||
|
||||
d = self.recipe_specific_options.get('date')
|
||||
if d and isinstance(d, str):
|
||||
url = 'https://www.outlookindia.com/magazine/' + d
|
||||
else:
|
||||
soup = self.index_to_soup('https://www.outlookindia.com/magazine')
|
||||
a = soup.find('a', attrs={'aria-label':'magazine-cover-image'})
|
||||
url = a['href']
|
||||
|
||||
self.log('Downloading issue:', url)
|
||||
|
||||
soup = self.index_to_soup(url)
|
||||
cov = soup.find(attrs={'aria-label':'magazine-cover-image'})
|
||||
self.cover_url = cov.img['src'].split('?')[0]
|
||||
summ = soup.find(attrs={'data-test-id':'magazine-summary'})
|
||||
if summ:
|
||||
self.description = self.tag_to_string(summ)
|
||||
tme = soup.find(attrs={'class':'arr__timeago'})
|
||||
if tme:
|
||||
self.timefmt = ' [' + self.tag_to_string(tme).strip() + ']'
|
||||
|
||||
|
||||
ans = []
|
||||
|
||||
|
@ -24,6 +24,20 @@ class AdvancedUserRecipe1277129332(BasicNewsRecipe):
|
||||
conversion_options = {'linearize_tables': True}
|
||||
masthead_url = 'http://www.people.com.cn/img/2010wb/images/logo.gif'
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
feeds = [
|
||||
(u'时政', u'http://www.people.com.cn/rss/politics.xml'),
|
||||
(u'国际', u'http://www.people.com.cn/rss/world.xml'),
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from collections import OrderedDict
|
||||
|
||||
from calibre import browser
|
||||
@ -31,19 +33,29 @@ class PhilosophyNow(BasicNewsRecipe):
|
||||
.articleImageCaption { font-size:small; text-align:center; }
|
||||
em, blockquote { color:#202020; }
|
||||
'''
|
||||
|
||||
recipe_specific_options = {
|
||||
'issue': {
|
||||
'short': 'Enter the Issue Number you want to download ',
|
||||
'long': 'For example, 136'
|
||||
}
|
||||
}
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup('https://philosophynow.org/')
|
||||
div = soup.find('div', attrs={'id': 'aside_issue_cover'})
|
||||
url = div.find('a', href=True)['href']
|
||||
issue = div.find('div', attrs={'id':'aside_issue_text'})
|
||||
if issue:
|
||||
self.log('Downloading issue:', self.tag_to_string(issue).strip())
|
||||
self.timefmt = ' [' + self.tag_to_string(issue.find(attrs={'id':'aside_issue_date'})) + ']'
|
||||
self.title = 'Philosophy Now ' + self.tag_to_string(issue.find(attrs={'id':'aside_issue_number'}))
|
||||
url = 'https://philosophynow.org' + div.find('a', href=True)['href']
|
||||
|
||||
d = self.recipe_specific_options.get('issue')
|
||||
if d and isinstance(d, str):
|
||||
url = 'https://philosophynow.org/issues/' + d
|
||||
|
||||
soup = self.index_to_soup(url)
|
||||
|
||||
div = soup.find('div', attrs={'id': 'issue_contents_cover_div'})
|
||||
cov_url = div.find('img', src=True)['src']
|
||||
self.cover_url = 'https://philosophynow.org' + cov_url
|
||||
soup = self.index_to_soup('https://philosophynow.org' + url)
|
||||
self.timefmt = ' [' + self.tag_to_string(soup.find('h1')) + ']'
|
||||
|
||||
feeds = OrderedDict()
|
||||
|
||||
|
@ -27,6 +27,20 @@ class Politico(BasicNewsRecipe):
|
||||
encoding = 'UTF-8'
|
||||
language = 'en'
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
remove_empty_feeds = True
|
||||
ignore_duplicate_articles = ['url']
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
import json
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
'''
|
||||
rt.com
|
||||
'''
|
||||
@ -26,6 +28,20 @@ class RT_eng(BasicNewsRecipe):
|
||||
remove_attributes = ['height', 'width', 'style']
|
||||
publication_type = 'newsportal'
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
extra_css = '''
|
||||
img {display:block; margin:0 auto;}
|
||||
em { color:#202020; }
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
'''
|
||||
https://sciencex.com/
|
||||
'''
|
||||
@ -26,6 +28,20 @@ class scix(BasicNewsRecipe):
|
||||
.article__info, .article-byline, .article-main__more, .d-print-block {font-size:small; color:#404040;}
|
||||
'''
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
resolve_internal_links = True
|
||||
remove_empty_feeds = True
|
||||
|
||||
|
@ -59,16 +59,29 @@ class ScientificAmerican(BasicNewsRecipe):
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
recipe_specific_options = {
|
||||
'issue_url': {
|
||||
'short': 'The issue URL ',
|
||||
'long': (
|
||||
'For example, https://www.scientificamerican.com/issue/sa/2024/07-01/'
|
||||
'\nYou can also download special-editions, physics, health, mind magazines by pasting the URL here.'
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
def parse_index(self):
|
||||
# Get the cover, date and issue URL
|
||||
fp_soup = self.index_to_soup("https://www.scientificamerican.com")
|
||||
curr_issue_link = fp_soup.find(**prefixed_classes('latest_issue_links-'))
|
||||
if not curr_issue_link:
|
||||
self.abort_recipe_processing("Unable to find issue link")
|
||||
issue_url = 'https://www.scientificamerican.com' + curr_issue_link.a["href"]
|
||||
# for past editions https://www.scientificamerican.com/archive/issues/
|
||||
# issue_url = 'https://www.scientificamerican.com/issue/sa/2024/01-01/'
|
||||
soup = self.index_to_soup(issue_url)
|
||||
d = self.recipe_specific_options.get('issue_url')
|
||||
if d and isinstance(d, str):
|
||||
issue = d
|
||||
else:
|
||||
fp_soup = self.index_to_soup("https://www.scientificamerican.com")
|
||||
curr_issue_link = fp_soup.find(**prefixed_classes('latest_issue_links-'))
|
||||
if not curr_issue_link:
|
||||
self.abort_recipe_processing("Unable to find issue link")
|
||||
issue = 'https://www.scientificamerican.com' + curr_issue_link.a["href"]
|
||||
|
||||
soup = self.index_to_soup(issue)
|
||||
script = soup.find("script", id="__DATA__")
|
||||
if not script:
|
||||
self.abort_recipe_processing("Unable to find script")
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from calibre.web.feeds.news import BasicNewsRecipe, classes
|
||||
|
||||
|
||||
@ -56,8 +58,19 @@ class spectator(BasicNewsRecipe):
|
||||
]
|
||||
return br
|
||||
|
||||
recipe_specific_options = {
|
||||
'date': {
|
||||
'short': 'The date of the edition to download (DD-MM-YYYY format)',
|
||||
'long': 'For example, 20-07-2024'
|
||||
}
|
||||
}
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup('https://www.spectator.co.uk/magazine')
|
||||
index = 'https://www.spectator.co.uk/magazine'
|
||||
d = self.recipe_specific_options.get('date')
|
||||
if d and isinstance(d, str):
|
||||
index = index + '/' + d + '/'
|
||||
soup = self.index_to_soup(index)
|
||||
self.cover_url = soup.find(**classes(
|
||||
'magazine-header__container')).img['src'].split('?')[0]
|
||||
issue = self.tag_to_string(soup.find(**classes(
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from datetime import datetime
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe, classes
|
||||
@ -29,17 +31,32 @@ class TheWeek(BasicNewsRecipe):
|
||||
.article-info { font-size:small; }
|
||||
'''
|
||||
|
||||
recipe_specific_options = {
|
||||
'date': {
|
||||
'short': 'The date of the edition to download (YYYY.MM.DD format)',
|
||||
'long': 'For example, 2024.06.30'
|
||||
}
|
||||
}
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup(
|
||||
'https://www.magzter.com/IN/Malayala_Manorama/THE_WEEK/Business/'
|
||||
)
|
||||
for citem in soup.findAll(
|
||||
'meta', content=lambda s: s and s.endswith('view/3.jpg')
|
||||
):
|
||||
return citem['content']
|
||||
d = self.recipe_specific_options.get('date')
|
||||
if not (d and isinstance(d, str)):
|
||||
soup = self.index_to_soup(
|
||||
'https://www.magzter.com/IN/Malayala_Manorama/THE_WEEK/Business/'
|
||||
)
|
||||
for citem in soup.findAll(
|
||||
'meta', content=lambda s: s and s.endswith('view/3.jpg')
|
||||
):
|
||||
return citem['content']
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup('https://www.theweek.in/theweek.html')
|
||||
issue = 'https://www.theweek.in/theweek.html'
|
||||
|
||||
d = self.recipe_specific_options.get('date')
|
||||
if d and isinstance(d, str):
|
||||
issue = 'https://www.theweek.in/theweek.' + d + '.html'
|
||||
|
||||
soup = self.index_to_soup(issue)
|
||||
ans = []
|
||||
d = datetime.today()
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
import json
|
||||
import re
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
@ -28,6 +30,20 @@ class TheWashingtonPost(BasicNewsRecipe):
|
||||
publication_type = 'newspaper'
|
||||
remove_attributes = ['style', 'width', 'height']
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
extra_css = '''
|
||||
.img { text-align:center; font-size:small; }
|
||||
.auth { font-weight:bold; font-size:small; }
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
'''
|
||||
https://www.world-archaeology.com
|
||||
'''
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
import json
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
import json
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
import json
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
@ -38,7 +40,21 @@ class WSJ(BasicNewsRecipe):
|
||||
resolve_internal_links = True
|
||||
ignore_duplicate_articles = {'url', 'title'}
|
||||
remove_empty_feeds = True
|
||||
oldest_article = 1 # days
|
||||
oldest_article = 1.2 # days
|
||||
|
||||
recipe_specific_options = {
|
||||
'days': {
|
||||
'short': 'Oldest article to download from this news source. In days ',
|
||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||
'default': str(oldest_article)
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||
d = self.recipe_specific_options.get('days')
|
||||
if d and isinstance(d, str):
|
||||
self.oldest_article = float(d)
|
||||
|
||||
extra_css = '''
|
||||
#subhed, em { font-style:italic; color:#202020; }
|
||||
|
Loading…
x
Reference in New Issue
Block a user