Merge branch 'kovidgoyal:master' into tolino

This commit is contained in:
beedaddy 2024-07-23 14:09:40 +02:00 committed by GitHub
commit ff93e10e00
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
56 changed files with 695 additions and 101 deletions

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
'''
https://ancientegyptmagazine.com
'''

View File

@ -81,6 +81,19 @@ class TheAtlantic(BasicNewsRecipe):
language = 'en'
encoding = 'utf-8'
recipe_specific_options = {
'date': {
'short': 'The date of the edition to download (YYYY/MM format)',
'long': 'For example, 2024/05'
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('date')
if d and isinstance(d, str):
self.INDEX = 'https://www.theatlantic.com/magazine/toc/' + d + '/'
keep_only_tags = [
dict(itemprop=['headline']),
classes(

View File

@ -234,6 +234,20 @@ class BBCNews(BasicNewsRecipe):
#
oldest_article = 1.5
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
# Number of simultaneous downloads. 20 is consistently working fine on the
# BBC News feeds with no problems. Speeds things up from the default of 5.
# If you have a lot of feeds and/or have increased oldest_article above 2

View File

@ -1,3 +1,6 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
import time
from datetime import datetime
@ -58,7 +61,7 @@ class Bloomberg(BasicNewsRecipe):
remove_empty_feeds = True
recipe_specific_options = {
'date': {
'issue': {
'short': 'The ID of the edition to download (YY_XX format)',
'long': 'For example, 24_17\nHint: Edition ID can be found at the end of its URL'
}
@ -86,7 +89,7 @@ class Bloomberg(BasicNewsRecipe):
inx = 'https://cdn-mobapi.bloomberg.com'
sec = self.index_to_soup(inx + '/wssmobile/v1/bw/news/list?limit=1', raw=True)
id = json.loads(sec)['magazines'][0]['id']
past_edition = self.recipe_specific_options.get('date')
past_edition = self.recipe_specific_options.get('issue')
if past_edition and isinstance(past_edition, str):
id = past_edition
edit = self.index_to_soup(inx + '/wssmobile/v1/bw/news/week/' + id, raw=True)

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
from datetime import datetime

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
from urllib.parse import quote, urlparse
@ -118,6 +120,13 @@ class CaravanMagazine(BasicNewsRecipe):
return br
return br
recipe_specific_options = {
'date': {
'short': 'The date of the edition to download (MM-YYYY format)',
'long': 'For example, 07-2024'
}
}
def parse_index(self):
self.log(
'\n***\nif this recipe fails, report it on: '
@ -125,9 +134,11 @@ class CaravanMagazine(BasicNewsRecipe):
)
api = 'https://api.caravanmagazine.in/api/trpc/magazines.getLatestIssue'
# for past editions
# inp = json.dumps({"0":{"json":{"month":6,"year":2023}}})
# api = 'https://api.caravanmagazine.in/api/trpc/magazines.getForMonthAndYear?batch=1&input=' + quote(inp, safe='')
d = self.recipe_specific_options.get('date')
if d and isinstance(d, str):
x = d.split('-')
inp = json.dumps({"0":{"json":{"month":int(x[0]),"year":int(x[1])}}})
api = 'https://api.caravanmagazine.in/api/trpc/magazines.getForMonthAndYear?batch=1&input=' + quote(inp, safe='')
raw = json.loads(self.index_to_soup(api, raw=True))
if isinstance(raw, list):

View File

@ -24,6 +24,20 @@ class DeutscheWelle_bs(BasicNewsRecipe):
dict(name='article')
]
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
remove_tags = [
dict(name=['footer', 'source']),
dict(attrs={'data-tracking-name':'sharing-icons-inline'}),

View File

@ -21,6 +21,20 @@ class DeutscheWelle(BasicNewsRecipe):
dict(name='article')
]
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
remove_tags = [
dict(name=['footer', 'source']),
dict(attrs={'data-tracking-name':'sharing-icons-inline'}),

View File

@ -16,6 +16,20 @@ class DeutscheWelle_en(BasicNewsRecipe):
ignore_duplicate_articles = {'title', 'url'}
remove_attributes = ['height', 'width', 'style']
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
keep_only_tags = [
dict(name='article')
]

View File

@ -17,6 +17,20 @@ class DeutscheWelle_es(BasicNewsRecipe):
remove_attributes = ['height', 'width', 'style']
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
keep_only_tags = [
dict(name='article')
]

View File

@ -20,6 +20,20 @@ class DeutscheWelle_hr(BasicNewsRecipe):
ignore_duplicate_articles = {'title', 'url'}
remove_attributes = ['height', 'width', 'style']
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
keep_only_tags = [
dict(name='article')
]

View File

@ -16,11 +16,24 @@ class DeutscheWelle_pt(BasicNewsRecipe):
remove_empty_feeds = True
masthead_url = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif'
remove_javascript = True
ignore_duplicate_articles = {'title', 'url'}
remove_attributes = ['height', 'width', 'style']
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
def preprocess_html(self, soup):
for img in soup.findAll('img', srcset=True):
img['src'] = img['srcset'].split()[6]

View File

@ -17,6 +17,20 @@ class DeutscheWelle(BasicNewsRecipe):
ignore_duplicate_articles = {'title', 'url'}
remove_attributes = ['height', 'width', 'style']
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
def preprocess_html(self, soup):
for img in soup.findAll('img', srcset=True):
img['src'] = img['srcset'].split()[6]

View File

@ -19,6 +19,20 @@ class DeutscheWelle_sr(BasicNewsRecipe):
ignore_duplicate_articles = {'title', 'url'}
remove_attributes = ['height', 'width', 'style']
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
def preprocess_html(self, soup):
for img in soup.findAll('img', srcset=True):
img['src'] = img['srcset'].split()[6]

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
import re
from datetime import date, datetime, timedelta

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import re
from datetime import datetime, timedelta
from urllib.parse import quote

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
'''
http://www.elcorreo.com/
'''
@ -22,6 +24,20 @@ class elcorreo(BasicNewsRecipe):
max_articles_per_feed = 25 # articles
compress_news_images = True
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
extra_css = '''
.v-mdl-ath__inf, .v-mdl-ath__p--2, .v-mdl-ath__p {font-size:small; color:#404040;}
.v-fc, .v-a-fig { text-align:center; font-size:small; }

View File

@ -27,6 +27,20 @@ class ElPais(BasicNewsRecipe):
oldest_article = 2.1
max_articles_per_feed = 25
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
use_embedded_content = False
recursion = 5

View File

@ -19,6 +19,20 @@ class EpochTimes(BasicNewsRecipe):
masthead_url = 'https://epochtimes-ny.newsmemory.com/eeLayout/epochtimes/1.0.a/images/webapp/banner.png'
extra_css = '.post_caption, .text-sm, .uppercase {font-size:small;}'
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
keep_only_tags = [
dict(name='article')
]

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
import re
from urllib.parse import quote
@ -31,6 +33,20 @@ class ft(BasicNewsRecipe):
.o-topper__topic { font-size:small; color:#5c5c5c; }
'''
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
keep_only_tags = [
classes(
'body_json o-topper__topic o-topper__headline o-topper__standfirst o-topper__visual article-info__time-byline main-image'

View File

@ -47,8 +47,20 @@ class ForeignPolicy(BasicNewsRecipe):
]
remove_tags_after = [classes('post-content-main')]
recipe_specific_options = {
'issue': {
'short': 'Enter the Issue ID you want to download ',
'long': 'For example, 411131563'
}
}
def parse_index(self):
soup = self.index_to_soup('https://foreignpolicy.com/the-magazine')
issue_url = 'https://foreignpolicy.com/the-magazine'
d = self.recipe_specific_options.get('issue')
if d and isinstance(d, str):
issue_url = issue_url + '/?issue_id=' + d
soup = self.index_to_soup(issue_url)
img = soup.find('img', attrs={'src': lambda x: x and '-cover' in x})
if img:
self.cover_url = img['src'].split('?')[0] + '?w=800?quality=90'

View File

@ -127,6 +127,13 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
INDEX = 'https://www.foreignaffairs.com/magazine'
recipe_specific_options = {
'issue': {
'short': 'Enter the Issue Number you want to download ',
'long': 'For example, 2024/103/1'
}
}
keep_only_tags = [
classes('article-header article-body article-lead-image article-body-text'),
]
@ -140,6 +147,10 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
remove_empty_feeds = True
def parse_index(self):
d = self.recipe_specific_options.get('issue')
if d and isinstance(d, str):
self.INDEX = 'https://www.foreignaffairs.com/issues/' + d
soup = self.index_to_soup(self.INDEX)
# get dates
date = re.split(r'\s\|\s', self.tag_to_string(

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from collections import defaultdict
from calibre.web.feeds.news import BasicNewsRecipe, classes
@ -53,8 +55,20 @@ class Frontline(BasicNewsRecipe):
src.extract()
return soup
recipe_specific_options = {
'issue': {
'short': 'Enter the Issue Number you want to download\n(Volume-Issue format)',
'long': 'For example, 41-12'
}
}
def parse_index(self):
soup = self.index_to_soup('https://frontline.thehindu.com/current-issue/')
issue_url = 'https://frontline.thehindu.com/current-issue/'
d = self.recipe_specific_options.get('issue')
if d and isinstance(d, str):
issue_url = 'https://frontline.thehindu.com/magazine/issue/vol' + d
soup = self.index_to_soup(issue_url)
if cover := soup.find('div', attrs={'class':'magazine'}):
self.cover_url = cover.find(**classes('sptar-image')).img['data-original'].replace('_320', '_1200')

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from datetime import datetime, timedelta, timezone
from calibre.utils.date import parse_date
@ -29,6 +31,20 @@ class GlobalTimes(BasicNewsRecipe):
blockquote, em {color:#202020;}
'''
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
keep_only_tags = [
classes(
'article_column article_title author_share_left article_content'

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
'''
harpers.org
'''

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
from calibre.web.feeds.news import BasicNewsRecipe
@ -37,6 +39,20 @@ class himal(BasicNewsRecipe):
resolve_internal_links = True
oldest_article = 30 # days
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
extra_css = '''
.cap, .auth {font-size:small;}
em, blockquote {color:#404040;}

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
import re
from collections import defaultdict
@ -32,7 +34,12 @@ class TheHindu(BasicNewsRecipe):
recipe_specific_options = {
'location': {
'short': 'The name of the local edition',
'long': 'If The Hindu is available in your local town/city,\nset this to your location, for example, hyderabad',
'long': ('If The Hindu is available in your local town/city,\n'
'set this to your location, for example, hyderabad\n'
'Available Editions: bengaluru, chennai, coimbatore, delhi, '
'erode, hyderabad, international, kochi, kolkata,\n'
'kozhikode, madurai, mangalore, mumbai, thiruvananthapuram, '
'tiruchirapalli, vijayawada, visakhapatnam'),
'default': 'international'
},
'date': {

View File

@ -1,5 +1,5 @@
from datetime import date
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe, classes
@ -24,6 +24,20 @@ class TheHindufeeds(BasicNewsRecipe):
.italic {font-style:italic; color:#202020;}
'''
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
ignore_duplicate_articles = {'url'}
keep_only_tags = [
@ -60,11 +74,6 @@ class TheHindufeeds(BasicNewsRecipe):
src.extract()
return soup
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
if self.output_profile.short_name.startswith('kindle'):
self.title = 'The Hindu (Feeds) ' + date.today().strftime('%b %d, %Y')
def get_cover_url(self):
soup = self.index_to_soup('https://www.thehindu.com/todays-paper/')

View File

@ -1,21 +1,11 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
from collections import defaultdict
from datetime import date
from calibre.web.feeds.news import BasicNewsRecipe
# figure out your local_edition from the fetch news log of this recipe
local_edition = 'Delhi'
today = date.today().strftime('%d/%m/%Y')
# for older edition, change today
# today = '22/12/2023'
day, month, year = (int(x) for x in today.split('/'))
dt = date(year, month, day)
today = today.replace('/', '%2F')
index = 'https://epaper.hindustantimes.com'
class ht(BasicNewsRecipe):
@ -23,28 +13,51 @@ class ht(BasicNewsRecipe):
language = 'en_IN'
__author__ = 'unkn0wn'
masthead_url = 'https://www.htmedia.in/wp-content/uploads/2020/08/HT-dot-com-logo-product.png'
timefmt = ' [' + dt.strftime('%b %d, %Y') + ']'
description = 'Articles from the Hindustan Times epaper, digital edition'
encoding = 'utf-8'
delay = 1
ignore_duplicate_articles = {'title'}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
if self.output_profile.short_name.startswith('kindle'):
self.title = 'HT Print Edition ' + dt.strftime('%b %d, %Y')
extra_css = '''
.cap { text-align:center; font-size:small; }
img { display:block; margin:0 auto; }
'''
def parse_index(self):
recipe_specific_options = {
'location': {
'short': 'The name of the local edition',
'long': ('If The Hindustan Times is available in your local town/city,\n'
'set this to your location, for example, Delhi\nAvailable Editions:'
'Delhi, Mumbai, Chandigarh, Lucknow, Patna, Bengaluru, Pune, Gurgaon,'
'Ludhiana, Rajasthan, Amritsar,\nEast UP, Haryana, Jammu, Navi Mumbai,'
'Noida, Punjab, Ranchi, Thane, Uttarakhand, West UP'),
'default': 'Delhi'
},
'date': {
'short': 'The date of the edition to download (DD/MM/YYYY format)',
'long': 'For example, 22/12/2023'
}
}
def parse_index(self):
self.log(
'\n***\nif this recipe fails, report it on: '
'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
)
local_edition = 'Delhi'
d = self.recipe_specific_options.get('location')
if d and isinstance(d, str):
local_edition = d
today = date.today().strftime('%d/%m/%Y')
p = self.recipe_specific_options.get('date')
if p and isinstance(p, str):
today = p
self.timefmt = ' [%s]' % today
today = today.replace('/', '%2F')
get_edition = index + '/Home/GetEditionSupplementHierarchy?EditionDate=' + today
edi_data = json.loads(self.index_to_soup(get_edition, raw=True))
@ -56,7 +69,7 @@ class ht(BasicNewsRecipe):
if edi['EditionName'] == local_edition:
edi_name = edi['EditionName']
edi_id = str(edi['EditionId'])
self.log('Downloading', edi_name, 'Edition')
self.log('Downloading', edi_name, 'Edition', self.timefmt)
url = index + '/Home/GetAllpages?editionid=' + edi_id + '&editiondate=' + today
main_data = json.loads(self.index_to_soup(url, raw=True))

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.ebooks.BeautifulSoup import Tag
from calibre.web.feeds.news import BasicNewsRecipe
@ -47,7 +49,16 @@ class IndiaToday(BasicNewsRecipe):
def preprocess_raw_html(self, raw_html, url):
return raw_html.replace('—', '--')
recipe_specific_options = {
'date': {
'short': 'The date of the edition to download (DD-MM-YYYY format)',
'long': 'For example, 22-07-2024'
}
}
def get_cover_url(self):
d = self.recipe_specific_options.get('date')
if not (d and isinstance(d, str)):
soup = self.index_to_soup(
'https://www.readwhere.com/magazine/the-india-today-group/India-Today/1154'
)
@ -57,7 +68,11 @@ class IndiaToday(BasicNewsRecipe):
return citem['content'].replace('300', '600')
def parse_index(self):
soup = self.index_to_soup('https://www.indiatoday.in/magazine')
issue = 'https://www.indiatoday.in/magazine'
d = self.recipe_specific_options.get('date')
if d and isinstance(d, str):
issue = issue + '/' + d
soup = self.index_to_soup(issue)
section = None
sections = {}

View File

@ -36,6 +36,20 @@ class LeMonde(BasicNewsRecipe):
'publisher': publisher
}
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
masthead_url = 'http://upload.wikimedia.org/wikipedia/commons/thumb/5/54/Le_monde_logo.svg/800px-Le_monde_logo.svg.png'
feeds = [

View File

@ -1,3 +1,4 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import unicode_literals
@ -33,6 +34,20 @@ class LeMondeDiplomatiqueSiteWeb(BasicNewsRecipe):
timefmt = ' [%d %b %Y]'
no_stylesheets = True
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
feeds = [(u'Blogs', u'http://blog.mondediplo.net/spip.php?page=backend'),
(u'Archives', u'http://www.monde-diplomatique.fr/rss/')]

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
'''
liberation.fr
'''
@ -81,7 +83,7 @@ class Liberation(BasicNewsRecipe):
'les mutations des sociétés et des cultures.'
)
language = 'fr'
oldest_article = 1
oldest_article = 1.15
remove_empty_feeds = True
articles_are_obfuscated = True
ignore_duplicate_articles = {'title', 'url'}
@ -94,6 +96,20 @@ class Liberation(BasicNewsRecipe):
blockquote { color:#202020; }
'''
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
feeds = [
('A la une', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/collection/accueil-une/?outputType=xml'),
('Politique', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/politique/?outputType=xml'),

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
import re
from datetime import date
@ -19,15 +21,22 @@ class LiveMint(BasicNewsRecipe):
remove_attributes = ['style', 'height', 'width']
masthead_url = 'https://images.livemint.com/static/livemint-logo-v1.svg'
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
remove_empty_feeds = True
resolve_internal_links = True
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
if self.output_profile.short_name.startswith('kindle'):
self.title = 'Mint | ' + date.today().strftime('%b %d, %Y')
if is_saturday:
self.title = 'Mint Lounge | ' + date.today().strftime('%b %d, %Y')
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
def get_cover_url(self):
today = date.today().strftime('%d/%m/%Y')
@ -40,7 +49,7 @@ class LiveMint(BasicNewsRecipe):
return cov['HighResolution']
if is_saturday:
title = 'Mint Lounge'
masthead_url = 'https://lifestyle.livemint.com/mintlounge/static-images/lounge-logo.svg'
oldest_article = 6.5 # days

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
'''
https://www.military-history.org/
'''

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
'''
https://minervamagazine.com/
'''

View File

@ -65,9 +65,20 @@ class MitTechnologyReview(BasicNewsRecipe):
),
]
recipe_specific_options = {
'issue_url': {
'short': 'The issue URL ',
'long': 'For example, https://www.technologyreview.com/magazines/the-education-issue/',
'default': 'http://www.technologyreview.com/magazine/'
}
}
def parse_index(self):
# for past editions, change the issue link below
issue = 'http://www.technologyreview.com/magazine/'
d = self.recipe_specific_options.get('issue_url')
if d and isinstance(d, str):
issue = d
soup = self.index_to_soup(issue)
if script := soup.find('script', id='preload'):
raw = script.contents[0]

View File

@ -192,7 +192,6 @@ class NatGeo(BasicNewsRecipe):
# self.cover_url = png[0] + '?w=1000&h=1000'
self.cover_url = soup.find('meta', attrs={'property':'og:image'})['content'].split('?')[0] + '?w=1000'
name = soup.find(attrs={'class':lambda x: x and 'Header__Description' in x.split()})
# self.title = 'National Geographic ' + self.tag_to_string(name)
ans = {}
if photoart := soup.find(attrs={'class':lambda x: x and 'BgImagePromo__Container__Text__Link' in x.split()}):

View File

@ -121,9 +121,19 @@ class NewYorkTimes(BasicNewsRecipe):
tf.write(self.get_nyt_page(url))
return tf.name
recipe_specific_options = {
'date': {
'short': 'The date of the edition to download (YYYY/MM/DD format)',
'long': 'For example, 2024/07/16'
}
}
def read_todays_paper(self):
INDEX = 'https://www.nytimes.com/section/todayspaper'
# INDEX = 'file:///t/raw.html'
d = self.recipe_specific_options.get('date')
if d and isinstance(d, str):
INDEX = 'https://www.nytimes.com/issue/todayspaper/' + d + '/todays-new-york-times'
return self.index_to_soup(self.get_nyt_page(INDEX, skip_wayback=True))
def read_nyt_metadata(self):

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe, classes
@ -34,6 +36,13 @@ class outlook(BasicNewsRecipe):
classes('ads-box info-img-absolute mobile-info-id story-dec-time-mobile sb-also-read ads-box1')
]
recipe_specific_options = {
'date': {
'short': 'The date of the edition to download (DD-Month-YYYY format)',
'long': 'For example, 10-june-2024'
}
}
def get_browser(self):
return BasicNewsRecipe.get_browser(self, user_agent='common_words/based', verify_ssl_certificates=False)
@ -42,14 +51,27 @@ class outlook(BasicNewsRecipe):
'\n***\nif this recipe fails, report it on: '
'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
)
d = self.recipe_specific_options.get('date')
if d and isinstance(d, str):
url = 'https://www.outlookindia.com/magazine/' + d
else:
soup = self.index_to_soup('https://www.outlookindia.com/magazine')
a = soup.find('a', attrs={'aria-label':'magazine-cover-image'})
self.cover_url = a.img['src'].split('?')[0]
url = a['href']
self.description = self.tag_to_string(a)
self.timefmt = ' [' + self.tag_to_string(a.div).strip() + ']'
self.log('Downloading issue:', url, self.timefmt)
self.log('Downloading issue:', url)
soup = self.index_to_soup(url)
cov = soup.find(attrs={'aria-label':'magazine-cover-image'})
self.cover_url = cov.img['src'].split('?')[0]
summ = soup.find(attrs={'data-test-id':'magazine-summary'})
if summ:
self.description = self.tag_to_string(summ)
tme = soup.find(attrs={'class':'arr__timeago'})
if tme:
self.timefmt = ' [' + self.tag_to_string(tme).strip() + ']'
ans = []

View File

@ -24,6 +24,20 @@ class AdvancedUserRecipe1277129332(BasicNewsRecipe):
conversion_options = {'linearize_tables': True}
masthead_url = 'http://www.people.com.cn/img/2010wb/images/logo.gif'
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
feeds = [
(u'时政', u'http://www.people.com.cn/rss/politics.xml'),
(u'国际', u'http://www.people.com.cn/rss/world.xml'),

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from collections import OrderedDict
from calibre import browser
@ -32,18 +34,28 @@ class PhilosophyNow(BasicNewsRecipe):
em, blockquote { color:#202020; }
'''
recipe_specific_options = {
'issue': {
'short': 'Enter the Issue Number you want to download ',
'long': 'For example, 136'
}
}
def parse_index(self):
soup = self.index_to_soup('https://philosophynow.org/')
div = soup.find('div', attrs={'id': 'aside_issue_cover'})
url = div.find('a', href=True)['href']
issue = div.find('div', attrs={'id':'aside_issue_text'})
if issue:
self.log('Downloading issue:', self.tag_to_string(issue).strip())
self.timefmt = ' [' + self.tag_to_string(issue.find(attrs={'id':'aside_issue_date'})) + ']'
self.title = 'Philosophy Now ' + self.tag_to_string(issue.find(attrs={'id':'aside_issue_number'}))
url = 'https://philosophynow.org' + div.find('a', href=True)['href']
d = self.recipe_specific_options.get('issue')
if d and isinstance(d, str):
url = 'https://philosophynow.org/issues/' + d
soup = self.index_to_soup(url)
div = soup.find('div', attrs={'id': 'issue_contents_cover_div'})
cov_url = div.find('img', src=True)['src']
self.cover_url = 'https://philosophynow.org' + cov_url
soup = self.index_to_soup('https://philosophynow.org' + url)
self.timefmt = ' [' + self.tag_to_string(soup.find('h1')) + ']'
feeds = OrderedDict()

View File

@ -27,6 +27,20 @@ class Politico(BasicNewsRecipe):
encoding = 'UTF-8'
language = 'en'
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
remove_empty_feeds = True
ignore_duplicate_articles = ['url']

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
import time
from datetime import datetime, timedelta

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
'''
rt.com
'''
@ -26,6 +28,20 @@ class RT_eng(BasicNewsRecipe):
remove_attributes = ['height', 'width', 'style']
publication_type = 'newsportal'
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
extra_css = '''
img {display:block; margin:0 auto;}
em { color:#202020; }

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
'''
https://sciencex.com/
'''
@ -26,6 +28,20 @@ class scix(BasicNewsRecipe):
.article__info, .article-byline, .article-main__more, .d-print-block {font-size:small; color:#404040;}
'''
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
resolve_internal_links = True
remove_empty_feeds = True

View File

@ -59,16 +59,29 @@ class ScientificAmerican(BasicNewsRecipe):
br.submit()
return br
recipe_specific_options = {
'issue_url': {
'short': 'The issue URL ',
'long': (
'For example, https://www.scientificamerican.com/issue/sa/2024/07-01/'
'\nYou can also download special-editions, physics, health, mind magazines by pasting the URL here.'
)
}
}
def parse_index(self):
# Get the cover, date and issue URL
d = self.recipe_specific_options.get('issue_url')
if d and isinstance(d, str):
issue = d
else:
fp_soup = self.index_to_soup("https://www.scientificamerican.com")
curr_issue_link = fp_soup.find(**prefixed_classes('latest_issue_links-'))
if not curr_issue_link:
self.abort_recipe_processing("Unable to find issue link")
issue_url = 'https://www.scientificamerican.com' + curr_issue_link.a["href"]
# for past editions https://www.scientificamerican.com/archive/issues/
# issue_url = 'https://www.scientificamerican.com/issue/sa/2024/01-01/'
soup = self.index_to_soup(issue_url)
issue = 'https://www.scientificamerican.com' + curr_issue_link.a["href"]
soup = self.index_to_soup(issue)
script = soup.find("script", id="__DATA__")
if not script:
self.abort_recipe_processing("Unable to find script")

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe, classes
@ -56,8 +58,19 @@ class spectator(BasicNewsRecipe):
]
return br
recipe_specific_options = {
'date': {
'short': 'The date of the edition to download (DD-MM-YYYY format)',
'long': 'For example, 20-07-2024'
}
}
def parse_index(self):
soup = self.index_to_soup('https://www.spectator.co.uk/magazine')
index = 'https://www.spectator.co.uk/magazine'
d = self.recipe_specific_options.get('date')
if d and isinstance(d, str):
index = index + '/' + d + '/'
soup = self.index_to_soup(index)
self.cover_url = soup.find(**classes(
'magazine-header__container')).img['src'].split('?')[0]
issue = self.tag_to_string(soup.find(**classes(

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from datetime import datetime
from calibre.web.feeds.news import BasicNewsRecipe, classes
@ -29,7 +31,16 @@ class TheWeek(BasicNewsRecipe):
.article-info { font-size:small; }
'''
recipe_specific_options = {
'date': {
'short': 'The date of the edition to download (YYYY.MM.DD format)',
'long': 'For example, 2024.06.30'
}
}
def get_cover_url(self):
d = self.recipe_specific_options.get('date')
if not (d and isinstance(d, str)):
soup = self.index_to_soup(
'https://www.magzter.com/IN/Malayala_Manorama/THE_WEEK/Business/'
)
@ -39,7 +50,13 @@ class TheWeek(BasicNewsRecipe):
return citem['content']
def parse_index(self):
soup = self.index_to_soup('https://www.theweek.in/theweek.html')
issue = 'https://www.theweek.in/theweek.html'
d = self.recipe_specific_options.get('date')
if d and isinstance(d, str):
issue = 'https://www.theweek.in/theweek.' + d + '.html'
soup = self.index_to_soup(issue)
ans = []
d = datetime.today()

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
import re

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
@ -28,6 +30,20 @@ class TheWashingtonPost(BasicNewsRecipe):
publication_type = 'newspaper'
remove_attributes = ['style', 'width', 'height']
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
extra_css = '''
.img { text-align:center; font-size:small; }
.auth { font-weight:bold; font-size:small; }

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
'''
https://www.world-archaeology.com
'''

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
import time
from datetime import datetime, timedelta

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
import time
from datetime import datetime, timedelta

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
import time
from datetime import datetime, timedelta
@ -38,7 +40,21 @@ class WSJ(BasicNewsRecipe):
resolve_internal_links = True
ignore_duplicate_articles = {'url', 'title'}
remove_empty_feeds = True
oldest_article = 1 # days
oldest_article = 1.2 # days
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
extra_css = '''
#subhed, em { font-style:italic; color:#202020; }