This commit is contained in:
Kovid Goyal 2024-07-22 18:46:59 +05:30
commit c9fefbcd54
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
37 changed files with 276 additions and 28 deletions

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
'''
https://ancientegyptmagazine.com
'''

View File

@ -234,6 +234,20 @@ class BBCNews(BasicNewsRecipe):
#
oldest_article = 1.5
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
# Number of simultaneous downloads. 20 is consistently working fine on the
# BBC News feeds with no problems. Speeds things up from the default of 5.
# If you have a lot of feeds and/or have increased oldest_article above 2

View File

@ -1,3 +1,6 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
import time
from datetime import datetime
@ -58,7 +61,7 @@ class Bloomberg(BasicNewsRecipe):
remove_empty_feeds = True
recipe_specific_options = {
'date': {
'issue': {
'short': 'The ID of the edition to download (YY_XX format)',
'long': 'For example, 24_17\nHint: Edition ID can be found at the end of its URL'
}
@ -86,7 +89,7 @@ class Bloomberg(BasicNewsRecipe):
inx = 'https://cdn-mobapi.bloomberg.com'
sec = self.index_to_soup(inx + '/wssmobile/v1/bw/news/list?limit=1', raw=True)
id = json.loads(sec)['magazines'][0]['id']
past_edition = self.recipe_specific_options.get('date')
past_edition = self.recipe_specific_options.get('issue')
if past_edition and isinstance(past_edition, str):
id = past_edition
edit = self.index_to_soup(inx + '/wssmobile/v1/bw/news/week/' + id, raw=True)

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
from datetime import datetime

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
import re
from datetime import date, datetime, timedelta

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import re
from datetime import datetime, timedelta
from urllib.parse import quote

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
'''
http://www.elcorreo.com/
'''
@ -22,6 +24,20 @@ class elcorreo(BasicNewsRecipe):
max_articles_per_feed = 25 # articles
compress_news_images = True
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
extra_css = '''
.v-mdl-ath__inf, .v-mdl-ath__p--2, .v-mdl-ath__p {font-size:small; color:#404040;}
.v-fc, .v-a-fig { text-align:center; font-size:small; }

View File

@ -27,6 +27,20 @@ class ElPais(BasicNewsRecipe):
oldest_article = 2.1
max_articles_per_feed = 25
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
use_embedded_content = False
recursion = 5

View File

@ -19,6 +19,20 @@ class EpochTimes(BasicNewsRecipe):
masthead_url = 'https://epochtimes-ny.newsmemory.com/eeLayout/epochtimes/1.0.a/images/webapp/banner.png'
extra_css = '.post_caption, .text-sm, .uppercase {font-size:small;}'
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
keep_only_tags = [
dict(name='article')
]

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
import re
from urllib.parse import quote
@ -31,6 +33,20 @@ class ft(BasicNewsRecipe):
.o-topper__topic { font-size:small; color:#5c5c5c; }
'''
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
keep_only_tags = [
classes(
'body_json o-topper__topic o-topper__headline o-topper__standfirst o-topper__visual article-info__time-byline main-image'

View File

@ -47,8 +47,20 @@ class ForeignPolicy(BasicNewsRecipe):
]
remove_tags_after = [classes('post-content-main')]
recipe_specific_options = {
'issue': {
'short': 'Enter the Issue ID you want to download ',
'long': 'For example, 411131563'
}
}
def parse_index(self):
soup = self.index_to_soup('https://foreignpolicy.com/the-magazine')
issue_url = 'https://foreignpolicy.com/the-magazine'
d = self.recipe_specific_options.get('issue')
if d and isinstance(d, str):
issue_url = issue_url + '/?issue_id=' + d
soup = self.index_to_soup(issue_url)
img = soup.find('img', attrs={'src': lambda x: x and '-cover' in x})
if img:
self.cover_url = img['src'].split('?')[0] + '?w=800?quality=90'

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from datetime import datetime, timedelta, timezone
from calibre.utils.date import parse_date
@ -29,6 +31,20 @@ class GlobalTimes(BasicNewsRecipe):
blockquote, em {color:#202020;}
'''
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
keep_only_tags = [
classes(
'article_column article_title author_share_left article_content'

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
'''
harpers.org
'''

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
from calibre.web.feeds.news import BasicNewsRecipe
@ -37,6 +39,20 @@ class himal(BasicNewsRecipe):
resolve_internal_links = True
oldest_article = 30 # days
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
extra_css = '''
.cap, .auth {font-size:small;}
em, blockquote {color:#404040;}

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
import re
from collections import defaultdict
@ -32,7 +34,7 @@ class TheHindu(BasicNewsRecipe):
recipe_specific_options = {
'location': {
'short': 'The name of the local edition',
'long': 'If The Hindu is available in your local town/city,\nset this to your location, for example, hyderabad',
'long': 'If The Hindu is available in your local town/city,\nset this to your location, for example, hyderabad\nAvailable Editions: bengaluru, chennai, coimbatore, delhi, erode, hyderabad, international, kochi, kolkata,\nkozhikode, madurai, mangalore, mumbai, thiruvananthapuram, tiruchirapalli, vijayawada, visakhapatnam',
'default': 'international'
},
'date': {

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from datetime import date
from calibre.web.feeds.news import BasicNewsRecipe, classes
@ -24,6 +26,20 @@ class TheHindufeeds(BasicNewsRecipe):
.italic {font-style:italic; color:#202020;}
'''
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
ignore_duplicate_articles = {'url'}
keep_only_tags = [

View File

@ -1,20 +1,11 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
from collections import defaultdict
from datetime import date
from calibre.web.feeds.news import BasicNewsRecipe
# figure out your local_edition from the fetch news log of this recipe
local_edition = 'Delhi'
today = date.today().strftime('%d/%m/%Y')
# for older edition, change today
# today = '22/12/2023'
day, month, year = (int(x) for x in today.split('/'))
dt = date(year, month, day)
today = today.replace('/', '%2F')
index = 'https://epaper.hindustantimes.com'
@ -23,28 +14,49 @@ class ht(BasicNewsRecipe):
language = 'en_IN'
__author__ = 'unkn0wn'
masthead_url = 'https://www.htmedia.in/wp-content/uploads/2020/08/HT-dot-com-logo-product.png'
timefmt = ' [' + dt.strftime('%b %d, %Y') + ']'
description = 'Articles from the Hindustan Times epaper, digital edition'
encoding = 'utf-8'
delay = 1
ignore_duplicate_articles = {'title'}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
if self.output_profile.short_name.startswith('kindle'):
self.title = 'HT Print Edition ' + dt.strftime('%b %d, %Y')
extra_css = '''
.cap { text-align:center; font-size:small; }
img { display:block; margin:0 auto; }
'''
def parse_index(self):
recipe_specific_options = {
'location': {
'short': 'The name of the local edition',
'long': 'If The Hindustan Times is available in your local town/city,\nset this to your location, for example, Delhi\nAvailable Editions: Delhi, Mumbai, Chandigarh, Lucknow, Patna, Bengaluru, Pune, Gurgaon, Ludhiana, Rajasthan, Amritsar,\nEast UP, Haryana, Jammu, Navi Mumbai, Noida, Punjab, Ranchi, Thane, Uttarakhand, West UP',
'default': 'Delhi'
},
'date': {
'short': 'The date of the edition to download (DD/MM/YYYY format)',
'long': 'For example, 22/12/2023'
}
}
def parse_index(self):
self.log(
'\n***\nif this recipe fails, report it on: '
'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
)
local_edition = 'Delhi'
d = self.recipe_specific_options.get('location')
if d and isinstance(d, str):
local_edition = d
today = date.today().strftime('%d/%m/%Y')
p = self.recipe_specific_options.get('date')
if p and isinstance(p, str):
today = p
self.timefmt = ' [%s]' % today
day, month, year = (int(x) for x in today.split('/'))
dt = date(year, month, day)
today = today.replace('/', '%2F')
get_edition = index + '/Home/GetEditionSupplementHierarchy?EditionDate=' + today
edi_data = json.loads(self.index_to_soup(get_edition, raw=True))
@ -56,7 +68,7 @@ class ht(BasicNewsRecipe):
if edi['EditionName'] == local_edition:
edi_name = edi['EditionName']
edi_id = str(edi['EditionId'])
self.log('Downloading', edi_name, 'Edition')
self.log('Downloading', edi_name, 'Edition', self.timefmt)
url = index + '/Home/GetAllpages?editionid=' + edi_id + '&editiondate=' + today
main_data = json.loads(self.index_to_soup(url, raw=True))

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.ebooks.BeautifulSoup import Tag
from calibre.web.feeds.news import BasicNewsRecipe

View File

@ -36,6 +36,20 @@ class LeMonde(BasicNewsRecipe):
'publisher': publisher
}
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
masthead_url = 'http://upload.wikimedia.org/wikipedia/commons/thumb/5/54/Le_monde_logo.svg/800px-Le_monde_logo.svg.png'
feeds = [

View File

@ -1,3 +1,4 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import unicode_literals
@ -33,6 +34,20 @@ class LeMondeDiplomatiqueSiteWeb(BasicNewsRecipe):
timefmt = ' [%d %b %Y]'
no_stylesheets = True
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
feeds = [(u'Blogs', u'http://blog.mondediplo.net/spip.php?page=backend'),
(u'Archives', u'http://www.monde-diplomatique.fr/rss/')]

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
'''
liberation.fr
'''

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
import re
from datetime import date
@ -35,10 +37,6 @@ class LiveMint(BasicNewsRecipe):
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
if self.output_profile.short_name.startswith('kindle'):
self.title = 'Mint | ' + date.today().strftime('%b %d, %Y')
if is_saturday:
self.title = 'Mint Lounge | ' + date.today().strftime('%b %d, %Y')
def get_cover_url(self):
today = date.today().strftime('%d/%m/%Y')
@ -51,7 +49,7 @@ class LiveMint(BasicNewsRecipe):
return cov['HighResolution']
if is_saturday:
self.title = 'Mint Lounge'
masthead_url = 'https://lifestyle.livemint.com/mintlounge/static-images/lounge-logo.svg'
oldest_article = 6.5 # days

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
'''
https://www.military-history.org/
'''

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
'''
https://minervamagazine.com/
'''

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe, classes

View File

@ -27,6 +27,20 @@ class Politico(BasicNewsRecipe):
encoding = 'UTF-8'
language = 'en'
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
remove_empty_feeds = True
ignore_duplicate_articles = ['url']

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
import time
from datetime import datetime, timedelta

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
'''
rt.com
'''

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe, classes

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from datetime import datetime
from calibre.web.feeds.news import BasicNewsRecipe, classes

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
import re

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
@ -28,6 +30,20 @@ class TheWashingtonPost(BasicNewsRecipe):
publication_type = 'newspaper'
remove_attributes = ['style', 'width', 'height']
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
extra_css = '''
.img { text-align:center; font-size:small; }
.auth { font-weight:bold; font-size:small; }

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
'''
https://www.world-archaeology.com
'''

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
import time
from datetime import datetime, timedelta

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
import time
from datetime import datetime, timedelta

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
import time
from datetime import datetime, timedelta