From a655f3cfac8ac470ec4198784cf87a4bf764946c Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Mon, 26 Jun 2023 00:59:52 +0530
Subject: [PATCH 1/3] Bloomberg recipes update
increased delay.
---
recipes/bloomberg-business-week.recipe | 8 ++++----
recipes/bloomberg.recipe | 10 +++++-----
2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/recipes/bloomberg-business-week.recipe b/recipes/bloomberg-business-week.recipe
index 763cc0185a..70dcd76418 100644
--- a/recipes/bloomberg-business-week.recipe
+++ b/recipes/bloomberg-business-week.recipe
@@ -14,7 +14,7 @@ class Bloomberg(BasicNewsRecipe):
ignore_duplicate_articles = {'url'}
resolve_internal_links = True
masthead_url = 'https://assets.bwbx.io/s3/javelin/public/hub/images/BW-Logo-Black-cc9035fbb3.svg'
- delay = 2
+ delay = 3 # seconds
extra_css = '''
#auth {font-size:small; font-weight:bold;}
#time {font-size:small;}
@@ -94,7 +94,7 @@ class Bloomberg(BasicNewsRecipe):
subhead = '
' + body
return html
diff --git a/recipes/bloomberg.recipe b/recipes/bloomberg.recipe
index ef8fee5e8c..ec72c73443 100644
--- a/recipes/bloomberg.recipe
+++ b/recipes/bloomberg.recipe
@@ -13,8 +13,8 @@ class Bloomberg(BasicNewsRecipe):
remove_attributes = ['style', 'height', 'width']
ignore_duplicate_articles = {'url', 'title'}
resolve_internal_links = True
- oldest_article = 2 # days
- delay = 1.5
+
+ delay = 5 # seconds
extra_css = '''
#auth {font-size:small; font-weight:bold;}
@@ -53,11 +53,11 @@ class Bloomberg(BasicNewsRecipe):
feeds = [
('Features',
'https://news.google.com/rss/search?q=when:27h+allinurl:bloomberg.com%2Fnews%2Ffeatures%2F&hl=en-US&gl=US&ceid=US:en'),
- ('News',
- 'https://news.google.com/rss/search?q=when:27h+allinurl:bloomberg.com%2Fnews%2Farticles%2F&hl=en-US&gl=US&ceid=US:en'),
('Opinion', 'https://news.google.com/rss/search?q=when:27h+allinurl:bloomberg.com%2Fopinion%2F&hl=en-US&gl=US&ceid=US:en'),
('Newsletters',
'https://news.google.com/rss/search?q=when:27h+allinurl:bloomberg.com%2Fnews%2Fnewsletters%2F&hl=en-US&gl=US&ceid=US:en'),
+ ('News',
+ 'https://news.google.com/rss/search?q=when:27h+allinurl:bloomberg.com%2Fnews%2Farticles%2F&hl=en-US&gl=US&ceid=US:en'),
('Others', 'https://news.google.com/rss/search?q=when:27h+allinurl:bloomberg.com&hl=en-US&gl=US&ceid=US:en')
]
@@ -84,7 +84,7 @@ class Bloomberg(BasicNewsRecipe):
subhead = '
' + data['abstract'][0] + '
' + data['abstract'][1] + '
'
else:
if 'summary' in data:
- subhead = '
' + data['summary'] + '
'
+ subhead = '
'
if 'byline' in data:
if data['byline'] is not None:
From 0c6f9949cd3e4ecdc42ccc361bded44f3c2e32eb Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Mon, 26 Jun 2023 01:18:59 +0530
Subject: [PATCH 2/3] ...
---
recipes/bloomberg-business-week.recipe | 2 +-
recipes/bloomberg.recipe | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/recipes/bloomberg-business-week.recipe b/recipes/bloomberg-business-week.recipe
index 70dcd76418..941604dfb0 100644
--- a/recipes/bloomberg-business-week.recipe
+++ b/recipes/bloomberg-business-week.recipe
@@ -17,7 +17,7 @@ class Bloomberg(BasicNewsRecipe):
delay = 3 # seconds
extra_css = '''
#auth {font-size:small; font-weight:bold;}
- #time {font-size:small;}
+ #time, .chart {font-size:small;}
#subhead {font-style:italic; color:#404040;}
.news-figure-caption-text, #cap {font-size:small; text-align:center;}
.news-figure-credit {font-size:small; text-align:center; color:#202020;}
diff --git a/recipes/bloomberg.recipe b/recipes/bloomberg.recipe
index ec72c73443..8ac2aca586 100644
--- a/recipes/bloomberg.recipe
+++ b/recipes/bloomberg.recipe
@@ -18,7 +18,7 @@ class Bloomberg(BasicNewsRecipe):
extra_css = '''
#auth {font-size:small; font-weight:bold;}
- #time {font-size:small;}
+ #time, .chart {font-size:small;}
#subhead {font-style:italic; color:#404040;}
#cat {font-size:small; color:gray;}
.news-figure-caption-text, #cap {font-size:small; text-align:center;}
From 3865197e1b810f55729598fe994d7fc3b47b4014 Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Mon, 26 Jun 2023 01:29:55 +0530
Subject: [PATCH 3/3] Update google_news.recipe
---
recipes/google_news.recipe | 29 +++++++++++++++++++++++++++--
1 file changed, 27 insertions(+), 2 deletions(-)
diff --git a/recipes/google_news.recipe b/recipes/google_news.recipe
index 042810743b..42c4cc666a 100644
--- a/recipes/google_news.recipe
+++ b/recipes/google_news.recipe
@@ -2,6 +2,7 @@
# vim:fileencoding=utf-8
from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ptempfile import PersistentTemporaryFile
import json
# a serarch topic, filled into the string below. You can change that to anything google news should be searched for...
@@ -16,9 +17,9 @@ class google_news_de(BasicNewsRecipe):
title = 'Google News'
cover_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/da/Google_News_icon.svg/500px-Google_News_icon.svg.png'
# Author
- __author__ = 'Volker Heggemann, VoHe'
+ __author__ = 'Volker Heggemann, VoHe, unkn0wn'
# oldest article to download (in days) ---- can be edit by user
- oldest_article = 2
+ oldest_article = 1.25
# describes itself, ---- can be edit by user
max_articles_per_feed = 200
# speed up the download on fast computers be careful (I test max.20)
@@ -36,6 +37,30 @@ class google_news_de(BasicNewsRecipe):
# remove the rubbish (in ebook)
auto_cleanup = True
+
+ articles_are_obfuscated = True
+
+ def get_obfuscated_article(self, url):
+ br = self.get_browser()
+ try:
+ br.open(url)
+ except Exception as e:
+ url = e.hdrs.get('location')
+ soup = self.index_to_soup(url)
+ link = soup.find('a', href=True)
+ skip_sections =[ # add sections you want to skip
+ '/video/', '/videos/', '/media/', 'podcast-'
+ ]
+ if any(x in link['href'] for x in skip_sections):
+ self.log('Aborting Article ', link['href'])
+ self.abort_article('skipping video links')
+
+ self.log('Found link: ', link['href'])
+ html = br.open(link['href']).read()
+ pt = PersistentTemporaryFile('.html')
+ pt.write(html)
+ pt.close()
+ return pt.name
# now the content description and URL follows
# feel free to add, wipe out what you need ---- can be edit by user