From 15dd52d0f1a75533839f4e9443679a3107dc4942 Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Wed, 28 Feb 2024 09:53:44 +0530
Subject: [PATCH 1/3] Update the_week_magazine_free.recipe

---
 recipes/the_week_magazine_free.recipe | 105 +++++++++++++++++++++-----
 1 file changed, 86 insertions(+), 19 deletions(-)

diff --git a/recipes/the_week_magazine_free.recipe b/recipes/the_week_magazine_free.recipe
index 923cc239c5..c424635e93 100644
--- a/recipes/the_week_magazine_free.recipe
+++ b/recipes/the_week_magazine_free.recipe
@@ -1,27 +1,94 @@
-__license__ = 'GPL v3'
-__copyright__ = '2010, JOlo'
 '''
 www.theweek.com
 '''
-
-from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.web.feeds.news import BasicNewsRecipe, classes
+from urllib.parse import quote
 
 
 class TheWeek(BasicNewsRecipe):
-    title = 'TheWeek.com'
-    __author__ = 'Jim Olo'
-    description = "The best of the US and international media.  Daily coverage of commentary and analysis of the day's events, as well as arts, entertainment, people and gossip, and political cartoons."  # noqa
-    publisher = 'The Week Publications, Inc.'
-    masthead_url = 'http://test.theweek.com/images/logo_theweek.gif'
-    cover_url = masthead_url
-    category = 'news, politics, USA'
-    oldest_article = 7
-    max_articles_per_feed = 100
-    no_stylesheets = True
+    title = 'The Week'
+    __author__ = 'unkn0wn'
+    description = (
+        'The Week is for readers who want to know what\'s going on in the world, without having to read '
+        'several daily newspapers or get wrapped up in the endless news cycle. For every important story, '
+        'our editors carefully select commentary from all sides of the debate and artfully stitch them together '
+        'into one concise read. By showing you every perspective, we enable you to form your own opinion.'
+    )
+    language = 'en_US'
     encoding = 'utf-8'
-    use_embedded_content = False
-    language = 'en'
-    auto_cleanup = True
-    feeds = [
-        (u'Latest articles', u'http://theweek.com/rss.xml'),
+    no_stylesheets = True
+    remove_javascript = True
+    remove_attributes = ['width', 'height', 'style']
+
+    ignore_duplicate_articles = {'title', 'url'}
+    remove_empty_feeds = True
+    resolve_internal_links = True
+    simultaneous_downloads = 1
+    web_url = ''
+
+    extra_css = '''
+        img {display:block; margin:0 auto;}
+        .caption__text--hero, .credit { font-size:small; text-align:center; }
+        .header__strapline, em, i { color:#202020; }
+        .article-type__breadcrumb { color:grey; }
+        .author-byline__author-text {font-size:small; }
+    '''
+
+    def get_cover_url(self):
+        import json
+        url = 'https://usmagazine.theweek.com/timelines.json'
+        data = json.loads(self.index_to_soup(url, raw=True))
+        for x in data['timelines'][:5]:
+            if '-cover-' in x['image']:
+                return 'https://usmagazine.theweek.com' + x['image'][1:]
+
+    articles_are_obfuscated = True
+
+    def get_obfuscated_article(self, url):
+        br = self.get_browser()
+        soup = self.index_to_soup(url)
+        link = soup.a['href']
+        skip_sections =[ # add sections you want to skip
+            '/video/', '/videos/', '/multimedia/',
+        ]
+        if any(x in link for x in skip_sections):
+            self.abort_article('skipping video links ', link)
+        self.web_url = link
+        html = br.open(link).read()
+        return ({ 'data': html, 'url': link })
+
+    keep_only_tags = [
+        classes('article-type__breadcrumb header__title header__strapline image image--hero author-byline__author-text article__body')
     ]
+
+    remove_tags = [
+        dict(name='aside'),
+        classes(
+            'blueconic-article__wrapper ad-unit van_vid_carousel tag-links'
+        )
+    ]
+
+    def preprocess_html(self, soup):
+        for img in soup.findAll('img', attrs={'data-pin-media':True}):
+            img['src'] = img['data-pin-media'].replace('.jpg', '-768-80.jpg')
+        return soup
+
+    feeds = []
+    when = '168' # hours (7 days)
+    index = 'https://theweek.com/'   
+    sections = [
+        'politics', 'news', 'cartoons', 'tech', 'science', 'health', 
+        'culture-life', 'business', 'travel', 'arts-life', 'history'
+    ]
+    for sec in sections:
+        a = 'https://news.google.com/rss/search?q=when:{}h+allinurl:{}&hl=en-IN&gl=US&ceid=US:en'
+        feeds.append((sec.capitalize(), a.format(when, quote(index + sec, safe=''))))
+    feeds.append(('Others', a.format(when, quote(index, safe=''), '')))
+
+    def populate_article_metadata(self, article, soup, first):
+        article.title = article.title.replace(' - The Week', '')
+        desc = soup.find(**classes('header__strapline'))
+        if desc:
+            article.summary = self.tag_to_string(desc)
+            article.text_summary = article.summary
+        article.url = self.web_url

From 215513510fc15fa24c87a66fdfa31e8985febab6 Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Wed, 28 Feb 2024 10:08:58 +0530
Subject: [PATCH 2/3] ...

---
 recipes/moneycontrol.recipe           | 13 +++++++------
 recipes/the_week_magazine_free.recipe |  3 ++-
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/recipes/moneycontrol.recipe b/recipes/moneycontrol.recipe
index 2563b5cf50..4fcc5c5760 100644
--- a/recipes/moneycontrol.recipe
+++ b/recipes/moneycontrol.recipe
@@ -16,6 +16,7 @@ class MoneyControlRecipe(BasicNewsRecipe):
     ignore_duplicate_articles = {'title', 'url'}
     remove_empty_feeds = True
     resolve_internal_links = True
+    oldest_article = 1 # days
 
     extra_css = '''
         img {display:block; margin:0 auto;}
@@ -65,7 +66,7 @@ class MoneyControlRecipe(BasicNewsRecipe):
 
     feeds = []
 
-    when = 27 # hours
+    when = oldest_article*24
     index = 'https://www.moneycontrol.com/'
 
     business_sections = [
@@ -73,12 +74,12 @@ class MoneyControlRecipe(BasicNewsRecipe):
         'personal-finance', 'commodities', 'trade', 'companies'
     ]
 
-    a = 'https://news.google.com/rss/search?q=when:{}h+allinurl:{}{}&hl=en-IN&gl=IN&ceid=IN:en'
+    a = 'https://news.google.com/rss/search?q=when:{}h+allinurl:{}&hl=en-IN&gl=IN&ceid=IN:en'
 
     for sec in business_sections:
         allinurl_a = index + 'news/business'
-        feeds.append((sec.capitalize(), a.format(when, quote(allinurl_a, safe=''), '%2F' + sec + '%2F')))
-    feeds.append(('Business' , a.format(when, quote(allinurl_a, safe=''), '')))
+        feeds.append((sec.capitalize(), a.format(when, quote(allinurl_a + sec, safe=''))))
+    feeds.append(('Business' , a.format(when, quote(allinurl_a + sec, safe=''))))
 
     news_sections = [
         'india', 'world', 'opinion', 'politics', 'technology', 'trends', 'lifestyle'
@@ -86,8 +87,8 @@ class MoneyControlRecipe(BasicNewsRecipe):
 
     for sec in news_sections:
         allinurl_b = index + 'news'
-        feeds.append((sec.capitalize(), a.format(when, quote(allinurl_b, safe=''), '%2F' + sec + '%2F')))
-    feeds.append(('News', a.format(when, quote(allinurl_b, safe=''), '')))
+        feeds.append((sec.capitalize(), a.format(when, quote(allinurl_b + sec, safe=''))))
+    feeds.append(('News', a.format(when, quote(allinurl_b + sec, safe=''), '')))
     feeds.append(
         ('Others', 'https://news.google.com/rss/search?q=when:{}h+allinurl:{}&hl=en-IN&gl=IN&ceid=IN:en'.format(when, quote(index, safe='')))
     )
diff --git a/recipes/the_week_magazine_free.recipe b/recipes/the_week_magazine_free.recipe
index c424635e93..db4a44acaa 100644
--- a/recipes/the_week_magazine_free.recipe
+++ b/recipes/the_week_magazine_free.recipe
@@ -24,6 +24,7 @@ class TheWeek(BasicNewsRecipe):
     remove_empty_feeds = True
     resolve_internal_links = True
     simultaneous_downloads = 1
+    oldest_article = 7 # days
     web_url = ''
 
     extra_css = '''
@@ -74,7 +75,7 @@ class TheWeek(BasicNewsRecipe):
         return soup
 
     feeds = []
-    when = '168' # hours (7 days)
+    when = oldest_article*24
     index = 'https://theweek.com/'   
     sections = [
         'politics', 'news', 'cartoons', 'tech', 'science', 'health', 

From 745409c1967db16028950d45a32d3992d5700c7c Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Wed, 28 Feb 2024 10:34:03 +0530
Subject: [PATCH 3/3] The Week UK

---
 recipes/icons/the_week_magazine_free.png | Bin 157 -> 286 bytes
 recipes/icons/the_week_uk.png            | Bin 0 -> 286 bytes
 recipes/the_week_uk.recipe               |  95 +++++++++++++++++++++++
 3 files changed, 95 insertions(+)
 create mode 100644 recipes/icons/the_week_uk.png
 create mode 100644 recipes/the_week_uk.recipe

diff --git a/recipes/icons/the_week_magazine_free.png b/recipes/icons/the_week_magazine_free.png
index 4fc029a27ff442c2159df3fa8d32eee043a2b876..f8d3c9013f4c04491ea6bd067e00f8c5dfe8c75c 100644
GIT binary patch
literal 286
zcmV+(0pb3MP)<h;3K|Lk000e1NJLTq000mG000mO0{{R3C@l|D0001KP)t-s|NsB{
z+uYv@6!DRj`1tsel9K%V{GOhknwpy2+}z{i<CT?_*4Ebh;o|e5rGbHg&(F{4>FJ7!
zion3YCMG7Qr>9IzOc@y&U|?WTQBieub@1@;3=9mhv9UcpJzHB_ZEbDE#l_<*G=U{<
zUH||9a7jc#R2Y?wk4X-LFc1Wr;2pEXHamob|34Dsh%ra%)}<b`26zDDb@O)42r0$i
z_yDvWp!C)04P-A+(9xivIuA~w9hu^j^(5wHH98b54ShBsOt<Cw3}1H>_IHlkl*=qq
klGtC7okW#Kswa;E2X|itJ)?-$h5!Hn07*qoM6N<$g0Y@}rT_o{

literal 157
zcmeAS@N?(olHy`uVBq!ia0vp^HbAV)2qYM8@tQsaQYoG;jv*Ddk`J(a`+x8OL#!kl
z4<8>Z?;m!Fo}WT;e`+dzFf?1%A8fzRFVA%KLw$o`U5JSNABHrMDTgJV2^^CbF4b^$
zl|S|1al664gOWv`1d9KiR+doMog?x8_HE`|d5QhW3=Geoe>rvNK%+9qUQbs)mvv4F
FO#l(AIFkSX

diff --git a/recipes/icons/the_week_uk.png b/recipes/icons/the_week_uk.png
new file mode 100644
index 0000000000000000000000000000000000000000..f8d3c9013f4c04491ea6bd067e00f8c5dfe8c75c
GIT binary patch
literal 286
zcmV+(0pb3MP)<h;3K|Lk000e1NJLTq000mG000mO0{{R3C@l|D0001KP)t-s|NsB{
z+uYv@6!DRj`1tsel9K%V{GOhknwpy2+}z{i<CT?_*4Ebh;o|e5rGbHg&(F{4>FJ7!
zion3YCMG7Qr>9IzOc@y&U|?WTQBieub@1@;3=9mhv9UcpJzHB_ZEbDE#l_<*G=U{<
zUH||9a7jc#R2Y?wk4X-LFc1Wr;2pEXHamob|34Dsh%ra%)}<b`26zDDb@O)42r0$i
z_yDvWp!C)04P-A+(9xivIuA~w9hu^j^(5wHH98b54ShBsOt<Cw3}1H>_IHlkl*=qq
klGtC7okW#Kswa;E2X|itJ)?-$h5!Hn07*qoM6N<$g0Y@}rT_o{

literal 0
HcmV?d00001

diff --git a/recipes/the_week_uk.recipe b/recipes/the_week_uk.recipe
new file mode 100644
index 0000000000..b3a0cb58e9
--- /dev/null
+++ b/recipes/the_week_uk.recipe
@@ -0,0 +1,95 @@
+'''
+www.theweek.com
+'''
+from calibre.web.feeds.news import BasicNewsRecipe, classes
+from urllib.parse import quote
+
+
+class TheWeek(BasicNewsRecipe):
+    title = 'The Week'
+    __author__ = 'unkn0wn'
+    description = (
+        'The Week is for readers who want to know what\'s going on in the world, without having to read '
+        'several daily newspapers or get wrapped up in the endless news cycle. For every important story, '
+        'our editors carefully select commentary from all sides of the debate and artfully stitch them together '
+        'into one concise read. By showing you every perspective, we enable you to form your own opinion.'
+    )
+    language = 'en_UK'
+    encoding = 'utf-8'
+    no_stylesheets = True
+    remove_javascript = True
+    remove_attributes = ['width', 'height', 'style']
+
+    ignore_duplicate_articles = {'title', 'url'}
+    remove_empty_feeds = True
+    resolve_internal_links = True
+    simultaneous_downloads = 1
+    oldest_article = 7 # days
+    web_url = ''
+
+    extra_css = '''
+        img {display:block; margin:0 auto;}
+        .caption__text--hero, .credit { font-size:small; text-align:center; }
+        .header__strapline, em, i { color:#202020; }
+        .article-type__breadcrumb { color:grey; }
+        .author-byline__author-text {font-size:small; }
+    '''
+
+    def get_cover_url(self):
+        import json
+        url = 'https://ukmagazine.theweek.com/timelines.json'
+        data = json.loads(self.index_to_soup(url, raw=True))
+        for x in data['timelines'][:5]:
+            if '-cover-' in x['image']:
+                return 'https://ukmagazine.theweek.com' + x['image'][1:]
+
+    articles_are_obfuscated = True
+
+    def get_obfuscated_article(self, url):
+        br = self.get_browser()
+        soup = self.index_to_soup(url)
+        link = soup.a['href']
+        skip_sections =[ # add sections you want to skip
+            '/video/', '/videos/', '/multimedia/',
+        ]
+        if any(x in link for x in skip_sections):
+            self.abort_article('skipping video links ', link)
+        self.web_url = link
+        html = br.open(link).read()
+        return ({ 'data': html, 'url': link })
+
+    keep_only_tags = [
+        classes('article-type__breadcrumb header__title header__strapline image image--hero author-byline__author-text article__body')
+    ]
+
+    remove_tags = [
+        dict(name='aside'),
+        classes(
+            'blueconic-article__wrapper ad-unit van_vid_carousel tag-links'
+        )
+    ]
+
+    def preprocess_html(self, soup):
+        for img in soup.findAll('img', attrs={'data-pin-media':True}):
+            img['src'] = img['data-pin-media'].replace('.jpg', '-768-80.jpg')
+        return soup
+
+    feeds = []
+    when = oldest_article*24
+    index = 'https://theweek.com/'   
+    sections = [
+        'politics', 'news', 'cartoons', 'tech', 'science', 'health', 
+        'culture-life', 'business', 'travel', 'arts-life', 'history'
+    ]
+    for sec in sections:
+        a = 'https://news.google.com/rss/search?q=when:{}h+allinurl:{}&hl=en-IN&gl=US&ceid=US:en'
+        feeds.append((sec.capitalize(), a.format(when, quote(index + sec, safe=''))))
+    feeds.append(('Others', a.format(when, quote(index, safe=''), '')))
+
+    def populate_article_metadata(self, article, soup, first):
+        article.title = article.title.replace(' - The Week', '')
+        desc = soup.find(**classes('header__strapline'))
+        if desc:
+            article.summary = self.tag_to_string(desc)
+            article.text_summary = article.summary
+        article.url = self.web_url