From a01b02ad66534a97fbb3ff57a130438fcdf4ce5b Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Tue, 27 Aug 2024 12:54:17 +0530
Subject: [PATCH] Update times_online.recipe

remove google feeds
---
 recipes/times_online.recipe | 60 ++++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/recipes/times_online.recipe b/recipes/times_online.recipe
index 6449f489a3..8f00a45f74 100644
--- a/recipes/times_online.recipe
+++ b/recipes/times_online.recipe
@@ -1,7 +1,7 @@
-from urllib.parse import quote
+#!/usr/bin/env python
+import random
 
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
-from calibre.scraper.simple import read_url
 from calibre.web.feeds.news import BasicNewsRecipe, prefixed_classes
 
 
@@ -11,6 +11,11 @@ def resize(x):
     elif '?crop=' in x:
         return x + '&resize=600'
 
+def absurl(url):
+    if url.startswith('/'):
+        url = 'https://www.thetimes.com' + url
+    return url
+
 class times(BasicNewsRecipe):
     title = 'The Times and Sunday Times'
     __author__ = 'unkn0wn'
@@ -30,8 +35,7 @@ class times(BasicNewsRecipe):
     remove_empty_feeds = True
     resolve_internal_links = True
     simultaneous_downloads = 1
-    oldest_article = 1 # days
-    web_url = ''
+    browser_type = 'webengine'
 
     def get_cover_url(self):
         soup = self.index_to_soup('https://www.frontpages.com/the-times/')
@@ -88,36 +92,34 @@ class times(BasicNewsRecipe):
             fig['class'] = 'sub'
         return soup
 
-    articles_are_obfuscated = True
+    def parse_index(self):
+        soup = self.index_to_soup('https://www.thetimes.com/')
+        main = soup.find('div', attrs={'id':'main-container', 'data-edition-date':True})
+        self.timefmt = ' [%s]' % main['data-edition-date']
 
-    def get_obfuscated_article(self, url):
-        soup = self.index_to_soup(url)
-        link = soup.a['href']
-        skip_sections =[ # add sections you want to skip
-            '/video/', '/videos/', '/multimedia/',
-        ]
-        if any(x in link for x in skip_sections):
-            self.abort_article('skipping video links ', link)
-        self.web_url = link
-        html = self.index_to_soup(link, raw=True)
-        return ({ 'data': html, 'url': link })
+        feeds = []
 
-    feeds = []
-    when = oldest_article*24
-    index = 'https://www.thetimes.com/'
-    sections = [
-        'politics', 'world', 'uk/politics', 'uk/scotland', 'uk', 'comment', 'business-money', 'sport',
-        'life-style', 'culture', 'magazine', 'travel', 'sunday-times', 'edition', 'article'
-    ]
-    for sec in sections:
-        a = 'https://news.google.com/rss/search?q=when:{}h+allinurl:{}&hl=en-GB&gl=GB&ceid=GB:en'
-        feeds.append((sec.capitalize(), a.format(when, quote(index + sec, safe=''))))
-    feeds.append(('Others', a.format(when, quote(index, safe=''))))
+        for sec in main.findAll('section', attrs={'id':lambda x: x and x.startswith('section-')}, recursive=False):
+            section = sec['id'].replace('section-', '').capitalize()
+            self.log(section)
+
+            articles = []
+
+            for a in sec.findAll(**prefixed_classes('Item-headline')):
+                if not a.find('a'):
+                    continue
+                url = absurl(a.a['href']).split('?')[0]
+                title = self.tag_to_string(a)
+                self.log('          ', title, '\n\t', url)
+                articles.append({'title': title, 'url': url})
+            feeds.append((section, articles))
+        return feeds
 
     def preprocess_raw_html(self, raw, url):
         access = '"userState":{"isLoggedIn":false,"isMetered":false,"hasAccess":true}'
         if access not in raw and 'comment/cartoons' not in url:
-            raw_ar = read_url([], 'https://archive.is/latest/' + url)
+            dom = random.choice(('fo', 'is', 'li', 'md', 'ph', 'vn'))
+            raw_ar = self.index_to_soup('https://archive.' + dom + '/latest/' + url)
             archive = BeautifulSoup(str(raw_ar))
             if archive.find('div', attrs={'id':'top'}):
                 content = archive.find('article', attrs={'id':False})
@@ -133,9 +135,7 @@ class times(BasicNewsRecipe):
         return raw
 
     def populate_article_metadata(self, article, soup, first):
-        article.title = article.title.replace(' - The Times', '')
         desc = soup.find(**prefixed_classes('responsive__StandfirstContainer-'))
         if desc:
             article.summary = self.tag_to_string(desc)
             article.text_summary = article.summary
-        article.url = self.web_url