This commit is contained in:
Kovid Goyal 2024-06-30 11:49:14 +05:30
parent 13f9c072d2
commit e00e213ba5
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 6 additions and 4 deletions

View File

@ -1,10 +1,10 @@
import json
import random
import time
from collections import defaultdict
from calibre.web.feeds.news import BasicNewsRecipe, classes, prefixed_classes
from html5_parser import parse
from collections import defaultdict
def get_contents(x):
@ -119,7 +119,6 @@ class Bloomberg(BasicNewsRecipe):
if h3 and h3.text:
sec = self.tag_to_string(h3)
self.log(sec)
articles = []
a = div.find(**prefixed_classes('MagazinePageMagazineArchive_storyLink__'))
url = a['href']
if url.startswith('http') is False:

View File

@ -1,7 +1,10 @@
from urllib.parse import quote
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.scraper.simple import read_url
from calibre.web.feeds.news import BasicNewsRecipe, prefixed_classes
def resize(x):
if 'resize' in x:
return x.split('&resize')[0] + '&resize=600'
@ -75,7 +78,7 @@ class times(BasicNewsRecipe):
for a in soup.findAll('a', href=True):
a['href'] = 'http' + a['href'].split('http')[-1]
div = soup.findAll(attrs={'style': lambda x: x and x.startswith(
'color:rgb(51, 51, 51);font-family:TimesDigitalW04-Regular'
'color:rgb(51, 51, 51);font-family:TimesDigitalW04-Regular'
)})
for p in div:
p.name = 'p'
@ -101,7 +104,7 @@ class times(BasicNewsRecipe):
feeds = []
when = oldest_article*24
index = 'https://www.thetimes.com/'
index = 'https://www.thetimes.com/'
sections = [
'politics', 'world', 'uk/politics', 'uk/scotland', 'uk', 'comment', 'business-money', 'sport',
'life-style', 'culture', 'magazine', 'travel', 'sunday-times', 'edition', 'article'