This commit is contained in:
unkn0w7n 2025-06-08 13:48:50 +05:30
parent af0039485f
commit ff3bd6d906

View File

@ -4,13 +4,31 @@ import json
from datetime import datetime, timedelta
from itertools import zip_longest
from urllib.parse import quote, urlencode
from mechanize import Request
from calibre import browser
from calibre.ptempfile import PersistentTemporaryFile
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.web.feeds.news import BasicNewsRecipe, classes
def get_article(article_id):
from mechanize import Request
mat_url = 'https://mats.mobile.dowjones.io/translate/' + article_id + '/jpml'
headers = {
'User-Agent': 'okhttp/4.10.0',
'Accept-Encoding': 'gzip',
'Cache-Control': 'no-cache',
'x-api-key': ('e''0''5''9''9''5''f''f''4''4''2''1''4''3''2''5''5''e''b''8''3''8''1''f''7''2''d''4''9''1''3''b''f''7''5''0''3''d''6''c'), # noqa: ISC001
}
br = browser()
req = Request(
mat_url,
headers=headers,
)
res = br.open(req)
return res.read()
class WSJ(BasicNewsRecipe):
title = 'The Wall Street Journal'
__author__ = 'unkn0wn'
@ -150,7 +168,6 @@ class WSJ(BasicNewsRecipe):
import os
from contextlib import closing
from calibre import browser
from calibre.utils.img import save_cover_data_to
br = browser()
@ -233,7 +250,7 @@ class WSJ(BasicNewsRecipe):
desc = mobi['description']['content']['text']
art_id = arts['id']
self.log(' ', title, '\n\t', desc)
art_cont = self.get_article(art_id)
art_cont = get_article(art_id)
pt = PersistentTemporaryFile('.html')
pt.write(art_cont)
pt.close()
@ -242,26 +259,6 @@ class WSJ(BasicNewsRecipe):
feeds.append((section, articles))
return feeds
def preprocess_raw_html(self, raw, url):
return BeautifulSoup(raw).prettify()
def get_article(self, article_id):
from calibre import browser
mat_url = 'https://mats.mobile.dowjones.io/translate/' + article_id + '/jpml'
headers = {
'User-Agent': 'okhttp/4.10.0',
'Accept-Encoding': 'gzip',
'Cache-Control': 'no-cache',
'x-api-key': ('e''0''5''9''9''5''f''f''4''4''2''1''4''3''2''5''5''e''b''8''3''8''1''f''7''2''d''4''9''1''3''b''f''7''5''0''3''d''6''c'), # noqa: ISC001
}
br = browser()
req = Request(
mat_url,
headers=headers,
)
res = br.open(req)
return res.read()
def populate_article_metadata(self, article, soup, first):
lnk = soup.find('div', attrs={'id': 'share-link'})
if lnk: