This commit is contained in:
unkn0w7n 2025-06-08 13:48:50 +05:30
parent af0039485f
commit ff3bd6d906

View File

@ -4,13 +4,31 @@ import json
from datetime import datetime, timedelta from datetime import datetime, timedelta
from itertools import zip_longest from itertools import zip_longest
from urllib.parse import quote, urlencode from urllib.parse import quote, urlencode
from mechanize import Request
from calibre import browser
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.web.feeds.news import BasicNewsRecipe, classes from calibre.web.feeds.news import BasicNewsRecipe, classes
def get_article(article_id):
from mechanize import Request
mat_url = 'https://mats.mobile.dowjones.io/translate/' + article_id + '/jpml'
headers = {
'User-Agent': 'okhttp/4.10.0',
'Accept-Encoding': 'gzip',
'Cache-Control': 'no-cache',
'x-api-key': ('e''0''5''9''9''5''f''f''4''4''2''1''4''3''2''5''5''e''b''8''3''8''1''f''7''2''d''4''9''1''3''b''f''7''5''0''3''d''6''c'), # noqa: ISC001
}
br = browser()
req = Request(
mat_url,
headers=headers,
)
res = br.open(req)
return res.read()
class WSJ(BasicNewsRecipe): class WSJ(BasicNewsRecipe):
title = 'The Wall Street Journal' title = 'The Wall Street Journal'
__author__ = 'unkn0wn' __author__ = 'unkn0wn'
@ -150,7 +168,6 @@ class WSJ(BasicNewsRecipe):
import os import os
from contextlib import closing from contextlib import closing
from calibre import browser
from calibre.utils.img import save_cover_data_to from calibre.utils.img import save_cover_data_to
br = browser() br = browser()
@ -233,7 +250,7 @@ class WSJ(BasicNewsRecipe):
desc = mobi['description']['content']['text'] desc = mobi['description']['content']['text']
art_id = arts['id'] art_id = arts['id']
self.log(' ', title, '\n\t', desc) self.log(' ', title, '\n\t', desc)
art_cont = self.get_article(art_id) art_cont = get_article(art_id)
pt = PersistentTemporaryFile('.html') pt = PersistentTemporaryFile('.html')
pt.write(art_cont) pt.write(art_cont)
pt.close() pt.close()
@ -242,26 +259,6 @@ class WSJ(BasicNewsRecipe):
feeds.append((section, articles)) feeds.append((section, articles))
return feeds return feeds
def preprocess_raw_html(self, raw, url):
return BeautifulSoup(raw).prettify()
def get_article(self, article_id):
from calibre import browser
mat_url = 'https://mats.mobile.dowjones.io/translate/' + article_id + '/jpml'
headers = {
'User-Agent': 'okhttp/4.10.0',
'Accept-Encoding': 'gzip',
'Cache-Control': 'no-cache',
'x-api-key': ('e''0''5''9''9''5''f''f''4''4''2''1''4''3''2''5''5''e''b''8''3''8''1''f''7''2''d''4''9''1''3''b''f''7''5''0''3''d''6''c'), # noqa: ISC001
}
br = browser()
req = Request(
mat_url,
headers=headers,
)
res = br.open(req)
return res.read()
def populate_article_metadata(self, article, soup, first): def populate_article_metadata(self, article, soup, first):
lnk = soup.find('div', attrs={'id': 'share-link'}) lnk = soup.find('div', attrs={'id': 'share-link'})
if lnk: if lnk: