#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
from itertools import zip_longest
from urllib.parse import quote, urlencode
from calibre import browser
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ptempfile import PersistentTemporaryFile
from calibre.web.feeds.news import BasicNewsRecipe, classes
def get_article(article_id):
from mechanize import Request
mat_url = 'https://mats.mobile.dowjones.io/translate/' + article_id + '/jpml'
headers = {
'User-Agent': 'okhttp/4.10.0',
'Accept-Encoding': 'gzip',
'Cache-Control': 'no-cache',
'x-api-key': ('e''0''5''9''9''5''f''f''4''4''2''1''4''3''2''5''5''e''b''8''3''8''1''f''7''2''d''4''9''1''3''b''f''7''5''0''3''d''6''c'), # noqa: ISC001
}
br = browser()
req = Request(
mat_url,
headers=headers,
)
res = br.open(req)
return res.read()
class WSJ(BasicNewsRecipe):
title = 'WSJ. Magazine'
__author__ = 'unkn0wn'
description = (
'Eight times a year the print edition of WSJ. Magazine covers contemporary culture '
'and the luminaries shaping it—with exclusive features and award-winning photography '
'documenting the worlds of entertainment, fashion, design, art, food, travel and more.'
)
language = 'en_US'
encoding = 'utf-8'
no_javascript = True
no_stylesheets = True
remove_attributes = ['style', 'height', 'width']
resolve_internal_links = True
simultaneous_downloads = 20
recipe_specific_options = {
'res': {
'short': 'For hi-res images, select a resolution from the\nfollowing options: 800, 1000, 1200 or 1500',
'long': 'This is useful for non e-ink devices, and for a lower file size\nthan the default, use 400 or 300.',
'default': '600',
},
}
extra_css = '''
#subhed, em { font-style:italic; color:#202020; }
#byline, #time-to-read, #orig-pubdate-string, .article-byline, time, #flashline { font-size:small; }
.figc { font-size:small; text-align:center; }
img {display:block; margin:0 auto;}
'''
remove_tags = [
dict(name='panel', attrs={'id': 'summary-image'}),
dict(name='panel', attrs={'layout': 'inline'}),
dict(name='panel', attrs={'embed': 'inner-article-ad'}),
dict(name='span', attrs={'embed': 'ticker'}),
classes('lamrelated-articles-inset-panel'),
dict(
name='p',
attrs={
'id': [
'keywords',
'orig-pubdate-number',
'type',
'is-custom-flashline',
'grouphed',
'author-ids',
'article-manifest',
'body-extract',
'category',
'sub-category',
'socialhed',
'summary',
'deckline',
'article-flashline',
]
},
),
]
remove_tags_before = [dict(name='p', attrs={'id': 'orig-pubdate-string'})]
def media_bucket(self, x):
res = '?width=600'
w = self.recipe_specific_options.get('res')
if w and isinstance(w, str):
res = '?width=' + w
if x.get('type', '') == 'image':
if (
x.get('subtype', '') == 'graphic'
or 'images.wsj.net' not in x['manifest-url']
):
return '