mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Merge branch 'master' of https://github.com/unkn0w7n/calibre
This commit is contained in:
commit
904bccb353
@ -28,7 +28,7 @@ def process_node(node):
|
||||
if ntype == 'PARAGRAPH':
|
||||
if node.get('textHtml'):
|
||||
return f'<p>{node.get("textHtml")}</p>'
|
||||
return f'<p>{node.get("tex", "")}</p>'
|
||||
return f'<p>{node.get("text", "")}</p>'
|
||||
elif ntype == 'IMAGE':
|
||||
alt = "" if node.get("altText") is None else node.get("altText")
|
||||
cap = ""
|
||||
@ -49,6 +49,7 @@ def process_node(node):
|
||||
print('** ', ntype)
|
||||
return ''
|
||||
|
||||
|
||||
def safe_dict(data, *names):
|
||||
ans = data
|
||||
for x in names:
|
||||
@ -187,15 +188,14 @@ class EconomistWorld(BasicNewsRecipe):
|
||||
}
|
||||
|
||||
def get_browser(self, *args, **kwargs):
|
||||
kwargs['user_agent'] = 'Mozilla/5.0 (Linux; Android 14; 330333QCG Build/AP1A.140705.005; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/125.0.6422.165 Mobile Safari/537.36 Lamarr/3.37.0-3037003 (android)' # noqa
|
||||
kwargs['user_agent'] = (
|
||||
'Mozilla/5.0 (Linux; Android 14; 330333QCG Build/AP1A.140705.005; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/125.0.6422.165 Mobile Safari/537.36 Lamarr/3.37.0-3037003 (android)' # noqa
|
||||
)
|
||||
br = BasicNewsRecipe.get_browser(self, *args, **kwargs)
|
||||
br.addheaders += [
|
||||
('x-requested-with', 'com.economist.lamarr')
|
||||
]
|
||||
br.addheaders += [('x-requested-with', 'com.economist.lamarr')]
|
||||
return br
|
||||
|
||||
def economist_test_article(self):
|
||||
self.cover_url = None
|
||||
return [('Articles', [{'title':'test',
|
||||
'url':'https://www.economist.com/the-world-ahead/2024/11/20/ten-business-trends-for-2025-and-forecasts-for-15-industries'
|
||||
}])]
|
||||
@ -257,9 +257,12 @@ class EconomistWorld(BasicNewsRecipe):
|
||||
# open('/t/raw.html', 'wb').write(raw.encode('utf-8'))
|
||||
root_ = parse(raw)
|
||||
if '/interactive/' in url:
|
||||
return '<html><body><article><h1>' + root_.xpath('//h1')[0].text + '</h1><em>' \
|
||||
+ 'This article is supposed to be read in a browser' \
|
||||
+ '</em></article></body></html>'
|
||||
return (
|
||||
'<html><body><article><h1>'
|
||||
+ root_.xpath('//h1')[0].text + '</h1><em>'
|
||||
+ 'This article is supposed to be read in a browser'
|
||||
+ '</em></article></body></html>'
|
||||
)
|
||||
|
||||
script = root_.xpath('//script[@id="__NEXT_DATA__"]')
|
||||
|
||||
|
@ -45,7 +45,7 @@ class IndianExpress(BasicNewsRecipe):
|
||||
'digital-subscriber-only h-text-widget ie-premium ie-first-publish adboxtop adsizes immigrationimg '
|
||||
'next-story-wrap ie-ie-share next-story-box brand-logo quote_section ie-customshare osv-ad-class '
|
||||
'custom-share o-story-paper-quite ie-network-commenting audio-player-tts-sec o-story-list subscriber_hide '
|
||||
'author-social author-follow author-img premium_widget_below_article'
|
||||
'author-social author-follow author-img premium_widget_below_article author-block'
|
||||
)
|
||||
]
|
||||
|
||||
@ -136,9 +136,13 @@ class IndianExpress(BasicNewsRecipe):
|
||||
return citem['content'].replace('300', '600')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
if h2 := soup.find(attrs={'itemprop': 'description'}):
|
||||
if h2 := (soup.find(attrs={"itemprop": "description"}) or soup.find(**classes("synopsis"))):
|
||||
h2.name = 'p'
|
||||
h2['id'] = 'sub-d'
|
||||
for span in soup.findAll(
|
||||
"span", attrs={"class": ["ie-custom-caption", "custom-caption"]}
|
||||
):
|
||||
span["id"] = "img-cap"
|
||||
for img in soup.findAll('img', attrs={'data-src': True}):
|
||||
img['src'] = img['data-src']
|
||||
if span := soup.find('span', content=True, attrs={'itemprop': 'dateModified'}):
|
||||
|
Loading…
x
Reference in New Issue
Block a user