This commit is contained in:
Kovid Goyal 2022-09-27 10:54:10 +05:30
commit 7ceb4caa8c
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -106,10 +106,11 @@ class SCMP(BasicNewsRecipe):
caption_text = child.get("attribs", {}).get("alt") or child.get( caption_text = child.get("attribs", {}).get("alt") or child.get(
"attribs", {} "attribs", {}
).get("title") ).get("title")
caption_tag = soup.new_tag("span") if caption_text:
caption_tag.string = caption_text new_ele = soup.new_tag("span")
caption_tag["class"] = "caption" new_ele.append(caption_text)
child_html += str(caption_tag) new_ele["class"] = "caption"
child_html += str(new_ele)
ele["class"] = "article-img" ele["class"] = "article-img"
ele.append(BeautifulSoup(child_html)) ele.append(BeautifulSoup(child_html))
@ -118,15 +119,20 @@ class SCMP(BasicNewsRecipe):
soup = BeautifulSoup(raw_html) soup = BeautifulSoup(raw_html)
for script in soup.find_all("script"): for script in soup.find_all("script"):
if not script.text.startswith("window.__APOLLO_STATE__"): if not script.contents:
continue
if not script.contents[0].startswith("window.__APOLLO_STATE__"):
continue continue
article_js = re.sub( article_js = re.sub(
r"window.__APOLLO_STATE__\s*=\s*", "", script.text.strip() r"window.__APOLLO_STATE__\s*=\s*", "", script.contents[0].strip()
) )
if article_js.endswith(";"): if article_js.endswith(";"):
article_js = article_js[:-1] article_js = article_js[:-1]
article = json.loads(article_js) try:
break article = json.loads(article_js)
break
except json.JSONDecodeError:
self.log.exception("Unable to parse __APOLLO_STATE__")
if not (article and article.get("contentService")): if not (article and article.get("contentService")):
# Sometimes the page does not have article content in the <script> # Sometimes the page does not have article content in the <script>