mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-11-05 12:03:03 -05:00
Update nytimes.py
fix JSON decode error
This commit is contained in:
parent
3b036b4853
commit
882b2dd517
@ -9,7 +9,7 @@ from xml.sax.saxutils import escape, quoteattr
|
|||||||
|
|
||||||
from calibre.utils.iso8601 import parse_iso8601
|
from calibre.utils.iso8601 import parse_iso8601
|
||||||
|
|
||||||
module_version = 14 # needed for live updates
|
module_version = 15 # needed for live updates
|
||||||
pprint
|
pprint
|
||||||
|
|
||||||
|
|
||||||
@ -198,8 +198,8 @@ def article_parse(data):
|
|||||||
def clean_js_json(text):
|
def clean_js_json(text):
|
||||||
text = re.sub(r'\bundefined\b', 'null', text)
|
text = re.sub(r'\bundefined\b', 'null', text)
|
||||||
text = re.sub(
|
text = re.sub(
|
||||||
r',?\s*"[^"]+"\s*:\s*function\s*\([^)]*\)\s*\{.*?\}',
|
r'{\"checkGate\":.*',
|
||||||
'',
|
'null}}',
|
||||||
text,
|
text,
|
||||||
flags=re.DOTALL
|
flags=re.DOTALL
|
||||||
)
|
)
|
||||||
@ -207,7 +207,7 @@ def clean_js_json(text):
|
|||||||
|
|
||||||
|
|
||||||
def json_to_html(raw):
|
def json_to_html(raw):
|
||||||
data = json.JSONDecoder(strict=False).raw_decode(raw)[0]
|
data = json.loads(clean_js_json(raw))
|
||||||
# open('/t/raw.json', 'w').write(json.dumps(data, indent=2))
|
# open('/t/raw.json', 'w').write(json.dumps(data, indent=2))
|
||||||
try:
|
try:
|
||||||
data = data['initialData']['data']
|
data = data['initialData']['data']
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user