mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix incorrect assignment of articles to sections in NYT recipe
This commit is contained in:
parent
c19cbe3d4a
commit
22eb77f518
@ -171,8 +171,9 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
script = soup.findAll('script', text=lambda x: x and 'window.__preloadedData' in x)[0]
|
script = soup.findAll('script', text=lambda x: x and 'window.__preloadedData' in x)[0]
|
||||||
script = type(u'')(script)
|
script = type(u'')(script)
|
||||||
data = json.loads(script[script.find('{'):].strip().rstrip(';'))['initialState']
|
data = json.loads(script[script.find('{'):].strip().rstrip(';'))['initialState']
|
||||||
containers, sections = [], {}
|
containers, sections = {}, {}
|
||||||
article_map = {}
|
article_map = {}
|
||||||
|
gc_pat = re.compile(r'groupings.(\d+).containers.(\d+)')
|
||||||
pat = re.compile(r'groupings.(\d+).containers.(\d+).relations.(\d+)')
|
pat = re.compile(r'groupings.(\d+).containers.(\d+).relations.(\d+)')
|
||||||
for key in data:
|
for key in data:
|
||||||
if 'Article' in key:
|
if 'Article' in key:
|
||||||
@ -189,7 +190,8 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
sdata = data[key]
|
sdata = data[key]
|
||||||
tname = sdata.get('__typename')
|
tname = sdata.get('__typename')
|
||||||
if tname == 'LegacyCollectionContainer':
|
if tname == 'LegacyCollectionContainer':
|
||||||
containers.append(sdata['label'] or sdata['name'])
|
m = gc_pat.search(key)
|
||||||
|
containers[int(m.group(2))] = sdata['label'] or sdata['name']
|
||||||
elif tname == 'LegacyCollectionRelation':
|
elif tname == 'LegacyCollectionRelation':
|
||||||
m = pat.search(key)
|
m = pat.search(key)
|
||||||
grouping, container, relation = map(int, m.groups())
|
grouping, container, relation = map(int, m.groups())
|
||||||
@ -200,17 +202,16 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
sections[container].append(asset['id'].split(':', 1)[1])
|
sections[container].append(asset['id'].split(':', 1)[1])
|
||||||
|
|
||||||
feeds = []
|
feeds = []
|
||||||
for i, section_title in enumerate(containers):
|
for container_num in sorted(containers):
|
||||||
if i in sections:
|
section_title = containers[container_num]
|
||||||
articles = sections[i]
|
if container_num in sections:
|
||||||
|
articles = sections[container_num]
|
||||||
if articles:
|
if articles:
|
||||||
self.log('\n' + section_title)
|
|
||||||
feeds.append((section_title, []))
|
feeds.append((section_title, []))
|
||||||
for artid in articles:
|
for artid in articles:
|
||||||
if artid in article_map:
|
if artid in article_map:
|
||||||
art = article_map[artid]
|
art = article_map[artid]
|
||||||
feeds[-1][1].append(art)
|
feeds[-1][1].append(art)
|
||||||
self.log('\t' + art['title'])
|
|
||||||
|
|
||||||
def skey(x):
|
def skey(x):
|
||||||
name = x[0].strip()
|
name = x[0].strip()
|
||||||
@ -218,6 +219,11 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
return 0, ''
|
return 0, ''
|
||||||
return 1, name.lower()
|
return 1, name.lower()
|
||||||
feeds.sort(key=skey)
|
feeds.sort(key=skey)
|
||||||
|
for section, articles in feeds:
|
||||||
|
self.log('\n' + section)
|
||||||
|
for article in articles:
|
||||||
|
self.log(article['title'] + ' - ' + article['url'])
|
||||||
|
# raise SystemExit(1)
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
def parse_highlights(self, container):
|
def parse_highlights(self, container):
|
||||||
|
@ -171,8 +171,9 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
script = soup.findAll('script', text=lambda x: x and 'window.__preloadedData' in x)[0]
|
script = soup.findAll('script', text=lambda x: x and 'window.__preloadedData' in x)[0]
|
||||||
script = type(u'')(script)
|
script = type(u'')(script)
|
||||||
data = json.loads(script[script.find('{'):].strip().rstrip(';'))['initialState']
|
data = json.loads(script[script.find('{'):].strip().rstrip(';'))['initialState']
|
||||||
containers, sections = [], {}
|
containers, sections = {}, {}
|
||||||
article_map = {}
|
article_map = {}
|
||||||
|
gc_pat = re.compile(r'groupings.(\d+).containers.(\d+)')
|
||||||
pat = re.compile(r'groupings.(\d+).containers.(\d+).relations.(\d+)')
|
pat = re.compile(r'groupings.(\d+).containers.(\d+).relations.(\d+)')
|
||||||
for key in data:
|
for key in data:
|
||||||
if 'Article' in key:
|
if 'Article' in key:
|
||||||
@ -189,7 +190,8 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
sdata = data[key]
|
sdata = data[key]
|
||||||
tname = sdata.get('__typename')
|
tname = sdata.get('__typename')
|
||||||
if tname == 'LegacyCollectionContainer':
|
if tname == 'LegacyCollectionContainer':
|
||||||
containers.append(sdata['label'] or sdata['name'])
|
m = gc_pat.search(key)
|
||||||
|
containers[int(m.group(2))] = sdata['label'] or sdata['name']
|
||||||
elif tname == 'LegacyCollectionRelation':
|
elif tname == 'LegacyCollectionRelation':
|
||||||
m = pat.search(key)
|
m = pat.search(key)
|
||||||
grouping, container, relation = map(int, m.groups())
|
grouping, container, relation = map(int, m.groups())
|
||||||
@ -200,17 +202,16 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
sections[container].append(asset['id'].split(':', 1)[1])
|
sections[container].append(asset['id'].split(':', 1)[1])
|
||||||
|
|
||||||
feeds = []
|
feeds = []
|
||||||
for i, section_title in enumerate(containers):
|
for container_num in sorted(containers):
|
||||||
if i in sections:
|
section_title = containers[container_num]
|
||||||
articles = sections[i]
|
if container_num in sections:
|
||||||
|
articles = sections[container_num]
|
||||||
if articles:
|
if articles:
|
||||||
self.log('\n' + section_title)
|
|
||||||
feeds.append((section_title, []))
|
feeds.append((section_title, []))
|
||||||
for artid in articles:
|
for artid in articles:
|
||||||
if artid in article_map:
|
if artid in article_map:
|
||||||
art = article_map[artid]
|
art = article_map[artid]
|
||||||
feeds[-1][1].append(art)
|
feeds[-1][1].append(art)
|
||||||
self.log('\t' + art['title'])
|
|
||||||
|
|
||||||
def skey(x):
|
def skey(x):
|
||||||
name = x[0].strip()
|
name = x[0].strip()
|
||||||
@ -218,6 +219,11 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
return 0, ''
|
return 0, ''
|
||||||
return 1, name.lower()
|
return 1, name.lower()
|
||||||
feeds.sort(key=skey)
|
feeds.sort(key=skey)
|
||||||
|
for section, articles in feeds:
|
||||||
|
self.log('\n' + section)
|
||||||
|
for article in articles:
|
||||||
|
self.log(article['title'] + ' - ' + article['url'])
|
||||||
|
# raise SystemExit(1)
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
def parse_highlights(self, container):
|
def parse_highlights(self, container):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user