mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Fix Economist not downloading all aticles from Technology Quarterly
This commit is contained in:
parent
2425e2c09b
commit
384e24a06a
@ -77,32 +77,23 @@ class Economist(BasicNewsRecipe):
|
|||||||
continue
|
continue
|
||||||
self.log('Found section: %s'%section_title)
|
self.log('Found section: %s'%section_title)
|
||||||
articles = []
|
articles = []
|
||||||
for h5 in section.findAll('h5'):
|
subsection = ''
|
||||||
article_title = self.tag_to_string(h5).strip()
|
for node in section.findAll(attrs={'class':'article'}):
|
||||||
if not article_title:
|
subsec = node.findPreviousSibling('h5')
|
||||||
continue
|
if subsec is not None:
|
||||||
data = h5.findNextSibling(attrs={'class':'article'})
|
subsection = self.tag_to_string(subsec)
|
||||||
if data is None: continue
|
prefix = (subsection+': ') if subsection else ''
|
||||||
a = data.find('a', href=True)
|
a = node.find('a', href=True)
|
||||||
if a is None: continue
|
if a is not None:
|
||||||
url = a['href']
|
url = a['href']
|
||||||
if url.startswith('/'): url = 'http://www.economist.com'+url
|
if url.startswith('/'): url = 'http://www.economist.com'+url
|
||||||
url += '/print'
|
url += '/print'
|
||||||
article_title += ': %s'%self.tag_to_string(a).strip()
|
title = self.tag_to_string(a)
|
||||||
articles.append({'title':article_title, 'url':url,
|
if title:
|
||||||
'description':'', 'date':''})
|
title = prefix + title
|
||||||
if not articles:
|
self.log('\tFound article:', title)
|
||||||
# We have last or first section
|
articles.append({'title':title, 'url':url,
|
||||||
for art in section.findAll(attrs={'class':'article'}):
|
'description':'', 'date':''})
|
||||||
a = art.find('a', href=True)
|
|
||||||
if a is not None:
|
|
||||||
url = a['href']
|
|
||||||
if url.startswith('/'): url = 'http://www.economist.com'+url
|
|
||||||
url += '/print'
|
|
||||||
title = self.tag_to_string(a)
|
|
||||||
if title:
|
|
||||||
articles.append({'title':title, 'url':url,
|
|
||||||
'description':'', 'date':''})
|
|
||||||
|
|
||||||
if articles:
|
if articles:
|
||||||
if section_title not in feeds:
|
if section_title not in feeds:
|
||||||
|
@ -69,32 +69,23 @@ class Economist(BasicNewsRecipe):
|
|||||||
continue
|
continue
|
||||||
self.log('Found section: %s'%section_title)
|
self.log('Found section: %s'%section_title)
|
||||||
articles = []
|
articles = []
|
||||||
for h5 in section.findAll('h5'):
|
subsection = ''
|
||||||
article_title = self.tag_to_string(h5).strip()
|
for node in section.findAll(attrs={'class':'article'}):
|
||||||
if not article_title:
|
subsec = node.findPreviousSibling('h5')
|
||||||
continue
|
if subsec is not None:
|
||||||
data = h5.findNextSibling(attrs={'class':'article'})
|
subsection = self.tag_to_string(subsec)
|
||||||
if data is None: continue
|
prefix = (subsection+': ') if subsection else ''
|
||||||
a = data.find('a', href=True)
|
a = node.find('a', href=True)
|
||||||
if a is None: continue
|
if a is not None:
|
||||||
url = a['href']
|
url = a['href']
|
||||||
if url.startswith('/'): url = 'http://www.economist.com'+url
|
if url.startswith('/'): url = 'http://www.economist.com'+url
|
||||||
url += '/print'
|
url += '/print'
|
||||||
article_title += ': %s'%self.tag_to_string(a).strip()
|
title = self.tag_to_string(a)
|
||||||
articles.append({'title':article_title, 'url':url,
|
if title:
|
||||||
'description':'', 'date':''})
|
title = prefix + title
|
||||||
if not articles:
|
self.log('\tFound article:', title)
|
||||||
# We have last or first section
|
articles.append({'title':title, 'url':url,
|
||||||
for art in section.findAll(attrs={'class':'article'}):
|
'description':'', 'date':''})
|
||||||
a = art.find('a', href=True)
|
|
||||||
if a is not None:
|
|
||||||
url = a['href']
|
|
||||||
if url.startswith('/'): url = 'http://www.economist.com'+url
|
|
||||||
url += '/print'
|
|
||||||
title = self.tag_to_string(a)
|
|
||||||
if title:
|
|
||||||
articles.append({'title':title, 'url':url,
|
|
||||||
'description':'', 'date':''})
|
|
||||||
|
|
||||||
if articles:
|
if articles:
|
||||||
if section_title not in feeds:
|
if section_title not in feeds:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user