More robust cover fetching for the economist

This commit is contained in:
Kovid Goyal 2011-11-12 23:03:20 +05:30
parent 19b54b8008
commit d26a9127a2
2 changed files with 22 additions and 13 deletions

View File

@ -55,12 +55,17 @@ class Economist(BasicNewsRecipe):
'''
def get_cover_url(self):
br = self.browser
br.open(self.INDEX)
issue = br.geturl().split('/')[4]
self.log('Fetching cover for issue: %s'%issue)
cover_url = "http://media.economist.com/sites/default/files/imagecache/print-cover-full/print-covers/%s_CNA400_0.jpg" %(issue.translate(None,'-'))
return cover_url
soup = self.index_to_soup('http://www.economist.com/printedition/covers')
div = soup.find('div', attrs={'class':lambda x: x and
'print-cover-links' in x})
a = div.find('a', href=True)
url = a.get('href')
if url.startswith('/'):
url = 'http://www.economist.com' + url
soup = self.index_to_soup(url)
div = soup.find('div', attrs={'class':'cover-content'})
img = div.find('img', src=True)
return img.get('src')
def parse_index(self):
return self.economist_parse_index()

View File

@ -39,13 +39,17 @@ class Economist(BasicNewsRecipe):
delay = 1
def get_cover_url(self):
br = self.browser
br.open(self.INDEX)
issue = br.geturl().split('/')[4]
self.log('Fetching cover for issue: %s'%issue)
cover_url = "http://media.economist.com/sites/default/files/imagecache/print-cover-full/print-covers/%s_CNA400_0.jpg" %(issue.translate(None,'-'))
return cover_url
soup = self.index_to_soup('http://www.economist.com/printedition/covers')
div = soup.find('div', attrs={'class':lambda x: x and
'print-cover-links' in x})
a = div.find('a', href=True)
url = a.get('href')
if url.startswith('/'):
url = 'http://www.economist.com' + url
soup = self.index_to_soup(url)
div = soup.find('div', attrs={'class':'cover-content'})
img = div.find('img', src=True)
return img.get('src')
def parse_index(self):
try: