Fix #7120 (GoComics.com)

This commit is contained in:
Kovid Goyal 2010-10-09 20:24:23 -06:00
parent 65a61bb0a8
commit 541a62398c

View File

@ -11,8 +11,8 @@ import mechanize
class GoComics(BasicNewsRecipe): class GoComics(BasicNewsRecipe):
title = 'GoComics' title = 'GoComics'
__author__ = 'Starson17' __author__ = 'Starson17'
__version__ = '1.02' __version__ = '1.03'
__date__ = '14 August 2010' __date__ = '09 October 2010'
description = u'200+ Comics - Customize for more days/comics: Defaults to 7 days, 25 comics - 20 general, 5 editorial.' description = u'200+ Comics - Customize for more days/comics: Defaults to 7 days, 25 comics - 20 general, 5 editorial.'
category = 'news, comics' category = 'news, comics'
language = 'en' language = 'en'
@ -273,6 +273,7 @@ class GoComics(BasicNewsRecipe):
# ("Wit of the World","http://www.gocomics.com/witoftheworld"), # ("Wit of the World","http://www.gocomics.com/witoftheworld"),
# ("Don Wright","http://www.gocomics.com/donwright"), # ("Don Wright","http://www.gocomics.com/donwright"),
]: ]:
print 'Working on: ', title
articles = self.make_links(url) articles = self.make_links(url)
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
@ -286,28 +287,30 @@ class GoComics(BasicNewsRecipe):
page_soup = self.index_to_soup(url) page_soup = self.index_to_soup(url)
if page_soup: if page_soup:
try: try:
strip_title = page_soup.h1.a.string strip_title = page_soup.find(name='div', attrs={'class':'top'}).h1.a.string
except: except:
strip_title = 'Error - no page_soup.h1.a.string' strip_title = 'Error - no Title found'
try: try:
date_title = page_soup.find('ul', attrs={'class': 'feature-nav'}).li.string date_title = page_soup.find('ul', attrs={'class': 'feature-nav'}).li.string
if not date_title:
date_title = page_soup.find('ul', attrs={'class': 'feature-nav'}).li.string
except: except:
date_title = 'Error - no page_soup.h1.li.string' date_title = 'Error - no Date found'
title = strip_title + ' - ' + date_title title = strip_title + ' - ' + date_title
for i in range(2): for i in range(2):
try: try:
strip_url_date = page_soup.h1.a['href'] strip_url_date = page_soup.find(name='div', attrs={'class':'top'}).h1.a['href']
break #success - this is normal exit break #success - this is normal exit
except: except:
strip_url_date = None
continue #try to get strip_url_date again continue #try to get strip_url_date again
continue # give up on this strip date
for i in range(2): for i in range(2):
try: try:
prev_strip_url_date = page_soup.find('a', attrs={'class': 'prev'})['href'] prev_strip_url_date = page_soup.find('a', attrs={'class': 'prev'})['href']
break #success - this is normal exit break #success - this is normal exit
except: except:
prev_strip_url_date = None
continue #try to get prev_strip_url_date again continue #try to get prev_strip_url_date again
continue # give up on this prev strip date
if strip_url_date: if strip_url_date:
page_url = 'http://www.gocomics.com' + strip_url_date page_url = 'http://www.gocomics.com' + strip_url_date
else: else: