News download: Fix regression that led to spurious error messages when downloading multipage articles

This commit is contained in:
Kovid Goyal 2010-07-05 19:06:13 -06:00
parent 875b7b85b3
commit a4e398d73c
2 changed files with 14 additions and 10 deletions

View File

@ -336,7 +336,7 @@ class NYTimes(BasicNewsRecipe):
self.log(">>> No class:'columnGroup first' found <<<") self.log(">>> No class:'columnGroup first' found <<<")
# Change class="kicker" to <h3> # Change class="kicker" to <h3>
kicker = soup.find(True, {'class':'kicker'}) kicker = soup.find(True, {'class':'kicker'})
if kicker and kicker.contents[0]: if kicker and kicker.contents and kicker.contents[0]:
h3Tag = Tag(soup, "h3") h3Tag = Tag(soup, "h3")
h3Tag.insert(0, self.fixChars(self.tag_to_string(kicker, h3Tag.insert(0, self.fixChars(self.tag_to_string(kicker,
use_alt=False))) use_alt=False)))
@ -460,8 +460,10 @@ class NYTimes(BasicNewsRecipe):
return self.massageNCXText(self.tag_to_string(p,use_alt=False)) return self.massageNCXText(self.tag_to_string(p,use_alt=False))
return None return None
article.author = extract_author(soup) if not article.author:
article.summary = article.text_summary = extract_description(soup) article.author = extract_author(soup)
if not article.summary:
article.summary = article.text_summary = extract_description(soup)
def strip_anchors(self,soup): def strip_anchors(self,soup):
paras = soup.findAll(True) paras = soup.findAll(True)

View File

@ -683,13 +683,15 @@ class BasicNewsRecipe(Recipe):
base.extract() base.extract()
ans = self.postprocess_html(soup, first_fetch) ans = self.postprocess_html(soup, first_fetch)
try: if job_info:
article = self.feed_objects[f].articles[a] url, f, a, feed_len = job_info
except: try:
self.log.exception('Failed to get article object for postprocessing') article = self.feed_objects[f].articles[a]
pass except:
else: self.log.exception('Failed to get article object for postprocessing')
self.populate_article_metadata(article, ans, first_fetch) pass
else:
self.populate_article_metadata(article, ans, first_fetch)
return ans return ans