From a4e398d73cb9d5ef098af5cf857a4c6efac0128a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 5 Jul 2010 19:06:13 -0600 Subject: [PATCH] News download: Fix regression that led to spurious error messages when downloading multipage articles --- resources/recipes/nytimes_sub.recipe | 8 +++++--- src/calibre/web/feeds/news.py | 16 +++++++++------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/resources/recipes/nytimes_sub.recipe b/resources/recipes/nytimes_sub.recipe index f4101ca299..1814132667 100644 --- a/resources/recipes/nytimes_sub.recipe +++ b/resources/recipes/nytimes_sub.recipe @@ -336,7 +336,7 @@ class NYTimes(BasicNewsRecipe): self.log(">>> No class:'columnGroup first' found <<<") # Change class="kicker" to

kicker = soup.find(True, {'class':'kicker'}) - if kicker and kicker.contents[0]: + if kicker and kicker.contents and kicker.contents[0]: h3Tag = Tag(soup, "h3") h3Tag.insert(0, self.fixChars(self.tag_to_string(kicker, use_alt=False))) @@ -460,8 +460,10 @@ class NYTimes(BasicNewsRecipe): return self.massageNCXText(self.tag_to_string(p,use_alt=False)) return None - article.author = extract_author(soup) - article.summary = article.text_summary = extract_description(soup) + if not article.author: + article.author = extract_author(soup) + if not article.summary: + article.summary = article.text_summary = extract_description(soup) def strip_anchors(self,soup): paras = soup.findAll(True) diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index b1af210011..a5478f96a9 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -683,13 +683,15 @@ class BasicNewsRecipe(Recipe): base.extract() ans = self.postprocess_html(soup, first_fetch) - try: - article = self.feed_objects[f].articles[a] - except: - self.log.exception('Failed to get article object for postprocessing') - pass - else: - self.populate_article_metadata(article, ans, first_fetch) + if job_info: + url, f, a, feed_len = job_info + try: + article = self.feed_objects[f].articles[a] + except: + self.log.exception('Failed to get article object for postprocessing') + pass + else: + self.populate_article_metadata(article, ans, first_fetch) return ans