From c83e888bb9b14bdd93288e5e420ffa026bfa8f28 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 28 May 2010 12:33:54 -0600 Subject: [PATCH] Fix NYTimes recipe to skip ads --- resources/recipes/nytimes_sub.recipe | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/resources/recipes/nytimes_sub.recipe b/resources/recipes/nytimes_sub.recipe index a3ef2555f4..86bb3409f2 100644 --- a/resources/recipes/nytimes_sub.recipe +++ b/resources/recipes/nytimes_sub.recipe @@ -82,6 +82,7 @@ class NYTimes(BasicNewsRecipe): 'articleExtras', 'articleInline', 'blog_sidebar', + 'businessSearchBar', 'cCol', 'entertainmentSearchBar', 'footer', @@ -286,9 +287,14 @@ class NYTimes(BasicNewsRecipe): raw = self.browser.open('http://www.nytimes.com'+content).read() return BeautifulSoup(raw.decode('cp1252', 'replace')) ''' + # Skip ad pages before actual article + skip_tag = soup.find(True, {'name':'skip'}) + if skip_tag is not None: + soup = self.index_to_soup(skip_tag.parent['href']) return self.strip_anchors(soup) def postprocess_html(self,soup, True): + print "\npostprocess_html()\n" if self.one_picture_per_article: # Remove all images after first @@ -411,6 +417,7 @@ class NYTimes(BasicNewsRecipe): return soup def postprocess_book(self, oeb, opts, log) : + print "\npostprocess_book()\n" def extract_byline(href) : #