From 3a12b18dc353a7256d30c55267af94f035a97338 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 2 Jun 2010 10:46:53 -0600
Subject: [PATCH] One more fix for NYTimes

---
 resources/recipes/nytimes.recipe     |  8 ++++++--
 resources/recipes/nytimes_sub.recipe | 16 ++++++----------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/resources/recipes/nytimes.recipe b/resources/recipes/nytimes.recipe
index 33758e8c47..eba717027e 100644
--- a/resources/recipes/nytimes.recipe
+++ b/resources/recipes/nytimes.recipe
@@ -391,10 +391,14 @@ class NYTimes(BasicNewsRecipe):
         return ans
 
     def preprocess_html(self, soup):
-        # Skip ad pages before actual article
+        # Skip ad pages served before actual article
         skip_tag = soup.find(True, {'name':'skip'})
         if skip_tag is not None:
-            soup = self.index_to_soup(skip_tag.parent['href'])
+            self.log.error("Found forwarding link: %s" % skip_tag.parent['href'])
+            url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
+            url += '?pagewanted=all'
+            self.log.error("Skipping ad to article at '%s'" % url)
+            soup = self.index_to_soup(url)
         return self.strip_anchors(soup)
 
     def postprocess_html(self,soup, True):
diff --git a/resources/recipes/nytimes_sub.recipe b/resources/recipes/nytimes_sub.recipe
index 79c0d49223..c08b06572d 100644
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@@ -280,18 +280,14 @@ class NYTimes(BasicNewsRecipe):
         return ans
 
     def preprocess_html(self, soup):
-        '''
-        refresh = soup.find('meta', {'http-equiv':'refresh'})
-        if refresh is None:
-            return soup
-        content = refresh.get('content').partition('=')[2]
-        raw = self.browser.open('http://www.nytimes.com'+content).read()
-        return BeautifulSoup(raw.decode('cp1252', 'replace'))
-        '''
-        # Skip ad pages before actual article
+        # Skip ad pages served before actual article
         skip_tag = soup.find(True, {'name':'skip'})
         if skip_tag is not None:
-            soup = self.index_to_soup(skip_tag.parent['href'])
+            self.log.error("Found forwarding link: %s" % skip_tag.parent['href'])
+            url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
+            url += '?pagewanted=all'
+            self.log.error("Skipping ad to article at '%s'" % url)
+            soup = self.index_to_soup(url)
         return self.strip_anchors(soup)
 
     def postprocess_html(self,soup, True):