Update NY Times some more

2025-08-30 23:00:21 -04:00 · 2014-01-12 19:29:09 +05:30 · 2014-01-12 19:29:09 +05:30 · 2d8dfc3a28
commit 2d8dfc3a28
parent a60a80d125
2 changed files with 181 additions and 190 deletions
--- a/recipes/nytimes.recipe
+++ b/recipes/nytimes.recipe
@ -33,7 +33,6 @@ class NYTimes(BasicNewsRecipe):
                        # and 30 will get the most popular measured over 30 days.
                        # you still only get up to 20 articles in each category

-
    # set headlinesOnly to True for the headlines-only version. If True, webEdition is ignored.
    headlinesOnly = True

@ -82,6 +81,7 @@ class NYTimes(BasicNewsRecipe):

    # The maximum number of articles that will be downloaded
    max_articles_per_feed = 100
+    use_embedded_content = False

    # Whether to omit duplicates of articles (typically arsing when articles are indexed in
    # more than one section). If True, only the first occurance will be downloaded.
@ -122,7 +122,6 @@ class NYTimes(BasicNewsRecipe):
               (u'Tech - Open', u'http://open.blogs.nytimes.com/feed/')
                   ]

-
    if headlinesOnly:
        title='New York Times Headlines'
        description = 'Headlines from the New York Times'
@ -168,8 +167,7 @@ class NYTimes(BasicNewsRecipe):

    cover_margins = (18,18,'grey99')

-    remove_tags_before = dict(id='article')
-    remove_tags_after  = dict(id='article')
+    keep_only_tags = dict(id=['article', 'story', 'content'])
    remove_tags = [
                    dict(attrs={'class':[
                                        'articleFooter',
@ -184,6 +182,7 @@ class NYTimes(BasicNewsRecipe):
                                        'entry-response module',
                                        'leftNavTabs',
                                        'metaFootnote',
+                                        'inside-story',
                                        'module box nav',
                                        'nextArticleLink',
                                        'nextArticleLink clearfix',
@ -223,6 +222,7 @@ class NYTimes(BasicNewsRecipe):
                                        'credit'
                                        ]}),
                    dict(attrs={'class':lambda x: x and 'related-coverage-marginalia' in x.split()}),
+                    dict(attrs={'class':lambda x: x and 'interactive' in x.split()}),
                    dict(name='div', attrs={'class':re.compile('toolsList')}),  # bits
                    dict(name='div', attrs={'class':re.compile('postNavigation')}),  # bits
                    dict(name='div', attrs={'class':'tweet'}),
@ -271,11 +271,13 @@ class NYTimes(BasicNewsRecipe):
                            'related-content',  # added for DealBook
                            'whats-next',
                            ]),
-                    dict(name=['script', 'noscript', 'style','form','hr', 'button', 'meta'])]
+                    dict(name=['script', 'noscript', 'style','form','hr', 'button', 'meta', 'footer'])]
    no_stylesheets = True
    extra_css = '''
                .articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
-                .credit { font-weight: normal; text-align: right; font-size: 50%; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .credit { font-weight: normal; text-align: right; font-size:
+                    50%; line-height:1em; margin-top:5px; margin-left:0;
+                    margin-right:0; margin-bottom: 0; }
                .byline { text-align: left; font-size: 50%; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
                .dateline { text-align: left; font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
                .kicker { font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
@ -291,7 +293,6 @@ class NYTimes(BasicNewsRecipe):
                .asidenote {color:blue;margin:0px 0px 0px 0px; padding: 0px 0px 0px 0px; font-size:100%;font-weight:bold;}
                .source {text-align: left; font-size: x-small; }'''

-
    articles = {}
    key = None
    ans = []
@ -412,7 +413,6 @@ class NYTimes(BasicNewsRecipe):
    def short_title(self):
        return self.title

-
    def article_to_soup(self, url_or_raw, raw=False):
        from contextlib import closing
        import copy
@ -446,7 +446,6 @@ class NYTimes(BasicNewsRecipe):
        usrc = self.preprocess_raw_html(usrc, url_or_raw)
        return BeautifulSoup(usrc, markupMassage=nmassage)

-
    def massageNCXText(self, description):
        # Kindle TOC descriptions won't render certain characters
        if description:
@ -501,7 +500,7 @@ class NYTimes(BasicNewsRecipe):
            if authorAttribution:
                author = self.tag_to_string(authorAttribution, use_alt=False)
        feed = self.key if self.key is not None else 'Uncategorized'
-        if not self.articles.has_key(feed):
+        if feed not in self.articles:
            self.ans.append(feed)
            self.articles[feed] = []
        self.articles[feed].append(
@ -536,7 +535,6 @@ class NYTimes(BasicNewsRecipe):
                    desc = ''
                return(title,url,author,desc)

-
            have_emailed = False
            emailed_soup = self.index_to_soup('http://www.nytimes.com/most-popular-emailed?period='+self.popularPeriod)
            for h3tag in emailed_soup.findAll('h3'):
@ -565,7 +563,7 @@ class NYTimes(BasicNewsRecipe):
                    dict(title=title, url=url, date=strftime('%a, %d %b'),
                        description=desc, author=author,
                        content=''))
-            viewed_ans = [(k, popular_articles[k]) for k in key_list if popular_articles.has_key(k)]
+            viewed_ans = [(k, popular_articles[k]) for k in key_list if k in popular_articles]
            for x in viewed_ans:
                ans.append(x)
        return ans
@ -588,10 +586,10 @@ class NYTimes(BasicNewsRecipe):
                tech_articles[f.title] = []
                for a in f.articles:
                    tech_articles[f.title].append(
-                        dict(title=a.title, url=a.url, date=a.date,
+                        dict(title=a.title, url=a.url.partition('?')[0], date=a.date,
                            description=a.summary, author=a.author,
                            content=a.content))
-            tech_ans = [(k, tech_articles[k]) for k in key_list if tech_articles.has_key(k)]
+            tech_ans = [(k, tech_articles[k]) for k in key_list if k in tech_articles]
            for x in tech_ans:
                ans.append(x)
        return ans
@ -630,10 +628,9 @@ class NYTimes(BasicNewsRecipe):
                    for lidiv in div.findAll('li'):
                        self.handle_article(lidiv)

-        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
+        self.ans = [(k, self.articles[k]) for k in self.ans if k in self.articles]
        return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))

-
    def parse_todays_index(self):

        soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
@ -663,7 +660,7 @@ class NYTimes(BasicNewsRecipe):
                    if not skipping:
                        self.handle_article(lidiv)

-        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
+        self.ans = [(k, self.articles[k]) for k in self.ans if k in self.articles]
        return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))

    def parse_headline_index(self):
@ -709,13 +706,13 @@ class NYTimes(BasicNewsRecipe):
                    description = self.tag_to_string(desc,use_alt=False)
                else:
                    description = ''
-                if not self.articles.has_key(section_name):
+                if section_name not in self.articles:
                    self.ans.append(section_name)
                    self.articles[section_name] = []
                print('Title '+title+' author '+author)
                self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))

-        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
+        self.ans = [(k, self.articles[k]) for k in self.ans if k in self.articles]
        return self.filter_ans(self.ans)

    def parse_index(self):
@ -735,7 +732,7 @@ class NYTimes(BasicNewsRecipe):
                    if kill_all or (self.recursions==0):
                        a.replaceWith(self.tag_to_string(a,False))
                    else:
-                        if a.has_key('href'):
+                        if 'href' in a:
                            if a['href'].startswith('http://www.nytimes'):
                                if not a['href'].endswith('pagewanted=all'):
                                    url = re.sub(r'\?.*', '', a['href'])
@ -743,13 +740,13 @@ class NYTimes(BasicNewsRecipe):
                                        a.replaceWith(self.tag_to_string(a,False))
                                    else:
                                        a['href'] = url+'?pagewanted=all'
-                            elif not (a['href'].startswith('http://pogue') or \
-                                      a['href'].startswith('http://bits') or \
-                                      a['href'].startswith('http://travel') or \
-                                      a['href'].startswith('http://business') or \
-                                      a['href'].startswith('http://tech') or \
-                                      a['href'].startswith('http://health') or \
-                                      a['href'].startswith('http://dealbook') or \
+                            elif not (a['href'].startswith('http://pogue') or
+                                      a['href'].startswith('http://bits') or
+                                      a['href'].startswith('http://travel') or
+                                      a['href'].startswith('http://business') or
+                                      a['href'].startswith('http://tech') or
+                                      a['href'].startswith('http://health') or
+                                      a['href'].startswith('http://dealbook') or
                                      a['href'].startswith('http://open')):
                                a.replaceWith(self.tag_to_string(a,False))
        return soup
@ -764,7 +761,7 @@ class NYTimes(BasicNewsRecipe):
            return None

 ##        print("HANDLING AD FORWARD:")
-##        print(soup)
+# print(soup)
        if self.keep_only_tags:
            body = Tag(soup, 'body')
            try:
@ -802,7 +799,6 @@ class NYTimes(BasicNewsRecipe):

        return soup

-
    def preprocess_html(self, soup):
        #print(strftime("%H:%M:%S")+" --  PREPROCESS TITLE="+self.tag_to_string(soup.title))
        skip_tag = soup.find(True, {'name':'skip'})
@ -864,9 +860,9 @@ class NYTimes(BasicNewsRecipe):
                img = atag.find('img')
                if img is not None:
                    atag.replaceWith(img)
-                elif not atag.has_key('href'):
+                elif 'href' not in atag:
                    atag.replaceWith(atag.renderContents().decode('cp1252','replace'))
-                elif not (atag['href'].startswith('http://www.nytimes') or atag['href'].startswith('http://pogue') or \
+                elif not (atag['href'].startswith('http://www.nytimes') or atag['href'].startswith('http://pogue') or
                              atag['href'].startswith('http://bits') or atag['href'].startswith('http://open')):
                    atag.replaceWith(atag.renderContents().decode('cp1252','replace'))
            hdr = soup.find('address')
@ -956,8 +952,10 @@ class NYTimes(BasicNewsRecipe):
                                    year = str(st.tm_year)
                                    month = "%.2d" % st.tm_mon
                                    day = "%.2d" % st.tm_mday
-                                    imgstartpos = popuphtml.find('http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/') + len('http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/')
-                                    highResImageLink = 'http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/' + popuphtml[imgstartpos:popuphtml.find('.jpg',imgstartpos)+4]
+                                    imgstartpos = popuphtml.find('http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/') + \
+                                                                 len('http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/')
+                                    highResImageLink = 'http://graphics8.nytimes.com/images/' + year + '/' + \
+                                        month +'/' + day +'/' + popuphtml[imgstartpos:popuphtml.find('.jpg',imgstartpos)+4]
                                    popupSoup = BeautifulSoup(popuphtml)
                                    highResTag = popupSoup.find('img', {'src':highResImageLink})
                                    if highResTag:
@ -987,7 +985,6 @@ class NYTimes(BasicNewsRecipe):
                except:
                    self.log("Error removing Enlarge this text")

-
        return self.strip_anchors(soup,False)

    def postprocess_html(self,soup,first_fetch):
@ -1111,7 +1108,6 @@ class NYTimes(BasicNewsRecipe):
            except:
                self.log("ERROR:  fixing credit format")

-
            try:
                # Change <h1> to <h3> - used in editorial blogs
                masthead = soup.find("h1")
@ -1205,4 +1201,3 @@ class NYTimes(BasicNewsRecipe):
            self.log("Error creating article descriptions")
            return

-
--- a/recipes/nytimes_sub.recipe
+++ b/recipes/nytimes_sub.recipe
@ -33,7 +33,6 @@ class NYTimes(BasicNewsRecipe):
                        # and 30 will get the most popular measured over 30 days.
                        # you still only get up to 20 articles in each category

-
    # set headlinesOnly to True for the headlines-only version. If True, webEdition is ignored.
    headlinesOnly = False

@ -82,6 +81,7 @@ class NYTimes(BasicNewsRecipe):

    # The maximum number of articles that will be downloaded
    max_articles_per_feed = 100
+    use_embedded_content = False

    # Whether to omit duplicates of articles (typically arsing when articles are indexed in
    # more than one section). If True, only the first occurance will be downloaded.
@ -122,7 +122,6 @@ class NYTimes(BasicNewsRecipe):
               (u'Tech - Open', u'http://open.blogs.nytimes.com/feed/')
                   ]

-
    if headlinesOnly:
        title='New York Times Headlines'
        description = 'Headlines from the New York Times'
@ -168,8 +167,7 @@ class NYTimes(BasicNewsRecipe):

    cover_margins = (18,18,'grey99')

-    remove_tags_before = dict(id='article')
-    remove_tags_after  = dict(id='article')
+    keep_only_tags = dict(id=['article', 'story', 'content'])
    remove_tags = [
                    dict(attrs={'class':[
                                        'articleFooter',
@ -184,6 +182,7 @@ class NYTimes(BasicNewsRecipe):
                                        'entry-response module',
                                        'leftNavTabs',
                                        'metaFootnote',
+                                        'inside-story',
                                        'module box nav',
                                        'nextArticleLink',
                                        'nextArticleLink clearfix',
@ -223,6 +222,7 @@ class NYTimes(BasicNewsRecipe):
                                        'credit'
                                        ]}),
                    dict(attrs={'class':lambda x: x and 'related-coverage-marginalia' in x.split()}),
+                    dict(attrs={'class':lambda x: x and 'interactive' in x.split()}),
                    dict(name='div', attrs={'class':re.compile('toolsList')}),  # bits
                    dict(name='div', attrs={'class':re.compile('postNavigation')}),  # bits
                    dict(name='div', attrs={'class':'tweet'}),
@ -254,6 +254,7 @@ class NYTimes(BasicNewsRecipe):
                            'masthead-nav',
                            'memberTools',
                            'navigation', 'navigation-ghost', 'navigation-modal', 'navigation-edge',
+                            'page-footer',
                            'portfolioInline',
                            'readerReviews',
                            'readerReviewsCount',
@ -270,11 +271,13 @@ class NYTimes(BasicNewsRecipe):
                            'related-content',  # added for DealBook
                            'whats-next',
                            ]),
-                    dict(name=['script', 'noscript', 'style','form','hr', 'button', 'meta'])]
+                    dict(name=['script', 'noscript', 'style','form','hr', 'button', 'meta', 'footer'])]
    no_stylesheets = True
    extra_css = '''
                .articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
-                .credit { font-weight: normal; text-align: right; font-size: 50%; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .credit { font-weight: normal; text-align: right; font-size:
+                    50%; line-height:1em; margin-top:5px; margin-left:0;
+                    margin-right:0; margin-bottom: 0; }
                .byline { text-align: left; font-size: 50%; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
                .dateline { text-align: left; font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
                .kicker { font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
@ -290,7 +293,6 @@ class NYTimes(BasicNewsRecipe):
                .asidenote {color:blue;margin:0px 0px 0px 0px; padding: 0px 0px 0px 0px; font-size:100%;font-weight:bold;}
                .source {text-align: left; font-size: x-small; }'''

-
    articles = {}
    key = None
    ans = []
@ -419,7 +421,6 @@ class NYTimes(BasicNewsRecipe):
    def short_title(self):
        return self.title

-
    def article_to_soup(self, url_or_raw, raw=False):
        from contextlib import closing
        import copy
@ -453,7 +454,6 @@ class NYTimes(BasicNewsRecipe):
        usrc = self.preprocess_raw_html(usrc, url_or_raw)
        return BeautifulSoup(usrc, markupMassage=nmassage)

-
    def massageNCXText(self, description):
        # Kindle TOC descriptions won't render certain characters
        if description:
@ -508,7 +508,7 @@ class NYTimes(BasicNewsRecipe):
            if authorAttribution:
                author = self.tag_to_string(authorAttribution, use_alt=False)
        feed = self.key if self.key is not None else 'Uncategorized'
-        if not self.articles.has_key(feed):
+        if feed not in self.articles:
            self.ans.append(feed)
            self.articles[feed] = []
        self.articles[feed].append(
@ -543,7 +543,6 @@ class NYTimes(BasicNewsRecipe):
                    desc = ''
                return(title,url,author,desc)

-
            have_emailed = False
            emailed_soup = self.index_to_soup('http://www.nytimes.com/most-popular-emailed?period='+self.popularPeriod)
            for h3tag in emailed_soup.findAll('h3'):
@ -572,7 +571,7 @@ class NYTimes(BasicNewsRecipe):
                    dict(title=title, url=url, date=strftime('%a, %d %b'),
                        description=desc, author=author,
                        content=''))
-            viewed_ans = [(k, popular_articles[k]) for k in key_list if popular_articles.has_key(k)]
+            viewed_ans = [(k, popular_articles[k]) for k in key_list if k in popular_articles]
            for x in viewed_ans:
                ans.append(x)
        return ans
@ -595,10 +594,10 @@ class NYTimes(BasicNewsRecipe):
                tech_articles[f.title] = []
                for a in f.articles:
                    tech_articles[f.title].append(
-                        dict(title=a.title, url=a.url, date=a.date,
+                        dict(title=a.title, url=a.url.partition('?')[0], date=a.date,
                            description=a.summary, author=a.author,
                            content=a.content))
-            tech_ans = [(k, tech_articles[k]) for k in key_list if tech_articles.has_key(k)]
+            tech_ans = [(k, tech_articles[k]) for k in key_list if k in tech_articles]
            for x in tech_ans:
                ans.append(x)
        return ans
@ -637,10 +636,9 @@ class NYTimes(BasicNewsRecipe):
                    for lidiv in div.findAll('li'):
                        self.handle_article(lidiv)

-        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
+        self.ans = [(k, self.articles[k]) for k in self.ans if k in self.articles]
        return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))

-
    def parse_todays_index(self):

        soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
@ -670,7 +668,7 @@ class NYTimes(BasicNewsRecipe):
                    if not skipping:
                        self.handle_article(lidiv)

-        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
+        self.ans = [(k, self.articles[k]) for k in self.ans if k in self.articles]
        return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))

    def parse_headline_index(self):
@ -716,13 +714,13 @@ class NYTimes(BasicNewsRecipe):
                    description = self.tag_to_string(desc,use_alt=False)
                else:
                    description = ''
-                if not self.articles.has_key(section_name):
+                if section_name not in self.articles:
                    self.ans.append(section_name)
                    self.articles[section_name] = []
                print('Title '+title+' author '+author)
                self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))

-        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
+        self.ans = [(k, self.articles[k]) for k in self.ans if k in self.articles]
        return self.filter_ans(self.ans)

    def parse_index(self):
@ -742,7 +740,7 @@ class NYTimes(BasicNewsRecipe):
                    if kill_all or (self.recursions==0):
                        a.replaceWith(self.tag_to_string(a,False))
                    else:
-                        if a.has_key('href'):
+                        if 'href' in a:
                            if a['href'].startswith('http://www.nytimes'):
                                if not a['href'].endswith('pagewanted=all'):
                                    url = re.sub(r'\?.*', '', a['href'])
@ -750,13 +748,13 @@ class NYTimes(BasicNewsRecipe):
                                        a.replaceWith(self.tag_to_string(a,False))
                                    else:
                                        a['href'] = url+'?pagewanted=all'
-                            elif not (a['href'].startswith('http://pogue') or \
-                                      a['href'].startswith('http://bits') or \
-                                      a['href'].startswith('http://travel') or \
-                                      a['href'].startswith('http://business') or \
-                                      a['href'].startswith('http://tech') or \
-                                      a['href'].startswith('http://health') or \
-                                      a['href'].startswith('http://dealbook') or \
+                            elif not (a['href'].startswith('http://pogue') or
+                                      a['href'].startswith('http://bits') or
+                                      a['href'].startswith('http://travel') or
+                                      a['href'].startswith('http://business') or
+                                      a['href'].startswith('http://tech') or
+                                      a['href'].startswith('http://health') or
+                                      a['href'].startswith('http://dealbook') or
                                      a['href'].startswith('http://open')):
                                a.replaceWith(self.tag_to_string(a,False))
        return soup
@ -771,7 +769,7 @@ class NYTimes(BasicNewsRecipe):
            return None

 ##        print("HANDLING AD FORWARD:")
-##        print(soup)
+# print(soup)
        if self.keep_only_tags:
            body = Tag(soup, 'body')
            try:
@ -809,7 +807,6 @@ class NYTimes(BasicNewsRecipe):

        return soup

-
    def preprocess_html(self, soup):
        #print(strftime("%H:%M:%S")+" --  PREPROCESS TITLE="+self.tag_to_string(soup.title))
        skip_tag = soup.find(True, {'name':'skip'})
@ -871,9 +868,9 @@ class NYTimes(BasicNewsRecipe):
                img = atag.find('img')
                if img is not None:
                    atag.replaceWith(img)
-                elif not atag.has_key('href'):
+                elif 'href' not in atag:
                    atag.replaceWith(atag.renderContents().decode('cp1252','replace'))
-                elif not (atag['href'].startswith('http://www.nytimes') or atag['href'].startswith('http://pogue') or \
+                elif not (atag['href'].startswith('http://www.nytimes') or atag['href'].startswith('http://pogue') or
                              atag['href'].startswith('http://bits') or atag['href'].startswith('http://open')):
                    atag.replaceWith(atag.renderContents().decode('cp1252','replace'))
            hdr = soup.find('address')
@ -963,8 +960,10 @@ class NYTimes(BasicNewsRecipe):
                                    year = str(st.tm_year)
                                    month = "%.2d" % st.tm_mon
                                    day = "%.2d" % st.tm_mday
-                                    imgstartpos = popuphtml.find('http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/') + len('http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/')
-                                    highResImageLink = 'http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/' + popuphtml[imgstartpos:popuphtml.find('.jpg',imgstartpos)+4]
+                                    imgstartpos = popuphtml.find('http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/') + \
+                                                                 len('http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/')
+                                    highResImageLink = 'http://graphics8.nytimes.com/images/' + year + '/' + \
+                                        month +'/' + day +'/' + popuphtml[imgstartpos:popuphtml.find('.jpg',imgstartpos)+4]
                                    popupSoup = BeautifulSoup(popuphtml)
                                    highResTag = popupSoup.find('img', {'src':highResImageLink})
                                    if highResTag:
@ -994,7 +993,6 @@ class NYTimes(BasicNewsRecipe):
                except:
                    self.log("Error removing Enlarge this text")

-
        return self.strip_anchors(soup,False)

    def postprocess_html(self,soup,first_fetch):
@ -1118,7 +1116,6 @@ class NYTimes(BasicNewsRecipe):
            except:
                self.log("ERROR:  fixing credit format")

-
            try:
                # Change <h1> to <h3> - used in editorial blogs
                masthead = soup.find("h1")
@ -1212,4 +1209,3 @@ class NYTimes(BasicNewsRecipe):
            self.log("Error creating article descriptions")
            return

-