diff --git a/recipes/nytimes.recipe b/recipes/nytimes.recipe
index 6afa4318d7..bdf61bc15f 100644
--- a/recipes/nytimes.recipe
+++ b/recipes/nytimes.recipe
@@ -1,1289 +1,170 @@
-#!/usr/bin/env  python2
-# -*- coding: utf-8 -*-
-__license__ = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-'''
-nytimes.com
-'''
-import re
-import string
-import time
-from calibre import strftime
-from datetime import timedelta, date
-from time import sleep
-from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+# License: GPLv3 Copyright: 2018, Kovid Goyal <kovid at kovidgoyal.net>
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.utils.date import strptime
+
+is_web_edition = True
+# The sections to download when downloading the web edition, comment out
+# the section you are not interested in
+web_sections = [
+    ('World', 'world'),
+    ('U.S.', 'us'),
+    ('Politics', 'politics'),
+    ('New York', 'nyregion'),
+    ('Business', 'business'),
+    ('Technology', 'technology'),
+    ('Sports', 'sports'),
+    ('Science', 'science'),
+    ('Health', 'health'),
+    ('Opinion', 'opinion'),
+    ('Arts', 'arts'),
+    ('Books', 'books'),
+    ('Movies', 'movies'),
+    ('Music', 'arts/music'),
+    ('Television', 'arts/television'),
+    ('Style', 'style'),
+    ('Dining & Wine', 'dining'),
+    ('Fashion & Style', 'fashion'),
+    ('Home & Garden', 'garden'),
+    ('Travel', 'travel'),
+    ('Education', 'education'),
+    ('Multimedia', 'multimedia'),
+    ('Obituaries', 'obituaries'),
+    ('Sunday Magazine', 'magazine')
+]
 
 
-class NYTimes(BasicNewsRecipe):
+def classes(classes):
+    q = frozenset(classes.split(' '))
+    return dict(attrs={
+        'class': lambda x: x and frozenset(x.split()).intersection(q)})
 
-    recursions = 1  # set this to zero to omit Related articles lists
-    # speeds up processing by preventing index page links from being followed
-    match_regexps = [r'/[12][0-9][0-9][0-9]/[0-9]+/']
 
-    # set getTechBlogs to True to include the technology blogs
-    # set tech_oldest_article to control article age
-    # set tech_max_articles_per_feed to control article count
-    getTechBlogs = True
-    remove_empty_feeds = True
-    tech_oldest_article = 14
-    tech_max_articles_per_feed = 25
+class NewYorkTimes(BasicNewsRecipe):
 
-    # set getPopularArticles to False if you don't want the Most E-mailed and Most Viewed articles
-    # otherwise you will get up to 20 of the most popular e-mailed and viewed articles (in each category)
-    # This is currently disabled because the NYT is changing this functionality
-    # on their website to a new "Trending" page
-    getPopularArticles = False
-    popularPeriod = '1'  # set this to the number of days to include in the measurement
-    # e.g. 7 will get the most popular measured over the last 7 days
-    # and 30 will get the most popular measured over 30 days.
-    # you still only get up to 20 articles in each category
-
-    # set headlinesOnly to True for the headlines-only version. If True,
-    # webEdition is ignored.
-    headlinesOnly = True
-
-    # set webEdition to True for the Web edition of the newspaper. Set oldest_web_article to the
-    # number of days old an article can be for inclusion. If oldest_web_article = None all articles
-    # will be included. Note: oldest_web_article is ignored if webEdition =
-    # False
-    webEdition = False
-    oldest_web_article = None
-
-    # download higher resolution images than the small thumbnails typically included in the article
-    # the down side of having large beautiful images is the file size is much
-    # larger, on the order of 7MB per paper
-    useHighResImages = True
+    title = 'The New York Times'
+    if is_web_edition:
+        description = 'New York Times (Web). You can edit the recipe to remove sections you are not interested in.'
+    else:
+        description = 'Today\'s New York Times'
+    encoding = 'utf-8'
+    __author__ = 'Kovid Goyal'
+    language = 'en'
+    ignore_duplicate_articles = {'title', 'url'}
+    no_stylesheets = True
     compress_news_images = True
     compress_news_images_auto_size = 5
 
-    # replace paid Kindle Version:  the name will be changed to "The New York Times" to cause
-    # previous paid versions of the new york times to best sent to the back
-    # issues folder on the kindle
-    replaceKindleVersion = False
-
-    # includeSections: List of sections to include. If empty, all sections found will be included.
-    # Otherwise, only the sections named will be included. For example,
-    #
-    #    includeSections = ['Politics','Sports']
-    #
-    # would cause only the Politics and Sports sections to be included.
-
-    includeSections = []  # by default, all sections included
-
-    # excludeSections: List of sections to exclude. If empty, all sections found will be included.
-    # Otherwise, the sections named will be excluded. For example,
-    #
-    #    excludeSections = ['Politics','Sports']
-    #
-    # would cause the Politics and Sports sections to be excluded. This parameter can be used
-    # in conjuction with includeSections although in most cases using one or the other, but
-    # not both, is sufficient.
-
-    excludeSections = []
-
-    # one_picture_per_article specifies that calibre should only use the first image
-    # from an article (if one exists).  If one_picture_per_article = True, the image
-    # will be moved to a location between the headline and the byline.
-    # If one_picture_per_article = False, all images from the article will be included
-    # and shown in their original location.
-    one_picture_per_article = False
-
-    # The maximum number of articles that will be downloaded
-    max_articles_per_feed = 100
-    use_embedded_content = False
-
-    # Whether to omit duplicates of articles (typically arsing when articles are indexed in
-    # more than one section). If True, only the first occurance will be
-    # downloaded.
-    filterDuplicates = True
-
-    # Sections to collect for the Web edition.
-    # Delete any you don't want, or use includeSections or excludeSections
-    web_sections = [(u'World', u'world'),
-                    (u'U.S.', u'national'),
-                    (u'Politics', u'politics'),
-                    (u'New York', u'nyregion'),
-                    (u'Business', 'business'),
-                    (u'Technology', u'technology'),
-                    (u'Sports', u'sports'),
-                    (u'Science', u'science'),
-                    (u'Health', u'health'),
-                    (u'Opinion', u'opinion'),
-                    (u'Arts', u'arts'),
-                    (u'Books', u'books'),
-                    (u'Movies', u'movies'),
-                    (u'Music', u'arts/music'),
-                    (u'Television', u'arts/television'),
-                    (u'Style', u'style'),
-                    (u'Dining & Wine', u'dining'),
-                    (u'Fashion & Style', u'fashion'),
-                    (u'Home & Garden', u'garden'),
-                    (u'Travel', u'travel'),
-                    ('Education', u'education'),
-                    ('Multimedia', u'multimedia'),
-                    (u'Obituaries', u'obituaries'),
-                    (u'Sunday Magazine', u'magazine')
-                    ]
-
-    tech_feeds = [
-        (u'Tech - News', u'http://pogue.blogs.nytimes.com/feed/'),
-        (u'Tech - Gadgetwise', u'http://gadgetwise.blogs.nytimes.com/feed/'),
-        (u'Tech - Open', u'http://open.blogs.nytimes.com/feed/')
+    keep_only_tags = [
+        dict(id='story-header'),
+        classes('story-body-supplemental story-interrupter'),
+    ]
+    remove_tags = [
+        dict(attrs={'aria-label':'tools'.split()}),
+        dict(attrs={'data-videoid':True}),
+        dict(name='button'),
+        dict(id=lambda x: x and x.startswith('story-ad-')),
+        dict(name='a', href=lambda x: x and '#story-continues-' in x),
+        dict(name='a', href=lambda x: x and '#whats-next' in x),
+        dict(id=lambda x: x and 'sharetools-' in x),
+        dict(id='newsletter-promo'.split()),
     ]
 
-    if headlinesOnly:
-        title = 'New York Times Headlines'
-        description = 'Headlines from the New York Times'
-        needs_subscription = False
-    elif webEdition:
-        title = 'New York Times (Web)'
-        description = 'New York Times on the Web'
-        needs_subscription = False
-    elif replaceKindleVersion:
-        title = 'The New York Times'
-        description = 'Today\'s New York Times'
-        needs_subscription = False
-    else:
-        title = 'New York Times'
-        description = 'Today\'s New York Times'
-        needs_subscription = False
+    def read_nyt_metadata(self):
+        INDEX = 'https://www.nytimes.com/section/todayspaper'
+        # INDEX = 'file:///t/raw.html'
+        soup = self.index_to_soup(INDEX)
+        pdate = soup.find('meta', attrs={'name':'pdate', 'content': True})['content']
+        date = strptime(pdate, '%Y%m%d', assume_utc=False, as_utc=False)
+        self.cover_url = 'https://static01.nyt.com/images/{}/nytfrontpage/scan.jpg'.format(date.strftime('%Y/%m/%d'))
+        self.timefmt = date.strftime(' [%d %b, %Y]')
+        return soup
 
-    def decode_url_date(self, url):
-        urlitems = url.split('/')
-        try:
-            d = date(int(urlitems[3]), int(urlitems[4]), int(urlitems[5]))
-        except:
-            try:
-                d = date(int(urlitems[4]), int(urlitems[5]), int(urlitems[6]))
-            except:
-                return None
-        return d
+    def parse_todays_sections(self, container):
+        for h2 in container.findAll('h2', **classes('headline')):
+            title = self.tag_to_string(h2)
+            a = h2.find('a', href=True)
+            url = a['href']
+            if '?' in url:
+                url = url.split('?')[0]
+            p = h2.findParent(**classes('story-body'))
+            desc = ''
+            if p is not None:
+                s = p.find(**classes('summary'))
+                if s is not None:
+                    desc = self.tag_to_string(s)
+            self.log('\t', title, ': ', url)
+            self.log('\t\t', desc)
+            yield {'title': title, 'url': url, 'description': desc}
 
-    if oldest_web_article is None:
-        earliest_date = date.today()
-    else:
-        earliest_date = date.today() - timedelta(days=oldest_web_article)
-    oldest_article = 365  # by default, a long time ago
-
-    __author__ = 'GRiker/Kovid Goyal/Nick Redding'
-    language = 'en'
-    requires_version = (0, 7, 5)
-    encoding = 'utf-8'
-
-    timefmt = ''
-
-    # simultaneous_downloads = 1 # no longer required to deal with ads
-
-    cover_margins = (18, 18, 'grey99')
-
-    keep_only_tags = dict(id=['article', 'story', 'content'])
-    remove_tags = [
-        dict(attrs={'class': [
-            'articleFooter',
-            'articleTools',
-            'rfd', 'story-footer-links', 'page-footer',
-            'columnGroup singleRule',
-            'columnGroup last',
-            'columnGroup  last',
-            'doubleRule',
-            'dottedLine',
-            'entry-meta',
-            'entry-response module',
-            'leftNavTabs',
-            'metaFootnote',
-            'inside-story',
-            'module box nav',
-            'nextArticleLink',
-            'nextArticleLink clearfix',
-            'post-tools',
-            'relatedSearchesModule',
-            'side_tool',
-            'singleAd',
-            'postCategory column',
-            'refer tagRefer',  # added for bits blog post
-            'entry entry-utility',  # added for DealBook
-            'entry-tags',  # added for DealBook
-            'footer promos clearfix',  # added for DealBook
-            'footer links clearfix',  # added for DealBook
-            'tabsContainer',  # added for other blog downloads
-            'column lastColumn',  # added for other blog downloads
-            'pageHeaderWithLabel',  # added for other gadgetwise downloads
-            'column two',  # added for other blog downloads
-            'column two last',  # added for other blog downloads
-            'column three',  # added for other blog downloads
-            'column three last',  # added for other blog downloads
-            'column four',  # added for other blog downloads
-            'column four last',  # added for other blog downloads
-            'column last',  # added for other blog downloads
-            'entry entry-related',
-            'subNavigation tabContent active',  # caucus blog navigation
-            'mediaOverlay slideshow',
-            'wideThumb',
-            'video',  # added 02-11-2011
-            'videoHeader',  # added 02-11-2011
-            'articleInlineVideoHolder',  # added 02-11-2011
-            'assetCompanionAd',
-            'nytint-sectionHeader',
-            re.compile('^subNavigation'),
-            re.compile('^leaderboard'),
-            re.compile('^module'),
-            re.compile('commentCount'),
-            'lede-container',
-            'credit',
-            'caption-video',
-            'upshot-social'
-        ]}),
-        dict(
-            attrs={'class': lambda x: x and 'related-coverage-marginalia' in x.split()}),
-        dict(attrs={'class': lambda x: x and 'hidden' in x.split()}),
-        dict(attrs={'class': lambda x: x and 'interactive' in x.split()}),
-        dict(attrs={'class': lambda x: x and 'SectionBarShare' in x.split('-')}),
-        dict(attrs={'class': lambda x: x and 'ResponsiveAd' in x.split('-')}),
-        dict(attrs={'class': lambda x: x and 'skip-to-text-link' in x.split()}),
-        dict(attrs={'class': lambda x: x and 'sharetools' in x.split()}),
-        dict(attrs={'class': lambda x: x and 'ad' in x.split()}),
-        dict(attrs={'class': lambda x: x and 'video' in x.split()}),
-        dict(attrs={'class': lambda x: x and 'visually-hidden' in x.split()}),
-        dict(name='div', attrs={'class': re.compile('toolsList')}),  # bits
-        dict(name='div', attrs={
-             'class': re.compile('postNavigation')}),  # bits
-        dict(name='div', attrs={'class': 'tweet'}),
-        dict(name='span', attrs={'class': 'commentCount meta'}),
-        dict(name='div', attrs={'id': 'header'}),
-        # bits, pogue, gadgetwise, open
-        dict(name='div', attrs={'id': re.compile('commentsContainer')}),
-        # pogue, gadgetwise
-        dict(name='ul', attrs={'class': re.compile('entry-tools')}),
-        # pogue, gadgetwise
-        dict(name='div', attrs={'class': re.compile('nocontent')}),
-        dict(name='div', attrs={'id': re.compile('respond')}),  # open
-        dict(name='div', attrs={'class': re.compile('entry-tags')}),  # pogue
-        dict(name='h4', attrs={'class': 'headline'}),
-        dict(id=[
-            'adxLeaderboard',
-            'pagelinks',
-            'adxSponLink',
-            'anchoredAd_module',
-            'anchoredAd_spot',
-            'archive',
-            'articleExtras',
-            'articleInline',
-            'blog_sidebar',
-            'businessSearchBar',
-            'cCol',
-            'entertainmentSearchBar',
-            'footer',
-            'header',
-            'header_search',
-            'inlineBox',
-            'login',
-            'masthead',
-            'masthead-nav',
-            'masthead-social',
-            'memberTools',
-            'navigation', 'navigation-ghost', 'navigation-modal', 'navigation-edge',
-            'page-footer',
-            'portfolioInline',
-            'readerReviews',
-            'readerReviewsCount',
-            'relatedArticles',
-            'relatedTopics',
-            'respond',
-            'ribbon',
-            'side_search',
-            'side_index',
-            'side_tool',
-            'toolsRight',
-            'skybox',  # added for DealBook
-            'TopAd',  # added for DealBook
-            'related-content',  # added for DealBook
-            'whats-next',
-            'newsletter-promo',
-        ]),
-        dict(name=['script', 'noscript', 'style', 'form', 'hr', 'button', 'meta', 'footer'])]
-    no_stylesheets = True
-    extra_css = '''
-                .articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
-                .credit { font-weight: normal; text-align: right; font-size:
-                    50%; line-height:1em; margin-top:5px; margin-left:0;
-                    margin-right:0; margin-bottom: 0; }
-                .byline { text-align: left; font-size: 50%; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                .dateline { text-align: left; font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                .kicker { font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                .timestamp { font-weight: normal; text-align: left; font-size: 50%; }
-                .caption { font-size: 50%; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                a:link {text-decoration: none; }
-                .date{font-size: 50%; }
-                .update{font-size: 50%; }
-                .articleBody { }
-                .authorId {text-align: left; font-size: 50%; }
-                .image {text-align: center;}
-                .aside {color:blue;margin:0px 0px 0px 0px; padding: 0px 0px 0px 0px; font-size:100%;}
-                .asidenote {color:blue;margin:0px 0px 0px 0px; padding: 0px 0px 0px 0px; font-size:100%;font-weight:bold;}
-                .source {text-align: left; font-size: x-small; }'''
-
-    articles = {}
-    key = None
-    ans = []
-    url_list = []
-
-    def filter_ans(self, ans):
-        total_article_count = 0
-        idx = 0
-        idx_max = len(ans) - 1
-        while idx <= idx_max:
-            if self.includeSections != []:
-                if ans[idx][0] not in self.includeSections:
-                    print "SECTION NOT INCLUDED: ", ans[idx][0]
-                    del ans[idx]
-                    idx_max = idx_max - 1
-                    continue
-            if ans[idx][0] in self.excludeSections:
-                print "SECTION EXCLUDED: ", ans[idx][0]
-                del ans[idx]
-                idx_max = idx_max - 1
-                continue
-            if True:  # self.verbose
-                self.log("Section %s: %d articles" %
-                         (ans[idx][0], len(ans[idx][1])))
-            for article in ans[idx][1]:
-                total_article_count += 1
-                if True:  # self.verbose
-                    self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252', 'replace'),
-                                                              article['url'].encode('cp1252', 'replace')))
-            idx = idx + 1
-
-        self.log("Queued %d articles" % total_article_count)
-        return ans
-
-    def exclude_url(self, url):
-        if not url.startswith("http"):
-            return True
-        # added for DealBook
-        if not url.endswith(".html") and 'dealbook.nytimes.com' not in url:
-            return True
-        if 'nytimes.com' not in url:
-            return True
-        if 'cn.nytimes.com' in url:
-            return True
-        if '/es/' in url:
-            return True
-        if 'podcast' in url:
-            return True
-        if '/video/' in url:
-            return True
-        if '/multimedia/' in url:
-            return True
-        if '/slideshow/' in url:
-            return True
-        if '/magazine/index' in url:
-            return True
-        if '/interactive/' in url:
-            return True
-        if '/reference/' in url:
-            return True
-        if '/premium/' in url:
-            return True
-        if '#comment' in url:
-            return True
-        if '#postComment' in url:
-            return True
-        if '#postcomment' in url:
-            return True
-        if re.search('/\d\d\d\d/\d\d/\d\d/', url) is None:
-            print("NO DATE IN " + url)
-            return True
-        return False
-
-    def fixChars(self, string):
-        # Replace lsquo (\x91)
-        fixed = re.sub("\x91", "‘", string)
-
-        # Replace rsquo (\x92)
-        fixed = re.sub("\x92", "’", fixed)
-
-        # Replace ldquo (\x93)
-        fixed = re.sub("\x93", "“", fixed)
-
-        # Replace rdquo (\x94)
-        fixed = re.sub("\x94", "”", fixed)
-
-        # Replace ndash (\x96)
-        fixed = re.sub("\x96", "–", fixed)
-
-        # Replace mdash (\x97)
-        fixed = re.sub("\x97", "—", fixed)
-
-        return fixed
-
-    def get_browser(self):
-        br = BasicNewsRecipe.get_browser(self)
-        return br
-
-    cover_tag = 'NY_NYT'
-
-    def get_cover_url(self):
-        from datetime import date
-        today = date.today()
-        cover = 'https://static01.nyt.com/images/' \
-            + today.strftime('%Y') + '/' + today.strftime('%m') + '/' \
-            + today.strftime('%d') + '/nytfrontpage/scan.jpg'
-        self.log(cover)
-        br = BasicNewsRecipe.get_browser(self)
-        try:
-            br.open(cover)
-        except:
-            self.log("\nCover unavailable")
-            cover = None
-        return cover
-
-    masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
-
-    def short_title(self):
-        return self.title
-
-    def article_to_soup(self, url_or_raw, raw=False):
-        from contextlib import closing
-        import copy
-        from calibre.ebooks.chardet import xml_to_unicode
-        print("ARTICLE_TO_SOUP " + url_or_raw)
-        if re.match(r'\w+://', url_or_raw):
-            br = self.clone_browser(self.browser)
-            open_func = getattr(br, 'open_novisit', br.open)
-            with closing(open_func(url_or_raw)) as f:
-                _raw = f.read()
-            if not _raw:
-                raise RuntimeError(
-                    'Could not fetch index from %s' % url_or_raw)
-        else:
-            _raw = url_or_raw
-        if raw:
-            return _raw
-        if not isinstance(_raw, unicode) and self.encoding:
-            if callable(self.encoding):
-                _raw = self.encoding(_raw)
-            else:
-                _raw = _raw.decode(self.encoding, 'replace')
-
-        nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE)
-        nmassage.extend(self.preprocess_regexps)
-        nmassage += [(re.compile(r'<!DOCTYPE .+?>', re.DOTALL), lambda m: '')]
-        # Some websites have buggy doctype declarations that mess up beautifulsoup
-        # Remove comments as they can leave detritus when extracting tags leaves
-        # multiple nested comments
-        nmassage.append((re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''))
-        usrc = xml_to_unicode(_raw, self.verbose, strip_encoding_pats=True)[0]
-        usrc = self.preprocess_raw_html(usrc, url_or_raw)
-        return BeautifulSoup(usrc, markupMassage=nmassage)
-
-    def massageNCXText(self, description):
-        # Kindle TOC descriptions won't render certain characters
-        if description:
-            massaged = unicode(BeautifulStoneSoup(
-                description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
-            # Replace '&' with '&'
-            massaged = re.sub("&#038;", "&", massaged)
-            massaged = re.sub("&amp;", "&", massaged)
-            return self.fixChars(massaged)
-        else:
-            return description
-
-    def feed_title(self, div):
-        return ''.join(div.findAll(text=True, recursive=True)).strip()
-
-    def handle_article(self, div):
-        thumbnail = div.find('div', 'thumbnail')
-        if thumbnail:
-            thumbnail.extract()
-        return self.handle_base_article(div)
-
-    # Handle '<article>' in world, u.s., etc
-    def handle_article_tag(self, div):
-        thumbnail = div.find('figure', 'media photo')
-        if not thumbnail:
-            thumbnail = div.find('div', 'thumb')
-        if thumbnail:
-            thumbnail.extract()
-        div = div.find('div', 'story-body')
-        if not div:
-            return
-        return self.handle_base_article(div)
-
-    def handle_base_article(self, div):
-        a = div.find('a', href=True)
-        if not a:
-            return
-        url = re.sub(r'\?.*', '', a['href'])
-        if self.exclude_url(url):
-            return
-        url += '?pagewanted=all'
-        if self.filterDuplicates:
-            if url in self.url_list:
-                return
-        if self.webEdition:
-            date_tag = self.decode_url_date(url)
-            if date_tag is not None:
-                if self.oldest_web_article is not None:
-                    if date_tag < self.earliest_date:
-                        self.log("Skipping article %s" % url)
-                        return
-            else:
-                self.log("Skipping article %s" % url)
-                return
-        self.url_list.append(url)
-        title = self.tag_to_string(a, use_alt=True).strip()
-        description = ''
-        pubdate = strftime('%a, %d %b')
-        summary = div.find(True, attrs={'class': 'summary'})
-        if summary:
-            description = self.tag_to_string(summary, use_alt=False)
-        author = ''
-        authorAttribution = div.find(True, attrs={'class': 'byline'})
-        if authorAttribution:
-            author = self.tag_to_string(authorAttribution, use_alt=False)
-        else:
-            authorAttribution = div.find(True, attrs={'class': 'byline'})
-            if authorAttribution:
-                author = self.tag_to_string(authorAttribution, use_alt=False)
-        feed = self.key if self.key is not None else 'Uncategorized'
-        if feed not in self.articles:
-            self.ans.append(feed)
-            self.articles[feed] = []
-        self.articles[feed].append(
-            dict(title=title, url=url, date=pubdate,
-                 description=description, author=author,
-                 content=''))
-
-    def get_popular_articles(self, ans):
-        if self.getPopularArticles:
-            popular_articles = {}
-            key_list = []
-
-            def handleh3(h3tag):
-                try:
-                    url = h3tag.a['href']
-                except:
-                    return ('', '', '', '')
-                url = re.sub(r'\?.*', '', url)
-                if self.exclude_url(url):
-                    return ('', '', '', '')
-                url += '?pagewanted=all'
-                title = self.tag_to_string(h3tag.a, False)
-                h6tag = h3tag.findNextSibling('h6')
-                if h6tag is not None:
-                    author = self.tag_to_string(h6tag, False)
-                else:
-                    author = ''
-                ptag = h3tag.findNextSibling('p')
-                if ptag is not None:
-                    desc = self.tag_to_string(ptag, False)
-                else:
-                    desc = ''
-                return(title, url, author, desc)
-
-            have_emailed = False
-            emailed_soup = self.index_to_soup(
-                'http://www.nytimes.com/most-popular-emailed?period=' + self.popularPeriod)
-            for h3tag in emailed_soup.findAll('h3'):
-                (title, url, author, desc) = handleh3(h3tag)
-                if url == '':
-                    continue
-                if not have_emailed:
-                    key_list.append('Most E-Mailed')
-                    popular_articles['Most E-Mailed'] = []
-                    have_emailed = True
-                popular_articles['Most E-Mailed'].append(
-                    dict(title=title, url=url, date=strftime('%a, %d %b'),
-                         description=desc, author=author,
-                         content=''))
-            have_viewed = False
-            viewed_soup = self.index_to_soup(
-                'http://www.nytimes.com/most-popular-viewed?period=' + self.popularPeriod)
-            for h3tag in viewed_soup.findAll('h3'):
-                (title, url, author, desc) = handleh3(h3tag)
-                if url == '':
-                    continue
-                if not have_viewed:
-                    key_list.append('Most Viewed')
-                    popular_articles['Most Viewed'] = []
-                    have_viewed = True
-                popular_articles['Most Viewed'].append(
-                    dict(title=title, url=url, date=strftime('%a, %d %b'),
-                         description=desc, author=author,
-                         content=''))
-            viewed_ans = [(k, popular_articles[k])
-                          for k in key_list if k in popular_articles]
-            for x in viewed_ans:
-                ans.append(x)
-        return ans
-
-    def get_tech_feeds(self, ans):
-        if self.getTechBlogs:
-            tech_articles = {}
-            key_list = []
-            save_oldest_article = self.oldest_article
-            save_max_articles_per_feed = self.max_articles_per_feed
-            self.oldest_article = self.tech_oldest_article
-            self.max_articles_per_feed = self.tech_max_articles_per_feed
-            self.feeds = self.tech_feeds
-            tech = self.parse_feeds()
-            self.oldest_article = save_oldest_article
-            self.max_articles_per_feed = save_max_articles_per_feed
-            self.feeds = None
-            for f in tech:
-                key_list.append(f.title)
-                tech_articles[f.title] = []
-                for a in f.articles:
-                    tech_articles[f.title].append(
-                        dict(title=a.title, url=a.url.partition('?')[0], date=a.date,
-                             description=a.summary, author=a.author,
-                             content=a.content))
-            tech_ans = [(k, tech_articles[k])
-                        for k in key_list if k in tech_articles]
-            for x in tech_ans:
-                ans.append(x)
-        return ans
-
-    def parse_web_edition(self):
-
-        for (sec_title, index_url) in self.web_sections:
-            if self.includeSections != []:
-                if sec_title not in self.includeSections:
-                    print "SECTION NOT INCLUDED: ", sec_title
-                    continue
-            if sec_title in self.excludeSections:
-                print "SECTION EXCLUDED: ", sec_title
-                continue
-            try:
-                soup = self.index_to_soup(
-                    'https://www.nytimes.com/pages/' + index_url + '/index.html')
-            except:
-                continue
-            print 'Index URL: ' + 'https://www.nytimes.com/pages/' + index_url + '/index.html'
-
-            self.key = sec_title
-            # Find each article
-            for div in soup.findAll('article'):
-                self.handle_article_tag(div)
-            for div in soup.findAll(True, attrs={
-                    'class': ['section-headline', 'ledeStory', 'story', 'story headline', 'sectionHeader', 'headlinesOnly multiline flush']}):
-                if div['class'] in ['story', 'story headline', 'storyHeader']:
-                    self.handle_article(div)
-                elif div['class'] == 'ledeStory':
-                    divsub = div.find('div', 'storyHeader')
-                    if divsub is not None:
-                        self.handle_article(divsub)
-                        ulrefer = div.find('ul', 'refer')
-                        if ulrefer is not None:
-                            for lidiv in ulrefer.findAll('li'):
-                                self.handle_article(lidiv)
-                elif div['class'] == 'headlinesOnly multiline flush':
-                    for lidiv in div.findAll('li'):
-                        self.handle_article(lidiv)
-
-        self.ans = [(k, self.articles[k])
-                    for k in self.ans if k in self.articles]
-        return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))
-
-    def parse_todays_index(self):
-
-        soup = self.index_to_soup(
-            'https://www.nytimes.com/pages/todayspaper/index.html')
-        skipping = False
-        # Find each article
-        for div in soup.findAll(True,
-                                attrs={'class': ['section-headline', 'story', 'story headline', 'sectionHeader', 'headlinesOnly multiline flush']}):
-            if div['class'] in ['section-headline', 'sectionHeader']:
-                self.key = string.capwords(self.feed_title(div))
-                self.key = self.key.replace('Op-ed', 'Op-Ed')
-                self.key = self.key.replace('U.s.', 'U.S.')
-                self.key = self.key.replace('N.y.', 'N.Y.')
-                skipping = False
-                if self.includeSections != []:
-                    if self.key not in self.includeSections:
-                        print "SECTION NOT INCLUDED: ", self.key
-                        skipping = True
-                if self.key in self.excludeSections:
-                    print "SECTION EXCLUDED: ", self.key
-                    skipping = True
-
-            elif div['class'] in ['story', 'story headline']:
-                if not skipping:
-                    self.handle_article(div)
-            elif div['class'] == 'headlinesOnly multiline flush':
-                for lidiv in div.findAll('li'):
-                    if not skipping:
-                        self.handle_article(lidiv)
-
-        self.ans = [(k, self.articles[k])
-                    for k in self.ans if k in self.articles]
-        return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))
-
-    def parse_headline_index(self):
-        soup = self.index_to_soup(
-            'https://www.nytimes.com/pages/todaysheadlines/')
-        pubdate = strftime('%a, %d %b')
-        section = None
-        articles = []
+    def parse_todays_page(self):
+        soup = self.read_nyt_metadata()
+        section = soup.find(id='collection-todays-new-york-times')
         feeds = []
-        for h6 in soup.findAll('h6'):
-            section = self.tag_to_string(h6).strip()
-            articles = []
-            table = h6.parent.findNextSibling('table')
-            if table is None:
-                continue
-            for a in table.findAll('a', attrs={'class':'headURL'}):
-                title = self.tag_to_string(a)
-                url = a['href'].partition('?')[0]
-                if self.exclude_url(url) or (self.filterDuplicates and url in self.url_list):
-                    continue
-                self.url_list.append(url)
-                desc = ''
-                h4 = a.findNextSibling('h4')
-                if h4 is not None:
-                    desc += self.tag_to_string(h4)
-                p = a.findNextSibling('p')
-                if p is not None:
-                    desc += ' ' + self.tag_to_string(p)
-                articles.append({'title':title, 'url':url + '?pagewanted=all', 'date':pubdate, 'description':desc})
+        for h1 in section.findAll('h1')[1:]:
+            section_title = self.tag_to_string(h1)
+            self.log('Found section:', section_title)
+            articles = list(self.parse_todays_sections(h1.parent))
             if articles:
-                feeds.append((section, articles))
-        self.ans = feeds
-        return self.filter_ans(self.ans)
+                feeds.append((section_title, articles))
+        return feeds
+
+    def parse_highlights(self, container):
+        for article in container.findAll('article', **classes('story')):
+            h2 = article.find('h2')
+            if h2 is not None:
+                title = self.tag_to_string(h2)
+                a = h2.find('a', href=True)
+                if a is not None:
+                    url = a['href']
+                    desc = ''
+                    p = article.find(**classes('summary'))
+                    if p is not None:
+                        desc = self.tag_to_string(p)
+                    yield {'title': title, 'url': url, 'description': desc}
+
+    def parse_web_section(self, soup, slug):
+
+        def log(article):
+            self.log('\t', article['title'], ':', article['url'])
+            if article.get('description'):
+                self.log('\t\t', article['description'])
+
+        container = soup.find(itemtype='http://schema.org/CollectionPage')
+        highlights = container.find('section', **classes('highlights'))
+        for article in self.parse_highlights(highlights):
+            log(article)
+            yield article
+        extra = container.find('section', attrs={'data-collection-type': True})
+        if extra is not None:
+            title = self.tag_to_string(extra.find('h2'))
+            for article in self.parse_highlights(extra):
+                article['title'] = '{}: {}'.format(title, article['title'])
+                log(article)
+                yield article
+
+    def parse_web_sections(self):
+        feeds = []
+        for section_title, slug in web_sections:
+            url = 'https://www.nytimes.com/section/' + slug
+            try:
+                soup = self.index_to_soup(url)
+            except Exception:
+                self.log.error('Failed to download section:', url)
+                continue
+            self.log('Found section:', section_title)
+            articles = list(self.parse_web_section(soup, slug))
+            if articles:
+                feeds.append((section_title, articles))
+            if self.test and len(feeds) >= self.test[0]:
+                break
+        return feeds
 
     def parse_index(self):
-        if self.headlinesOnly:
-            return self.parse_headline_index()
-        elif self.webEdition:
-            return self.parse_web_edition()
-        else:
-            return self.parse_todays_index()
-
-    def strip_anchors(self, soup, kill_all=False):
-        paras = soup.findAll(True)
-        for para in paras:
-            aTags = para.findAll('a')
-            for a in aTags:
-                if a.img is None:
-                    if kill_all or (self.recursions == 0):
-                        a.replaceWith(self.tag_to_string(a, False))
-                    else:
-                        if 'href' in a:
-                            if a['href'].startswith('http://www.nytimes'):
-                                if not a['href'].endswith('pagewanted=all'):
-                                    url = re.sub(r'\?.*', '', a['href'])
-                                    if self.exclude_url(url):
-                                        a.replaceWith(
-                                            self.tag_to_string(a, False))
-                                    else:
-                                        a['href'] = url + '?pagewanted=all'
-                            elif not (a['href'].startswith('http://pogue') or
-                                      a['href'].startswith('http://bits') or
-                                      a['href'].startswith('http://travel') or
-                                      a['href'].startswith('http://business') or
-                                      a['href'].startswith('http://tech') or
-                                      a['href'].startswith('http://health') or
-                                      a['href'].startswith('http://dealbook') or
-                                      a['href'].startswith('http://open')):
-                                a.replaceWith(self.tag_to_string(a, False))
-        return soup
-
-    def handle_tags(self, soup):
-        try:
-            print("HANDLE TAGS: TITLE = " + self.tag_to_string(soup.title))
-        except:
-            print("HANDLE TAGS: NO TITLE")
-        if soup is None:
-            print("ERROR: handle_tags received NoneType")
-            return None
-
-        if self.keep_only_tags:
-            body = Tag(soup, 'body')
-            try:
-                if isinstance(self.keep_only_tags, dict):
-                    self.keep_only_tags = [self.keep_only_tags]
-                for spec in self.keep_only_tags:
-                    for tag in soup.find('body').findAll(**spec):
-                        body.insert(len(body.contents), tag)
-                soup.find('body').replaceWith(body)
-            except AttributeError:  # soup has no body element
-                pass
-
-        def remove_beyond(tag, next):
-            while tag is not None and getattr(tag, 'name', None) != 'body':
-                after = getattr(tag, next)
-                while after is not None:
-                    ns = getattr(tag, next)
-                    after.extract()
-                    after = ns
-                tag = tag.parent
-
-        if self.remove_tags_after is not None:
-            rt = [self.remove_tags_after] if isinstance(
-                self.remove_tags_after, dict) else self.remove_tags_after
-            for spec in rt:
-                tag = soup.find(**spec)
-                remove_beyond(tag, 'nextSibling')
-
-        if self.remove_tags_before is not None:
-            tag = soup.find(**self.remove_tags_before)
-            remove_beyond(tag, 'previousSibling')
-
-        for kwds in self.remove_tags:
-            for tag in soup.findAll(**kwds):
-                tag.extract()
-
-        return soup
-
-    def preprocess_html(self, soup):
-        skip_tag = soup.find(True, {'name': 'skip'})
-        if skip_tag is not None:
-            url = 'http://www.nytimes.com' + skip_tag.parent['href']
-            self.log.warn("Skipping ad to article at '%s'" % url)
-            sleep(5)
-            soup = self.handle_tags(self.article_to_soup(url))
-
-        # check if the article is from one of the tech blogs
-        blog = soup.find(
-            'div', attrs={'id': ['pogue', 'bits', 'gadgetwise', 'open']})
-
-        if blog is not None:
-            old_body = soup.find('body')
-            new_body = Tag(soup, 'body')
-            new_body.append(soup.find('div', attrs={'id': 'content'}))
-            new_body.find('div', attrs={'id': 'content'})[
-                'id'] = 'blogcontent'  # identify for postprocess_html
-            old_body.replaceWith(new_body)
-            for divr in soup.findAll('div', attrs={'class': re.compile('w190 right')}):
-                if divr.find(text=re.compile('Sign up')):
-                    divr.extract()
-            divr = soup.find(
-                'div', attrs={'class': re.compile('^relatedArticlesModule')})
-            if divr is not None:
-                print("PROCESSING RELATED: " +
-                      self.tag_to_string(soup.title, False))
-            # handle related articles
-                rlist = []
-                ul = divr.find('ul')
-                if ul is not None:
-                    for li in ul.findAll('li'):
-                        atag = li.find('a')
-                        if atag is not None:
-                            if atag['href'].startswith('http://pogue') or atag['href'].startswith('http://bits') or \
-                                    atag['href'].startswith('http://open'):
-                                atag.find(text=True).replaceWith(
-                                    self.massageNCXText(self.tag_to_string(atag, False)))
-                                rlist.append(atag)
-                divr.extract()
-                if rlist != []:
-                    asidediv = Tag(soup, 'div', [('class', 'aside')])
-                    if soup.find('hr') is None:
-                        asidediv.append(Tag(soup, 'hr'))
-                    h4 = Tag(soup, 'h4', [('class', 'asidenote')])
-                    h4.insert(0, "Related Posts")
-                    asidediv.append(h4)
-                    ul = Tag(soup, 'ul')
-                    for r in rlist:
-                        li = Tag(soup, 'li', [('class', 'aside')])
-                        r['class'] = 'aside'
-                        li.append(r)
-                        ul.append(li)
-                    asidediv.append(ul)
-                    asidediv.append(Tag(soup, 'hr'))
-                    smain = soup.find('body')
-                    smain.append(asidediv)
-            else:
-                print("CANNOT FIND RELATED: " +
-                      self.tag_to_string(soup.title, False))
-            for atag in soup.findAll('a'):
-                img = atag.find('img')
-                if img is not None:
-                    atag.replaceWith(img)
-                elif 'href' not in atag:
-                    atag.replaceWith(
-                        atag.renderContents().decode('cp1252', 'replace'))
-                elif not (atag['href'].startswith('http://www.nytimes') or atag['href'].startswith('http://pogue') or
-                          atag['href'].startswith('http://bits') or atag['href'].startswith('http://open')):
-                    atag.replaceWith(
-                        atag.renderContents().decode('cp1252', 'replace'))
-            hdr = soup.find('address')
-            if hdr is not None:
-                hdr.name = 'span'
-            for span_credit in soup.findAll('span', 'credit'):
-                sp = Tag(soup, 'span')
-                span_credit.replaceWith(sp)
-                sp.append(Tag(soup, 'br'))
-                sp.append(span_credit)
-                sp.append(Tag(soup, 'br'))
-
-        else:  # nytimes article
-
-            related = []  # these will be the related articles
-            first_outer = None  # first related outer tag
-            first_related = None  # first related tag
-            for outerdiv in soup.findAll(attrs={'class': re.compile('articleInline runaroundLeft')}):
-                for rdiv in soup.findAll('div', 'columnGroup doubleRule'):
-                    if rdiv.find('h3') is not None:
-                        if self.tag_to_string(rdiv.h3, False).startswith('Related'):
-                            rdiv.h3.find(text=True).replaceWith(
-                                "Related articles")
-                            rdiv.h3['class'] = 'asidenote'
-                            for litag in rdiv.findAll('li'):
-                                if litag.find('a') is not None:
-                                    if litag.find('a')['href'].startswith('http://www.nytimes.com'):
-                                        url = re.sub(
-                                            r'\?.*', '', litag.find('a')['href'])
-                                        litag.find('a')[
-                                            'href'] = url + '?pagewanted=all'
-                                        litag.extract()
-                                        related.append(litag)
-                                        if first_related is None:
-                                            first_related = rdiv
-                                            first_outer = outerdiv
-                                    else:
-                                        litag.extract()
-                            for h6tag in rdiv.findAll('h6'):
-                                if h6tag.find('a') is not None:
-                                    if h6tag.find('a')['href'].startswith('http://www.nytimes.com'):
-                                        url = re.sub(
-                                            r'\?.*', '', h6tag.find('a')['href'])
-                                        h6tag.find('a')[
-                                            'href'] = url + '?pagewanted=all'
-                                        h6tag.extract()
-                                        related.append(h6tag)
-                                        if first_related is None:
-                                            first_related = rdiv
-                                            first_outer = outerdiv
-                                    else:
-                                        h6tag.extract()
-            if related != []:
-                for r in related:
-                    if r.h6:  # don't want the anchor inside a h6 tag
-                        r.h6.replaceWith(r.h6.a)
-                    first_related.ul.append(r)
-                first_related.insert(0, Tag(soup, 'hr'))
-                first_related.append(Tag(soup, 'hr'))
-                first_related['class'] = 'aside'
-                # replace the outer tag with the related tag
-                first_outer.replaceWith(first_related)
-
-            for rdiv in soup.findAll(attrs={'class': re.compile('articleInline runaroundLeft')}):
-                rdiv.extract()
-
-            kicker_tag = soup.find(attrs={'class': 'kicker'})
-            if kicker_tag:  # remove Op_Ed author head shots
-                tagline = self.tag_to_string(kicker_tag)
-                if tagline == 'Op-Ed Columnist':
-                    img_div = soup.find('div', 'inlineImage module')
-                    if img_div:
-                        img_div.extract()
-
-            if self.useHighResImages:
-                try:
-                    # open up all the "Enlarge this Image" pop-ups and download
-                    # the full resolution jpegs
-                    enlargeThisList = soup.findAll(
-                        'div', {'class': 'icon enlargeThis'})
-                    if enlargeThisList:
-                        for popupref in enlargeThisList:
-                            popupreflink = popupref.find('a')
-                            if popupreflink:
-                                reflinkstring = str(popupreflink['href'])
-                                refstart = reflinkstring.find(
-                                    "javascript:pop_me_up2('") + len("javascript:pop_me_up2('")
-                                refend = reflinkstring.find(
-                                    ".html", refstart) + len(".html")
-                                reflinkstring = reflinkstring[refstart:refend]
-
-                                popuppage = self.browser.open(reflinkstring)
-                                popuphtml = popuppage.read()
-                                popuppage.close()
-                                if popuphtml:
-                                    st = time.localtime()
-                                    year = str(st.tm_year)
-                                    month = "%.2d" % st.tm_mon
-                                    day = "%.2d" % st.tm_mday
-                                    imgstartpos = popuphtml.find('http://graphics8.nytimes.com/images/' + year + '/' +  month + '/' + day + '/') + \
-                                        len('http://graphics8.nytimes.com/images/' +
-                                            year + '/' + month + '/' + day + '/')
-                                    highResImageLink = 'http://graphics8.nytimes.com/images/' + year + '/' + \
-                                        month + '/' + day + '/' + \
-                                        popuphtml[imgstartpos:popuphtml.find(
-                                            '.jpg', imgstartpos) + 4]
-                                    popupSoup = BeautifulSoup(popuphtml)
-                                    highResTag = popupSoup.find(
-                                        'img', {'src': highResImageLink})
-                                    if highResTag:
-                                        try:
-                                            newWidth = highResTag['width']
-                                            newHeight = highResTag['height']
-                                            imageTag = popupref.parent.find(
-                                                "img")
-                                        except:
-                                            self.log(
-                                                "Error: finding width and height of img")
-                                        popupref.extract()
-                                        if imageTag:
-                                            try:
-                                                imageTag[
-                                                    'src'] = highResImageLink
-                                                imageTag['width'] = newWidth
-                                                imageTag['height'] = newHeight
-                                            except:
-                                                self.log(
-                                                    "Error setting the src width and height parameters")
-                except Exception:
-                    self.log("Error pulling high resolution images")
-
-                try:
-                    # in case pulling images failed, delete the enlarge this
-                    # text
-                    enlargeThisList = soup.findAll(
-                        'div', {'class': 'icon enlargeThis'})
-                    if enlargeThisList:
-                        for popupref in enlargeThisList:
-                            popupref.extract()
-                except:
-                    self.log("Error removing Enlarge this text")
-
-        return self.strip_anchors(soup, False)
-
-    def postprocess_html(self, soup, first_fetch):
-        if not first_fetch:  # remove Related links
-            for aside in soup.findAll('div', 'aside'):
-                aside.extract()
-            soup = self.strip_anchors(soup, True)
-
-        for t in soup.findAll('time', attrs={'class':'dateline'}):
-            t.name = 'div'
-
-        if soup.find('div', attrs={'id': 'blogcontent'}) is None:
-            if first_fetch:
-                aside = soup.find('div', 'aside')
-                if aside is not None:  # move the related list to the end of the article
-                    art = soup.find('div', attrs={'id': 'article'})
-                    if art is None:
-                        art = soup.find('div', attrs={'class': 'article'})
-                    if art is not None:
-                        art.append(aside)
-            try:
-                if self.one_picture_per_article:
-                            # Remove all images after first
-                    largeImg = soup.find(True, {'class': 'articleSpanImage'})
-                    inlineImgs = soup.findAll(
-                        True, {'class': 'inlineImage module'})
-                    if largeImg:
-                        for inlineImg in inlineImgs:
-                            inlineImg.extract()
-                    else:
-                        if inlineImgs:
-                            firstImg = inlineImgs[0]
-                            for inlineImg in inlineImgs[1:]:
-                                inlineImg.extract()
-                            # Move firstImg before article body
-                            cgFirst = soup.find(
-                                True, {'class': re.compile('columnGroup  *first')})
-                            if cgFirst:
-                                # Strip all sibling NavigableStrings: noise
-                                navstrings = cgFirst.findAll(
-                                    text=True, recursive=False)
-                                [ns.extract() for ns in navstrings]
-                                headline_found = False
-                                tag = cgFirst.find(True)
-                                insertLoc = 0
-                                while True:
-                                    insertLoc += 1
-                                    if hasattr(tag, 'class') and tag['class'] == 'articleHeadline':
-                                        headline_found = True
-                                        break
-                                    tag = tag.nextSibling
-                                    if not tag:
-                                        headline_found = False
-                                        break
-                                if headline_found:
-                                    cgFirst.insert(insertLoc, firstImg)
-                            else:
-                                self.log(
-                                    ">>> No class:'columnGroup first' found <<<")
-            except:
-                self.log("ERROR: One picture per article in postprocess_html")
-
-            try:
-                # Change captions to italic
-                for caption in soup.findAll(True, {'class': 'caption'}):
-                    if caption and len(caption) > 0:
-                        cTag = Tag(soup, "p", [("class", "caption")])
-                        c = self.fixChars(self.tag_to_string(
-                            caption, use_alt=False)).strip()
-                        mp_off = c.find("More Photos")
-                        if mp_off >= 0:
-                            c = c[:mp_off]
-                        cTag.insert(0, c)
-                        caption.replaceWith(cTag)
-            except:
-                self.log("ERROR:  Problem in change captions to italic")
-
-            try:
-                # Change <nyt_headline> to <h2>
-                h1 = soup.find('h1')
-                blogheadline = str(h1)  # added for dealbook
-                if h1:
-                    headline = h1.find("nyt_headline")
-                    if headline:
-                        tag = Tag(soup, "h2")
-                        tag['class'] = "headline"
-                        tag.insert(0, self.fixChars(headline.contents[0]))
-                        h1.replaceWith(tag)
-                    elif blogheadline.find('entry-title'):  # added for dealbook
-                        tag = Tag(soup, "h2")  # added for dealbook
-                        tag['class'] = "headline"  # added for dealbook
-                        # added for dealbook
-                        tag.insert(0, self.fixChars(h1.contents[0]))
-                        h1.replaceWith(tag)  # added for dealbook
-
-                else:
-                    # Blog entry - replace headline, remove <hr> tags  - BCC I
-                    # think this is no longer functional 1-18-2011
-                    headline = soup.find('title')
-                    if headline:
-                        tag = Tag(soup, "h2")
-                        tag['class'] = "headline"
-                        tag.insert(0, self.fixChars(
-                            self.tag_to_string(headline, False)))
-                        soup.insert(0, tag)
-                        hrs = soup.findAll('hr')
-                        for hr in hrs:
-                            hr.extract()
-            except:
-                self.log("ERROR:  Problem in Change <nyt_headline> to <h2>")
-
-            try:
-                # if this is from a blog (dealbook, fix the byline format
-                bylineauthor = soup.find(
-                    'address', attrs={'class': 'byline author vcard'})
-                if bylineauthor:
-                    tag = Tag(soup, "h6")
-                    tag['class'] = "byline"
-                    tag.insert(0, self.fixChars(
-                        self.tag_to_string(bylineauthor, False)))
-                    bylineauthor.replaceWith(tag)
-            except:
-                self.log("ERROR:  fixing byline author format")
-
-            try:
-                # if this is a blog (dealbook) fix the credit style for the
-                # pictures
-                blogcredit = soup.find('div', attrs={'class': 'credit'})
-                if blogcredit:
-                    tag = Tag(soup, "h6")
-                    tag['class'] = "credit"
-                    tag.insert(0, self.fixChars(
-                        self.tag_to_string(blogcredit, False)))
-                    blogcredit.replaceWith(tag)
-            except:
-                self.log("ERROR:  fixing credit format")
-
-            try:
-                # Change <h1> to <h3> - used in editorial blogs
-                masthead = soup.find("h1")
-                if masthead:
-                    # Nuke the href
-                    if masthead.a:
-                        del(masthead.a['href'])
-                    tag = Tag(soup, "h3")
-                    tag.insert(0, self.fixChars(masthead.contents[0]))
-                    masthead.replaceWith(tag)
-            except:
-                self.log(
-                    "ERROR:  Problem in Change <h1> to <h3> - used in editorial blogs")
-
-            try:
-                # Change <span class="bold"> to <b>
-                for subhead in soup.findAll(True, {'class': 'bold'}):
-                    if subhead.contents:
-                        bTag = Tag(soup, "b")
-                        bTag.insert(0, subhead.contents[0])
-                        subhead.replaceWith(bTag)
-            except:
-                self.log(
-                    "ERROR:  Problem in Change <h1> to <h3> - used in editorial blogs")
-            try:
-                # remove the <strong> update tag
-                blogupdated = soup.find('span', {'class': 'update'})
-                if blogupdated:
-                    blogupdated.replaceWith("")
-            except:
-                self.log("ERROR:  Removing strong tag")
-
-            try:
-                divTag = soup.find('div', attrs={'id': 'articleBody'})
-                if divTag:
-                    divTag['class'] = divTag['id']
-            except:
-                self.log(
-                    "ERROR:  Problem in soup.find(div,attrs={id:articleBody})")
-
-            try:
-                # Add class="authorId" to <div> so we can format with CSS
-                divTag = soup.find('div', attrs={'id': 'authorId'})
-                if divTag and divTag.contents[0]:
-                    tag = Tag(soup, "p")
-                    tag['class'] = "authorId"
-                    tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0],
-                                                                   use_alt=False)))
-                    divTag.replaceWith(tag)
-            except:
-                self.log(
-                    "ERROR:  Problem in Add class=authorId to <div> so we can format with CSS")
-        return soup
-
-    def populate_article_metadata(self, article, soup, first):
-        if not first:
-            return
-        idxdiv = soup.find('div', attrs={'class': 'articleSpanImage'})
-        if idxdiv is not None:
-            if idxdiv.img:
-                self.add_toc_thumbnail(article, re.sub(
-                    r'links\\link\d+\\', '', idxdiv.img['src']))
-        else:
-            img = soup.find('body').find('img')
-            if img is not None:
-                self.add_toc_thumbnail(article, re.sub(
-                    r'links\\link\d+\\', '', img['src']))
-        shortparagraph = ""
-        try:
-            if len(article.text_summary.strip()) == 0:
-                articlebodies = soup.findAll(
-                    'div', attrs={'class': 'articleBody'})
-                if articlebodies:
-                    for articlebody in articlebodies:
-                        if articlebody:
-                            paras = articlebody.findAll('p')
-                            for p in paras:
-                                refparagraph = self.massageNCXText(
-                                    self.tag_to_string(p, use_alt=False)).strip()
-                                # account for blank paragraphs and short
-                                # paragraphs by appending them to longer ones
-                                if len(refparagraph) > 0:
-                                    if len(refparagraph) > 70:  # approximately one line of text
-                                        newpara = shortparagraph + refparagraph
-                                        newparaDateline, newparaEm, newparaDesc = newpara.partition(
-                                            '&mdash;')
-                                        if newparaEm == '':
-                                            newparaDateline, newparaEm, newparaDesc = newpara.partition(
-                                                '—')
-                                            if newparaEm == '':
-                                                newparaDesc = newparaDateline
-                                        article.summary = article.text_summary = newparaDesc.strip()
-                                        return
-                                    else:
-                                        shortparagraph = refparagraph + " "
-                                        if shortparagraph.strip().find(" ") == -1 and not shortparagraph.strip().endswith(":"):
-                                            shortparagraph = shortparagraph + "- "
-            else:
-                article.summary = article.text_summary = self.massageNCXText(
-                    article.text_summary)
-        except:
-            self.log("Error creating article descriptions")
-            return
+        if is_web_edition:
+            return self.parse_web_sections()
+        return self.parse_todays_page()
diff --git a/recipes/nytimes_sub.recipe b/recipes/nytimes_sub.recipe
index 925c0c1494..cae243517a 100644
--- a/recipes/nytimes_sub.recipe
+++ b/recipes/nytimes_sub.recipe
@@ -1,1314 +1,170 @@
-#!/usr/bin/env  python2
-# -*- coding: utf-8 -*-
-__license__ = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-'''
-nytimes.com
-'''
-import re
-import string
-import time
-from calibre import strftime
-from datetime import timedelta, date
-from time import sleep
-from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+# License: GPLv3 Copyright: 2018, Kovid Goyal <kovid at kovidgoyal.net>
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.utils.date import strptime
+
+is_web_edition = False
+# The sections to download when downloading the web edition, comment out
+# the section you are not interested in
+web_sections = [
+    ('World', 'world'),
+    ('U.S.', 'us'),
+    ('Politics', 'politics'),
+    ('New York', 'nyregion'),
+    ('Business', 'business'),
+    ('Technology', 'technology'),
+    ('Sports', 'sports'),
+    ('Science', 'science'),
+    ('Health', 'health'),
+    ('Opinion', 'opinion'),
+    ('Arts', 'arts'),
+    ('Books', 'books'),
+    ('Movies', 'movies'),
+    ('Music', 'arts/music'),
+    ('Television', 'arts/television'),
+    ('Style', 'style'),
+    ('Dining & Wine', 'dining'),
+    ('Fashion & Style', 'fashion'),
+    ('Home & Garden', 'garden'),
+    ('Travel', 'travel'),
+    ('Education', 'education'),
+    ('Multimedia', 'multimedia'),
+    ('Obituaries', 'obituaries'),
+    ('Sunday Magazine', 'magazine')
+]
 
 
-class NYTimes(BasicNewsRecipe):
+def classes(classes):
+    q = frozenset(classes.split(' '))
+    return dict(attrs={
+        'class': lambda x: x and frozenset(x.split()).intersection(q)})
 
-    recursions = 1  # set this to zero to omit Related articles lists
-    # speeds up processing by preventing index page links from being followed
-    match_regexps = [r'/[12][0-9][0-9][0-9]/[0-9]+/']
 
-    # set getTechBlogs to True to include the technology blogs
-    # set tech_oldest_article to control article age
-    # set tech_max_articles_per_feed to control article count
-    getTechBlogs = True
-    remove_empty_feeds = True
-    tech_oldest_article = 14
-    tech_max_articles_per_feed = 25
+class NewYorkTimes(BasicNewsRecipe):
 
-    # set getPopularArticles to False if you don't want the Most E-mailed and Most Viewed articles
-    # otherwise you will get up to 20 of the most popular e-mailed and viewed articles (in each category)
-    # This is currently disabled because the NYT is changing this functionality
-    # on their website to a new "Trending" page
-    getPopularArticles = False
-    popularPeriod = '1'  # set this to the number of days to include in the measurement
-    # e.g. 7 will get the most popular measured over the last 7 days
-    # and 30 will get the most popular measured over 30 days.
-    # you still only get up to 20 articles in each category
-
-    # set headlinesOnly to True for the headlines-only version. If True,
-    # webEdition is ignored.
-    headlinesOnly = False
-
-    # set webEdition to True for the Web edition of the newspaper. Set oldest_web_article to the
-    # number of days old an article can be for inclusion. If oldest_web_article = None all articles
-    # will be included. Note: oldest_web_article is ignored if webEdition =
-    # False
-    webEdition = False
-    oldest_web_article = None
-
-    # download higher resolution images than the small thumbnails typically included in the article
-    # the down side of having large beautiful images is the file size is much
-    # larger, on the order of 7MB per paper
-    useHighResImages = True
+    title = 'The New York Times'
+    if is_web_edition:
+        description = 'New York Times (Web). You can edit the recipe to remove sections you are not interested in.'
+    else:
+        description = 'Today\'s New York Times'
+    encoding = 'utf-8'
+    __author__ = 'Kovid Goyal'
+    language = 'en'
+    ignore_duplicate_articles = {'title', 'url'}
+    no_stylesheets = True
     compress_news_images = True
     compress_news_images_auto_size = 5
 
-    # replace paid Kindle Version:  the name will be changed to "The New York Times" to cause
-    # previous paid versions of the new york times to best sent to the back
-    # issues folder on the kindle
-    replaceKindleVersion = False
-
-    # includeSections: List of sections to include. If empty, all sections found will be included.
-    # Otherwise, only the sections named will be included. For example,
-    #
-    #    includeSections = ['Politics','Sports']
-    #
-    # would cause only the Politics and Sports sections to be included.
-
-    includeSections = []  # by default, all sections included
-
-    # excludeSections: List of sections to exclude. If empty, all sections found will be included.
-    # Otherwise, the sections named will be excluded. For example,
-    #
-    #    excludeSections = ['Politics','Sports']
-    #
-    # would cause the Politics and Sports sections to be excluded. This parameter can be used
-    # in conjuction with includeSections although in most cases using one or the other, but
-    # not both, is sufficient.
-
-    excludeSections = []
-
-    # one_picture_per_article specifies that calibre should only use the first image
-    # from an article (if one exists).  If one_picture_per_article = True, the image
-    # will be moved to a location between the headline and the byline.
-    # If one_picture_per_article = False, all images from the article will be included
-    # and shown in their original location.
-    one_picture_per_article = False
-
-    # The maximum number of articles that will be downloaded
-    max_articles_per_feed = 100
-    use_embedded_content = False
-
-    # Whether to omit duplicates of articles (typically arsing when articles are indexed in
-    # more than one section). If True, only the first occurance will be
-    # downloaded.
-    filterDuplicates = True
-
-    # Sections to collect for the Web edition.
-    # Delete any you don't want, or use includeSections or excludeSections
-    web_sections = [(u'World', u'world'),
-                    (u'U.S.', u'national'),
-                    (u'Politics', u'politics'),
-                    (u'New York', u'nyregion'),
-                    (u'Business', 'business'),
-                    (u'Technology', u'technology'),
-                    (u'Sports', u'sports'),
-                    (u'Science', u'science'),
-                    (u'Health', u'health'),
-                    (u'Opinion', u'opinion'),
-                    (u'Arts', u'arts'),
-                    (u'Books', u'books'),
-                    (u'Movies', u'movies'),
-                    (u'Music', u'arts/music'),
-                    (u'Television', u'arts/television'),
-                    (u'Style', u'style'),
-                    (u'Dining & Wine', u'dining'),
-                    (u'Fashion & Style', u'fashion'),
-                    (u'Home & Garden', u'garden'),
-                    (u'Travel', u'travel'),
-                    ('Education', u'education'),
-                    ('Multimedia', u'multimedia'),
-                    (u'Obituaries', u'obituaries'),
-                    (u'Sunday Magazine', u'magazine')
-                    ]
-
-    tech_feeds = [
-        (u'Tech - News', u'http://pogue.blogs.nytimes.com/feed/'),
-        (u'Tech - Gadgetwise', u'http://gadgetwise.blogs.nytimes.com/feed/'),
-        (u'Tech - Open', u'http://open.blogs.nytimes.com/feed/')
+    keep_only_tags = [
+        dict(id='story-header'),
+        classes('story-body-supplemental story-interrupter'),
+    ]
+    remove_tags = [
+        dict(attrs={'aria-label':'tools'.split()}),
+        dict(attrs={'data-videoid':True}),
+        dict(name='button'),
+        dict(id=lambda x: x and x.startswith('story-ad-')),
+        dict(name='a', href=lambda x: x and '#story-continues-' in x),
+        dict(name='a', href=lambda x: x and '#whats-next' in x),
+        dict(id=lambda x: x and 'sharetools-' in x),
+        dict(id='newsletter-promo'.split()),
     ]
 
-    if headlinesOnly:
-        title = 'New York Times Headlines'
-        description = 'Headlines from the New York Times'
-        needs_subscription = 'optional'
-    elif webEdition:
-        title = 'New York Times (Web)'
-        description = 'New York Times on the Web'
-        needs_subscription = 'optional'
-    elif replaceKindleVersion:
-        title = 'The New York Times'
-        description = 'Today\'s New York Times'
-        needs_subscription = 'optional'
-    else:
-        title = 'New York Times'
-        description = 'Today\'s New York Times'
-        needs_subscription = 'optional'
+    def read_nyt_metadata(self):
+        INDEX = 'https://www.nytimes.com/section/todayspaper'
+        # INDEX = 'file:///t/raw.html'
+        soup = self.index_to_soup(INDEX)
+        pdate = soup.find('meta', attrs={'name':'pdate', 'content': True})['content']
+        date = strptime(pdate, '%Y%m%d', assume_utc=False, as_utc=False)
+        self.cover_url = 'https://static01.nyt.com/images/{}/nytfrontpage/scan.jpg'.format(date.strftime('%Y/%m/%d'))
+        self.timefmt = date.strftime(' [%d %b, %Y]')
+        return soup
 
-    def decode_url_date(self, url):
-        urlitems = url.split('/')
-        try:
-            d = date(int(urlitems[3]), int(urlitems[4]), int(urlitems[5]))
-        except:
-            try:
-                d = date(int(urlitems[4]), int(urlitems[5]), int(urlitems[6]))
-            except:
-                return None
-        return d
+    def parse_todays_sections(self, container):
+        for h2 in container.findAll('h2', **classes('headline')):
+            title = self.tag_to_string(h2)
+            a = h2.find('a', href=True)
+            url = a['href']
+            if '?' in url:
+                url = url.split('?')[0]
+            p = h2.findParent(**classes('story-body'))
+            desc = ''
+            if p is not None:
+                s = p.find(**classes('summary'))
+                if s is not None:
+                    desc = self.tag_to_string(s)
+            self.log('\t', title, ': ', url)
+            self.log('\t\t', desc)
+            yield {'title': title, 'url': url, 'description': desc}
 
-    if oldest_web_article is None:
-        earliest_date = date.today()
-    else:
-        earliest_date = date.today() - timedelta(days=oldest_web_article)
-    oldest_article = 365  # by default, a long time ago
-
-    __author__ = 'GRiker/Kovid Goyal/Nick Redding'
-    language = 'en'
-    requires_version = (0, 7, 5)
-    encoding = 'utf-8'
-
-    timefmt = ''
-
-    # simultaneous_downloads = 1 # no longer required to deal with ads
-
-    cover_margins = (18, 18, 'grey99')
-
-    keep_only_tags = dict(id=['article', 'story', 'content'])
-    remove_tags = [
-        dict(attrs={'class': [
-            'articleFooter',
-            'articleTools',
-            'rfd', 'story-footer-links', 'page-footer',
-            'columnGroup singleRule',
-            'columnGroup last',
-            'columnGroup  last',
-            'doubleRule',
-            'dottedLine',
-            'entry-meta',
-            'entry-response module',
-            'leftNavTabs',
-            'metaFootnote',
-            'inside-story',
-            'module box nav',
-            'nextArticleLink',
-            'nextArticleLink clearfix',
-            'post-tools',
-            'relatedSearchesModule',
-            'side_tool',
-            'singleAd',
-            'postCategory column',
-            'refer tagRefer',  # added for bits blog post
-            'entry entry-utility',  # added for DealBook
-            'entry-tags',  # added for DealBook
-            'footer promos clearfix',  # added for DealBook
-            'footer links clearfix',  # added for DealBook
-            'tabsContainer',  # added for other blog downloads
-            'column lastColumn',  # added for other blog downloads
-            'pageHeaderWithLabel',  # added for other gadgetwise downloads
-            'column two',  # added for other blog downloads
-            'column two last',  # added for other blog downloads
-            'column three',  # added for other blog downloads
-            'column three last',  # added for other blog downloads
-            'column four',  # added for other blog downloads
-            'column four last',  # added for other blog downloads
-            'column last',  # added for other blog downloads
-            'entry entry-related',
-            'subNavigation tabContent active',  # caucus blog navigation
-            'mediaOverlay slideshow',
-            'wideThumb',
-            'video',  # added 02-11-2011
-            'videoHeader',  # added 02-11-2011
-            'articleInlineVideoHolder',  # added 02-11-2011
-            'assetCompanionAd',
-            'nytint-sectionHeader',
-            re.compile('^subNavigation'),
-            re.compile('^leaderboard'),
-            re.compile('^module'),
-            re.compile('commentCount'),
-            'lede-container',
-            'credit',
-            'caption-video',
-            'upshot-social'
-        ]}),
-        dict(
-            attrs={'class': lambda x: x and 'related-coverage-marginalia' in x.split()}),
-        dict(attrs={'class': lambda x: x and 'hidden' in x.split()}),
-        dict(attrs={'class': lambda x: x and 'interactive' in x.split()}),
-        dict(attrs={'class': lambda x: x and 'SectionBarShare' in x.split('-')}),
-        dict(attrs={'class': lambda x: x and 'ResponsiveAd' in x.split('-')}),
-        dict(attrs={'class': lambda x: x and 'skip-to-text-link' in x.split()}),
-        dict(attrs={'class': lambda x: x and 'sharetools' in x.split()}),
-        dict(attrs={'class': lambda x: x and 'ad' in x.split()}),
-        dict(attrs={'class': lambda x: x and 'video' in x.split()}),
-        dict(attrs={'class': lambda x: x and 'visually-hidden' in x.split()}),
-        dict(name='div', attrs={'class': re.compile('toolsList')}),  # bits
-        dict(name='div', attrs={
-             'class': re.compile('postNavigation')}),  # bits
-        dict(name='div', attrs={'class': 'tweet'}),
-        dict(name='span', attrs={'class': 'commentCount meta'}),
-        dict(name='div', attrs={'id': 'header'}),
-        # bits, pogue, gadgetwise, open
-        dict(name='div', attrs={'id': re.compile('commentsContainer')}),
-        # pogue, gadgetwise
-        dict(name='ul', attrs={'class': re.compile('entry-tools')}),
-        # pogue, gadgetwise
-        dict(name='div', attrs={'class': re.compile('nocontent')}),
-        dict(name='div', attrs={'id': re.compile('respond')}),  # open
-        dict(name='div', attrs={'class': re.compile('entry-tags')}),  # pogue
-        dict(name='h4', attrs={'class': 'headline'}),
-        dict(id=[
-            'adxLeaderboard',
-            'pagelinks',
-            'adxSponLink',
-            'anchoredAd_module',
-            'anchoredAd_spot',
-            'archive',
-            'articleExtras',
-            'articleInline',
-            'blog_sidebar',
-            'businessSearchBar',
-            'cCol',
-            'entertainmentSearchBar',
-            'footer',
-            'header',
-            'header_search',
-            'inlineBox',
-            'login',
-            'masthead',
-            'masthead-nav',
-            'masthead-social',
-            'memberTools',
-            'navigation', 'navigation-ghost', 'navigation-modal', 'navigation-edge',
-            'page-footer',
-            'portfolioInline',
-            'readerReviews',
-            'readerReviewsCount',
-            'relatedArticles',
-            'relatedTopics',
-            'respond',
-            'ribbon',
-            'side_search',
-            'side_index',
-            'side_tool',
-            'toolsRight',
-            'skybox',  # added for DealBook
-            'TopAd',  # added for DealBook
-            'related-content',  # added for DealBook
-            'whats-next',
-            'newsletter-promo',
-        ]),
-        dict(name=['script', 'noscript', 'style', 'form', 'hr', 'button', 'meta', 'footer'])]
-    no_stylesheets = True
-    extra_css = '''
-                .articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
-                .credit { font-weight: normal; text-align: right; font-size:
-                    50%; line-height:1em; margin-top:5px; margin-left:0;
-                    margin-right:0; margin-bottom: 0; }
-                .byline { text-align: left; font-size: 50%; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                .dateline { text-align: left; font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                .kicker { font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                .timestamp { font-weight: normal; text-align: left; font-size: 50%; }
-                .caption { font-size: 50%; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                a:link {text-decoration: none; }
-                .date{font-size: 50%; }
-                .update{font-size: 50%; }
-                .articleBody { }
-                .authorId {text-align: left; font-size: 50%; }
-                .image {text-align: center;}
-                .aside {color:blue;margin:0px 0px 0px 0px; padding: 0px 0px 0px 0px; font-size:100%;}
-                .asidenote {color:blue;margin:0px 0px 0px 0px; padding: 0px 0px 0px 0px; font-size:100%;font-weight:bold;}
-                .source {text-align: left; font-size: x-small; }'''
-
-    articles = {}
-    key = None
-    ans = []
-    url_list = []
-
-    def filter_ans(self, ans):
-        total_article_count = 0
-        idx = 0
-        idx_max = len(ans) - 1
-        while idx <= idx_max:
-            if self.includeSections != []:
-                if ans[idx][0] not in self.includeSections:
-                    print "SECTION NOT INCLUDED: ", ans[idx][0]
-                    del ans[idx]
-                    idx_max = idx_max - 1
-                    continue
-            if ans[idx][0] in self.excludeSections:
-                print "SECTION EXCLUDED: ", ans[idx][0]
-                del ans[idx]
-                idx_max = idx_max - 1
-                continue
-            if True:  # self.verbose
-                self.log("Section %s: %d articles" %
-                         (ans[idx][0], len(ans[idx][1])))
-            for article in ans[idx][1]:
-                total_article_count += 1
-                if True:  # self.verbose
-                    self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252', 'replace'),
-                                                              article['url'].encode('cp1252', 'replace')))
-            idx = idx + 1
-
-        self.log("Queued %d articles" % total_article_count)
-        return ans
-
-    def exclude_url(self, url):
-        if not url.startswith("http"):
-            return True
-        # added for DealBook
-        if not url.endswith(".html") and 'dealbook.nytimes.com' not in url:
-            return True
-        if 'nytimes.com' not in url:
-            return True
-        if 'cn.nytimes.com' in url:
-            return True
-        if '/es/' in url:
-            return True
-        if 'podcast' in url:
-            return True
-        if '/video/' in url:
-            return True
-        if '/multimedia/' in url:
-            return True
-        if '/slideshow/' in url:
-            return True
-        if '/magazine/index' in url:
-            return True
-        if '/interactive/' in url:
-            return True
-        if '/reference/' in url:
-            return True
-        if '/premium/' in url:
-            return True
-        if '#comment' in url:
-            return True
-        if '#postComment' in url:
-            return True
-        if '#postcomment' in url:
-            return True
-        if re.search('/\d\d\d\d/\d\d/\d\d/', url) is None:
-            print("NO DATE IN " + url)
-            return True
-        return False
-
-    def fixChars(self, string):
-        # Replace lsquo (\x91)
-        fixed = re.sub("\x91", "‘", string)
-
-        # Replace rsquo (\x92)
-        fixed = re.sub("\x92", "’", fixed)
-
-        # Replace ldquo (\x93)
-        fixed = re.sub("\x93", "“", fixed)
-
-        # Replace rdquo (\x94)
-        fixed = re.sub("\x94", "”", fixed)
-
-        # Replace ndash (\x96)
-        fixed = re.sub("\x96", "–", fixed)
-
-        # Replace mdash (\x97)
-        fixed = re.sub("\x97", "—", fixed)
-
-        return fixed
-
-    def get_browser(self):
-        br = BasicNewsRecipe.get_browser(self)
-        if False and self.username is not None and self.password is not None:
-            # disabled because the idiotic nyt has moved to using an all JS
-            # login process with the further involvement of recaptcha, they
-            # apparently dont want their subscribing users to access their
-            # content conveniently
-            # Sample POST request
-            # URL: https://myaccount.nytimes.com/svc/account/auth/v1/login
-            # Origin: https://myaccount.nytimes.com
-            # Referer: https://myaccount.nytimes.com/auth/login
-            # Payload:
-            # auth_token: "H4sIAAAAAAAAAw3LQQqDMBBG4btk7cLJxKbjZST5ZwQpGEkUWmzv3my/x7tdtXaUvdlyfg5zs0Mpr83c0INu1XAuV926d7H30aW5mYI82XvhaXB7uXb0UZjD76uTInFSRCKf05gMPvIqijHALECyRE2RRs9A9jIRkyE+xEx1xR+mstBMkAAAAA=="  # noqa
-            # captcha_result: "03AEHxwuxyNRK8s99A7rMvZOvWGfXUuy9HqbRfC0uSREKhf6lpR5eL0I6hp-PHBN8wshkxSU4piWvyD7n7xESxnwidq19esqPvWiTV7oTUGDkqFltTbq9Dk4xjAaZ7nczevNJR_xeG8tZhF72_EmwL_AGCR83a8aylDXzX7e_PDzh9JC9wA03rEVn66Q7uY29NGUOQd0Ux8frNelwCYKfUJaIdYCjzBnaCCQwp6mufm7rWVdQxX2Togq2-g6MKdpjpyXqjCQxFzYaX1Jcm5XS4bcVlB7F_tt5W-6FqfxsEqGUPhH1QTc8LEb8aRJ39QCMUNWGQ3Uz96ZK1LYOODwM9hv0wNecjUkl7fzdzikHC-o5m1Dg3pzaNlb1vQeejkNzt03QLk7CxhgisnbR-LmClpl-6BULP4un0HVvt0YkgY08osSYDEBVfT2I"  # noqa
-            # password : "asdasd"
-            # remember_me : "Y"
-            # username : "sdasd"
-            # where auth_token is taken from data-auth-options attribute of
-            # and the captcha result comes from recaptcha
-            # <div id="myAccountAuth">
-            br.open('https://www.nytimes.com/auth/login')
-            br.form = list(br.forms())[0]
-            br['userid'] = self.username
-            br['password'] = self.password
-            br.submit().read()
-            # if 'Please try again' in raw:
-            #     raise Exception('Your username and password are incorrect')
-        return br
-
-    cover_tag = 'NY_NYT'
-
-    def get_cover_url(self):
-        from datetime import date
-        today = date.today()
-        cover = 'https://static01.nyt.com/images/' \
-            + today.strftime('%Y') + '/' + today.strftime('%m') + '/' \
-            + today.strftime('%d') + '/nytfrontpage/scan.jpg'
-        self.log(cover)
-        br = BasicNewsRecipe.get_browser(self)
-        try:
-            br.open(cover)
-        except:
-            self.log("\nCover unavailable")
-            cover = None
-        return cover
-
-    masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
-
-    def short_title(self):
-        return self.title
-
-    def article_to_soup(self, url_or_raw, raw=False):
-        from contextlib import closing
-        import copy
-        from calibre.ebooks.chardet import xml_to_unicode
-        print("ARTICLE_TO_SOUP " + url_or_raw)
-        if re.match(r'\w+://', url_or_raw):
-            br = self.clone_browser(self.browser)
-            open_func = getattr(br, 'open_novisit', br.open)
-            with closing(open_func(url_or_raw)) as f:
-                _raw = f.read()
-            if not _raw:
-                raise RuntimeError(
-                    'Could not fetch index from %s' % url_or_raw)
-        else:
-            _raw = url_or_raw
-        if raw:
-            return _raw
-        if not isinstance(_raw, unicode) and self.encoding:
-            if callable(self.encoding):
-                _raw = self.encoding(_raw)
-            else:
-                _raw = _raw.decode(self.encoding, 'replace')
-
-        nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE)
-        nmassage.extend(self.preprocess_regexps)
-        nmassage += [(re.compile(r'<!DOCTYPE .+?>', re.DOTALL), lambda m: '')]
-        # Some websites have buggy doctype declarations that mess up beautifulsoup
-        # Remove comments as they can leave detritus when extracting tags leaves
-        # multiple nested comments
-        nmassage.append((re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''))
-        usrc = xml_to_unicode(_raw, self.verbose, strip_encoding_pats=True)[0]
-        usrc = self.preprocess_raw_html(usrc, url_or_raw)
-        return BeautifulSoup(usrc, markupMassage=nmassage)
-
-    def massageNCXText(self, description):
-        # Kindle TOC descriptions won't render certain characters
-        if description:
-            massaged = unicode(BeautifulStoneSoup(
-                description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
-            # Replace '&' with '&'
-            massaged = re.sub("&#038;", "&", massaged)
-            massaged = re.sub("&amp;", "&", massaged)
-            return self.fixChars(massaged)
-        else:
-            return description
-
-    def feed_title(self, div):
-        return ''.join(div.findAll(text=True, recursive=True)).strip()
-
-    def handle_article(self, div):
-        thumbnail = div.find('div', 'thumbnail')
-        if thumbnail:
-            thumbnail.extract()
-        return self.handle_base_article(div)
-
-    # Handle '<article>' in world, u.s., etc
-    def handle_article_tag(self, div):
-        thumbnail = div.find('figure', 'media photo')
-        if not thumbnail:
-            thumbnail = div.find('div', 'thumb')
-        if thumbnail:
-            thumbnail.extract()
-        div = div.find('div', 'story-body')
-        if not div:
-            return
-        return self.handle_base_article(div)
-
-    def handle_base_article(self, div):
-        a = div.find('a', href=True)
-        if not a:
-            return
-        url = re.sub(r'\?.*', '', a['href'])
-        if self.exclude_url(url):
-            return
-        url += '?pagewanted=all'
-        if self.filterDuplicates:
-            if url in self.url_list:
-                return
-        if self.webEdition:
-            date_tag = self.decode_url_date(url)
-            if date_tag is not None:
-                if self.oldest_web_article is not None:
-                    if date_tag < self.earliest_date:
-                        self.log("Skipping article %s" % url)
-                        return
-            else:
-                self.log("Skipping article %s" % url)
-                return
-        self.url_list.append(url)
-        title = self.tag_to_string(a, use_alt=True).strip()
-        description = ''
-        pubdate = strftime('%a, %d %b')
-        summary = div.find(True, attrs={'class': 'summary'})
-        if summary:
-            description = self.tag_to_string(summary, use_alt=False)
-        author = ''
-        authorAttribution = div.find(True, attrs={'class': 'byline'})
-        if authorAttribution:
-            author = self.tag_to_string(authorAttribution, use_alt=False)
-        else:
-            authorAttribution = div.find(True, attrs={'class': 'byline'})
-            if authorAttribution:
-                author = self.tag_to_string(authorAttribution, use_alt=False)
-        feed = self.key if self.key is not None else 'Uncategorized'
-        if feed not in self.articles:
-            self.ans.append(feed)
-            self.articles[feed] = []
-        self.articles[feed].append(
-            dict(title=title, url=url, date=pubdate,
-                 description=description, author=author,
-                 content=''))
-
-    def get_popular_articles(self, ans):
-        if self.getPopularArticles:
-            popular_articles = {}
-            key_list = []
-
-            def handleh3(h3tag):
-                try:
-                    url = h3tag.a['href']
-                except:
-                    return ('', '', '', '')
-                url = re.sub(r'\?.*', '', url)
-                if self.exclude_url(url):
-                    return ('', '', '', '')
-                url += '?pagewanted=all'
-                title = self.tag_to_string(h3tag.a, False)
-                h6tag = h3tag.findNextSibling('h6')
-                if h6tag is not None:
-                    author = self.tag_to_string(h6tag, False)
-                else:
-                    author = ''
-                ptag = h3tag.findNextSibling('p')
-                if ptag is not None:
-                    desc = self.tag_to_string(ptag, False)
-                else:
-                    desc = ''
-                return(title, url, author, desc)
-
-            have_emailed = False
-            emailed_soup = self.index_to_soup(
-                'http://www.nytimes.com/most-popular-emailed?period=' + self.popularPeriod)
-            for h3tag in emailed_soup.findAll('h3'):
-                (title, url, author, desc) = handleh3(h3tag)
-                if url == '':
-                    continue
-                if not have_emailed:
-                    key_list.append('Most E-Mailed')
-                    popular_articles['Most E-Mailed'] = []
-                    have_emailed = True
-                popular_articles['Most E-Mailed'].append(
-                    dict(title=title, url=url, date=strftime('%a, %d %b'),
-                         description=desc, author=author,
-                         content=''))
-            have_viewed = False
-            viewed_soup = self.index_to_soup(
-                'http://www.nytimes.com/most-popular-viewed?period=' + self.popularPeriod)
-            for h3tag in viewed_soup.findAll('h3'):
-                (title, url, author, desc) = handleh3(h3tag)
-                if url == '':
-                    continue
-                if not have_viewed:
-                    key_list.append('Most Viewed')
-                    popular_articles['Most Viewed'] = []
-                    have_viewed = True
-                popular_articles['Most Viewed'].append(
-                    dict(title=title, url=url, date=strftime('%a, %d %b'),
-                         description=desc, author=author,
-                         content=''))
-            viewed_ans = [(k, popular_articles[k])
-                          for k in key_list if k in popular_articles]
-            for x in viewed_ans:
-                ans.append(x)
-        return ans
-
-    def get_tech_feeds(self, ans):
-        if self.getTechBlogs:
-            tech_articles = {}
-            key_list = []
-            save_oldest_article = self.oldest_article
-            save_max_articles_per_feed = self.max_articles_per_feed
-            self.oldest_article = self.tech_oldest_article
-            self.max_articles_per_feed = self.tech_max_articles_per_feed
-            self.feeds = self.tech_feeds
-            tech = self.parse_feeds()
-            self.oldest_article = save_oldest_article
-            self.max_articles_per_feed = save_max_articles_per_feed
-            self.feeds = None
-            for f in tech:
-                key_list.append(f.title)
-                tech_articles[f.title] = []
-                for a in f.articles:
-                    tech_articles[f.title].append(
-                        dict(title=a.title, url=a.url.partition('?')[0], date=a.date,
-                             description=a.summary, author=a.author,
-                             content=a.content))
-            tech_ans = [(k, tech_articles[k])
-                        for k in key_list if k in tech_articles]
-            for x in tech_ans:
-                ans.append(x)
-        return ans
-
-    def parse_web_edition(self):
-
-        for (sec_title, index_url) in self.web_sections:
-            if self.includeSections != []:
-                if sec_title not in self.includeSections:
-                    print "SECTION NOT INCLUDED: ", sec_title
-                    continue
-            if sec_title in self.excludeSections:
-                print "SECTION EXCLUDED: ", sec_title
-                continue
-            try:
-                soup = self.index_to_soup(
-                    'https://www.nytimes.com/pages/' + index_url + '/index.html')
-            except:
-                continue
-            print 'Index URL: ' + 'https://www.nytimes.com/pages/' + index_url + '/index.html'
-
-            self.key = sec_title
-            # Find each article
-            for div in soup.findAll('article'):
-                self.handle_article_tag(div)
-            for div in soup.findAll(True, attrs={
-                    'class': ['section-headline', 'ledeStory', 'story', 'story headline', 'sectionHeader', 'headlinesOnly multiline flush']}):
-                if div['class'] in ['story', 'story headline', 'storyHeader']:
-                    self.handle_article(div)
-                elif div['class'] == 'ledeStory':
-                    divsub = div.find('div', 'storyHeader')
-                    if divsub is not None:
-                        self.handle_article(divsub)
-                        ulrefer = div.find('ul', 'refer')
-                        if ulrefer is not None:
-                            for lidiv in ulrefer.findAll('li'):
-                                self.handle_article(lidiv)
-                elif div['class'] == 'headlinesOnly multiline flush':
-                    for lidiv in div.findAll('li'):
-                        self.handle_article(lidiv)
-
-        self.ans = [(k, self.articles[k])
-                    for k in self.ans if k in self.articles]
-        return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))
-
-    def parse_todays_index(self):
-
-        soup = self.index_to_soup(
-            'https://www.nytimes.com/pages/todayspaper/index.html')
-        skipping = False
-        # Find each article
-        for div in soup.findAll(True,
-                                attrs={'class': ['section-headline', 'story', 'story headline', 'sectionHeader', 'headlinesOnly multiline flush']}):
-            if div['class'] in ['section-headline', 'sectionHeader']:
-                self.key = string.capwords(self.feed_title(div))
-                self.key = self.key.replace('Op-ed', 'Op-Ed')
-                self.key = self.key.replace('U.s.', 'U.S.')
-                self.key = self.key.replace('N.y.', 'N.Y.')
-                skipping = False
-                if self.includeSections != []:
-                    if self.key not in self.includeSections:
-                        print "SECTION NOT INCLUDED: ", self.key
-                        skipping = True
-                if self.key in self.excludeSections:
-                    print "SECTION EXCLUDED: ", self.key
-                    skipping = True
-
-            elif div['class'] in ['story', 'story headline']:
-                if not skipping:
-                    self.handle_article(div)
-            elif div['class'] == 'headlinesOnly multiline flush':
-                for lidiv in div.findAll('li'):
-                    if not skipping:
-                        self.handle_article(lidiv)
-
-        self.ans = [(k, self.articles[k])
-                    for k in self.ans if k in self.articles]
-        return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))
-
-    def parse_headline_index(self):
-        soup = self.index_to_soup(
-            'https://www.nytimes.com/pages/todaysheadlines/')
-        pubdate = strftime('%a, %d %b')
-        section = None
-        articles = []
+    def parse_todays_page(self):
+        soup = self.read_nyt_metadata()
+        section = soup.find(id='collection-todays-new-york-times')
         feeds = []
-        for h6 in soup.findAll('h6'):
-            section = self.tag_to_string(h6).strip()
-            articles = []
-            table = h6.parent.findNextSibling('table')
-            if table is None:
-                continue
-            for a in table.findAll('a', attrs={'class':'headURL'}):
-                title = self.tag_to_string(a)
-                url = a['href'].partition('?')[0]
-                if self.exclude_url(url) or (self.filterDuplicates and url in self.url_list):
-                    continue
-                self.url_list.append(url)
-                desc = ''
-                h4 = a.findNextSibling('h4')
-                if h4 is not None:
-                    desc += self.tag_to_string(h4)
-                p = a.findNextSibling('p')
-                if p is not None:
-                    desc += ' ' + self.tag_to_string(p)
-                articles.append({'title':title, 'url':url + '?pagewanted=all', 'date':pubdate, 'description':desc})
+        for h1 in section.findAll('h1')[1:]:
+            section_title = self.tag_to_string(h1)
+            self.log('Found section:', section_title)
+            articles = list(self.parse_todays_sections(h1.parent))
             if articles:
-                feeds.append((section, articles))
-        self.ans = feeds
-        return self.filter_ans(self.ans)
+                feeds.append((section_title, articles))
+        return feeds
+
+    def parse_highlights(self, container):
+        for article in container.findAll('article', **classes('story')):
+            h2 = article.find('h2')
+            if h2 is not None:
+                title = self.tag_to_string(h2)
+                a = h2.find('a', href=True)
+                if a is not None:
+                    url = a['href']
+                    desc = ''
+                    p = article.find(**classes('summary'))
+                    if p is not None:
+                        desc = self.tag_to_string(p)
+                    yield {'title': title, 'url': url, 'description': desc}
+
+    def parse_web_section(self, soup, slug):
+
+        def log(article):
+            self.log('\t', article['title'], ':', article['url'])
+            if article.get('description'):
+                self.log('\t\t', article['description'])
+
+        container = soup.find(itemtype='http://schema.org/CollectionPage')
+        highlights = container.find('section', **classes('highlights'))
+        for article in self.parse_highlights(highlights):
+            log(article)
+            yield article
+        extra = container.find('section', attrs={'data-collection-type': True})
+        if extra is not None:
+            title = self.tag_to_string(extra.find('h2'))
+            for article in self.parse_highlights(extra):
+                article['title'] = '{}: {}'.format(title, article['title'])
+                log(article)
+                yield article
+
+    def parse_web_sections(self):
+        feeds = []
+        for section_title, slug in web_sections:
+            url = 'https://www.nytimes.com/section/' + slug
+            try:
+                soup = self.index_to_soup(url)
+            except Exception:
+                self.log.error('Failed to download section:', url)
+                continue
+            self.log('Found section:', section_title)
+            articles = list(self.parse_web_section(soup, slug))
+            if articles:
+                feeds.append((section_title, articles))
+            if self.test and len(feeds) >= self.test[0]:
+                break
+        return feeds
 
     def parse_index(self):
-        if self.headlinesOnly:
-            return self.parse_headline_index()
-        elif self.webEdition:
-            return self.parse_web_edition()
-        else:
-            return self.parse_todays_index()
-
-    def strip_anchors(self, soup, kill_all=False):
-        paras = soup.findAll(True)
-        for para in paras:
-            aTags = para.findAll('a')
-            for a in aTags:
-                if a.img is None:
-                    if kill_all or (self.recursions == 0):
-                        a.replaceWith(self.tag_to_string(a, False))
-                    else:
-                        if 'href' in a:
-                            if a['href'].startswith('http://www.nytimes'):
-                                if not a['href'].endswith('pagewanted=all'):
-                                    url = re.sub(r'\?.*', '', a['href'])
-                                    if self.exclude_url(url):
-                                        a.replaceWith(
-                                            self.tag_to_string(a, False))
-                                    else:
-                                        a['href'] = url + '?pagewanted=all'
-                            elif not (a['href'].startswith('http://pogue') or
-                                      a['href'].startswith('http://bits') or
-                                      a['href'].startswith('http://travel') or
-                                      a['href'].startswith('http://business') or
-                                      a['href'].startswith('http://tech') or
-                                      a['href'].startswith('http://health') or
-                                      a['href'].startswith('http://dealbook') or
-                                      a['href'].startswith('http://open')):
-                                a.replaceWith(self.tag_to_string(a, False))
-        return soup
-
-    def handle_tags(self, soup):
-        try:
-            print("HANDLE TAGS: TITLE = " + self.tag_to_string(soup.title))
-        except:
-            print("HANDLE TAGS: NO TITLE")
-        if soup is None:
-            print("ERROR: handle_tags received NoneType")
-            return None
-
-        if self.keep_only_tags:
-            body = Tag(soup, 'body')
-            try:
-                if isinstance(self.keep_only_tags, dict):
-                    self.keep_only_tags = [self.keep_only_tags]
-                for spec in self.keep_only_tags:
-                    for tag in soup.find('body').findAll(**spec):
-                        body.insert(len(body.contents), tag)
-                soup.find('body').replaceWith(body)
-            except AttributeError:  # soup has no body element
-                pass
-
-        def remove_beyond(tag, next):
-            while tag is not None and getattr(tag, 'name', None) != 'body':
-                after = getattr(tag, next)
-                while after is not None:
-                    ns = getattr(tag, next)
-                    after.extract()
-                    after = ns
-                tag = tag.parent
-
-        if self.remove_tags_after is not None:
-            rt = [self.remove_tags_after] if isinstance(
-                self.remove_tags_after, dict) else self.remove_tags_after
-            for spec in rt:
-                tag = soup.find(**spec)
-                remove_beyond(tag, 'nextSibling')
-
-        if self.remove_tags_before is not None:
-            tag = soup.find(**self.remove_tags_before)
-            remove_beyond(tag, 'previousSibling')
-
-        for kwds in self.remove_tags:
-            for tag in soup.findAll(**kwds):
-                tag.extract()
-
-        return soup
-
-    def preprocess_html(self, soup):
-        skip_tag = soup.find(True, {'name': 'skip'})
-        if skip_tag is not None:
-            url = 'http://www.nytimes.com' + skip_tag.parent['href']
-            self.log.warn("Skipping ad to article at '%s'" % url)
-            sleep(5)
-            soup = self.handle_tags(self.article_to_soup(url))
-
-        # check if the article is from one of the tech blogs
-        blog = soup.find(
-            'div', attrs={'id': ['pogue', 'bits', 'gadgetwise', 'open']})
-
-        if blog is not None:
-            old_body = soup.find('body')
-            new_body = Tag(soup, 'body')
-            new_body.append(soup.find('div', attrs={'id': 'content'}))
-            new_body.find('div', attrs={'id': 'content'})[
-                'id'] = 'blogcontent'  # identify for postprocess_html
-            old_body.replaceWith(new_body)
-            for divr in soup.findAll('div', attrs={'class': re.compile('w190 right')}):
-                if divr.find(text=re.compile('Sign up')):
-                    divr.extract()
-            divr = soup.find(
-                'div', attrs={'class': re.compile('^relatedArticlesModule')})
-            if divr is not None:
-                print("PROCESSING RELATED: " +
-                      self.tag_to_string(soup.title, False))
-            # handle related articles
-                rlist = []
-                ul = divr.find('ul')
-                if ul is not None:
-                    for li in ul.findAll('li'):
-                        atag = li.find('a')
-                        if atag is not None:
-                            if atag['href'].startswith('http://pogue') or atag['href'].startswith('http://bits') or \
-                                    atag['href'].startswith('http://open'):
-                                atag.find(text=True).replaceWith(
-                                    self.massageNCXText(self.tag_to_string(atag, False)))
-                                rlist.append(atag)
-                divr.extract()
-                if rlist != []:
-                    asidediv = Tag(soup, 'div', [('class', 'aside')])
-                    if soup.find('hr') is None:
-                        asidediv.append(Tag(soup, 'hr'))
-                    h4 = Tag(soup, 'h4', [('class', 'asidenote')])
-                    h4.insert(0, "Related Posts")
-                    asidediv.append(h4)
-                    ul = Tag(soup, 'ul')
-                    for r in rlist:
-                        li = Tag(soup, 'li', [('class', 'aside')])
-                        r['class'] = 'aside'
-                        li.append(r)
-                        ul.append(li)
-                    asidediv.append(ul)
-                    asidediv.append(Tag(soup, 'hr'))
-                    smain = soup.find('body')
-                    smain.append(asidediv)
-            else:
-                print("CANNOT FIND RELATED: " +
-                      self.tag_to_string(soup.title, False))
-            for atag in soup.findAll('a'):
-                img = atag.find('img')
-                if img is not None:
-                    atag.replaceWith(img)
-                elif 'href' not in atag:
-                    atag.replaceWith(
-                        atag.renderContents().decode('cp1252', 'replace'))
-                elif not (atag['href'].startswith('http://www.nytimes') or atag['href'].startswith('http://pogue') or
-                          atag['href'].startswith('http://bits') or atag['href'].startswith('http://open')):
-                    atag.replaceWith(
-                        atag.renderContents().decode('cp1252', 'replace'))
-            hdr = soup.find('address')
-            if hdr is not None:
-                hdr.name = 'span'
-            for span_credit in soup.findAll('span', 'credit'):
-                sp = Tag(soup, 'span')
-                span_credit.replaceWith(sp)
-                sp.append(Tag(soup, 'br'))
-                sp.append(span_credit)
-                sp.append(Tag(soup, 'br'))
-
-        else:  # nytimes article
-
-            related = []  # these will be the related articles
-            first_outer = None  # first related outer tag
-            first_related = None  # first related tag
-            for outerdiv in soup.findAll(attrs={'class': re.compile('articleInline runaroundLeft')}):
-                for rdiv in soup.findAll('div', 'columnGroup doubleRule'):
-                    if rdiv.find('h3') is not None:
-                        if self.tag_to_string(rdiv.h3, False).startswith('Related'):
-                            rdiv.h3.find(text=True).replaceWith(
-                                "Related articles")
-                            rdiv.h3['class'] = 'asidenote'
-                            for litag in rdiv.findAll('li'):
-                                if litag.find('a') is not None:
-                                    if litag.find('a')['href'].startswith('http://www.nytimes.com'):
-                                        url = re.sub(
-                                            r'\?.*', '', litag.find('a')['href'])
-                                        litag.find('a')[
-                                            'href'] = url + '?pagewanted=all'
-                                        litag.extract()
-                                        related.append(litag)
-                                        if first_related is None:
-                                            first_related = rdiv
-                                            first_outer = outerdiv
-                                    else:
-                                        litag.extract()
-                            for h6tag in rdiv.findAll('h6'):
-                                if h6tag.find('a') is not None:
-                                    if h6tag.find('a')['href'].startswith('http://www.nytimes.com'):
-                                        url = re.sub(
-                                            r'\?.*', '', h6tag.find('a')['href'])
-                                        h6tag.find('a')[
-                                            'href'] = url + '?pagewanted=all'
-                                        h6tag.extract()
-                                        related.append(h6tag)
-                                        if first_related is None:
-                                            first_related = rdiv
-                                            first_outer = outerdiv
-                                    else:
-                                        h6tag.extract()
-            if related != []:
-                for r in related:
-                    if r.h6:  # don't want the anchor inside a h6 tag
-                        r.h6.replaceWith(r.h6.a)
-                    first_related.ul.append(r)
-                first_related.insert(0, Tag(soup, 'hr'))
-                first_related.append(Tag(soup, 'hr'))
-                first_related['class'] = 'aside'
-                # replace the outer tag with the related tag
-                first_outer.replaceWith(first_related)
-
-            for rdiv in soup.findAll(attrs={'class': re.compile('articleInline runaroundLeft')}):
-                rdiv.extract()
-
-            kicker_tag = soup.find(attrs={'class': 'kicker'})
-            if kicker_tag:  # remove Op_Ed author head shots
-                tagline = self.tag_to_string(kicker_tag)
-                if tagline == 'Op-Ed Columnist':
-                    img_div = soup.find('div', 'inlineImage module')
-                    if img_div:
-                        img_div.extract()
-
-            if self.useHighResImages:
-                try:
-                    # open up all the "Enlarge this Image" pop-ups and download
-                    # the full resolution jpegs
-                    enlargeThisList = soup.findAll(
-                        'div', {'class': 'icon enlargeThis'})
-                    if enlargeThisList:
-                        for popupref in enlargeThisList:
-                            popupreflink = popupref.find('a')
-                            if popupreflink:
-                                reflinkstring = str(popupreflink['href'])
-                                refstart = reflinkstring.find(
-                                    "javascript:pop_me_up2('") + len("javascript:pop_me_up2('")
-                                refend = reflinkstring.find(
-                                    ".html", refstart) + len(".html")
-                                reflinkstring = reflinkstring[refstart:refend]
-
-                                popuppage = self.browser.open(reflinkstring)
-                                popuphtml = popuppage.read()
-                                popuppage.close()
-                                if popuphtml:
-                                    st = time.localtime()
-                                    year = str(st.tm_year)
-                                    month = "%.2d" % st.tm_mon
-                                    day = "%.2d" % st.tm_mday
-                                    imgstartpos = popuphtml.find('http://graphics8.nytimes.com/images/' + year + '/' +  month + '/' + day + '/') + \
-                                        len('http://graphics8.nytimes.com/images/' +
-                                            year + '/' + month + '/' + day + '/')
-                                    highResImageLink = 'http://graphics8.nytimes.com/images/' + year + '/' + \
-                                        month + '/' + day + '/' + \
-                                        popuphtml[imgstartpos:popuphtml.find(
-                                            '.jpg', imgstartpos) + 4]
-                                    popupSoup = BeautifulSoup(popuphtml)
-                                    highResTag = popupSoup.find(
-                                        'img', {'src': highResImageLink})
-                                    if highResTag:
-                                        try:
-                                            newWidth = highResTag['width']
-                                            newHeight = highResTag['height']
-                                            imageTag = popupref.parent.find(
-                                                "img")
-                                        except:
-                                            self.log(
-                                                "Error: finding width and height of img")
-                                        popupref.extract()
-                                        if imageTag:
-                                            try:
-                                                imageTag[
-                                                    'src'] = highResImageLink
-                                                imageTag['width'] = newWidth
-                                                imageTag['height'] = newHeight
-                                            except:
-                                                self.log(
-                                                    "Error setting the src width and height parameters")
-                except Exception:
-                    self.log("Error pulling high resolution images")
-
-                try:
-                    # in case pulling images failed, delete the enlarge this
-                    # text
-                    enlargeThisList = soup.findAll(
-                        'div', {'class': 'icon enlargeThis'})
-                    if enlargeThisList:
-                        for popupref in enlargeThisList:
-                            popupref.extract()
-                except:
-                    self.log("Error removing Enlarge this text")
-
-        return self.strip_anchors(soup, False)
-
-    def postprocess_html(self, soup, first_fetch):
-        if not first_fetch:  # remove Related links
-            for aside in soup.findAll('div', 'aside'):
-                aside.extract()
-            soup = self.strip_anchors(soup, True)
-
-        for t in soup.findAll('time', attrs={'class':'dateline'}):
-            t.name = 'div'
-
-        if soup.find('div', attrs={'id': 'blogcontent'}) is None:
-            if first_fetch:
-                aside = soup.find('div', 'aside')
-                if aside is not None:  # move the related list to the end of the article
-                    art = soup.find('div', attrs={'id': 'article'})
-                    if art is None:
-                        art = soup.find('div', attrs={'class': 'article'})
-                    if art is not None:
-                        art.append(aside)
-            try:
-                if self.one_picture_per_article:
-                            # Remove all images after first
-                    largeImg = soup.find(True, {'class': 'articleSpanImage'})
-                    inlineImgs = soup.findAll(
-                        True, {'class': 'inlineImage module'})
-                    if largeImg:
-                        for inlineImg in inlineImgs:
-                            inlineImg.extract()
-                    else:
-                        if inlineImgs:
-                            firstImg = inlineImgs[0]
-                            for inlineImg in inlineImgs[1:]:
-                                inlineImg.extract()
-                            # Move firstImg before article body
-                            cgFirst = soup.find(
-                                True, {'class': re.compile('columnGroup  *first')})
-                            if cgFirst:
-                                # Strip all sibling NavigableStrings: noise
-                                navstrings = cgFirst.findAll(
-                                    text=True, recursive=False)
-                                [ns.extract() for ns in navstrings]
-                                headline_found = False
-                                tag = cgFirst.find(True)
-                                insertLoc = 0
-                                while True:
-                                    insertLoc += 1
-                                    if hasattr(tag, 'class') and tag['class'] == 'articleHeadline':
-                                        headline_found = True
-                                        break
-                                    tag = tag.nextSibling
-                                    if not tag:
-                                        headline_found = False
-                                        break
-                                if headline_found:
-                                    cgFirst.insert(insertLoc, firstImg)
-                            else:
-                                self.log(
-                                    ">>> No class:'columnGroup first' found <<<")
-            except:
-                self.log("ERROR: One picture per article in postprocess_html")
-
-            try:
-                # Change captions to italic
-                for caption in soup.findAll(True, {'class': 'caption'}):
-                    if caption and len(caption) > 0:
-                        cTag = Tag(soup, "p", [("class", "caption")])
-                        c = self.fixChars(self.tag_to_string(
-                            caption, use_alt=False)).strip()
-                        mp_off = c.find("More Photos")
-                        if mp_off >= 0:
-                            c = c[:mp_off]
-                        cTag.insert(0, c)
-                        caption.replaceWith(cTag)
-            except:
-                self.log("ERROR:  Problem in change captions to italic")
-
-            try:
-                # Change <nyt_headline> to <h2>
-                h1 = soup.find('h1')
-                blogheadline = str(h1)  # added for dealbook
-                if h1:
-                    headline = h1.find("nyt_headline")
-                    if headline:
-                        tag = Tag(soup, "h2")
-                        tag['class'] = "headline"
-                        tag.insert(0, self.fixChars(headline.contents[0]))
-                        h1.replaceWith(tag)
-                    elif blogheadline.find('entry-title'):  # added for dealbook
-                        tag = Tag(soup, "h2")  # added for dealbook
-                        tag['class'] = "headline"  # added for dealbook
-                        # added for dealbook
-                        tag.insert(0, self.fixChars(h1.contents[0]))
-                        h1.replaceWith(tag)  # added for dealbook
-
-                else:
-                    # Blog entry - replace headline, remove <hr> tags  - BCC I
-                    # think this is no longer functional 1-18-2011
-                    headline = soup.find('title')
-                    if headline:
-                        tag = Tag(soup, "h2")
-                        tag['class'] = "headline"
-                        tag.insert(0, self.fixChars(
-                            self.tag_to_string(headline, False)))
-                        soup.insert(0, tag)
-                        hrs = soup.findAll('hr')
-                        for hr in hrs:
-                            hr.extract()
-            except:
-                self.log("ERROR:  Problem in Change <nyt_headline> to <h2>")
-
-            try:
-                # if this is from a blog (dealbook, fix the byline format
-                bylineauthor = soup.find(
-                    'address', attrs={'class': 'byline author vcard'})
-                if bylineauthor:
-                    tag = Tag(soup, "h6")
-                    tag['class'] = "byline"
-                    tag.insert(0, self.fixChars(
-                        self.tag_to_string(bylineauthor, False)))
-                    bylineauthor.replaceWith(tag)
-            except:
-                self.log("ERROR:  fixing byline author format")
-
-            try:
-                # if this is a blog (dealbook) fix the credit style for the
-                # pictures
-                blogcredit = soup.find('div', attrs={'class': 'credit'})
-                if blogcredit:
-                    tag = Tag(soup, "h6")
-                    tag['class'] = "credit"
-                    tag.insert(0, self.fixChars(
-                        self.tag_to_string(blogcredit, False)))
-                    blogcredit.replaceWith(tag)
-            except:
-                self.log("ERROR:  fixing credit format")
-
-            try:
-                # Change <h1> to <h3> - used in editorial blogs
-                masthead = soup.find("h1")
-                if masthead:
-                    # Nuke the href
-                    if masthead.a:
-                        del(masthead.a['href'])
-                    tag = Tag(soup, "h3")
-                    tag.insert(0, self.fixChars(masthead.contents[0]))
-                    masthead.replaceWith(tag)
-            except:
-                self.log(
-                    "ERROR:  Problem in Change <h1> to <h3> - used in editorial blogs")
-
-            try:
-                # Change <span class="bold"> to <b>
-                for subhead in soup.findAll(True, {'class': 'bold'}):
-                    if subhead.contents:
-                        bTag = Tag(soup, "b")
-                        bTag.insert(0, subhead.contents[0])
-                        subhead.replaceWith(bTag)
-            except:
-                self.log(
-                    "ERROR:  Problem in Change <h1> to <h3> - used in editorial blogs")
-            try:
-                # remove the <strong> update tag
-                blogupdated = soup.find('span', {'class': 'update'})
-                if blogupdated:
-                    blogupdated.replaceWith("")
-            except:
-                self.log("ERROR:  Removing strong tag")
-
-            try:
-                divTag = soup.find('div', attrs={'id': 'articleBody'})
-                if divTag:
-                    divTag['class'] = divTag['id']
-            except:
-                self.log(
-                    "ERROR:  Problem in soup.find(div,attrs={id:articleBody})")
-
-            try:
-                # Add class="authorId" to <div> so we can format with CSS
-                divTag = soup.find('div', attrs={'id': 'authorId'})
-                if divTag and divTag.contents[0]:
-                    tag = Tag(soup, "p")
-                    tag['class'] = "authorId"
-                    tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0],
-                                                                   use_alt=False)))
-                    divTag.replaceWith(tag)
-            except:
-                self.log(
-                    "ERROR:  Problem in Add class=authorId to <div> so we can format with CSS")
-        return soup
-
-    def populate_article_metadata(self, article, soup, first):
-        if not first:
-            return
-        idxdiv = soup.find('div', attrs={'class': 'articleSpanImage'})
-        if idxdiv is not None:
-            if idxdiv.img:
-                self.add_toc_thumbnail(article, re.sub(
-                    r'links\\link\d+\\', '', idxdiv.img['src']))
-        else:
-            img = soup.find('body').find('img')
-            if img is not None:
-                self.add_toc_thumbnail(article, re.sub(
-                    r'links\\link\d+\\', '', img['src']))
-        shortparagraph = ""
-        try:
-            if len(article.text_summary.strip()) == 0:
-                articlebodies = soup.findAll(
-                    'div', attrs={'class': 'articleBody'})
-                if articlebodies:
-                    for articlebody in articlebodies:
-                        if articlebody:
-                            paras = articlebody.findAll('p')
-                            for p in paras:
-                                refparagraph = self.massageNCXText(
-                                    self.tag_to_string(p, use_alt=False)).strip()
-                                # account for blank paragraphs and short
-                                # paragraphs by appending them to longer ones
-                                if len(refparagraph) > 0:
-                                    if len(refparagraph) > 70:  # approximately one line of text
-                                        newpara = shortparagraph + refparagraph
-                                        newparaDateline, newparaEm, newparaDesc = newpara.partition(
-                                            '&mdash;')
-                                        if newparaEm == '':
-                                            newparaDateline, newparaEm, newparaDesc = newpara.partition(
-                                                '—')
-                                            if newparaEm == '':
-                                                newparaDesc = newparaDateline
-                                        article.summary = article.text_summary = newparaDesc.strip()
-                                        return
-                                    else:
-                                        shortparagraph = refparagraph + " "
-                                        if shortparagraph.strip().find(" ") == -1 and not shortparagraph.strip().endswith(":"):
-                                            shortparagraph = shortparagraph + "- "
-            else:
-                article.summary = article.text_summary = self.massageNCXText(
-                    article.text_summary)
-        except:
-            self.log("Error creating article descriptions")
-            return
+        if is_web_edition:
+            return self.parse_web_sections()
+        return self.parse_todays_page()