diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py
index 152c58502f..afc74fbdef 100644
--- a/src/calibre/__init__.py
+++ b/src/calibre/__init__.py
@@ -138,9 +138,16 @@ def get_proxies():
     return proxies
 
 
-def browser(honor_time=False):
+def browser(honor_time=True, max_time=2):
+    '''
+    Create a mechanize browser for web scraping. The browser handles cookies,
+    refresh requests and ignores robots.txt. Also uses proxy if avaialable.  
+    
+    :param honor_time: If True honors pause time in refresh requests
+    :param max_time: Maximum time in seconds to wait during a refresh request
+    '''
     opener = mechanize.Browser()
-    opener.set_handle_refresh(True, honor_time=honor_time)
+    opener.set_handle_refresh(True, max_time=max_time, honor_time=honor_time)
     opener.set_handle_robots(False)
     opener.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; i686 Linux; en_US; rv:1.8.0.4) Gecko/20060508 Firefox/1.5.0.4')]
     http_proxy = get_proxies().get('http', None)
diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py
index 32b7ee2562..bac117d628 100644
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@@ -493,25 +493,27 @@ class BasicNewsRecipe(object, LoggingInterface):
         @return: Path to index.html
         @rtype: string
         '''
-        res = self.build_index()
-        self.cleanup()
-        self.report_progress(1, _('Download finished'))
-        if self.failed_downloads:
-            self.log_warning(_('Failed to download the following articles:'))
-            for feed, article, debug in self.failed_downloads:
-                self.log_warning(article.title+_(' from ')+feed.title)
-                self.log_debug(article.url)
-                self.log_debug(debug)
-        if self.partial_failures:
-            self.log_warning(_('Failed to download parts of the following articles:'))
-            for feed, atitle, aurl, debug in self.partial_failures:
-                self.log_warning(atitle + _(' from ') + feed)
-                self.log_debug(aurl)
-                self.log_warning(_('\tFailed links:'))
-                for l, tb in debug:
-                    self.log_warning(l)
-                    self.log_debug(tb) 
-        return res
+        try:
+            res = self.build_index()
+            self.report_progress(1, _('Download finished'))
+            if self.failed_downloads:
+                self.log_warning(_('Failed to download the following articles:'))
+                for feed, article, debug in self.failed_downloads:
+                    self.log_warning(article.title+_(' from ')+feed.title)
+                    self.log_debug(article.url)
+                    self.log_debug(debug)
+            if self.partial_failures:
+                self.log_warning(_('Failed to download parts of the following articles:'))
+                for feed, atitle, aurl, debug in self.partial_failures:
+                    self.log_warning(atitle + _(' from ') + feed)
+                    self.log_debug(aurl)
+                    self.log_warning(_('\tFailed links:'))
+                    for l, tb in debug:
+                        self.log_warning(l)
+                        self.log_debug(tb) 
+            return res
+        finally:
+            self.cleanup()
     
     def feeds2index(self, feeds):
         templ = templates.IndexTemplate()
diff --git a/src/calibre/web/feeds/recipes/wsj.py b/src/calibre/web/feeds/recipes/wsj.py
index b6ab4f6f1a..ec3bc6bb93 100644
--- a/src/calibre/web/feeds/recipes/wsj.py
+++ b/src/calibre/web/feeds/recipes/wsj.py
@@ -4,28 +4,26 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 
 from calibre.web.feeds.news import BasicNewsRecipe
-import re, urlparse
+
+# http://online.wsj.com/page/us_in_todays_paper.html
 
 class WallStreetJournal(BasicNewsRecipe): 
     
         title = 'The Wall Street Journal' 
-        __author__ = 'JTravers'
+        __author__ = 'Kovid Goyal'
         description = 'News and current affairs.'
         needs_subscription = True
         max_articles_per_feed = 10
         timefmt  = ' [%a, %b %d, %Y]' 
         html2lrf_options = ['--ignore-tables']
+        remove_tags_before = dict(name='h1')
+        remove_tags = [
+                       dict(id=["articleTabs_tab_article", "articleTabs_tab_comments", "articleTabs_tab_interactive"]),
+                       {'class':['more_in', "insetContent", 'articleTools_bottom', 'aTools', "tooltip", "adSummary", "nav-inline"]},
+                      ]
+        remove_tags_after = [dict(id="article_story_body"), {'class':"article story"},]
 
-        preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in  
-                [ 
-                ## Remove anything before the body of the article. 
-                (r'<body.*?<!-- article start', lambda match: '<body><!-- article start'), 
- 
-                ## Remove anything after the end of the article. 
-                (r'<!-- article end.*?</body>', lambda match : '</body>'), 
-                ] 
-        ] 
- 
+        
         def get_browser(self): 
             br = BasicNewsRecipe.get_browser() 
             if self.username is not None and self.password is not None: 
@@ -34,11 +32,16 @@ class WallStreetJournal(BasicNewsRecipe):
                 br['user']   = self.username 
                 br['password'] = self.password 
                 br.submit() 
-            return br 
+            return br
+        
+        def get_article_url(self, article):
+            try:
+                return article.feedburner_origlink.split('?')[0]
+            except AttributeError:
+                return article.link.split('?')[0]
  
-        def print_version(self, url): 
-            article = urlparse.urlparse(url).path.rpartition('/')[-1]
-            return 'http://online.wsj.com/article_print/'+article 
+        def cleanup(self): 
+            self.browser.open('http://online.wsj.com/logout?url=http://online.wsj.com') 
  
         def get_feeds(self):
             return  [ 
@@ -89,7 +92,3 @@ class WallStreetJournal(BasicNewsRecipe):
                 ('Weekend & Leisure - Sports', 'http://online.wsj.com/xml/rss/3_7204.xml'), 
                 ]
 
-## Logout of website
-## NOT CURRENTLY WORKING
-        # def cleanup(self): 
-            # self.browser.open('http://commerce.wsj.com/auth/postlogout') 
diff --git a/src/calibre/web/fetch/simple.py b/src/calibre/web/fetch/simple.py
index 2fcaa96583..bd867a2045 100644
--- a/src/calibre/web/fetch/simple.py
+++ b/src/calibre/web/fetch/simple.py
@@ -118,8 +118,10 @@ class RecursiveFetcher(object, LoggingInterface):
                 tag = tag.parent
         
         if self.remove_tags_after is not None:
-            tag = soup.find(**self.remove_tags_after)
-            remove_beyond(tag, 'nextSibling')
+            rt = [self.remove_tags_after] if isinstance(self.remove_tags_after, dict) else self.remove_tags_after
+            for spec in rt:
+                tag = soup.find(**spec)
+                remove_beyond(tag, 'nextSibling')
             
         if self.remove_tags_before is not None:
             tag = soup.find(**self.remove_tags_before)