diff --git a/src/libprs500/__init__.py b/src/libprs500/__init__.py
index b7b752de3b..5dab02a138 100644
--- a/src/libprs500/__init__.py
+++ b/src/libprs500/__init__.py
@@ -23,13 +23,14 @@ from gettext import GNUTranslations
 from math import floor
 from optparse import OptionParser as _OptionParser
 from optparse import IndentedHelpFormatter
+from logging import Formatter
 
 from ttfquery import findsystem, describe
 
 from libprs500.translations.msgfmt import make
 from libprs500.ebooks.chardet import detect
 from libprs500.terminfo import TerminalController
-terminal_controller = TerminalController()
+terminal_controller = TerminalController(sys.stdout)
 
 iswindows = 'win32' in sys.platform.lower() or 'win64' in sys.platform.lower()
 isosx     = 'darwin' in sys.platform.lower()
@@ -51,6 +52,25 @@ __builtin__.__dict__['_'] = lambda s: s
 class CommandLineError(Exception):
     pass
 
+class ColoredFormatter(Formatter):
+    
+    def format(self, record):
+        ln = record.__dict__['levelname']
+        col = ''
+        if ln == 'CRITICAL':
+            col = terminal_controller.YELLOW
+        elif ln == 'ERROR':
+            col = terminal_controller.RED
+        elif ln in ['WARN', 'WARNING']:
+            col = terminal_controller.BLUE
+        elif ln == 'INFO':
+            col = terminal_controller.GREEN
+        elif ln == 'DEBUG':
+            col = terminal_controller.CYAN
+        record.__dict__['levelname'] = col + record.__dict__['levelname'] + terminal_controller.NORMAL
+        return Formatter.format(self, record)
+         
+
 def setup_cli_handlers(logger, level):
     logger.setLevel(level)
     if level == logging.WARNING:
@@ -187,9 +207,9 @@ def extract(path, dir):
         raise Exception('Unknown archive type')
     extractor(path, dir)
 
-def browser():
+def browser(honor_time=False):
     opener = mechanize.Browser()
-    opener.set_handle_refresh(True)
+    opener.set_handle_refresh(True, honor_time=honor_time)
     opener.set_handle_robots(False)
     opener.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; i686 Linux; en_US; rv:1.8.0.4) Gecko/20060508 Firefox/1.5.0.4')]
     return opener
diff --git a/src/libprs500/ebooks/lrf/pylrs/pylrs.py b/src/libprs500/ebooks/lrf/pylrs/pylrs.py
index adc8855a0f..45b141d47a 100644
--- a/src/libprs500/ebooks/lrf/pylrs/pylrs.py
+++ b/src/libprs500/ebooks/lrf/pylrs/pylrs.py
@@ -147,7 +147,7 @@ class Delegator(object):
             d.parent = self
             methods = d.getMethods()
             self.delegatedMethods += methods
-            for m in methods:                
+            for m in methods:
                 setattr(self, m, getattr(d, m))
 
             """
diff --git a/src/libprs500/ebooks/metadata/opf.py b/src/libprs500/ebooks/metadata/opf.py
index e1d078d9a0..833f8ae51f 100644
--- a/src/libprs500/ebooks/metadata/opf.py
+++ b/src/libprs500/ebooks/metadata/opf.py
@@ -595,6 +595,11 @@ class OPFCreator(OPF):
             self.uid = mi.uid    
         
     def create_manifest(self, entries):
+        '''
+        Create <manifest>
+        @param entries: List of (URL, mime-type)
+        @type entries: list of 2-tuples
+        '''
         doc = dom.parseString(self.soup.__str__('UTF-8').strip())
         package = doc.documentElement
         manifest = doc.createElement('manifest')
@@ -616,6 +621,11 @@ class OPFCreator(OPF):
             
             
     def create_spine(self, entries):
+        '''
+        Create the <spine> element. Must first call L{create_manifest}.
+        @param: List of paths
+        @type param: list of strings
+        '''
         doc = dom.parseString(self.soup.__str__('UTF-8').strip())
         package = doc.documentElement
         spine = doc.createElement('spine')
diff --git a/src/libprs500/linux.py b/src/libprs500/linux.py
index baf5c02467..0d06482aee 100644
--- a/src/libprs500/linux.py
+++ b/src/libprs500/linux.py
@@ -74,6 +74,34 @@ def options(option_parser):
         opts.extend(opt._long_opts)
     return opts
 
+def opts_and_words(name, op, words):
+    opts  = ' '.join(options(op))
+    words = [repr(w) for w in words]
+    words = ' '.join(words) 
+    return '_'+name+'()'+\
+'''
+{
+    local cur prev opts
+    COMPREPLY=()
+    cur="${COMP_WORDS[COMP_CWORD]}"
+    opts="%s"
+    words="%s"
+    
+    case "${cur}" in
+      -* )
+         COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
+         return 0
+         ;;      
+      *  )
+         COMPREPLY=( $(compgen -W "${words}" -- ${cur}) )
+         return 0
+         ;;
+    esac
+
+}
+complete -F _'''%(opts, words) + name + ' ' + name +"\n\n"
+
+
 def opts_and_exts(name, op, exts):
     opts = ' '.join(options(op))
     exts.extend([i.upper() for i in exts])
@@ -135,6 +163,8 @@ def setup_completion(fatal_errors):
         from libprs500.gui2.lrf_renderer.main import option_parser as lrfviewerop
         from libprs500.ebooks.lrf.pdf.reflow import option_parser as pdfhtmlop
         from libprs500.ebooks.mobi.reader import option_parser as mobioeb
+        from libprs500.web.feeds.main import option_parser as feeds2disk
+        from libprs500.web.feeds.recipes import titles as feed_titles
         
         f = open_file('/etc/bash_completion.d/libprs500')
         
@@ -159,6 +189,7 @@ def setup_completion(fatal_errors):
         f.write(opts_and_exts('lrfviewer', lrfviewerop, ['lrf']))
         f.write(opts_and_exts('pdfrelow', pdfhtmlop, ['pdf']))
         f.write(opts_and_exts('mobi2oeb', mobioeb, ['mobi', 'prc']))
+        f.write(opts_and_words('feeds2disk', feeds2disk, feed_titles))
         f.write('''
 _prs500_ls()
 {
diff --git a/src/libprs500/parallel.py b/src/libprs500/parallel.py
index 56724220ea..05fcc1c8cf 100644
--- a/src/libprs500/parallel.py
+++ b/src/libprs500/parallel.py
@@ -15,7 +15,7 @@
 '''
 Used to run jobs in parallel in separate processes.
 '''
-import re, sys, tempfile, os, cPickle, cStringIO, traceback, atexit, binascii, time, subprocess
+import re, sys, tempfile, os, cPickle, traceback, atexit, binascii, time, subprocess
 from functools import partial
 
 
diff --git a/src/libprs500/terminfo.py b/src/libprs500/terminfo.py
index 1114f39642..fca163d988 100644
--- a/src/libprs500/terminfo.py
+++ b/src/libprs500/terminfo.py
@@ -106,7 +106,7 @@ class TerminalController:
         except: return
         
         # If the stream isn't a tty, then assume it has no capabilities.
-        if not term_stream.isatty(): return
+        if not hasattr(term_stream, 'isatty') or not term_stream.isatty(): return
         
         # Check the terminal type.  If we fail, then assume that the
         # terminal has no capabilities.
diff --git a/src/libprs500/threadpool.py b/src/libprs500/threadpool.py
index 917665bf92..19b1a7b038 100644
--- a/src/libprs500/threadpool.py
+++ b/src/libprs500/threadpool.py
@@ -97,7 +97,8 @@ class WorkerThread(threading.Thread):
                 )
             except:
                 request.exception = True
-                self.resultQueue.put((request, sys.exc_info()))
+                import traceback
+                self.resultQueue.put((request, traceback.format_exc()))
 
     def dismiss(self):
         """Sets a flag to tell the thread to exit when done with current job.
diff --git a/src/libprs500/web/feeds/__init__.py b/src/libprs500/web/feeds/__init__.py
index f54f3c254a..1a3083131d 100644
--- a/src/libprs500/web/feeds/__init__.py
+++ b/src/libprs500/web/feeds/__init__.py
@@ -27,6 +27,7 @@ class Article(object):
     time_offset = datetime.now() - datetime.utcnow()
 
     def __init__(self, id, title, url, summary, published, content):
+        self.downloaded = False
         self.id = id
         self.title = title
         self.url = url
@@ -103,7 +104,7 @@ class Feed(object):
         if delta.days*24*3600 + delta.seconds <= 24*3600*self.oldest_article:
             self.articles.append(article)
         else:
-            self.logger.debug('Skipping article %s as it is too old.'%title)
+            self.logger.debug('Skipping article %s (%s) from feed %s as it is too old.'%(title, article.localtime.strftime('%a, %d %b, %Y %H:%M'), self.title))
         
     def __iter__(self):
         return iter(self.articles)
@@ -118,6 +119,12 @@ class Feed(object):
     
     def __str__(self):
         return repr(self)
+    
+    def __bool__(self):
+        for article in self:
+            if getattr(article, 'downloaded', False):
+                return True
+        return False
 
 
 def feed_from_xml(raw_xml, title=None, oldest_article=7, max_articles_per_feed=100):
diff --git a/src/libprs500/web/feeds/main.py b/src/libprs500/web/feeds/main.py
index df5ef3c36b..8e2d7d0701 100644
--- a/src/libprs500/web/feeds/main.py
+++ b/src/libprs500/web/feeds/main.py
@@ -18,21 +18,24 @@ from libprs500.web.feeds.news import BasicNewsRecipe
 ''''''
 
 import sys, os, logging
-from libprs500.web.recipes import get_feed, compile_recipe
+from libprs500.web.feeds.recipes import get_builtin_recipe, compile_recipe, titles
 from libprs500.web.fetch.simple import option_parser as _option_parser
 
 
 def option_parser(usage='''\
-%prog [options] ARG
+%%prog [options] ARG
 
-%prog parsers an online source of articles, like an RSS or ATOM feed and 
+%%prog parsers an online source of articles, like an RSS or ATOM feed and 
 fetches the article contents organized in a nice hierarchy.
 
 ARG can be one of:
-file name            - %prog will try to load a recipe from the file
-builtin recipe title - %prog will load the builtin recipe and use it to fetch the feed. For e.g. Newsweek or "The BBC" or "The New York Times"
-recipe as a string   - $prog will load the recipe directly from the string arg.
-'''):
+file name            - %%prog will try to load a recipe from the file
+builtin recipe title - %%prog will load the builtin recipe and use it to fetch the feed. For e.g. Newsweek or "The BBC" or "The New York Times"
+recipe as a string   - %%prog will load the recipe directly from the string arg.
+
+Available builtin recipes are:
+%s
+'''%(unicode(list(titles))[1:-1])):
     p = _option_parser(usage=usage)
     p.remove_option('--max-recursions')
     p.remove_option('--base-dir')
@@ -86,7 +89,7 @@ def main(args=sys.argv, notification=None, handler=None):
         else:
             notification = no_progress_bar
         
-    if len(args) != 2:
+    if len(args) != 2 and opts.feeds is None:
         p.print_help()
         return 1
     
@@ -96,11 +99,16 @@ def main(args=sys.argv, notification=None, handler=None):
     else:
         try:
             if os.access(args[1], os.R_OK):
-                recipe = compile_recipe(open(args[1]).read())
+                try:
+                    recipe = compile_recipe(open(args[1]).read())
+                except:
+                    import traceback
+                    traceback.print_exc()
+                    return 1
             else:
-                raise Exception('')
+                raise Exception('not file')
         except:
-            recipe = get_feed(args[1])
+            recipe = get_builtin_recipe(args[1])
             if recipe is None:
                 recipe = compile_recipe(args[1])
     
@@ -111,9 +119,10 @@ def main(args=sys.argv, notification=None, handler=None):
         return 1
     
     if handler is None:
+        from libprs500 import ColoredFormatter
         handler = logging.StreamHandler(sys.stdout)
         handler.setLevel(logging.DEBUG if opts.debug else logging.INFO if opts.verbose else logging.WARN)
-        handler.setFormatter(logging.Formatter('%(levelname)s: %(message)s'))
+        handler.setFormatter(ColoredFormatter('%(levelname)s: %(message)s\n')) # The trailing newline is need because of the progress bar
         logging.getLogger('feeds2disk').addHandler(handler)
     
     recipe = recipe(opts, p, notification)
diff --git a/src/libprs500/web/feeds/news.py b/src/libprs500/web/feeds/news.py
index debc6d8a92..46c5549598 100644
--- a/src/libprs500/web/feeds/news.py
+++ b/src/libprs500/web/feeds/news.py
@@ -17,11 +17,13 @@
 The backend to parse feeds and create HTML that can then be converted
 to an ebook.
 '''
-import logging, os, cStringIO, traceback, time
+import logging, os, cStringIO, time, itertools, traceback
 import urlparse
 
-from libprs500 import browser
+from libprs500 import browser, __appname__
 from libprs500.ebooks.BeautifulSoup import BeautifulSoup
+from libprs500.ebooks.metadata.opf import OPFCreator
+from libprs500.ebooks.metadata import MetaInformation
 from libprs500.web.feeds import feed_from_xml, templates
 from libprs500.web.fetch.simple import option_parser as web2disk_option_parser
 from libprs500.web.fetch.simple import RecursiveFetcher
@@ -35,7 +37,10 @@ class BasicNewsRecipe(object):
     
     #: The title to use for the ebook
     #: @type: string    
-    title                 = 'Unknown News Source'    
+    title                 = _('Unknown News Source')
+    
+    #: The author of this recipe
+    __author__            = _('Unknown')    
     
     #: Maximum number of articles to download from each feed
     #: @type: integer
@@ -55,17 +60,18 @@ class BasicNewsRecipe(object):
     delay                 = 0
     
     #: Number of simultaneous downloads. Set to 1 if the server is picky.
+    #: Automatically reduced to 1 if L{delay} > 0
     #: @type: integer
     simultaneous_downloads = 5
     
     #: Timeout for fetching files from server in seconds
     #: @type: integer
-    timeout               = 10
+    timeout               = 120
     
     #: The format string for the date shown on the first page
     #: By default: Day Name Day Number Month Name Year
     #: @type: string
-    timefmt               = ' %a, %d %b %Y'
+    timefmt               = ' [%a, %d %b %Y]'
     
     #: Max number of characters in the short description.
     #: @type: integer
@@ -102,7 +108,7 @@ class BasicNewsRecipe(object):
     
     #: List of options to pass to html2lrf, to customize generation of LRF ebooks.
     #: @type: list of strings
-    html2lrf_options   = []
+    html2lrf_options   = ['--page-break-before', '$']
     
     #: List of tags to be removed. Specified tags are removed from downloaded HTML.
     #: A tag is specified as a dictionary of the form::
@@ -114,9 +120,23 @@ class BasicNewsRecipe(object):
     #: U{http://www.crummy.com/software/BeautifulSoup/documentation.html#The basic find method: findAll(name, attrs, recursive, text, limit, **kwargs)}
     #: A common example::
     #:   remove_tags = [dict(name='div', attrs={'class':'advert'})]
-    #:   This will remove all <div class="advert"> tags and all their children from the downloaded HTML. 
+    #:   This will remove all <div class="advert"> tags and all their children from the downloaded HTML.
+    #: @type: list 
     remove_tags = []
     
+    #: Remove all tags that occur after the specified tag. 
+    #: For the format for specifying a tag see L{remove_tags}.
+    #: For example, C{remove_tags_after = [dict(id='content')]} will remove all
+    #: tags after the element with id C{content}.
+    remove_tags_after = None
+    
+    #: Keep only the specified tags and their children. 
+    #: For the format for specifying tags see L{remove_tags}.
+    #: If this list is not empty, then the <body> element will be emptied and re-filled with
+    #: the tags that match the entries in this list.
+    #: @type: list 
+    keep_only_tags = []
+    
     #: List of regexp substitution rules to run on the downloaded HTML. Each element of the 
     #: list should be a two element tuple. The first element of the tuple should
     #: be a compiled regular expression and the second a callable that takes
@@ -126,6 +146,13 @@ class BasicNewsRecipe(object):
     
     # See the built-in profiles for examples of these settings.
     
+    def get_cover_url(self):
+        '''
+        Return a URL to the cover image for this issue or None.
+        @rtype: string or None
+        '''
+        return getattr(self, 'cover_url', None)
+    
     def get_feeds(self):
         '''
         Return a list of RSS feeds to fetch for this profile. Each element of the list
@@ -156,7 +183,21 @@ class BasicNewsRecipe(object):
     
     def preprocess_html(self, soup):
         '''
-        This function is called with the source of each downloaded HTML file. 
+        This function is called with the source of each downloaded HTML file, before
+        it is parsed for links and images. 
+        It can be used to do arbitrarily powerful pre-processing on the HTML.
+        @param soup: A U{BeautifulSoup<http://www.crummy.com/software/BeautifulSoup/documentation.html>} 
+                     instance containing the downloaded HTML.
+        @type soup: A U{BeautifulSoup<http://www.crummy.com/software/BeautifulSoup/documentation.html>} instance
+        @return: It must return soup (after having done any needed preprocessing)
+        @rtype: A U{BeautifulSoup<http://www.crummy.com/software/BeautifulSoup/documentation.html>} instance 
+        '''
+        return soup
+    
+    def postprocess_html(self, soup):
+        '''
+        This function is called with the source of each downloaded HTML file, after
+        it is parsed for links and images. 
         It can be used to do arbitrarily powerful pre-processing on the HTML.
         @param soup: A U{BeautifulSoup<http://www.crummy.com/software/BeautifulSoup/documentation.html>} 
                      instance containing the downloaded HTML.
@@ -210,6 +251,7 @@ class BasicNewsRecipe(object):
         
         self.browser = self.get_browser()
         self.image_map, self.image_counter = {}, 1
+        self.css_map = {}
         
         web2disk_cmdline = [ 'web2disk', 
             '--timeout', str(self.timeout),
@@ -233,14 +275,18 @@ class BasicNewsRecipe(object):
             web2disk_cmdline.extend(['--filter-regexp', reg])
             
         self.web2disk_options = web2disk_option_parser().parse_args(web2disk_cmdline)[0]
-        self.web2disk_options.remove_tags = self.remove_tags
-        self.web2disk_options.preprocess_regexps = self.preprocess_regexps
-        self.web2disk_options.preprocess_html = self.preprocess_html
+        for extra in ('keep_only_tags', 'remove_tags', 'preprocess_regexps', 
+                      'preprocess_html', 'remove_tags_after', 'postprocess_html'):
+            setattr(self.web2disk_options, extra, getattr(self, extra))
         
         if self.delay > 0:
             self.simultaneous_downloads = 1
             
         self.navbar = templates.NavBarTemplate()
+        self.max_articles_per_feed -= 1
+        self.html2lrf_options.append('--use-spine')
+        self.failed_downloads = []
+        self.partial_failures = []
             
     def download(self):
         '''
@@ -250,9 +296,26 @@ class BasicNewsRecipe(object):
         @return: Path to index.html
         @rtype: string
         '''
-        self.report_progress(0, _('Initialized'))
+        self.report_progress(0, _('Trying to download cover...'))
+        self.download_cover()
         res = self.build_index()
         self.cleanup()
+        self.report_progress(1, _('Download finished'))
+        if self.failed_downloads:
+            self.logger.warning(_('Failed to download the following articles:'))
+            for feed, article, debug in self.failed_downloads:
+                self.logger.warning(article.title+_(' from ')+feed.title)
+                self.logger.debug(article.url)
+                self.logger.debug(debug)
+        if self.partial_failures:
+            self.logger.warning(_('Failed to download parts of the following articles:'))
+            for feed, atitle, aurl, debug in self.partial_failures:
+                self.logger.warning(atitle + _(' from ') + feed)
+                self.logger.debug(aurl)
+                self.logger.warning(_('\tFailed links:'))
+                for l, tb in debug:
+                    self.logger.warning(l)
+                    self.logger.debug(tb) 
         return res
     
     def feeds2index(self, feeds):
@@ -294,11 +357,14 @@ class BasicNewsRecipe(object):
         return logger, out
     
     def fetch_article(self, url, dir, logger):
-        fetcher = RecursiveFetcher(self.web2disk_options, logger, self.image_map)
+        fetcher = RecursiveFetcher(self.web2disk_options, logger, self.image_map, self.css_map)
         fetcher.base_dir = dir
         fetcher.current_dir = dir
         fetcher.show_progress = False
-        return fetcher.start_fetch(url)
+        res, path, failures = fetcher.start_fetch(url), fetcher.downloaded_paths, fetcher.failed_links
+        if not res:
+            raise Exception(_('Could not fetch article. Run with --debug to see the reason'))
+        return res, path, failures
     
     def build_index(self):
         self.report_progress(0, _('Fetching feeds...'))
@@ -331,58 +397,111 @@ class BasicNewsRecipe(object):
                 req.stream = stream
                 req.feed = feed
                 req.article = article
+                req.feed_dir = feed_dir
                 self.jobs.append(req)
+            
                     
         self.jobs_done = 0
         tp = ThreadPool(self.simultaneous_downloads)
         for req in self.jobs:
             tp.putRequest(req, block=True, timeout=0)
         
+        
         self.report_progress(0, _('Starting download [%d thread(s)]...')%self.simultaneous_downloads)
         while True:
             try:
-                tp.poll(True)
+                tp.poll()
                 time.sleep(0.1)
             except NoResultsPending:
                 break
         
-        html = self.feed2index(feed)
-        open(os.path.join(feed_dir, 'index.html'), 'wb').write(html)
+        for f, feed in enumerate(feeds):
+            html = self.feed2index(feed)
+            feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
+            open(os.path.join(feed_dir, 'index.html'), 'wb').write(html)
+        
+        self.create_opf(feeds)
         self.report_progress(1, _('Feeds downloaded to %s')%index)
         return index
+    
+    def download_cover(self):
+        self.cover_path = None
+        try:
+            cu = self.get_cover_url()
+        except Exception, err:
+            cu = None
+            self.logger.error(_('Could not download cover: %s')%str(err))
+            self.logger.debug(traceback.format_exc())
+        if cu is not None:
+            ext = cu.rpartition('.')[-1]
+            ext = ext.lower() if ext else 'jpg'
+            self.report_progress(1, _('Downloading cover from %s')%cu)
+            cpath = os.path.join(self.output_dir, 'cover.'+ext)
+            cfile = open(cpath, 'wb')
+            cfile.write(self.browser.open(cu).read())
+            self.cover_path = cpath
             
-            
+    
+    def create_opf(self, feeds, dir=None):
+        if dir is None:
+            dir = self.output_dir
+        mi = MetaInformation(self.title + time.strftime(self.timefmt), [__appname__])
+        opf = OPFCreator(mi)
+        opf_path = os.path.join(dir, 'index.opf')
+        
+        cpath = getattr(self, 'cover_path', None) 
+        if cpath is not None and os.access(cpath, os.R_OK):
+            opf.cover = cpath
+        
+        entries = ['index.html']
+        for i, f in enumerate(feeds):
+            entries.append('feed_%d/index.html'%i)
+            for j, a in enumerate(f):
+                if getattr(a, 'downloaded', False):
+                    adir = 'feed_%d/article_%d/'%(i, j)
+                    entries.append('%sindex.html'%adir)
+                    for sp in a.sub_pages:
+                        prefix = os.path.commonprefix([opf_path, sp])
+                        relp = sp[len(prefix):]
+                        entries.append(relp.replace(os.sep, '/'))
+                        
+        opf.create_manifest(itertools.izip(entries, itertools.repeat('text/html')))
+        opf.create_spine(entries)
+        opf.write(open(opf_path, 'wb'))
+        
+    
     def article_downloaded(self, request, result):
-        index = os.path.join(os.path.dirname(result), 'index.html')
-        os.rename(result, index)
+        index = os.path.join(os.path.dirname(result[0]), 'index.html')
+        os.rename(result[0], index)
         src = open(index, 'rb').read().decode('utf-8')
         f, a = request.requestID
         soup = BeautifulSoup(src)
         body = soup.find('body')
         if body is not None:
             top    = self.navbar.generate(False, f, a, len(request.feed), not self.has_single_feed).render(doctype='xhtml')
-            bottom = self.navbar.generate(True,  f, a, len(request.feed), not self.has_single_feed).render(doctype='xhtml')
             top    = BeautifulSoup(top).find('div')
-            bottom = BeautifulSoup(bottom).find('div')
             body.insert(0, top)
-            body.insert(len(body.contents), bottom)
             open(index, 'wb').write(unicode(soup).encode('utf-8'))
         
         article = request.article
-        self.logger.debug(_('\nDownloaded article %s from %s\n%s')%(article.title, article.url, request.stream.getvalue()))
-        article.url = result
+        self.logger.debug(_('\nDownloaded article %s from %s\n%s')%(article.title, article.url, request.stream.getvalue().decode('utf-8', 'ignore')))
+        article.url = result[0]
         article.downloaded = True
+        article.sub_pages  = result[1][1:]
         self.jobs_done += 1
         self.report_progress(float(self.jobs_done)/len(self.jobs), _('Article downloaded: %s')%article.title)
+        if result[2]:
+            self.partial_failures.append((request.feed.title, article.title, article.url, result[2]))
         
-    def error_in_article_download(self, request, exc_info):
+    def error_in_article_download(self, request, traceback):
         self.jobs_done += 1
-        self.logger.error(_('Failed to download article: %s from %s')%(request.article.title, request.article.url))
-        self.logger.debug(traceback.format_exc(*exc_info))
-        self.logger.debug(request.stream.getvalue())
+        self.logger.error(_('Failed to download article: %s from %s\n')%(request.article.title, request.article.url))
+        debug = request.stream.getvalue().decode('utf-8', 'ignore')
+        self.logger.debug(debug)
+        self.logger.debug(traceback)
         self.logger.debug('\n')
         self.report_progress(float(self.jobs_done)/len(self.jobs), _('Article download failed: %s')%request.article.title)
-        
+        self.failed_downloads.append((request.feed.title, request.article, debug))
         
     def parse_feeds(self):
         '''
@@ -404,5 +523,3 @@ class BasicNewsRecipe(object):
                                               max_articles_per_feed=self.max_articles_per_feed))
             
         return parsed_feeds
-    
-               
diff --git a/src/libprs500/web/feeds/recipes/__init__.py b/src/libprs500/web/feeds/recipes/__init__.py
new file mode 100644
index 0000000000..ed7d7a3d9a
--- /dev/null
+++ b/src/libprs500/web/feeds/recipes/__init__.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env  python
+
+##    Copyright (C) 2008 Kovid Goyal kovid@kovidgoyal.net
+##    This program is free software; you can redistribute it and/or modify
+##    it under the terms of the GNU General Public License as published by
+##    the Free Software Foundation; either version 2 of the License, or
+##    (at your option) any later version.
+##
+##    This program is distributed in the hope that it will be useful,
+##    but WITHOUT ANY WARRANTY; without even the implied warranty of
+##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+##    GNU General Public License for more details.
+##
+##    You should have received a copy of the GNU General Public License along
+##    with this program; if not, write to the Free Software Foundation, Inc.,
+##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+'''
+Builtin recipes.
+'''
+recipes = ['newsweek']
+
+import re
+from libprs500.web.feeds.news import BasicNewsRecipe
+from libprs500.ebooks.lrf.web.profiles import DefaultProfile, FullContentProfile
+from libprs500.ebooks.lrf.web import available_profiles
+
+basic_recipes = (BasicNewsRecipe, DefaultProfile, FullContentProfile)
+basic_recipe_names = (i.__name__ for i in basic_recipes)
+
+
+#: Compiled builtin recipe/profile classes
+def load_recipe(module, package='libprs500.web.feeds.recipes'):
+    module = __import__(package+'.'+module, fromlist=[''])
+    for attr in dir(module):
+        obj = getattr(module, attr)
+        if type(obj) is not type:
+            continue
+        recipe = False
+        for b in obj.__bases__:
+            if b in basic_recipes:
+                recipe = True
+                break
+        if not recipe:
+            continue
+        if obj not in basic_recipes:
+            return obj
+
+
+recipes = [load_recipe(i) for i in recipes]
+    
+def compile_recipe(src):
+    '''
+    Compile the code in src and return the first object that is a recipe or profile.
+    @return: Recipe/Profile class or None, if no such class was found in C{src} 
+    '''
+    locals = {}
+    exec src in globals(), locals
+    for obj in locals.values():
+        if type(obj) is type and obj.__name__ not in basic_recipe_names:
+            for base in obj.__bases__:
+                if base in basic_recipes:
+                    return obj
+    
+    return None
+
+
+def get_builtin_recipe(title):
+    '''
+    Return a builtin recipe/profile class whoose title == C{title} or None if no such
+    recipe exists.
+    
+    @type title: string
+    @rtype: class or None
+    '''
+    for r in recipes:
+        if r.title == title:
+            return r
+
+titles = set([r.title for r in recipes])
\ No newline at end of file
diff --git a/src/libprs500/web/feeds/recipes/newsweek.py b/src/libprs500/web/feeds/recipes/newsweek.py
new file mode 100644
index 0000000000..cdee8402e8
--- /dev/null
+++ b/src/libprs500/web/feeds/recipes/newsweek.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env  python
+
+##    Copyright (C) 2008 Kovid Goyal kovid@kovidgoyal.net
+##    This program is free software; you can redistribute it and/or modify
+##    it under the terms of the GNU General Public License as published by
+##    the Free Software Foundation; either version 2 of the License, or
+##    (at your option) any later version.
+##
+##    This program is distributed in the hope that it will be useful,
+##    but WITHOUT ANY WARRANTY; without even the implied warranty of
+##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+##    GNU General Public License for more details.
+##
+##    You should have received a copy of the GNU General Public License along
+##    with this program; if not, write to the Free Software Foundation, Inc.,
+##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+import re
+from libprs500.web.feeds.news import BasicNewsRecipe
+from libprs500.ebooks.BeautifulSoup import BeautifulSoup
+
+class Newsweek(BasicNewsRecipe):
+
+    title      = 'Newsweek'
+    __author__ = 'Kovid Goyal'
+    
+    feeds = [
+             ('Top News', 'http://feeds.newsweek.com/newsweek/TopNews',),
+             'http://feeds.newsweek.com/newsweek/columnists/StevenLevy',
+             ('Politics', 'http://feeds.newsweek.com/headlines/politics'),
+             ('Health', 'http://feeds.newsweek.com/headlines/health'),
+             ('Business', 'http://feeds.newsweek.com/headlines/business'),
+             ('Science and Technology', 'http://feeds.newsweek.com/headlines/technology/science'),
+             ('National News', 'http://feeds.newsweek.com/newsweek/NationalNews'),
+             ('World News', 'http://feeds.newsweek.com/newsweek/WorldNews'),
+             'http://feeds.newsweek.com/newsweek/Columnists/ChristopherDickey',
+             'http://feeds.newsweek.com/newsweek/Columnists/FareedZakaria', 
+             ('Iraq', 'http://feeds.newsweek.com/newsweek/iraq'),
+             ('Society', 'http://feeds.newsweek.com/newsweek/society'),
+             ('Entertainment', 'http://feeds.newsweek.com/newsweek/entertainment'),
+             'http://feeds.newsweek.com/newsweek/columnists/GeorgeFWill', 
+             'http://feeds.newsweek.com/newsweek/columnists/AnnaQuindlen',
+             ]
+    # For testing
+    feeds = feeds[:2]
+    max_articles_per_feed = 1
+    
+    keep_only_tags = [dict(name='div', id='content')]
+
+    remove_tags = [
+        dict(name=['script',  'noscript']),
+        dict(name='div',  attrs={'class':['ad', 'SocialLinks', 'SocialLinksDiv', 'channel', 'bot', 'nav', 'top', 'EmailArticleBlock']}),
+        dict(name='div',  attrs={'class':re.compile('box')}),
+        dict(id=['ToolBox', 'EmailMain', 'EmailArticle', ])
+    ]
+    
+    recursions = 1
+    match_regexps = [r'http://www.newsweek.com/id/\S+/page/\d+']
+    
+    def postprocess_html(self,  soup):
+        divs = list(soup.findAll('div', 'pagination'))
+        divs[0].extract()
+        if len(divs) > 1:
+            soup.find('body')['style'] = 'page-break-after:avoid'
+            divs[1].extract()            
+            
+            h1 = soup.find('h1')
+            if h1:
+                h1.extract()
+            ai = soup.find('div', 'articleInfo')
+            ai.extract()
+        else:
+            soup.find('body')['style'] = 'page-break-before:always; page-break-after:avoid;'
+        return soup
+    
+    def get_current_issue(self):
+        from urllib2 import urlopen # For some reason mechanize fails
+        home = urlopen('http://www.newsweek.com').read() 
+        soup = BeautifulSoup(home)
+        img  = soup.find('img', alt='Current Magazine')
+        if img and img.parent.has_key('href'):
+            return urlopen(img.parent['href']).read()
+        
+    def get_cover_url(self):
+        ci = self.get_current_issue()
+        if ci is not None:
+            soup = BeautifulSoup(ci)
+            img = soup.find(alt='Cover')
+            if img is not None and img.has_key('src'):
+                small = img['src']
+                return small.replace('coversmall', 'coverlarge')
\ No newline at end of file
diff --git a/src/libprs500/web/feeds/templates.py b/src/libprs500/web/feeds/templates.py
index d90106a28c..dd12a1b2ff 100644
--- a/src/libprs500/web/feeds/templates.py
+++ b/src/libprs500/web/feeds/templates.py
@@ -104,7 +104,7 @@ class IndexTemplate(Template):
         <p style="text-align:right">${datetime.now().strftime(datefmt)}</p>
         <ul>
             <py:for each="i, feed in enumerate(feeds)">
-            <li id="feed_${str(i)}">
+            <li py:if="feed" id="feed_${str(i)}">
                 <a class="feed" href="${'feed_%d/index.html'%i}">${feed.title}</a>
             </li>
             </py:for>
@@ -136,7 +136,7 @@ class FeedTemplate(Template):
             ${style}
         </style>
     </head>
-    <body>
+    <body style="page-break-before:always">
         <h2>${feed.title}</h2>
         <py:if test="feed.image">
         <div class="feed_image">
@@ -144,7 +144,7 @@ class FeedTemplate(Template):
         </div>
         </py:if>
         <ul>
-            <py:for each="i, article in enumerate(feed)">
+            <py:for each="i, article in enumerate(feed.articles)">
             <li id="${'article_%d'%i}" py:if="getattr(article, 'downloaded', False)">
                 <a class="article" href="${article.url}">${article.title}</a>
                 <span class="article_date">${article.localtime.strftime(" [%a, %d %b %H:%M]")}</span>
diff --git a/src/libprs500/web/fetch/simple.py b/src/libprs500/web/fetch/simple.py
index b4b1d77bdd..644f5bc241 100644
--- a/src/libprs500/web/fetch/simple.py
+++ b/src/libprs500/web/fetch/simple.py
@@ -17,12 +17,12 @@ Fetch a webpage and its links recursively. The webpages are saved to disk in
 UTF-8 encoding with any charset declarations removed.
 '''
 from __future__ import with_statement
-import sys, socket, os, urlparse, codecs, logging, re, time, copy, urllib2, threading
+import sys, socket, os, urlparse, codecs, logging, re, time, copy, urllib2, threading, traceback
 from urllib import url2pathname
 from httplib import responses
 
 from libprs500 import setup_cli_handlers, browser, sanitize_file_name, OptionParser
-from libprs500.ebooks.BeautifulSoup import BeautifulSoup
+from libprs500.ebooks.BeautifulSoup import BeautifulSoup, Tag
 from libprs500.ebooks.chardet import xml_to_unicode
 
 class FetchError(Exception):
@@ -37,10 +37,11 @@ def basename(url):
     return res
 
 def save_soup(soup, target):
-    for meta in soup.findAll('meta', content=True):
+    nm = Tag(soup, '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />')
+    for meta in soup.find('meta', content=True):
         if 'charset' in meta['content']:
-            meta.extract()
-    f = codecs.open(target, 'w', 'utf8')
+            meta.replaceWith(nm)
+    f = codecs.open(target, 'w', 'utf-8')
     f.write(unicode(soup))
     f.close()
 
@@ -55,7 +56,7 @@ class RecursiveFetcher(object):
     #                       )
     CSS_IMPORT_PATTERN = re.compile(r'\@import\s+url\((.*?)\)', re.IGNORECASE)
     
-    def __init__(self, options, logger, image_map={}):
+    def __init__(self, options, logger, image_map={}, css_map={}):
         self.logger = logger
         self.base_dir = os.path.abspath(os.path.expanduser(options.dir))
         if not os.path.exists(self.base_dir):
@@ -74,20 +75,44 @@ class RecursiveFetcher(object):
         self.filemap = {}
         self.imagemap = image_map
         self.imagemap_lock = threading.RLock()
-        self.stylemap = {}
+        self.stylemap = css_map
+        self.stylemap_lock = threading.RLock()
+        self.downloaded_paths = []
         self.current_dir = self.base_dir
         self.files = 0
         self.preprocess_regexps  = getattr(options, 'preprocess_regexps', [])
         self.remove_tags         = getattr(options, 'remove_tags', [])
+        self.remove_tags_after   = getattr(options, 'remove_tags_after', None)
+        self.keep_only_tags      = getattr(options, 'keep_only_tags', [])
         self.preprocess_html_ext = getattr(options, 'preprocess_html', lambda soup: soup) 
+        self.postprocess_html_ext= getattr(options, 'postprocess_html', lambda soup: soup)
         self.download_stylesheets = not options.no_stylesheets
         self.show_progress = True
+        self.failed_links = []
                
 
     def get_soup(self, src):
         nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE)
         nmassage.extend(self.preprocess_regexps)
         soup = BeautifulSoup(xml_to_unicode(src, self.verbose)[0], markupMassage=nmassage)
+         
+        if self.keep_only_tags:
+            body = Tag(soup, 'body')
+            for spec in self.keep_only_tags:
+                for tag in soup.find('body').findAll(**spec):
+                    body.insert(len(body.contents), tag)
+            soup.find('body').replaceWith(body)
+            
+        if self.remove_tags_after is not None:
+            tag = soup.find(**self.remove_tags_after)
+            while tag is not None and tag.name != 'body':
+                after = tag.nextSibling
+                while after is not None:
+                    ns = after.nextSibling
+                    after.extract()
+                    after = ns
+                tag = tag.parent
+            
         for kwds in self.remove_tags:
             for tag in soup.findAll(**kwds):
                 tag.extract()
@@ -105,7 +130,12 @@ class RecursiveFetcher(object):
         except urllib2.URLError, err:
             if hasattr(err, 'code') and responses.has_key(err.code):
                 raise FetchError, responses[err.code]
-            raise err
+            if err.reason[0] == 104: # Connection reset by peer
+                self.logger.debug('Connection reset by peer retrying in 1 second.')
+                time.sleep(1)
+                f = self.browser.open(url)
+            else: 
+                raise err
         finally:
             self.last_fetch_at = time.time()
         return f
@@ -146,9 +176,10 @@ class RecursiveFetcher(object):
                 iurl = tag['href']
                 if not urlparse.urlsplit(iurl).scheme:
                     iurl = urlparse.urljoin(baseurl, iurl, False)
-                if self.stylemap.has_key(iurl):
-                    tag['href'] = self.stylemap[iurl]
-                    continue
+                with self.stylemap_lock:
+                    if self.stylemap.has_key(iurl):
+                        tag['href'] = self.stylemap[iurl]
+                        continue
                 try:
                     f = self.fetch_url(iurl)
                 except Exception, err:
@@ -157,7 +188,8 @@ class RecursiveFetcher(object):
                     continue
                 c += 1
                 stylepath = os.path.join(diskpath, 'style'+str(c)+'.css')
-                self.stylemap[iurl] = stylepath
+                with self.stylemap_lock:
+                    self.stylemap[iurl] = stylepath
                 open(stylepath, 'wb').write(f.read())
                 tag['href'] = stylepath
             else:
@@ -168,9 +200,10 @@ class RecursiveFetcher(object):
                         iurl = m.group(1)
                         if not urlparse.urlsplit(iurl).scheme:
                             iurl = urlparse.urljoin(baseurl, iurl, False)
-                        if self.stylemap.has_key(iurl):
-                            ns.replaceWith(src.replace(m.group(1), self.stylemap[iurl]))
-                            continue
+                        with self.stylemap_lock:
+                            if self.stylemap.has_key(iurl):
+                                ns.replaceWith(src.replace(m.group(1), self.stylemap[iurl]))
+                                continue
                         try:
                             f = self.fetch_url(iurl)
                         except Exception, err:
@@ -179,7 +212,8 @@ class RecursiveFetcher(object):
                             continue
                         c += 1
                         stylepath = os.path.join(diskpath, 'style'+str(c)+'.css')
-                        self.stylemap[iurl] = stylepath
+                        with self.stylemap_lock:
+                            self.stylemap[iurl] = stylepath
                         open(stylepath, 'wb').write(f.read())
                         ns.replaceWith(src.replace(m.group(1), stylepath))
                         
@@ -214,7 +248,7 @@ class RecursiveFetcher(object):
             open(imgpath, 'wb').write(f.read())
             tag['src'] = imgpath
 
-    def absurl(self, baseurl, tag, key): 
+    def absurl(self, baseurl, tag, key, filter=True): 
         iurl = tag[key]
         parts = urlparse.urlsplit(iurl)
         if not parts.netloc and not parts.path:
@@ -224,7 +258,7 @@ class RecursiveFetcher(object):
         if not self.is_link_ok(iurl):
             self.logger.debug('Skipping invalid link: %s', iurl)
             return None
-        if not self.is_link_wanted(iurl):
+        if filter and not self.is_link_wanted(iurl):
             self.logger.debug('Filtered link: '+iurl)
             return None
         return iurl
@@ -256,12 +290,12 @@ class RecursiveFetcher(object):
         prev_dir = self.current_dir
         try:
             self.current_dir = diskpath
-            for tag in soup.findAll(lambda tag: tag.name.lower()=='a' and tag.has_key('href')):
+            for tag in soup.findAll('a', href=True):
                 if self.show_progress:
                     print '.',
                     sys.stdout.flush()
                 sys.stdout.flush()
-                iurl = self.absurl(baseurl, tag, 'href')
+                iurl = self.absurl(baseurl, tag, 'href', filter=recursion_level != 0)
                 if not iurl:
                     continue
                 nurl = self.normurl(iurl)
@@ -293,6 +327,7 @@ class RecursiveFetcher(object):
                         self.process_stylesheets(soup, f.geturl())
                     
                     res = os.path.join(linkdiskpath, basename(iurl))
+                    self.downloaded_paths.append(res)
                     self.filemap[nurl] = res
                     if recursion_level < self.max_recursions:
                         self.logger.debug('Processing links...')
@@ -301,9 +336,11 @@ class RecursiveFetcher(object):
                         self.process_return_links(soup, iurl) 
                         self.logger.debug('Recursion limit reached. Skipping links in %s', iurl)
                     
-                    save_soup(soup, res)
+                    save_soup(self.postprocess_html_ext(soup), res)
+                    
                     self.localize_link(tag, 'href', res)
                 except Exception, err:
+                    self.failed_links.append((iurl, traceback.format_exc()))
                     self.logger.warning('Could not fetch link %s', iurl)
                     self.logger.debug('Error: %s', str(err), exc_info=True)
                 finally:
diff --git a/src/libprs500/web/recipes.py b/src/libprs500/web/recipes.py
deleted file mode 100644
index 7621e6f4c7..0000000000
--- a/src/libprs500/web/recipes.py
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/usr/bin/env  python
-##    Copyright (C) 2008 Kovid Goyal kovid@kovidgoyal.net
-##    This program is free software; you can redistribute it and/or modify
-##    it under the terms of the GNU General Public License as published by
-##    the Free Software Foundation; either version 2 of the License, or
-##    (at your option) any later version.
-##
-##    This program is distributed in the hope that it will be useful,
-##    but WITHOUT ANY WARRANTY; without even the implied warranty of
-##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-##    GNU General Public License for more details.
-##
-##    You should have received a copy of the GNU General Public License along
-##    with this program; if not, write to the Free Software Foundation, Inc.,
-##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-'''
-Contains recipes for various common news sources and websites.
-'''
-import re
-from libprs500.web.feeds.news import BasicNewsRecipe
-
-_basic_recipes = (BasicNewsRecipe,)
-_basic_recipe_names = (i.__name__ for i in _basic_recipes)
-
-def compile_recipe(src):
-    '''
-    Compile the code in src and return the first object that is
-    '''
-    locals = {}
-    exec src in globals(), locals
-    for obj in locals.values():
-        if type(obj) is type and obj.__name__ not in _basic_recipe_names:
-            for base in obj.__bases__:
-                if base in _basic_recipes:
-                    return obj
-    
-    return None
-
-
-def get_feed(title):
-    '''
-    Return a builtin recipe class whoose title == C{title} or None if no such
-    recipe exists.
-    
-    @type title: string
-    @rtype: class or None
-    '''
-    if isinstance(_feeds[0], basestring):
-        for i, val in enumerate(_feeds):
-            recipe = compile_recipe(val)
-            if recipe is None:
-                raise RuntimeError('The builtin Recipe #%d is invalid.'%i)
-            _feeds[i] = recipe
-    
-    for recipe in _feeds:
-        if recipe.title == title:
-            return recipe
-        
-    return None
-    
-
-#: Recipes to be used with feeds2disk
-_feeds = ['class Temp(BasicNewsRecipe):\n\ttitle="temp"']
\ No newline at end of file