diff --git a/src/libprs500/ebooks/lrf/__init__.py b/src/libprs500/ebooks/lrf/__init__.py
index 46856516bb..e28d1ca4b6 100644
--- a/src/libprs500/ebooks/lrf/__init__.py
+++ b/src/libprs500/ebooks/lrf/__init__.py
@@ -229,6 +229,9 @@ def option_parser(usage, gui_mode=False):
                       help='Convert to LRS', default=False)
     parser.add_option('--minimize-memory-usage', action='store_true', default=False,
                       help=_('Minimize memory usage at the cost of longer processing times. Use this option if you are on a memory constrained machine.'))
+    parser.add_option('--encoding', default=None, 
+                      help='Specify the character encoding of the source file. If the output LRF file contains strange characters, try changing this option. A common encoding for files from windows computers is cp-1252. Another common choice is utf-8. The default is to try and guess the encoding.')
+    
     return parser
 
 def find_custom_fonts(options, logger):
diff --git a/src/libprs500/ebooks/lrf/html/convert_from.py b/src/libprs500/ebooks/lrf/html/convert_from.py
index 594aa4824c..67c2c1495d 100644
--- a/src/libprs500/ebooks/lrf/html/convert_from.py
+++ b/src/libprs500/ebooks/lrf/html/convert_from.py
@@ -359,6 +359,8 @@ class HTMLConverter(object):
         raw = f.read()
         if self.pdftohtml: # Bug in pdftohtml that causes it to output invalid UTF-8 files
             raw = raw.decode('utf-8', 'ignore')
+        elif self.encoding is not None:
+            raw = raw.decode(self.encoding, 'ignore')
         else:
             raw = xml_to_unicode(raw, self.verbose)[0]
         f.close()
diff --git a/src/libprs500/ebooks/lrf/txt/convert_from.py b/src/libprs500/ebooks/lrf/txt/convert_from.py
index 33a55db4e3..4ab6a83d10 100644
--- a/src/libprs500/ebooks/lrf/txt/convert_from.py
+++ b/src/libprs500/ebooks/lrf/txt/convert_from.py
@@ -74,11 +74,10 @@ def process_file(path, options, logger=None):
         logger = logging.getLogger('txt2lrf')
         setup_cli_handlers(logger, level)
     txt = os.path.abspath(os.path.expanduser(path))
-    if not hasattr(options, 'encoding'):
-        options.encoding = None 
     if not hasattr(options, 'debug_html_generation'):
         options.debug_html_generation = False
     htmlfile = generate_html(txt, options.encoding, logger)
+    options.encoding = 'utf-8'
     if not options.debug_html_generation:
         options.force_page_break = 'h2'
         if not options.output:
diff --git a/src/libprs500/ebooks/lrf/web/convert_from.py b/src/libprs500/ebooks/lrf/web/convert_from.py
index a82cb64b79..b19ba5d813 100644
--- a/src/libprs500/ebooks/lrf/web/convert_from.py
+++ b/src/libprs500/ebooks/lrf/web/convert_from.py
@@ -12,7 +12,7 @@
 ##    You should have received a copy of the GNU General Public License along
 ##    with this program; if not, write to the Free Software Foundation, Inc.,
 ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-'''Convert known websites into LRF files.'''
+'''Convert websites into LRF files.'''
 
 import sys, time, tempfile, shutil, os, logging, imp, inspect, re
 from urlparse import urlsplit
@@ -53,8 +53,7 @@ def option_parser():
     parser.add_option('--delay', default=None, dest='delay', type='int',
                       help='Minimum interval in seconds between consecutive fetches. Default is %d s'%DefaultProfile.timeout)
     parser.add_option('--dont-download-stylesheets', action='store_true', default=None,
-                      help='Do not download CSS stylesheets.', dest='no_stylesheets')    
-    
+                      help='Do not download CSS stylesheets.', dest='no_stylesheets')
     parser.add_option('--match-regexp', dest='match_regexps', default=[], action='append',
                       help='Only links that match this regular expression will be followed. This option can be specified multiple times, in which case as long as a link matches any one regexp, it will be followed. By default all links are followed.')
     parser.add_option('--filter-regexp', default=[], action='append', dest='filter_regexps',
@@ -64,7 +63,7 @@ def option_parser():
     return parser
     
 def fetch_website(options, logger):
-    tdir = tempfile.mkdtemp(prefix=__appname__+'_' )
+    tdir = tempfile.mkdtemp(prefix=__appname__+'_', suffix='_web2lrf')
     options.dir = tdir
     fetcher = create_fetcher(options, logger)
     fetcher.preprocess_regexps = options.preprocess_regexps
@@ -147,10 +146,13 @@ def process_profile(args, options, logger=None):
         options.preprocess_regexps = profile.preprocess_regexps
         options.filter_regexps += profile.filter_regexps
         
+        options.encoding = profile.encoding if options.encoding is None else options.encoding 
+        
         if len(args) == 2 and args[1] != 'default':
             options.anchor_ids = False
         
         htmlfile, tdir = fetch_website(options, logger)
+        options.encoding = 'utf-8'
         cwd = os.getcwdu()
         if not options.output:
             options.output = os.path.join(cwd, options.title+('.lrs' if options.lrs else '.lrf'))
diff --git a/src/libprs500/ebooks/lrf/web/profiles/__init__.py b/src/libprs500/ebooks/lrf/web/profiles/__init__.py
index 33e9b46c5f..768ce9e45b 100644
--- a/src/libprs500/ebooks/lrf/web/profiles/__init__.py
+++ b/src/libprs500/ebooks/lrf/web/profiles/__init__.py
@@ -13,6 +13,8 @@
 ##    with this program; if not, write to the Free Software Foundation, Inc.,
 ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 '''
+Contains the Base Profiles that can be used to easily create profiles to download
+particular websites.  
 '''
 
 import tempfile, time, calendar, re, operator, atexit, shutil, os
@@ -24,36 +26,120 @@ from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup, NavigableString,
 
 class DefaultProfile(object):
     
-    url                   = ''    # The URL of the website
-    title                 = 'Default Profile'    # The title to use for the LRF file
-    max_articles_per_feed = 10    # Maximum number of articles to download from each feed 
-    html_description      = True  # If True process the <description> element of the feed as HTML
-    oldest_article        = 7     # How many days old should the oldest article downloaded from the feeds be?
-    max_recursions        = 1     # Number of levels of links to follow
-    max_files             = 3000  # Maximum number of files to download
-    delay                 = 0     # Delay between consecutive downloads
-    timeout               = 10    # Timeout for fetching files from server in seconds
-    timefmt               = ' [%a %d %b %Y]' # The format of the date shown on the first page
-    url_search_order      = ['guid', 'link'] # The order of elements to search for a URL when parssing the RSS feed
-    pubdate_fmt           = None  # The format string used to parse the publication date in the RSS feed. If set to None some default heuristics are used, these may fail, in which case set this to the correct string or re-implement strptime in your subclass.
-    use_pubdate           = True, # If True will look for a publication date for each article. If False assumes the publication date is the current time.
-    summary_length        = 500 # Max number of characters in the short description (ignored in DefaultProfile)
-    no_stylesheets        = False # Download stylesheets only if False
-    allow_duplicates      = False # If False articles with the same title in the same feed are not downloaded multiple times
-    needs_subscription    = False # If True the GUI will ask the userfor a username and password to use while downloading
-    match_regexps         = []    # List of regular expressions that determines which links to follow
-    filter_regexps        = []    # List of regular expressions that determines which links to ignore
-    # Only one of match_regexps or filter_regexps should be defined
+    #: The title to use for the LRF file
+    #: @type: string    
+    title                 = 'Default Profile'    
     
-    html2lrf_options   = []    # List of options to pass to html2lrf
-    # List of regexp substitution rules to run on the downloaded HTML. Each element of the 
-    # list should be a two element tuple. The first element of the tuple should
-    # be a compiled regular expression and the second a callable that takes
-    # a single match object and returns a string to replace the match.
+    #: Maximum number of articles to download from each feed
+    #: @type: integer
+    max_articles_per_feed = 10     
+    
+    #: If True process the <description> element of the feed as HTML
+    #: @type: boolean
+    html_description      = True  
+    
+    #: How many days old should the oldest article downloaded from the feeds be
+    #: @type: integer
+    oldest_article        = 7
+    
+    #: Recommend frequency at which to download this profile. In days.
+    recommended_frequency = 7
+    
+    #: Number of levels of links to follow
+    #: @type: integer
+    max_recursions        = 1
+    
+    #: Maximum number of files to download
+    #: @type: integer
+    max_files             = 3000
+    
+    #: Delay between consecutive downloads in seconds
+    #: @type: integer
+    delay                 = 0
+    
+    #: Timeout for fetching files from server in seconds
+    #: @type: integer
+    timeout               = 10
+    
+    #: The format string for the date shown on the first page
+    #: @type: string
+    timefmt               = ' [%a %d %b %Y]'
+    
+    #: The order of elements to search for a URL when parsing the RSS feed. You 
+    #: can replace these elements by completely arbitrary elements to customize
+    #: feed processing. 
+    #: @type: list of strings
+    url_search_order      = ['guid', 'link']
+    
+    #: The format string used to parse the publication date in the RSS feed. 
+    #: If set to None some default heuristics are used, these may fail, 
+    #: in which case set this to the correct string or re-implement 
+    #: L{DefaultProfile.strptime} in your subclass.
+    #: @type: string or None 
+    pubdate_fmt           = None  
+    
+    #: If True will look for a publication date for each article. 
+    #: If False assumes the publication date is the current time.
+    #: @type: boolean
+    use_pubdate           = True, 
+    
+    #: Max number of characters in the short description.
+    #: Used by L{FullContentProfile}
+    #: @type: integer
+    summary_length        = 500
+    
+    #: If True stylesheets are not downloaded and processed
+    #: Convenient flag to disable loading of stylesheets for websites
+    #: that have overly complex stylesheets unsuitable for conversion
+    #: to ebooks formats
+    #: @type: boolean
+    no_stylesheets        = False
+    
+    #: If False articles with the same title in the same feed 
+    #: are not downloaded multiple times
+    #: @type: boolean 
+    allow_duplicates      = False 
+    
+    #: If True the GUI will ask the user for a username and password 
+    #: to use while downloading
+    #: @type: boolean
+    needs_subscription    = False
+    
+    #: Specify an override encoding for sites that have an incorrect
+    #: charset specification. THe most common being specifying latin1 and
+    #: using cp1252 
+    encoding = None
+    
+    #: List of regular expressions that determines which links to follow
+    #: If empty, it is ignored.
+    #: Only one of L{match_regexps} or L{filter_regexps} should be defined
+    #: @type: list of strings
+    match_regexps         = []
+    
+    #: List of regular expressions that determines which links to ignore
+    #: If empty it is ignored
+    #: Only one of L{match_regexps} or L{filter_regexps} should be defined
+    #: @type: list of strings
+    filter_regexps        = []    
+    
+    #: List of options to pass to html2lrf, to customize conversion
+    #: to LRF
+    #: @type: list of strings
+    html2lrf_options   = []
+        
+    #: List of regexp substitution rules to run on the downloaded HTML. Each element of the 
+    #: list should be a two element tuple. The first element of the tuple should
+    #: be a compiled regular expression and the second a callable that takes
+    #: a single match object and returns a string to replace the match.
+    #: @type: list of tuples
     preprocess_regexps = []
     
     # See the built-in profiles for examples of these settings.
     
+    #: The URL of the website
+    #: @type: string
+    url                   = ''
+    
     feeds = []
     CDATA_PAT = re.compile(r'<\!\[CDATA\[(.*?)\]\]>', re.DOTALL)
 
@@ -84,9 +170,7 @@ class DefaultProfile(object):
         '''
         return browser()
     
-    ########################################################################
-    ###################### End of customizable portion #####################
-    ########################################################################
+    
     
     
     def __init__(self, logger, verbose=False, username=None, password=None):
@@ -105,14 +189,14 @@ class DefaultProfile(object):
     def build_index(self):
         '''Build an RSS based index.html'''
         articles = self.parse_feeds()
-    
+        encoding = 'utf-8' if self.encoding is None else self.encoding 
         def build_sub_index(title, items):
             ilist = ''
             li = u'<li><a href="%(url)s">%(title)s</a> <span style="font-size: x-small">[%(date)s]</span><br/>\n'+\
                 u'<div style="font-size:small; font-family:sans">%(description)s<br /></div></li>\n'
             for item in items:
                 if not item.has_key('date'):
-                    item['date'] = time.ctime()
+                    item['date'] = time.strftime('%a, %d %b', time.localtime())
                 ilist += li%item
             return u'''\
             <html>
@@ -135,8 +219,8 @@ class DefaultProfile(object):
             prefix = 'file:' if iswindows else ''
             clist += u'<li><a href="%s">%s</a></li>\n'%(prefix+cfile, category)
             src = build_sub_index(category, articles[category])
-            open(cfile, 'wb').write(src.encode('utf-8'))
-            
+            open(cfile, 'wb').write(src.encode(encoding))
+                        
         src = '''\
         <html>
         <body>
@@ -150,7 +234,8 @@ class DefaultProfile(object):
         '''%dict(date=time.strftime('%a, %d %B, %Y', time.localtime()), 
                  categories=clist, title=self.title)
         index = os.path.join(self.temp_dir, 'index.html')
-        open(index, 'wb').write(src.encode('utf-8'))
+        open(index, 'wb').write(src.encode(encoding))
+        
         return index
 
     
@@ -160,7 +245,9 @@ class DefaultProfile(object):
         Convenience method to take a BeautifulSoup Tag and extract the text from it
         recursively, including any CDATA sections and alt tag attributes.
         @param use_alt: If True try to use the alt attribute for tags that don't have any textual content
+        @type use_alt: boolean
         @return: A unicode (possibly empty) object
+        @rtype: unicode string
         '''
         if not tag:
             return ''
@@ -181,11 +268,13 @@ class DefaultProfile(object):
     def get_article_url(self, item):
         '''
         Return the article URL given an item Tag from a feed, or None if no valid URL is found
-        @param: A BeautifulSoup Tag instance corresponding to the <item> tag from a feed.
+        @type item: BeatifulSoup.Tag
+        @param item: A BeautifulSoup Tag instance corresponding to the <item> tag from a feed.
+        @rtype: string or None
         '''
         url = None
         for element in self.url_search_order:
-            url = item.find(element)
+            url = item.find(element.lower())
             if url:
                 break
         return url
@@ -195,15 +284,17 @@ class DefaultProfile(object):
         '''
         Create list of articles from a list of feeds.
         @param require_url: If True skip articles that don't have a link to a HTML page with the full article contents.
+        @type require_url: boolean
+        @rtype: dictionary
         @return: A dictionary whose keys are feed titles and whose values are each
-        a list of dictionaries. Each list contains dictionaries of the form:
-        {
+        a list of dictionaries. Each list contains dictionaries of the form::
+            {
             'title'       : article title,
             'url'         : URL of print version,
             'date'        : The publication date of the article as a string,
             'description' : A summary of the article
-            'content'     : The full article (can be an empty string). This is unused in DefaultProfile
-        }
+            'content'     : The full article (can be an empty string). This is used by FullContentProfile
+            }
         '''
         added_articles = {}
         feeds = self.get_feeds()
@@ -299,6 +390,12 @@ class DefaultProfile(object):
     
     @classmethod
     def process_html_description(cls, tag, strip_links=True):
+        '''
+        Process a <description> tag that contains HTML markup, either 
+        entity encoded or escaped in a CDATA section. 
+        @return: HTML
+        @rtype: string
+        '''
         src = '\n'.join(tag.contents) if hasattr(tag, 'contents') else tag
         match = cls.CDATA_PAT.match(src.lstrip())
         if match:
@@ -325,7 +422,13 @@ class DefaultProfile(object):
     def strptime(cls, src):
         ''' 
         Take a string and return the date that string represents, in UTC as
-        an epoch (i.e. number of seconds since Jan 1, 1970)
+        an epoch (i.e. number of seconds since Jan 1, 1970). This function uses
+        a bunch of heuristics and is a prime candidate for being overridden in a 
+        subclass.
+        @param src: Timestamp as a string
+        @type src: string
+        @return: time ans a epoch
+        @rtype: number 
         '''        
         delta = 0
         zone = re.search(r'\s*(\+\d\d\:{0,1}\d\d)', src)
@@ -376,7 +479,7 @@ class FullContentProfile(DefaultProfile):
     
     
     def build_index(self):
-        '''Build an RSS based index.html'''
+        '''Build an RSS based index.html. '''
         articles = self.parse_feeds(require_url=False)
         
         def build_sub_index(title, items):
@@ -467,4 +570,5 @@ def cleanup(tdir):
         if os.path.isdir(tdir):
             shutil.rmtree(tdir)
     except:
-        pass 
\ No newline at end of file
+        pass
+    
\ No newline at end of file
diff --git a/src/libprs500/gui2/device.py b/src/libprs500/gui2/device.py
index 22309e0e7b..b6bee50e6d 100644
--- a/src/libprs500/gui2/device.py
+++ b/src/libprs500/gui2/device.py
@@ -34,6 +34,7 @@ class DeviceDetector(QThread):
         self.devices = [[d, False] for d in devices()]
         self.sleep_time = sleep_time
         QThread.__init__(self)
+        self.keep_going = True
         
     def run(self):
         _wmi = None
@@ -42,7 +43,7 @@ class DeviceDetector(QThread):
             pythoncom.CoInitialize()
             _wmi = wmi.WMI()
         scanner = DeviceScanner(_wmi)
-        while True:
+        while self.keep_going:
             scanner.scan()
             for device in self.devices:
                 connected = scanner.is_device_connected(device[0])
diff --git a/src/libprs500/gui2/dialogs/lrf_single.py b/src/libprs500/gui2/dialogs/lrf_single.py
index 755f95d7d2..f824dc79fd 100644
--- a/src/libprs500/gui2/dialogs/lrf_single.py
+++ b/src/libprs500/gui2/dialogs/lrf_single.py
@@ -12,7 +12,7 @@
 ##    You should have received a copy of the GNU General Public License along
 ##    with this program; if not, write to the Free Software Foundation, Inc.,
 ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-import os, cPickle
+import os, cPickle, codecs
 
 from PyQt4.QtCore import QObject, SIGNAL, Qt, QSettings, QVariant, QByteArray
 from PyQt4.QtGui import QAbstractSpinBox, QLineEdit, QCheckBox, QDialog, \
@@ -315,6 +315,14 @@ class LRFSingleDialog(QDialog, Ui_LRFSingleDialog):
             elif isinstance(obj, QLineEdit):
                 val = qstring_to_unicode(obj.text())
                 if val:
+                    if opt == '--encoding':
+                        try:
+                            codecs.getdecoder(val)
+                        except:
+                            d = error_dialog(self, 'Unknown encoding', 
+                                             '<p>Unknown encoding: %s<br/>For a list of known encodings see http://docs.python.org/lib/standard-encodings.html'%val)
+                            d.exec_()
+                            return
                     cmd.extend([opt, val])
             elif isinstance(obj, QTextEdit):
                 val = qstring_to_unicode(obj.toPlainText())
@@ -366,6 +374,8 @@ class LRFSingleDialog(QDialog, Ui_LRFSingleDialog):
     
     def accept(self):
         cmdline = self.build_commandline()
+        if cmdline is None:
+            return
         if self.db:
             self.cover_file = None
             self.write_metadata()
diff --git a/src/libprs500/gui2/dialogs/lrf_single.ui b/src/libprs500/gui2/dialogs/lrf_single.ui
index b20cf9bed8..3f38188893 100644
--- a/src/libprs500/gui2/dialogs/lrf_single.ui
+++ b/src/libprs500/gui2/dialogs/lrf_single.ui
@@ -559,6 +559,19 @@
                  <item row="2" column="1" colspan="2" >
                   <widget class="QComboBox" name="gui_mono_family" />
                  </item>
+                 <item row="3" column="0" >
+                  <widget class="QLabel" name="label_26" >
+                   <property name="text" >
+                    <string>Source en&amp;coding:</string>
+                   </property>
+                   <property name="buddy" >
+                    <cstring>gui_encoding</cstring>
+                   </property>
+                  </widget>
+                 </item>
+                 <item row="3" column="1" colspan="2" >
+                  <widget class="QLineEdit" name="gui_encoding" />
+                 </item>
                 </layout>
                </widget>
               </item>
diff --git a/src/libprs500/gui2/main.py b/src/libprs500/gui2/main.py
index 0b7c6e1920..fcde2e6519 100644
--- a/src/libprs500/gui2/main.py
+++ b/src/libprs500/gui2/main.py
@@ -984,6 +984,10 @@ class Main(MainWindow, Ui_MainWindow):
                 e.ignore()
                 return
         self.write_settings()
+        self.detector.keep_going = False
+        self.hide()
+        self.detector.wait(2000)
+        self.detector.terminate()
         e.accept()
         
     def update_found(self, version):
diff --git a/src/libprs500/manual/index.html b/src/libprs500/manual/index.html
index c4af6acf44..f1a99c9abb 100644
--- a/src/libprs500/manual/index.html
+++ b/src/libprs500/manual/index.html
@@ -1,7 +1,7 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
-    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >
+<!DOCTYPE html PUBLIC
+          "-//W3C//DTD XHTML 1.0 Frameset//EN"
+          "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
 
     <head>
         <meta name="author" content="Kovid Goyal" />
diff --git a/src/libprs500/manual/make.py b/src/libprs500/manual/make.py
index a6b0cd6020..c6c2ca7512 100644
--- a/src/libprs500/manual/make.py
+++ b/src/libprs500/manual/make.py
@@ -80,11 +80,11 @@ def clean():
     return 0
                 
 def compile_help():
-    QTDIR = '/usr/local/Trolltech/Qt-4.4.0-tp1'
+    QTDIR = '/usr/local/Trolltech/Qt-4.4.0-beta1'
     QTBIN = QTDIR + '/bin'
     QTLIB = QTDIR + '/lib'
     QCG = os.path.join(QTBIN, 'qcollectiongenerator')
-    QTA = os.path.join(QTBIN, 'assistant_new')
+    QTA = os.path.join(QTBIN, 'assistant')
     os.environ['LD_LIBRARY_PATH'] = QTLIB
     subprocess.check_call((QCG, 'libprs500.qhcp'))
     subprocess.call((QTA, '-collectionFile', 'libprs500.qhc'))
@@ -156,7 +156,7 @@ def generate_cli_docs(src='libprs500.qhp'):
                 '<li><a href="cli-%s.html">%s</a></li>\n'%(i[0], i[0]) for i in documented_cmds)
     
     body = '<h1 class="documentHeading">The Command Line Interface</h1>\n'
-    body += '<div style="text-align:center"><img src="images/cli.png" /></div>'
+    body += '<div style="text-align:center"><img src="images/cli.png" alt="CLI" /></div>'
     body += '<p>%s</p>\n'%'<b class="cmd">libprs500</b> has a very comprehensive command line interface to perform most operations that can be performed by the GUI.'
     body += '<h2 class="sectionHeading">Documented commands</h2>\n'+dc_html
     body += '<h2 class="sectionHeading">Undocumented commands</h2>\n'+uc_html
diff --git a/src/libprs500/manual/templates/navtree.html b/src/libprs500/manual/templates/navtree.html
index ca314154e6..223676b264 100644
--- a/src/libprs500/manual/templates/navtree.html
+++ b/src/libprs500/manual/templates/navtree.html
@@ -10,6 +10,8 @@
             #browser {
                 font-family: monospace;
             }
+            a { color: black; }
+            a:visited { color: black; }
             .toplevel {
                 font-weight: bold;
             }
diff --git a/src/libprs500/web/fetch/simple.py b/src/libprs500/web/fetch/simple.py
index 64191ce561..50cb53038a 100644
--- a/src/libprs500/web/fetch/simple.py
+++ b/src/libprs500/web/fetch/simple.py
@@ -13,7 +13,8 @@
 ##    with this program; if not, write to the Free Software Foundation, Inc.,
 ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 '''
-Fetch a webpage and its links recursively.
+Fetch a webpage and its links recursively. The webpages are saved to disk in
+UTF-8 encoding with any charset declarations removed.
 '''
 import sys, socket, os, urlparse, codecs, logging, re, time, copy, urllib2
 from urllib import url2pathname
@@ -35,6 +36,9 @@ def basename(url):
     return res
 
 def save_soup(soup, target):
+    for meta in soup.findAll('meta', content=True):
+        if 'charset' in meta['content']:
+            meta.extract()
     f = codecs.open(target, 'w', 'utf8')
     f.write(unicode(soup))
     f.close()
@@ -58,6 +62,7 @@ class RecursiveFetcher(object):
         self.default_timeout = socket.getdefaulttimeout()
         socket.setdefaulttimeout(options.timeout)
         self.verbose = options.verbose
+        self.encoding = options.encoding
         self.browser = options.browser if hasattr(options, 'browser') else browser()
         self.max_recursions = options.max_recursions
         self.match_regexps  = [re.compile(i, re.IGNORECASE) for i in options.match_regexps]
@@ -262,6 +267,11 @@ class RecursiveFetcher(object):
                     dsrc = f.read()
                     if len(dsrc) == 0:
                         raise Exception('No content')
+                    if self.encoding is not None:
+                        dsrc = dsrc.decode(self.encoding, 'ignore')
+                    else:
+                        dsrc = xml_to_unicode(dsrc)
+                    
                     soup = self.get_soup(dsrc)
                     self.logger.debug('Processing images...')
                     self.process_images(soup, f.geturl())
@@ -305,6 +315,8 @@ def option_parser(usage='%prog URL\n\nWhere URL is for example http://google.com
                       help='The maximum number of files to download. This only applies to files from <a href> tags. Default is %default')
     parser.add_option('--delay', default=0, dest='delay', type='int',
                       help='Minimum interval in seconds between consecutive fetches. Default is %default s')
+    parser.add_option('--encoding', default=None, 
+                      help='The character encoding for the websites you are trying to download. The default is to try and guess the encoding.')
     parser.add_option('--match-regexp', default=[], action='append', dest='match_regexps',
                       help='Only links that match this regular expression will be followed. This option can be specified multiple times, in which case as long as a link matches any one regexp, it will be followed. By default all links are followed.')
     parser.add_option('--filter-regexp', default=[], action='append', dest='filter_regexps',
diff --git a/upload.py b/upload.py
index d742c2c48c..734b028a17 100644
--- a/upload.py
+++ b/upload.py
@@ -10,6 +10,7 @@ import pysvn
 PREFIX = "/var/www/vhosts/kovidgoyal.net/subdomains/libprs500"
 DOWNLOADS = PREFIX+"/httpdocs/downloads"
 DOCS = PREFIX+"/httpdocs/apidocs"
+USER_MANUAL = PREFIX+'/httpdocs/user_manual'
 HTML2LRF = "src/libprs500/ebooks/lrf/html/demo"
 TXT2LRF  = "src/libprs500/ebooks/lrf/txt/demo"
 check_call = partial(_check_call, shell=True)
@@ -108,7 +109,15 @@ def upload_docs():
     check_call('''epydoc -v --config epydoc-pdf.conf''')
     check_call('''scp docs/pdf/api.pdf castalia:%s/'''%(DOCS,))
 
-    
+def upload_user_manual():
+    cwd = os.getcwdu()
+    os.chdir('src/libprs500/manual')
+    try:
+        check_call('python make.py')
+        check_call('ssh castalia rm -rf %s/\\*'%USER_MANUAL)
+        check_call('scp -r *.html styles images castalia:%s/'%USER_MANUAL)
+    finally:
+        os.chdir(cwd)
 
 def main():
     upload = len(sys.argv) < 2
@@ -128,6 +137,7 @@ def main():
         print 'Uploading to PyPI'
         check_call('''python setup.py register bdist_egg --exclude-source-files upload''')
         upload_docs()
+        upload_user_manual()
         check_call('''rm -rf dist/* build/*''')
     
 if __name__ == '__main__':