From 6982652f923b8c0b1bfe9e69ba816733ab9fba21 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Fri, 14 Mar 2008 19:25:48 +0000
Subject: [PATCH] Refactored OPF creation code. Implemented Table of Contents
 support in feeds2disk.

---
 Makefile                                      |   6 +-
 resources.py                                  |  39 +++
 src/libprs500/ebooks/lrf/html/convert_from.py |  17 +-
 src/libprs500/ebooks/lrf/html/convert_to.py   |   4 +-
 src/libprs500/ebooks/metadata/__init__.py     |  16 +-
 src/libprs500/ebooks/metadata/meta.py         |   6 +-
 src/libprs500/ebooks/metadata/ncx.xml         |  27 ++
 src/libprs500/ebooks/metadata/opf.py          | 293 ++++++++----------
 src/libprs500/ebooks/metadata/opf.xml         |  36 +++
 src/libprs500/ebooks/metadata/toc.py          | 154 +++++++++
 src/libprs500/ebooks/mobi/reader.py           |   6 +-
 src/libprs500/library/database.py             |   6 +-
 src/libprs500/linux.py                        |   1 +
 src/libprs500/terminfo.py                     |   1 +
 src/libprs500/web/feeds/news.py               |  44 ++-
 src/libprs500/web/feeds/recipes/newsweek.py   |  12 +-
 src/libprs500/web/feeds/templates.py          |  11 +-
 src/libprs500/web/fetch/simple.py             |  12 +-
 18 files changed, 482 insertions(+), 209 deletions(-)
 create mode 100644 resources.py
 create mode 100644 src/libprs500/ebooks/metadata/ncx.xml
 create mode 100644 src/libprs500/ebooks/metadata/opf.xml
 create mode 100644 src/libprs500/ebooks/metadata/toc.py

diff --git a/Makefile b/Makefile
index c3514fb0de..4b920c6a39 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 PYTHON = python
 
-all : gui2 translations
+all : gui2 translations resources
 
 clean : 
 	cd src/libprs500/gui2 && ${PYTHON} make.py clean
@@ -13,4 +13,8 @@ test : gui2
 
 translations :
 	cd src/libprs500 && ${PYTHON} translations/__init__.py
+
+resources:	
+	${PYTHON} resources.py
+    
     
diff --git a/resources.py b/resources.py
new file mode 100644
index 0000000000..cf5cf58253
--- /dev/null
+++ b/resources.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env  python
+
+##    Copyright (C) 2008 Kovid Goyal kovid@kovidgoyal.net
+##    This program is free software; you can redistribute it and/or modify
+##    it under the terms of the GNU General Public License as published by
+##    the Free Software Foundation; either version 2 of the License, or
+##    (at your option) any later version.
+##
+##    This program is distributed in the hope that it will be useful,
+##    but WITHOUT ANY WARRANTY; without even the implied warranty of
+##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+##    GNU General Public License for more details.
+##
+##    You should have received a copy of the GNU General Public License along
+##    with this program; if not, write to the Free Software Foundation, Inc.,
+##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+'''
+Compile resource files.
+'''
+import os, sys
+sys.path.insert(1, os.path.join(os.getcwd(), 'src'))
+from libprs500 import __appname__
+
+RESOURCES = dict(
+    opf_template = '%p/ebooks/metadata/opf.xml',
+    ncx_template = '%p/ebooks/metadata/ncx.xml',
+                 )
+
+def main(args=sys.argv):
+    data = ''
+    for key, value in RESOURCES.items():
+        path = value.replace('%p', 'src'+os.sep+__appname__)
+        bytes = repr(open(path, 'rb').read())
+        data += key + ' = ' + bytes + '\n\n'
+    open('src'+os.sep+__appname__+os.sep+'/resources.py', 'wb').write(data) 
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
\ No newline at end of file
diff --git a/src/libprs500/ebooks/lrf/html/convert_from.py b/src/libprs500/ebooks/lrf/html/convert_from.py
index eb4149c521..6ec3f06c53 100644
--- a/src/libprs500/ebooks/lrf/html/convert_from.py
+++ b/src/libprs500/ebooks/lrf/html/convert_from.py
@@ -60,6 +60,8 @@ def update_css(ncss, ocss):
 def munge_paths(basepath, url):
     purl = urlparse(unquote(url),)
     path, fragment = purl[2], purl[5]
+    if path:
+        path = path.replace('/', os.sep)
     if not path:
         path = basepath
     elif not os.path.isabs(path):
@@ -223,7 +225,6 @@ class HTMLConverter(object):
         self.extra_toc_entries = [] #: TOC entries gleaned from semantic information
         self.image_memory = []
         self.id_counter = 0
-        self.toc_from_metadata = False #: If True means that the toc has been populated from metadata
         self.unused_target_blocks = [] #: Used to remove extra TextBlocks
         self.link_level  = 0    #: Current link level
         self.memory = []        #: Used to ensure that duplicate CSS unhandled erros are not reported
@@ -543,7 +544,7 @@ class HTMLConverter(object):
         
         path, fragment = munge_paths(self.target_prefix, tag['href'])
         return {'para':para, 'text':text, 'path':os.path.abspath(path), 
-                'fragment':fragment, 'in toc': (self.link_level == 0 and not self.toc_from_metadata)}
+                'fragment':fragment, 'in toc': (self.link_level == 0 and not self.use_spine)}
         
     
     def get_text(self, tag, limit=None):
@@ -637,13 +638,12 @@ class HTMLConverter(object):
         return outside_links
             
     def create_toc(self, toc):
-        for (path, fragment, txt) in toc:
-            ascii_text = txt.encode('ascii', 'ignore') # Bug in SONY LRF renderer
-            self.toc_from_metadata = True
-            if not fragment and path in self.tops:
-                self.book.addTocEntry(ascii_text, self.tops[path])                
+        for item in toc.top_level_items():
+            ascii_text = item.text.encode('ascii', 'ignore') # Bug in SONY LRF renderer
+            if not item.fragment and item.abspath in self.tops:
+                self.book.addTocEntry(ascii_text, self.tops[item.abspath])                
             else:
-                url = path+fragment
+                url = item.abspath+item.fragment
                 if url in self.targets:
                     self.book.addTocEntry(ascii_text, self.targets[url])
                     
@@ -1846,6 +1846,7 @@ def try_opf(path, options, logger):
             options.cover = None
             cover = opf.cover            
             if cover:
+                cover = cover.replace('/', os.sep)
                 if not os.path.isabs(cover):
                     cover = os.path.join(dirpath, cover)
                 if os.access(cover, os.R_OK):
diff --git a/src/libprs500/ebooks/lrf/html/convert_to.py b/src/libprs500/ebooks/lrf/html/convert_to.py
index 0e42a4d5b7..242b43d0df 100644
--- a/src/libprs500/ebooks/lrf/html/convert_to.py
+++ b/src/libprs500/ebooks/lrf/html/convert_to.py
@@ -65,7 +65,7 @@ class LRFConverter(object):
     def create_metadata(self):
         self.logger.info('Reading metadata...')
         mi = get_metadata(self.lrf)
-        self.opf = OPFCreator(mi)
+        self.opf = OPFCreator(self.output_dir, mi)
         
     def create_page_styles(self):
         self.page_css = ''
@@ -126,4 +126,4 @@ def main(args=sys.argv):
 
 
 if __name__ == '__main__':
-    sys.exit(main())
\ No newline at end of file
+    sys.exit(main())
diff --git a/src/libprs500/ebooks/metadata/__init__.py b/src/libprs500/ebooks/metadata/__init__.py
index dbd1886f68..544bb6c3d0 100644
--- a/src/libprs500/ebooks/metadata/__init__.py
+++ b/src/libprs500/ebooks/metadata/__init__.py
@@ -45,12 +45,13 @@ class MetaInformation(object):
         ans = MetaInformation(mi.title, mi.authors)
         for attr in ('author_sort', 'title_sort', 'comments', 'category',
                      'publisher', 'series', 'series_index', 'rating',
-                     'isbn', 'tags', 'cover_data', 'libprs_id'):
+                     'isbn', 'tags', 'cover_data', 'application_id',
+                     'manifest', 'spine', 'toc', 'cover'):
             if hasattr(mi, attr):
                 setattr(ans, attr, getattr(mi, attr))
         
     
-    def __init__(self, title, authors):
+    def __init__(self, title, authors=['Unknown']):
         '''
         @param title: title or "Unknown" or a MetaInformation object
         @param authors: List of strings or []
@@ -76,8 +77,11 @@ class MetaInformation(object):
         self.isbn         = None if not mi else mi.isbn
         self.tags         = []  if not mi else mi.tags
         self.cover_data   = mi.cover_data if (mi and hasattr(mi, 'cover_data')) else (None, None)
-        self.libprs_id    = mi.libprs_id  if (mi and hasattr(mi, 'libprs_id')) else None
-         
+        self.application_id    = mi.application_id  if (mi and hasattr(mi, 'application_id')) else None
+        self.manifest = getattr(mi, 'manifest', None) 
+        self.toc      = getattr(mi, 'toc', None)
+        self.spine    = getattr(mi, 'spine', None)
+        self.cover    = getattr(mi, 'cover', None)
     
     def smart_update(self, mi):
         '''
@@ -92,7 +96,7 @@ class MetaInformation(object):
             
         for attr in ('author_sort', 'title_sort', 'comments', 'category',
                      'publisher', 'series', 'series_index', 'rating',
-                     'isbn', 'libprs_id'):
+                     'isbn', 'application_id', 'manifest', 'spine', 'toc', 'cover'):
             if hasattr(mi, attr):
                 val = getattr(mi, attr)
                 if val is not None:
@@ -117,4 +121,4 @@ class MetaInformation(object):
         return ans.strip()
     
     def __nonzero__(self):
-        return bool(self.title or self.author or self.comments or self.category)
\ No newline at end of file
+        return bool(self.title or self.author or self.comments or self.category)
diff --git a/src/libprs500/ebooks/metadata/meta.py b/src/libprs500/ebooks/metadata/meta.py
index 8e2f3e5524..ed78f39a14 100644
--- a/src/libprs500/ebooks/metadata/meta.py
+++ b/src/libprs500/ebooks/metadata/meta.py
@@ -51,7 +51,7 @@ def metadata_from_formats(formats):
         ext = path_to_ext(path)
         stream = open(path, 'rb')
         mi.smart_update(get_metadata(stream, stream_type=ext, use_libprs_metadata=True))
-        if getattr(mi, 'libprs_id', None) is not None:
+        if getattr(mi, 'application_id', None) is not None:
             return mi
     
     return mi
@@ -69,7 +69,7 @@ def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
         if os.access(c, os.R_OK):
             opf = opf_metadata(os.path.abspath(c))
         
-    if use_libprs_metadata and getattr(opf, 'libprs_id', None) is not None:
+    if use_libprs_metadata and getattr(opf, 'application_id', None) is not None:
         return opf
     
     try:
@@ -147,7 +147,7 @@ def opf_metadata(opfpath):
     f = open(opfpath, 'rb')
     opf = OPFReader(f, os.path.dirname(opfpath))
     try:
-        if opf.libprs_id is not None:
+        if opf.application_id is not None:
             mi = MetaInformation(opf, None)
             if hasattr(opf, 'cover') and opf.cover:
                 cpath = os.path.join(os.path.dirname(opfpath), opf.cover)
diff --git a/src/libprs500/ebooks/metadata/ncx.xml b/src/libprs500/ebooks/metadata/ncx.xml
new file mode 100644
index 0000000000..7bcb9ac479
--- /dev/null
+++ b/src/libprs500/ebooks/metadata/ncx.xml
@@ -0,0 +1,27 @@
+<ncx version="2005-1" 
+     xml:lang="en" 
+     xmlns="http://www.daisy.org/z3986/2005/ncx/"
+     xmlns:py="http://genshi.edgewall.org/"
+>
+    <head>
+        <meta name="dtb:uid" content="${uid}"/>
+        <meta name="dtb:depth" content="${toc.depth()}"/>
+        <meta name="dtb:generator" content="${__appname__}"/>
+        <meta name="dtb:totalPageCount" content="0"/>
+        <meta name="dtb:maxPageNumber" content="0"/>
+    </head>
+    <docTitle><text>Table of Contents</text></docTitle>
+    
+    <py:def function="navpoint(np, level)">
+        ${'%*s'%(4*level,'')}<navPoint playOrder="${str(np.play_order)}">
+            ${'%*s'%(4*level,'')}<navLabel>
+                ${'%*s'%(4*level,'')}<text>${np.text}</text>
+            ${'%*s'%(4*level,'')}</navLabel>
+            ${'%*s'%(4*level,'')}<content src="${str(np.href)+(('#' + str(np.fragment)) if np.fragment else '')}" />
+            <py:for each="np2 in np">${navpoint(np2, level+1)}</py:for>
+        ${'%*s'%(4*level,'')}</navPoint>
+    </py:def>
+    <navMap>
+    <py:for each="np in toc">${navpoint(np, 0)}</py:for>
+    </navMap>
+</ncx>
\ No newline at end of file
diff --git a/src/libprs500/ebooks/metadata/opf.py b/src/libprs500/ebooks/metadata/opf.py
index 833f8ae51f..c1d88706da 100644
--- a/src/libprs500/ebooks/metadata/opf.py
+++ b/src/libprs500/ebooks/metadata/opf.py
@@ -12,18 +12,21 @@
 ##    You should have received a copy of the GNU General Public License along
 ##    with this program; if not, write to the Free Software Foundation, Inc.,
 ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+import uuid
 '''Read/Write metadata from Open Packaging Format (.opf) files.'''
 
-import sys, re, os, glob
+import sys, re, os, mimetypes
 from urllib import unquote
 from urlparse import urlparse
 import xml.dom.minidom as dom
 from itertools import repeat
 
+from libprs500 import __appname__
 from libprs500.ebooks.metadata import MetaInformation
-from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup, BeautifulSoup
+from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup
 from libprs500.ebooks.lrf import entity_to_unicode
 from libprs500.ebooks.metadata import get_parser
+from libprs500.ebooks.metadata.toc import TOC
 
 class ManifestItem(object):
     def __init__(self, item, cwd):
@@ -40,6 +43,14 @@ class ManifestItem(object):
         
     def __unicode__(self):
         return u'<item id="%s" href="%s" media-type="%s" />'%(self.id, self.href, self.media_type)
+    
+    def __getitem__(self, index):
+        if index == 0:
+            return self.href
+        if index == 1:
+            return self.media_type
+        raise IndexError('%d out of bounds.'%index)
+        
 
 class Manifest(list):
     
@@ -81,85 +92,11 @@ class Spine(object):
     def items(self):
         for i in self.linear_ids + self.nonlinear_ids:
             yield  self.manifest.item(i)
+            
+    def __iter__(self):
+        for i in self.linear_ids + self.nonlinear_ids:
+            yield i
 
-class TOC(list):
-    
-    def __init__(self, opfreader, cwd):
-        self.toc = None
-        toc = opfreader.soup.find('spine', toc=True)
-        if toc is not None:
-            toc = toc['toc']
-        if toc is None:
-            try:
-                toc = opfreader.soup.find('guide').find('reference', attrs={'type':'toc'})['href']
-            except:
-                for item in opfreader.manifest:
-                    if 'toc' in item.href.lower():
-                        toc = item.href
-                        break
-                            
-        if toc is not None:
-            if toc.lower() != 'ncx':
-                toc = urlparse(unquote(toc))[2]
-                if not os.path.isabs(toc):
-                    toc = os.path.join(cwd, toc)
-                try:
-                    if not os.path.exists(toc):
-                        bn  = os.path.basename(toc)
-                        bn  = bn.replace('_top.htm', '_toc.htm') # Bug in BAEN OPF files
-                        toc = os.path.join(os.path.dirname(toc), bn)
-                    
-                    self.read_html_toc(toc, cwd)
-                    self.toc = toc
-                except:
-                    pass
-            else:
-                cwd = os.path.abspath(cwd)
-                m = glob.glob(os.path.join(cwd, '*.ncx'))
-                if m:
-                    toc = m[0]
-                    try:
-                        self.read_ncx_toc(toc)
-                        self.toc = toc
-                    except:
-                        raise
-                        pass
-            
-    def read_ncx_toc(self, toc):
-        bdir = os.path.dirname(toc)
-        soup = BeautifulStoneSoup(open(toc, 'rb').read(),
-                                  convertEntities=BeautifulSoup.HTML_ENTITIES)
-        elems = soup.findAll('navpoint')
-        elems.sort(cmp=lambda x, y: cmp(int(x['playorder']), int(y['playorder'])))
-        
-        for elem in elems:
-            txt = u''
-            for nl in elem.findAll('navlabel'):
-                for text in nl.findAll('text'):
-                    txt += ''.join([unicode(s) for s in text.findAll(text=True)])
-            
-            content = elem.find('content')
-            if content is None or not content.has_key('src') or not txt:
-                continue
-            
-            purl = urlparse(unquote(content['src']))
-            href, fragment = purl[2], purl[5]
-            if not os.path.isabs(href):
-                href = os.path.join(bdir, href)
-            self.append((href, fragment, txt))
-        
-    
-    def read_html_toc(self, toc, cwd):
-        soup = BeautifulSoup(open(toc, 'rb').read(), convertEntities=BeautifulSoup.HTML_ENTITIES)
-        for a in soup.findAll('a'):
-            if not a.has_key('href'):
-                continue
-            purl = urlparse(unquote(a['href']))
-            href, fragment = purl[2], purl[5]
-            if not os.path.isabs(href):
-                href = os.path.join(cwd, href)
-            txt = ''.join([unicode(s).strip() for s in a.findAll(text=True)])
-            self.append((href, fragment, txt))
             
 
 class standard_field(object):
@@ -178,21 +115,21 @@ class OPF(MetaInformation):
     MIMETYPE = 'application/oebps-package+xml'
     ENTITY_PATTERN = re.compile(r'&(\S+?);')
     
-    uid           = standard_field('uid')
-    libprs_id     = standard_field('libprs_id')
-    title         = standard_field('title')
-    authors       = standard_field('authors')
-    title_sort    = standard_field('title_sort')
-    author_sort   = standard_field('author_sort')
-    comments      = standard_field('comments')
-    category      = standard_field('category')
-    publisher     = standard_field('publisher')
-    isbn          = standard_field('isbn')
-    cover         = standard_field('cover')
-    series        = standard_field('series')
-    series_index  = standard_field('series_index')
-    rating        = standard_field('rating')
-    tags          = standard_field('tags')
+    uid            = standard_field('uid')
+    application_id = standard_field('application_id')
+    title          = standard_field('title')
+    authors        = standard_field('authors')
+    title_sort     = standard_field('title_sort')
+    author_sort    = standard_field('author_sort')
+    comments       = standard_field('comments')
+    category       = standard_field('category')
+    publisher      = standard_field('publisher')
+    isbn           = standard_field('isbn')
+    cover          = standard_field('cover')
+    series         = standard_field('series')
+    series_index   = standard_field('series_index')
+    rating         = standard_field('rating')
+    tags           = standard_field('tags')
     
     HEADER = '''\
 <?xml version="1.0" encoding="UTF-8"?>
@@ -207,14 +144,14 @@ class OPF(MetaInformation):
         if not hasattr(self, 'soup'):
             self.soup = BeautifulStoneSoup(u'''\
 %s
-<package unique-identifier="libprs_id">
+<package unique-identifier="%s_id">
     <metadata>
         <dc-metadata
          xmlns:dc="http://purl.org/dc/elements/1.1/"
          xmlns:oebpackage="http://openebook.org/namespaces/oeb-package/1.0/" />
     </metadata>
 </package>
-'''%self.HEADER)
+'''%(__appname__, self.HEADER))
     
     def _commit(self, doc):
         self.soup = BeautifulStoneSoup(doc.toxml('utf-8'), fromEncoding='utf-8')
@@ -403,15 +340,15 @@ class OPF(MetaInformation):
             self._set_metadata_element('dc:identifier', isbn, [('scheme', 'ISBN')], 
                                        replace=True)
         
-    def get_libprs_id(self):
+    def get_application_id(self):
         for item in self.soup.package.metadata.findAll('dc:identifier'):
-            if item.has_key('scheme') and item['scheme'] == 'libprs':
+            if item.has_key('scheme') and item['scheme'] == __appname__:
                 return str(item.string).strip()
         return None
     
-    def set_libprs_id(self, val):
+    def set_application_id(self, val):
         if val:
-            self._set_metadata_element('dc:identifier', str(val), [('scheme', 'libprs'), ('id', 'libprs_id')], 
+            self._set_metadata_element('dc:identifier', str(val), [('scheme', __appname__), ('id', __appname__+'_id')], 
                                        replace=True)
     
     def get_cover(self):
@@ -564,61 +501,72 @@ class OPFReader(OPF):
             stream.close()
         self.manifest = Manifest(self.soup, dir)
         self.spine = Spine(self.soup, self.manifest)
-        self.toc = TOC(self, dir)
+        self.toc = TOC()
+        self.toc.read_from_opf(self)
         self.cover_data = (None, None)
         
-class OPFCreator(OPF):
+class OPFCreator(MetaInformation):
+    
+    def __init__(self, base_path, *args, **kwargs):
+        '''
+        Initialize.
+        @param base_path: An absolute path to the directory in which this OPF file
+        will eventually be. This is used by the L{create_manifest} method
+        to convert paths to files into relative paths.
+        '''
+        MetaInformation.__init__(self, *args, **kwargs)
+        self.base_path = os.path.abspath(base_path)
+        if self.application_id is None:
+            self.application_id = str(uuid.uuid4())
+        self.toc = None
+        if isinstance(self.manifest, Manifest):
+            manifest = []
+            for path, mt in self.manifest:
+                if not path.startswith(self.base_path):
+                    raise ValueError('Inavlid manifest item %s for base path %s'%(path, self.base_path))
+                path = path[len(self.base_path)+1:]
+                manifest.append((path, mt))
+            self.manifest = manifest
     
-    def __init__(self, mi):
-        self.title = mi.title
-        self.authors = mi.authors
-        if mi.category:
-            self.category = mi.category
-        if mi.comments:
-            self.comments = mi.comments
-        if mi.publisher:
-            self.publisher = mi.publisher
-        if mi.rating:
-            self.rating = mi.rating
-        if mi.series:
-            self.series = mi.series
-        if mi.series_index:
-            self.series_index = mi.series_index
-        if mi.tags:
-            self.tags = mi.tags
-        if mi.isbn:
-            self.isbn = mi.isbn
-        self.cover_data = mi.cover_data
-        if hasattr(mi, 'libprs_id'):
-            self.libprs_id = mi.libprs_id
-        if hasattr(mi, 'uid'):
-            self.uid = mi.uid    
-        
     def create_manifest(self, entries):
         '''
         Create <manifest>
-        @param entries: List of (URL, mime-type)
+        @param entries: List of (path, mime-type)
+        @param base_path: It is used to convert each path into a path relative to itself
         @type entries: list of 2-tuples
         '''
-        doc = dom.parseString(self.soup.__str__('UTF-8').strip())
-        package = doc.documentElement
-        manifest = doc.createElement('manifest')
-        package.appendChild(manifest)
-        package.appendChild(doc.createTextNode('\n'))
-        
-        self.href_map = {}
-        
-        for href, media_type in entries:
-            item = doc.createElement('item')
-            item.setAttribute('href', href)
-            item.setAttribute('media-type', media_type)
-            self.href_map[href] = str(hash(href))
-            item.setAttribute('id', self.href_map[href])
-            manifest.appendChild(item)
-            manifest.appendChild(doc.createTextNode('\n'))
-            
-        self._commit(doc)
+        rentries = []
+        base_path = self.base_path
+        mimetypes.init()
+        for href, mt in entries:
+            href = os.path.abspath(href)
+            if not href.startswith(base_path):
+                raise ValueError('OPF should only refer to files below it. %s is above %s'%(href, base_path))
+            href = href[len(base_path)+1:].replace(os.sep, '/')
+            if not mt:
+                mt = mimetypes.guess_type(href)[0]
+                if not mt:
+                    mt = ''
+            rentries.append((href, mt))
             
+        self.manifest = rentries
+        
+    def create_manifest_from_files_in(self, files_and_dirs):
+        entries = []
+        
+        def dodir(dir):
+            for root, dirs, files in os.walk(dir):
+                for name in files:
+                    path = os.path.join(root, name)
+                    entries.append((path, None)) 
+        
+        for i in files_and_dirs:
+            if os.path.isdir(i):
+                dodir(i)
+            else:
+                entries.append((i, None))
+                
+        self.create_manifest(entries)    
             
     def create_spine(self, entries):
         '''
@@ -626,19 +574,43 @@ class OPFCreator(OPF):
         @param: List of paths
         @type param: list of strings
         '''
-        doc = dom.parseString(self.soup.__str__('UTF-8').strip())
-        package = doc.documentElement
-        spine = doc.createElement('spine')
-        package.appendChild(spine)
-        package.appendChild(doc.createTextNode('\n'))
+        self.spine = []
         
-        for href in entries:
-            itemref = doc.createElement('itemref')
-            itemref.setAttribute('idref', self.href_map[href])
-            spine.appendChild(itemref)
-            spine.appendChild(doc.createTextNode('\n'))
+        for path in entries:
+            if not os.path.isabs(path):
+                path = os.path.join(self.base_path, path)
+            if not path.startswith(self.base_path):
+                raise ValueError('Invalid entry %s for base path %s'%(path, self.base_path))
+            href = path[len(self.base_path)+1:]
+            in_manifest = False
+            for i, m in enumerate(self.manifest):
+                if m[0] == href:
+                    in_manifest = True
+                    break
+            if not in_manifest:
+                raise ValueError('%s is not in the manifest. (%s)'%(href, path))
+            self.spine.append(i)
+         
             
-        self._commit(doc)
+        
+    def set_toc(self, toc):
+        '''
+        Set the toc. You must call L{create_spine} before calling this
+        method.
+        @param toc: A Table of Contents
+        @type toc: L{TOC}
+        '''
+        self.toc = toc
+        
+    def render(self, opf_stream, ncx_stream=None):
+        from libprs500.resources import opf_template
+        from genshi.template import MarkupTemplate
+        template = MarkupTemplate(opf_template)
+        opf = template.generate(__appname__=__appname__, mi=self).render('xml')
+        opf_stream.write(opf)
+        toc = getattr(self, 'toc', None)
+        if toc is not None and ncx_stream is not None:
+            toc.render(ncx_stream, self.application_id)
     
 def option_parser():
     return get_parser('opf')
@@ -649,7 +621,7 @@ def main(args=sys.argv):
     if len(args) != 2:
         parser.print_help()
         return 1
-    mi = OPFReader(open(args[1], 'rb'))
+    mi = MetaInformation(OPFReader(open(args[1], 'rb')))
     if opts.title is not None:
         mi.title = opts.title.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
     if opts.authors is not None:
@@ -660,7 +632,8 @@ def main(args=sys.argv):
     if opts.comment is not None:
         mi.comments = opts.comment.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
     print mi
-    mi.write(open(args[1], 'wb'))
+    mo = OPFCreator(os.getcwd(), mi)
+    mo.render(open(args[1], 'wb'))
     return 0
 
 if __name__ == '__main__':
diff --git a/src/libprs500/ebooks/metadata/opf.xml b/src/libprs500/ebooks/metadata/opf.xml
new file mode 100644
index 0000000000..a847bae2c8
--- /dev/null
+++ b/src/libprs500/ebooks/metadata/opf.xml
@@ -0,0 +1,36 @@
+<?xml version="1.0"  encoding="UTF-8"?>
+<package version="2.0" 
+         xmlns:opf="http://www.idpf.org/2007/opf" 
+         xmlns:py="http://genshi.edgewall.org/" 
+         unique-identifier="${__appname__}_id"
+         
+>
+    <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
+        <dc:title py:with="attrs={'files-as':mi.title_sort}" py:attrs="attrs">${mi.title}</dc:title>
+        <dc:creator opf:role="aut" py:for="i, author in enumerate(mi.authors)" py:with="attrs={'file-as':mi.author_sort if i==0 else None}" py:attrs="attrs">${author}</dc:creator>
+        <dc:identifier scheme="${__appname__}" id="${__appname__}_id">${mi.application_id}</dc:identifier>
+
+        <dc:type py:if="mi.category">${mi.category}</dc:type>
+        <dc:description py:if="mi.comments">${mi.comments}</dc:description>
+        <dc:publisher py:if="mi.publisher">${mi.publisher}</dc:publisher>
+        <dc:identifier opf:scheme="ISBN" py:if="mi.isbn">${mi.isbn}</dc:identifier>
+        <series py:if="mi.series">${mi.series}</series>
+        <series-index py:if="mi.series_index is not None">${mi.series_index}</series-index>
+        <rating py:if="mi.rating is not None">${mi.rating}</rating>
+        <dc:subject py:if="mi.tags is not None" py:for="tag in mi.tags">${tag}</dc:subject>
+    </metadata>
+    
+    <guide>
+        <reference py:if="mi.cover" type="cover" href="${mi.cover}" /> 
+    </guide>
+    
+    <manifest>
+        <py:for each="i, m in enumerate(mi.manifest)">
+        <item id="${str(i)}" href="${m[0]}" media-type="${m[1]}" /> 
+        </py:for>
+    </manifest>
+    
+    <spine py:with="attrs={'toc':'ncx' if mi.toc else None}" py:attrs="attrs">
+        <itemref py:for="idref in mi.spine" idref="${str(idref)}" />
+    </spine>    
+</package>
diff --git a/src/libprs500/ebooks/metadata/toc.py b/src/libprs500/ebooks/metadata/toc.py
new file mode 100644
index 0000000000..89aaadbe11
--- /dev/null
+++ b/src/libprs500/ebooks/metadata/toc.py
@@ -0,0 +1,154 @@
+#!/usr/bin/env  python
+##    Copyright (C) 2008 Kovid Goyal kovid@kovidgoyal.net
+##    This program is free software; you can redistribute it and/or modify
+##    it under the terms of the GNU General Public License as published by
+##    the Free Software Foundation; either version 2 of the License, or
+##    (at your option) any later version.
+##
+##    This program is distributed in the hope that it will be useful,
+##    but WITHOUT ANY WARRANTY; without even the implied warranty of
+##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+##    GNU General Public License for more details.
+##
+##    You should have received a copy of the GNU General Public License along
+##    with this program; if not, write to the Free Software Foundation, Inc.,
+##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+import os, glob
+from urlparse import urlparse
+from urllib import unquote
+
+from libprs500 import __appname__
+from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup, BeautifulSoup
+
+class NCXSoup(BeautifulStoneSoup):
+    
+    NESTABLE_TAGS = {'navpoint':[]}
+    
+    def __init__(self, raw):
+        BeautifulStoneSoup.__init__(self, raw,  
+                                  convertEntities=BeautifulSoup.HTML_ENTITIES,
+                                  selfClosingTags=['meta', 'content'])
+
+class TOC(list):
+    
+    def __init__(self, href=None, fragment=None, text=None, parent=None, play_order=1, 
+                 base_path=os.getcwd()):
+        self.href = href
+        self.fragment = fragment
+        self.text = text
+        self.parent = parent
+        self.base_path = base_path
+        self.play_order = play_order
+        
+    def add_item(self, href, fragment, text):
+        self.append(TOC(href=href, fragment=fragment, text=text, parent=self, base_path=self.base_path))
+        return self[-1]
+    
+    def top_level_items(self):
+        for item in self:
+            if item.text is not None:
+                yield item
+    
+    def depth(self):
+        depth = 1
+        for obj in self:
+            c = obj.depth()
+            if c > depth - 1:
+                depth = c + 1
+        return depth
+    
+    @apply
+    def abspath():
+        doc='Return the file this toc entry points to as a absolute path to a file on the system.'
+        def fget(self):
+            path = self.href.replace('/', os.sep)
+            if not os.path.isabs(path):
+                path = os.path.join(self.base_path, path)
+            return path
+        return property(fget=fget, doc=doc) 
+    
+    def read_from_opf(self, opfreader):
+        toc = opfreader.soup.find('spine', toc=True)
+        if toc is not None:
+            toc = toc['toc']
+        if toc is None:
+            try:
+                toc = opfreader.soup.find('guide').find('reference', attrs={'type':'toc'})['href']
+            except:
+                for item in opfreader.manifest:
+                    if 'toc' in item.href.lower():
+                        toc = item.href
+                        break
+                            
+        if toc is not None:
+            if toc.lower() != 'ncx':
+                toc = urlparse(unquote(toc))[2]
+                toc = toc.replace('/', os.sep)
+                if not os.path.isabs(toc):
+                    toc = os.path.join(self.base_path, toc)
+                try:
+                    if not os.path.exists(toc):
+                        bn  = os.path.basename(toc)
+                        bn  = bn.replace('_top.htm', '_toc.htm') # Bug in BAEN OPF files
+                        toc = os.path.join(os.path.dirname(toc), bn)
+                    
+                    self.read_html_toc(toc, self.base_path)
+                except:
+                    pass
+            else:
+                cwd = os.path.abspath(self.base_path)
+                m = glob.glob(os.path.join(cwd, '*.ncx'))
+                if m:
+                    toc = m[0]
+                    self.read_ncx_toc(toc)
+                    
+    def read_ncx_toc(self, toc):
+        self.base_path = os.path.dirname(toc)
+        soup = NCXSoup(open(toc, 'rb').read())
+        
+        def process_navpoint(np, dest):
+            play_order = np.get('playOrder', 1)
+            href = fragment = text = None
+            nl = np.find('navlabel')
+            if nl is not None:
+                text = u''
+                for txt in nl.findAll('text'):
+                    text += ''.join([unicode(s) for s in txt.findAll(text=True)])
+                content = elem.find('content')
+                if content is None or not content.has_key('src') or not txt:
+                    return
+                
+                purl = urlparse(unquote(content['src']))
+                href, fragment = purl[2], purl[5]
+            nd = dest.add_item(href, fragment, text)
+            nd.play_order = play_order
+                
+            for c in np:
+                if getattr(c, 'name', None) == 'navpoint':
+                    process_navpoint(c, nd)
+            
+        nm = soup.find('navmap')
+        for elem in nm:
+            if getattr(elem, 'name', None) == 'navpoint':
+                process_navpoint(elem, self)
+            
+        
+    def read_html_toc(self, toc):
+        self.base_path = os.path.dirname(toc)
+        soup = BeautifulSoup(open(toc, 'rb').read(), convertEntities=BeautifulSoup.HTML_ENTITIES)
+        for a in soup.findAll('a'):
+            if not a.has_key('href'):
+                continue
+            purl = urlparse(unquote(a['href']))
+            href, fragment = purl[2], purl[5]
+            txt = ''.join([unicode(s).strip() for s in a.findAll(text=True)])
+            self.add_item(href, fragment, txt)
+
+    def render(self, stream, uid):
+        from libprs500.resources import ncx_template
+        from genshi.template import MarkupTemplate
+        doctype = ('ncx', "-//NISO//DTD ncx 2005-1//EN", "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd")
+        template = MarkupTemplate(ncx_template)
+        raw = template.generate(uid=uid, toc=self, __appname__=__appname__)
+        raw = raw.render(doctype=doctype)
+        stream.write(raw)
\ No newline at end of file
diff --git a/src/libprs500/ebooks/mobi/reader.py b/src/libprs500/ebooks/mobi/reader.py
index 71a0c3f026..c89daa1ae8 100644
--- a/src/libprs500/ebooks/mobi/reader.py
+++ b/src/libprs500/ebooks/mobi/reader.py
@@ -186,11 +186,11 @@ class MobiReader(object):
         
         if self.book_header.exth is not None:
             opf = self.create_opf(htmlfile)
-            opf.write(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'))
+            opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'))
         
     def create_opf(self, htmlfile):
         mi = self.book_header.exth.mi
-        opf = OPFCreator(mi)
+        opf = OPFCreator(os.path.dirname(htmlfile), mi)
         if hasattr(self.book_header.exth, 'cover_offset'):
             opf.cover = 'images/%05d.jpg'%(self.book_header.exth.cover_offset+1)
         manifest = [(os.path.basename(htmlfile), 'text/x-oeb1-document')]
@@ -333,4 +333,4 @@ def main(args=sys.argv):
     return 0
 
 if __name__ == '__main__':
-    sys.exit(main())
\ No newline at end of file
+    sys.exit(main())
diff --git a/src/libprs500/library/database.py b/src/libprs500/library/database.py
index e7d67dba91..6d8c3f4be8 100644
--- a/src/libprs500/library/database.py
+++ b/src/libprs500/library/database.py
@@ -1340,7 +1340,7 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
         mi.rating = self.rating(idx, index_is_id=index_is_id)
         mi.isbn = self.isbn(idx, index_is_id=index_is_id)
         id = idx if index_is_id else self.id(idx)        
-        mi.libprs_id = id
+        mi.application_id = id
         return mi
     
     def vacuum(self):
@@ -1382,7 +1382,7 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
                 name += '_'+id
                 base  = dir if single_dir else tpath
                 
-                mi = OPFCreator(self.get_metadata(idx, index_is_id=index_is_id))
+                mi = OPFCreator(base, self.get_metadata(idx, index_is_id=index_is_id))
                 cover = self.cover(idx, index_is_id=index_is_id)
                 if cover is not None:
                     cname = name + '.jpg'
@@ -1390,7 +1390,7 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
                     open(cpath, 'wb').write(cover)
                     mi.cover = cname
                 f = open(os.path.join(base, name+'.opf'), 'wb')
-                mi.write(f)
+                mi.render(f)
                 f.close()
                 
                 for fmt in self.formats(idx, index_is_id=index_is_id).split(','):
diff --git a/src/libprs500/linux.py b/src/libprs500/linux.py
index 0d06482aee..de3846f4a5 100644
--- a/src/libprs500/linux.py
+++ b/src/libprs500/linux.py
@@ -44,6 +44,7 @@ entry_points = {
                              'rtf2lrf   = libprs500.ebooks.lrf.rtf.convert_from:main',
                              'web2disk  = libprs500.web.fetch.simple:main',
                              'feeds2disk = libprs500.web.feeds.main:main',
+                             'feeds2lrf = libprs500.ebooks.lrf.feeds.convert_from:main',
                              'web2lrf   = libprs500.ebooks.lrf.web.convert_from:main',
                              'pdf2lrf   = libprs500.ebooks.lrf.pdf.convert_from:main',
                              'mobi2lrf  = libprs500.ebooks.lrf.mobi.convert_from:main',
diff --git a/src/libprs500/terminfo.py b/src/libprs500/terminfo.py
index fca163d988..2114f8ad7f 100644
--- a/src/libprs500/terminfo.py
+++ b/src/libprs500/terminfo.py
@@ -201,6 +201,7 @@ class ProgressBar:
         self.term.BOL + self.term.UP + self.term.CLEAR_EOL +
         (self.bar % (100*percent, '='*n, '-'*(self.width-10-n))) +
         self.term.CLEAR_EOL + msg)
+        sys.stdout.flush()
     
     def clear(self):
         if not self.cleared:
diff --git a/src/libprs500/web/feeds/news.py b/src/libprs500/web/feeds/news.py
index 46c5549598..98e2405c72 100644
--- a/src/libprs500/web/feeds/news.py
+++ b/src/libprs500/web/feeds/news.py
@@ -17,12 +17,13 @@
 The backend to parse feeds and create HTML that can then be converted
 to an ebook.
 '''
-import logging, os, cStringIO, time, itertools, traceback
+import logging, os, cStringIO, time, traceback
 import urlparse
 
 from libprs500 import browser, __appname__
 from libprs500.ebooks.BeautifulSoup import BeautifulSoup
 from libprs500.ebooks.metadata.opf import OPFCreator
+from libprs500.ebooks.metadata.toc import TOC
 from libprs500.ebooks.metadata import MetaInformation
 from libprs500.web.feeds import feed_from_xml, templates
 from libprs500.web.fetch.simple import option_parser as web2disk_option_parser
@@ -94,6 +95,9 @@ class BasicNewsRecipe(object):
     #: using cp1252. If None, try to detect the encoding. 
     encoding = None
     
+    #: Specify any extra CSS that should be addded to downloaded HTML files
+    extra_css = None
+    
     #: List of regular expressions that determines which links to follow
     #: If empty, it is ignored.
     #: Only one of L{match_regexps} or L{filter_regexps} should be defined
@@ -276,8 +280,9 @@ class BasicNewsRecipe(object):
             
         self.web2disk_options = web2disk_option_parser().parse_args(web2disk_cmdline)[0]
         for extra in ('keep_only_tags', 'remove_tags', 'preprocess_regexps', 
-                      'preprocess_html', 'remove_tags_after', 'postprocess_html'):
+                      'preprocess_html', 'remove_tags_after'):
             setattr(self.web2disk_options, extra, getattr(self, extra))
+        self.web2disk_options.postprocess_html = [self._postprocess_html, self.postprocess_html]
         
         if self.delay > 0:
             self.simultaneous_downloads = 1
@@ -288,6 +293,14 @@ class BasicNewsRecipe(object):
         self.failed_downloads = []
         self.partial_failures = []
             
+    def _postprocess_html(self, soup):
+        if self.extra_css is not None:
+            head = soup.find('head')
+            if head:
+                style = BeautifulSoup(u'<style type="text/css">%s</style>'%self.extra_css).find('style')
+                head.insert(len(head.contents), style)
+        return soup
+    
     def download(self):
         '''
         Download and pre-process all articles from the feeds in this recipe. 
@@ -297,6 +310,7 @@ class BasicNewsRecipe(object):
         @rtype: string
         '''
         self.report_progress(0, _('Trying to download cover...'))
+        
         self.download_cover()
         res = self.build_index()
         self.cleanup()
@@ -362,7 +376,7 @@ class BasicNewsRecipe(object):
         fetcher.current_dir = dir
         fetcher.show_progress = False
         res, path, failures = fetcher.start_fetch(url), fetcher.downloaded_paths, fetcher.failed_links
-        if not res:
+        if not res or not os.path.exists(res):
             raise Exception(_('Could not fetch article. Run with --debug to see the reason'))
         return res, path, failures
     
@@ -446,28 +460,44 @@ class BasicNewsRecipe(object):
         if dir is None:
             dir = self.output_dir
         mi = MetaInformation(self.title + time.strftime(self.timefmt), [__appname__])
-        opf = OPFCreator(mi)
         opf_path = os.path.join(dir, 'index.opf')
+        ncx_path = os.path.join(dir, 'index.ncx')
+        opf = OPFCreator(dir, mi)
         
+        
+        manifest = ['feed_%d'%i for i in range(len(feeds))]
+        manifest.append('index.html')
         cpath = getattr(self, 'cover_path', None) 
         if cpath is not None and os.access(cpath, os.R_OK):
             opf.cover = cpath
+            manifest.append(cpath)
+        opf.create_manifest_from_files_in(manifest)
         
         entries = ['index.html']
+        toc = TOC(base_path=dir)
         for i, f in enumerate(feeds):
             entries.append('feed_%d/index.html'%i)
+            feed = toc.add_item('feed_%d/index.html'%i, None, f.title)
             for j, a in enumerate(f):
                 if getattr(a, 'downloaded', False):
                     adir = 'feed_%d/article_%d/'%(i, j)
                     entries.append('%sindex.html'%adir)
+                    feed.add_item('%sindex.html'%adir, None, a.title if a.title else 'Untitled article')
                     for sp in a.sub_pages:
                         prefix = os.path.commonprefix([opf_path, sp])
                         relp = sp[len(prefix):]
                         entries.append(relp.replace(os.sep, '/'))
                         
-        opf.create_manifest(itertools.izip(entries, itertools.repeat('text/html')))
         opf.create_spine(entries)
-        opf.write(open(opf_path, 'wb'))
+        opf.set_toc(toc)
+        
+        for i, f in enumerate(feeds):
+            
+            for j, a in enumerate(f):
+                if getattr(a, 'downloaded', False):
+                    adir = 'feed_%d/article_%d/'%(i, j)
+                    
+        opf.render(open(opf_path, 'wb'), open(ncx_path, 'wb'))
         
     
     def article_downloaded(self, request, result):
@@ -516,7 +546,7 @@ class BasicNewsRecipe(object):
                 title, url = None, obj
             else:
                 title, url = obj
-            self.report_progress(0, _('Fetching feed %s...'%(title if title else url)))
+            self.report_progress(0, _('Fetching feed')+' %s...'%(title if title else url))
             parsed_feeds.append(feed_from_xml(self.browser.open(url).read(), 
                                               title=title,
                                               oldest_article=self.oldest_article,
diff --git a/src/libprs500/web/feeds/recipes/newsweek.py b/src/libprs500/web/feeds/recipes/newsweek.py
index 88ca183b08..0313e52f33 100644
--- a/src/libprs500/web/feeds/recipes/newsweek.py
+++ b/src/libprs500/web/feeds/recipes/newsweek.py
@@ -33,15 +33,15 @@ class Newsweek(BasicNewsRecipe):
              ('National News', 'http://feeds.newsweek.com/newsweek/NationalNews'),
              ('World News', 'http://feeds.newsweek.com/newsweek/WorldNews'),
              'http://feeds.newsweek.com/newsweek/Columnists/ChristopherDickey',
-             'http://feeds.newsweek.com/newsweek/Columnists/FareedZakaria', 
+             'http://feeds.newsweek.com/newsweek/Columnists/FareedZakaria',
              ('Iraq', 'http://feeds.newsweek.com/newsweek/iraq'),
              ('Society', 'http://feeds.newsweek.com/newsweek/society'),
              ('Entertainment', 'http://feeds.newsweek.com/newsweek/entertainment'),
-             'http://feeds.newsweek.com/newsweek/columnists/GeorgeFWill', 
+             'http://feeds.newsweek.com/newsweek/columnists/GeorgeFWill',
              'http://feeds.newsweek.com/newsweek/columnists/AnnaQuindlen',
              ]
     
-    extra_css = '#content { font:serif,120%; }'
+    extra_css = '#content { font:serif 1.2em; }'
     keep_only_tags = [dict(name='div', id='content')]
 
     remove_tags = [
@@ -55,8 +55,8 @@ class Newsweek(BasicNewsRecipe):
     match_regexps = [r'http://www.newsweek.com/id/\S+/page/\d+']
     
     # For testing
-    #feeds = feeds[:2]
-    #max_articles_per_feed = 1
+    #feeds = feeds[3:5]
+    #max_articles_per_feed = 2
     
     
     
@@ -91,4 +91,4 @@ class Newsweek(BasicNewsRecipe):
             img = soup.find(alt='Cover')
             if img is not None and img.has_key('src'):
                 small = img['src']
-                return small.replace('coversmall', 'coverlarge')
\ No newline at end of file
+                return small.replace('coversmall', 'coverlarge')
diff --git a/src/libprs500/web/feeds/templates.py b/src/libprs500/web/feeds/templates.py
index dd12a1b2ff..1d1becbb51 100644
--- a/src/libprs500/web/feeds/templates.py
+++ b/src/libprs500/web/feeds/templates.py
@@ -57,16 +57,17 @@ class NavBarTemplate(Template):
     <body>
         <div class="navbar" style="text-align:center">
             <hr py:if="bottom" />
-            <a href="../index.html#article_${str(art)}">Up one level</a> 
+            <py:if test="art != num - 1">
+            | <a href="../article_${str(art+1)}/index.html">Next</a>
+            </py:if>
+            | <a href="../index.html#article_${str(art)}">Up one level</a> 
             <py:if test="two_levels">
             | <a href="../../index.html#_${str(feed)}">Up two levels</a>
             </py:if>
             <py:if test="art != 0">
             | <a href="../article_${str(art-1)}/index.html">Previous</a>
             </py:if>
-            <py:if test="art != num - 1">
-            | <a href="../article_${str(art+1)}/index.html">Next</a>
-            </py:if>
+            |
             <hr py:if="not bottom" />
         </div>
     </body>
@@ -159,4 +160,4 @@ class FeedTemplate(Template):
 ''')
         
     def generate(self, feed):
-        return Template.generate(self, feed=feed)
\ No newline at end of file
+        return Template.generate(self, feed=feed)
diff --git a/src/libprs500/web/fetch/simple.py b/src/libprs500/web/fetch/simple.py
index 644f5bc241..b6622631e9 100644
--- a/src/libprs500/web/fetch/simple.py
+++ b/src/libprs500/web/fetch/simple.py
@@ -38,9 +38,9 @@ def basename(url):
 
 def save_soup(soup, target):
     nm = Tag(soup, '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />')
-    for meta in soup.find('meta', content=True):
-        if 'charset' in meta['content']:
-            meta.replaceWith(nm)
+    meta = soup.find('meta', content=True)
+    if meta and 'charset' in meta['content']:
+        meta.replaceWith(nm)
     f = codecs.open(target, 'w', 'utf-8')
     f.write(unicode(soup))
     f.close()
@@ -85,7 +85,7 @@ class RecursiveFetcher(object):
         self.remove_tags_after   = getattr(options, 'remove_tags_after', None)
         self.keep_only_tags      = getattr(options, 'keep_only_tags', [])
         self.preprocess_html_ext = getattr(options, 'preprocess_html', lambda soup: soup) 
-        self.postprocess_html_ext= getattr(options, 'postprocess_html', lambda soup: soup)
+        self.postprocess_html_ext= getattr(options, 'postprocess_html', [])
         self.download_stylesheets = not options.no_stylesheets
         self.show_progress = True
         self.failed_links = []
@@ -336,7 +336,9 @@ class RecursiveFetcher(object):
                         self.process_return_links(soup, iurl) 
                         self.logger.debug('Recursion limit reached. Skipping links in %s', iurl)
                     
-                    save_soup(self.postprocess_html_ext(soup), res)
+                    for func in self.postprocess_html_ext:
+                        soup = func(soup)
+                    save_soup(soup, res)
                     
                     self.localize_link(tag, 'href', res)
                 except Exception, err: