diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py
index eb61e6d988..9bab5d6701 100644
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@@ -679,7 +679,7 @@ def create_oebbook(log, path_or_stream, opts, input_plugin, reader=None,
     html_preprocessor = HTMLPreProcessor(input_plugin.preprocess_html,
             opts.preprocess_html)
     oeb = OEBBook(log, html_preprocessor,
-            pretty_print=opts.pretty_print, encoding=encoding)
+            pretty_print=opts.pretty_print, input_encoding=encoding)
     # Read OEB Book into OEBBook
     log('Parsing all content...')
     if reader is None:
diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py
index f880d8731c..f566714878 100644
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@@ -16,7 +16,7 @@ from urlparse import urlparse, urlunparse
 from urllib import unquote
 
 from calibre.customize.conversion import InputFormatPlugin
-from calibre.ebooks.metadata.opf2 import OPFCreator, OPF
+from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.customize.conversion import OptionRecommendation
 from calibre import unicode_path
@@ -264,7 +264,7 @@ class HTMLInput(InputFormatPlugin):
 
     def convert(self, stream, opts, file_ext, log,
                 accelerators):
-        from calibre.ebooks.metadata.meta import get_metadata
+        from calibre.ebooks.metadata.html import get_metadata_
 
         basedir = os.getcwd()
         self.opts = opts
@@ -275,18 +275,16 @@ class HTMLInput(InputFormatPlugin):
             opfpath = stream.name
         else:
             filelist = get_filelist(stream.name, basedir, opts, log)
-            mi = get_metadata(stream, 'html')
+            mi = get_metadata_(stream.read(), opts.input_encoding)
             mi = OPFCreator(os.getcwdu(), mi)
             mi.guide = None
             entries = [(f.path, 'application/xhtml+xml') for f in filelist]
             mi.create_manifest(entries)
             mi.create_spine([f.path for f in filelist])
 
-            mi.render(open('metadata.opf', 'wb'))
+            mi.render(open('metadata.opf', 'wb'), encoding=opts.input_encoding)
             opfpath = os.path.abspath('metadata.opf')
 
-        opf = OPF(opfpath, os.getcwdu())
-
         if opts.dont_package:
             return opfpath
 
diff --git a/src/calibre/ebooks/metadata/html.py b/src/calibre/ebooks/metadata/html.py
index 9ef578c858..d5aa9b8bef 100644
--- a/src/calibre/ebooks/metadata/html.py
+++ b/src/calibre/ebooks/metadata/html.py
@@ -12,9 +12,18 @@ import re
 from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.chardet import xml_to_unicode
 
+
 def get_metadata(stream):
-    src = xml_to_unicode(stream.read())[0]
-    
+    src = stream.read()
+    return get_metadata_(src)
+
+def get_metadata_(src, encoding=None):
+    if not isinstance(src, unicode):
+        if not encoding:
+            src = xml_to_unicode(src)[0]
+        else:
+            src = src.decode(encoding, 'replace')
+
     # Title
     title = None
     pat = re.compile(r'<!--.*?TITLE=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL)
@@ -26,29 +35,29 @@ def get_metadata(stream):
         match = pat.search(src)
         if match:
             title = match.group(1)
-        
+
     # Author
     author = None
     pat = re.compile(r'<!--.*?AUTHOR=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL)
     match = pat.search(src)
     if match:
         author = match.group(2).replace(',', ';')
-        
+
     mi = MetaInformation(title, [author] if author else None)
-    
+
     # Publisher
     pat = re.compile(r'<!--.*?PUBLISHER=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL)
     match = pat.search(src)
     if match:
         mi.publisher = match.group(2)
-        
+
     # ISBN
     pat = re.compile(r'<!--.*?ISBN=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
     match = pat.search(src)
     if match:
         isbn = match.group(1)
         mi.isbn = re.sub(r'[^0-9xX]', '', isbn)
-        
+
     return mi
-    
-    
\ No newline at end of file
+
+
diff --git a/src/calibre/ebooks/metadata/opf.xml b/src/calibre/ebooks/metadata/opf.xml
index 619fb3301c..027d560ffa 100644
--- a/src/calibre/ebooks/metadata/opf.xml
+++ b/src/calibre/ebooks/metadata/opf.xml
@@ -1,4 +1,3 @@
-<?xml version="1.0"  encoding="UTF-8"?>
 <package version="2.0" 
          xmlns="http://www.idpf.org/2007/opf" 
          xmlns:py="http://genshi.edgewall.org/" 
diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py
index 4918e2a583..7dc4c67d17 100644
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@@ -924,9 +924,11 @@ class OPFCreator(MetaInformation):
         self.guide.set_basedir(self.base_path)
 
     def render(self, opf_stream=sys.stdout, ncx_stream=None,
-               ncx_manifest_entry=None):
+               ncx_manifest_entry=None, encoding=None):
         from calibre.resources import opf_template
         from calibre.utils.genshi.template import MarkupTemplate
+        if encoding is None:
+            encoding = 'utf-8'
         template = MarkupTemplate(opf_template)
         toc = getattr(self, 'toc', None)
         if self.manifest:
@@ -948,7 +950,11 @@ class OPFCreator(MetaInformation):
                 cover = os.path.abspath(os.path.join(self.base_path, cover))
             self.guide.set_cover(cover)
         self.guide.set_basedir(self.base_path)
-        opf = template.generate(__appname__=__appname__, mi=self, __version__=__version__).render('xml')
+        opf = template.generate(
+                __appname__=__appname__, mi=self,
+                __version__=__version__).render('xml', encoding=encoding)
+        opf_stream.write('<?xml version="1.0" encoding="%s" ?>\n'
+                %encoding.upper())
         opf_stream.write(opf)
         opf_stream.flush()
         if toc is not None and ncx_stream is not None:
diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index 55cc2f926b..e2a4875399 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -1516,7 +1516,8 @@ class OEBBook(object):
     def __init__(self, logger,
             html_preprocessor,
             css_preprocessor=CSSPreProcessor(),
-            encoding='utf-8', pretty_print=False):
+            encoding='utf-8', pretty_print=False,
+            input_encoding='utf-8'):
         """Create empty book.  Arguments:
 
         :param:`encoding`: Default encoding for textual content read
@@ -1549,6 +1550,7 @@ class OEBBook(object):
         """
         _css_log_handler.log = logger
         self.encoding = encoding
+        self.input_encoding = input_encoding
         self.html_preprocessor = html_preprocessor
         self.css_preprocessor = css_preprocessor
         self.pretty_print = pretty_print
@@ -1588,9 +1590,9 @@ class OEBBook(object):
                 return fix_data(data.decode('utf-16'))
             except UnicodeDecodeError:
                 pass
-        if self.encoding is not None:
+        if self.input_encoding is not None:
             try:
-                return fix_data(data.decode(self.encoding, 'replace'))
+                return fix_data(data.decode(self.input_encoding, 'replace'))
             except UnicodeDecodeError:
                 pass
         try:
diff --git a/src/calibre/ebooks/oeb/iterator.py b/src/calibre/ebooks/oeb/iterator.py
index 46b3e64644..6653240629 100644
--- a/src/calibre/ebooks/oeb/iterator.py
+++ b/src/calibre/ebooks/oeb/iterator.py
@@ -130,6 +130,9 @@ class EbookIterator(object):
             plumber.opts.dont_package = True
         if hasattr(plumber.opts, 'no_process'):
             plumber.opts.no_process = True
+        if hasattr(plumber.input_plugin, '_preprocess_html_for_viewer'):
+            plumber.input_plugin._preprocess_html_for_viewer = True
+
         self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
                 plumber.opts, plumber.input_fmt, self.log,
                 {}, self.base)
diff --git a/src/calibre/ebooks/oeb/output.py b/src/calibre/ebooks/oeb/output.py
index 6f141f7e5e..2cb513293c 100644
--- a/src/calibre/ebooks/oeb/output.py
+++ b/src/calibre/ebooks/oeb/output.py
@@ -30,6 +30,7 @@ class OEBOutput(OutputFormatPlugin):
                     raw = etree.tostring(root, pretty_print=True,
                             encoding='utf-8')
                     with open(href, 'wb') as f:
+                        f.write('<?xml version="1.0" encoding="UTF-8" ?>\n')
                         f.write(raw)
 
             for item in oeb_book.manifest:
diff --git a/src/calibre/ebooks/pdf/input.py b/src/calibre/ebooks/pdf/input.py
index 6aa695c912..d6e66ebd74 100644
--- a/src/calibre/ebooks/pdf/input.py
+++ b/src/calibre/ebooks/pdf/input.py
@@ -17,10 +17,18 @@ class PDFInput(InputFormatPlugin):
     description = 'Convert PDF files to HTML'
     file_types  = set(['pdf'])
 
+    _preprocess_html_for_viewer = False
+
     def convert(self, stream, options, file_ext, log,
                 accelerators):
         html = pdftohtml(stream.name)
 
+        if self._preprocess_html_for_viewer:
+            from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
+            prepro = HTMLPreProcessor(lambda x:x, False)
+            html = prepro(html.decode('utf-8')).encode('utf-8')
+
+
         with open('index.html', 'wb') as index:
             index.write(html)
 
diff --git a/src/calibre/utils/ipc/worker.py b/src/calibre/utils/ipc/worker.py
index 8898d753a2..de220340db 100644
--- a/src/calibre/utils/ipc/worker.py
+++ b/src/calibre/utils/ipc/worker.py
@@ -30,6 +30,13 @@ PARALLEL_FUNCS = {
       'read_metadata' :
       ('calibre.ebooks.metadata.worker', 'read_metadata_', 'notification'),
 
+      'read_pdf_metadata' :
+      ('calibre.utils.podofo.__init__', 'get_metadata_', None),
+
+      'write_pdf_metadata' :
+      ('calibre.utils.podofo.__init__', 'set_metadata_', None),
+
+
       'save_book' :
       ('calibre.ebooks.metadata.worker', 'save_book', 'notification'),
 }
diff --git a/src/calibre/utils/podofo/__init__.py b/src/calibre/utils/podofo/__init__.py
index 3db6699dfe..8654a95c04 100644
--- a/src/calibre/utils/podofo/__init__.py
+++ b/src/calibre/utils/podofo/__init__.py
@@ -6,11 +6,14 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
-import os
+import os, time
 
 from calibre.constants import plugins, preferred_encoding
 from calibre.ebooks.metadata import MetaInformation, string_to_authors, \
     authors_to_string
+from calibre.utils.ipc.job import ParallelJob
+from calibre.utils.ipc.server import Server
+from calibre.ptempfile import PersistentTemporaryFile
 
 podofo, podofo_err = plugins['podofo']
 
@@ -19,22 +22,43 @@ class Unavailable(Exception): pass
 def get_metadata(stream):
     if not podofo:
         raise Unavailable(podofo_err)
-    raw = stream.read()
-    stream.seek(0)
-    p = podofo.PDFDoc()
-    p.load(raw)
-    title = p.title
-    if not title:
+    pt = PersistentTemporaryFile('_podofo.pdf')
+    pt.write(stream.read())
+    pt.close()
+    server = Server(pool_size=1)
+    job = ParallelJob('read_pdf_metadata', 'Read pdf metadata',
+        lambda x,y:x,  args=[pt.name])
+    server.add_job(job)
+    while not job.is_finished:
+        time.sleep(0.1)
+        job.update()
+
+    job.update()
+    server.close()
+    if job.result is None:
+        raise ValueError('Failed to read metadata: ' + job.details)
+    title, authors, creator = job.result
+    if title == '_':
         title = getattr(stream, 'name', _('Unknown'))
-        title = os.path.splitext(os.path.basename(title))[0]
-    author = p.author
-    authors = string_to_authors(author) if author else  [_('Unknown')]
+        title = os.path.splitext(title)[0]
+
     mi = MetaInformation(title, authors)
-    creator = p.creator
     if creator:
         mi.book_producer = creator
+    if os.path.exists(pt.name): os.remove(pt.name)
     return mi
 
+def get_metadata_(path):
+    p = podofo.PDFDoc()
+    p.open(path)
+    title = p.title
+    if not title:
+        title = '_'
+    author = p.author
+    authors = string_to_authors(author) if author else  [_('Unknown')]
+    creator = p.creator
+    return (title, authors, creator)
+
 def prep(val):
     if not val:
         return u''
@@ -45,21 +69,43 @@ def prep(val):
 def set_metadata(stream, mi):
     if not podofo:
         raise Unavailable(podofo_err)
-    raw = stream.read()
+    pt = PersistentTemporaryFile('_podofo.pdf')
+    pt.write(stream.read())
+    pt.close()
+    server = Server(pool_size=1)
+    job = ParallelJob('write_pdf_metadata', 'Write pdf metadata',
+        lambda x,y:x,  args=[pt.name, mi.title, mi.authors, mi.book_producer])
+    server.add_job(job)
+    while not job.is_finished:
+        time.sleep(0.1)
+        job.update()
+
+    job.update()
+    server.close()
+    if job.result is not None:
+        stream.seek(0)
+        stream.truncate()
+        stream.write(job.result)
+        stream.flush()
+        stream.seek(0)
+
+
+
+def set_metadata_(path, title, authors, bkp):
     p = podofo.PDFDoc()
-    p.load(raw)
-    title = prep(mi.title)
+    p.open(path)
+    title = prep(title)
     touched = False
     if title:
         p.title = title
         touched = True
 
-    author = prep(authors_to_string(mi.authors))
+    author = prep(authors_to_string(authors))
     if author:
         p.author = author
         touched = True
 
-    bkp = prep(mi.book_producer)
+    bkp = prep(bkp)
     if bkp:
         p.creator = bkp
         touched = True
@@ -68,12 +114,7 @@ def set_metadata(stream, mi):
         from calibre.ptempfile import TemporaryFile
         with TemporaryFile('_pdf_set_metadata.pdf') as f:
             p.save(f)
-            raw = open(f, 'rb').read()
-            stream.seek(0)
-            stream.truncate()
-            stream.write(raw)
-            stream.flush()
-            stream.seek(0)
+            return open(f, 'rb').read()
 
 if __name__ == '__main__':
     f = '/tmp/t.pdf'
diff --git a/src/calibre/utils/podofo/podofo.cpp b/src/calibre/utils/podofo/podofo.cpp
index fd5cc6bc32..e81cf0b475 100644
--- a/src/calibre/utils/podofo/podofo.cpp
+++ b/src/calibre/utils/podofo/podofo.cpp
@@ -64,6 +64,24 @@ podofo_PDFDoc_load(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) {
     return Py_None;
 }
 
+static PyObject *
+podofo_PDFDoc_open(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) {
+    char *fname;
+
+    if (PyArg_ParseTuple(args, "s", &fname)) {
+        try {
+            self->doc->Load(fname);
+        } catch(const PdfError & err) {
+            podofo_set_exception(err);
+            return NULL;
+    }
+} else return NULL;
+
+
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
 static PyObject *
 podofo_PDFDoc_save(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) {
     char *buffer;
@@ -232,6 +250,9 @@ static PyMethodDef podofo_PDFDoc_methods[] = {
     {"load", (PyCFunction)podofo_PDFDoc_load, METH_VARARGS,
      "Load a PDF document from a byte buffer (string)"
     },
+    {"open", (PyCFunction)podofo_PDFDoc_open, METH_VARARGS,
+     "Load a PDF document from a file path (string)"
+    },
     {"save", (PyCFunction)podofo_PDFDoc_save, METH_VARARGS,
      "Save the PDF document to a path on disk"
     },