Merge pluginize

2025-11-13 10:06:59 -05:00 · 2009-05-31 08:49:17 -07:00 · 2009-05-31 08:49:17 -07:00 · c3db153421
commit c3db153421
parent f91c1e0cc4 f4ab41dc6e
352 changed files with 55719 additions and 18591 deletions
--- a/.bzrignore
+++ b/.bzrignore
@ -13,6 +13,7 @@ src/calibre/manual/cli/
 build
 dist
 docs
+nbproject/
 src/calibre/gui2/pictureflow/Makefile.Debug
 src/calibre/gui2/pictureflow/Makefile.Release
 src/calibre/gui2/pictureflow/debug/
--- a/.pydevproject
+++ b/.pydevproject
@ -2,9 +2,9 @@
 <?eclipse-pydev version="1.0"?>

 <pydev_project>
-<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.5</pydev_property>
+<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.6</pydev_property>
 <pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
-<path>/calibre/src</path>
+<path>/calibre-pluginize/src</path>
 </pydev_pathproperty>
 <pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
 </pydev_project>
--- a/setup.py
+++ b/setup.py
@ -72,6 +72,9 @@ if __name__ == '__main__':
                        library_dirs=[os.environ.get('PODOFO_LIB_DIR', podofo_lib)],
                        include_dirs=\
                        [os.environ.get('PODOFO_INC_DIR', podofo_inc)]))
+    else:
+        print 'WARNING: PoDoFo not found on your system. Various PDF related',
+        print 'functionality will not work.'

    ext_modules = optional + [

@ -88,6 +91,9 @@ if __name__ == '__main__':
                                      'src/calibre/utils/msdes/des.c'],
                             include_dirs=['src/calibre/utils/msdes']),

+                    Extension('calibre.plugins.cPalmdoc',
+                        sources=['src/calibre/ebooks/compression/palmdoc.c']),
+
                    PyQtExtension('calibre.plugins.pictureflow',
                                  ['src/calibre/gui2/pictureflow/pictureflow.cpp',
                                   'src/calibre/gui2/pictureflow/pictureflow.h'],
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -2,11 +2,11 @@
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-
-import sys, os, re, logging, time, subprocess, atexit, mimetypes, warnings
+import sys, os, re, logging, time, mimetypes, \
+       __builtin__, warnings, multiprocessing
+__builtin__.__dict__['dynamic_property'] = lambda(func): func(None)
 from htmlentitydefs import name2codepoint
 from math import floor
-from logging import Formatter

 warnings.simplefilter('ignore', DeprecationWarning)

@ -45,6 +45,13 @@ def to_unicode(raw, encoding='utf-8', errors='strict'):
        return raw
    return raw.decode(encoding, errors)

+def patheq(p1, p2):
+    p = os.path
+    d = lambda x : p.normcase(p.normpath(p.realpath(p.normpath(x))))
+    if not p1 or not p2:
+        return False
+    return d(p1) == d(p2)
+
 def unicode_path(path, abs=False):
    if not isinstance(path, unicode):
        path = path.decode(sys.getfilesystemencoding())
@ -83,26 +90,33 @@ def sanitize_file_name(name, substitute='_', as_unicode=False):
    return one.replace('..', '_')


+def prints(*args, **kwargs):
+    '''
+    Print unicode arguments safely by encoding them to preferred_encoding
+    Has the same signature as the print function from Python 3.
+    '''
+    file = kwargs.get('file', sys.stdout)
+    sep  = kwargs.get('sep', ' ')
+    end  = kwargs.get('end', '\n')
+    enc = preferred_encoding
+    if 'CALIBRE_WORKER' in os.environ:
+        enc = 'utf-8'
+    for i, arg in enumerate(args):
+        if isinstance(arg, unicode):
+            arg = arg.encode(enc)
+        if not isinstance(arg, str):
+            arg = str(arg)
+            if not isinstance(arg, unicode):
+                arg = arg.decode(preferred_encoding, 'replace').encode(enc)
+        file.write(arg)
+        if i != len(args)-1:
+            file.write(sep)
+    file.write(end)
+
 class CommandLineError(Exception):
    pass

-class ColoredFormatter(Formatter):

-    def format(self, record):
-        ln = record.__dict__['levelname']
-        col = ''
-        if ln == 'CRITICAL':
-            col = terminal_controller.YELLOW
-        elif ln == 'ERROR':
-            col = terminal_controller.RED
-        elif ln in ['WARN', 'WARNING']:
-            col = terminal_controller.BLUE
-        elif ln == 'INFO':
-            col = terminal_controller.GREEN
-        elif ln == 'DEBUG':
-            col = terminal_controller.CYAN
-        record.__dict__['levelname'] = col + record.__dict__['levelname'] + terminal_controller.NORMAL
-        return Formatter.format(self, record)


 def setup_cli_handlers(logger, level):
@ -244,7 +258,7 @@ class CurrentDir(object):
        os.chdir(self.cwd)


-class FileWrapper(object):
+class StreamReadWrapper(object):
    '''
    Used primarily with pyPdf to ensure the stream is properly closed.
    '''
@ -263,40 +277,7 @@ class FileWrapper(object):

 def detect_ncpus():
    """Detects the number of effective CPUs in the system"""
-    try:
-        from PyQt4.QtCore import QThread
-        ans = QThread.idealThreadCount()
-        if ans > 0:
-            return ans
-    except:
-        pass
-    #for Linux, Unix and MacOS
-    if hasattr(os, "sysconf"):
-        if os.sysconf_names.has_key("SC_NPROCESSORS_ONLN"):
-            #Linux and Unix
-            ncpus = os.sysconf("SC_NPROCESSORS_ONLN")
-            if isinstance(ncpus, int) and ncpus > 0:
-                return ncpus
-        else:
-            #MacOS X
-            try:
-                return int(subprocess.Popen(('sysctl', '-n', 'hw.cpu'), stdout=subprocess.PIPE).stdout.read())
-            except IOError: # Occassionally the system call gets interrupted
-                try:
-                    return int(subprocess.Popen(('sysctl', '-n', 'hw.cpu'), stdout=subprocess.PIPE).stdout.read())
-                except IOError:
-                    return 1
-            except ValueError: # On some systems the sysctl call fails
-                return 1
-
-    #for Windows
-    if os.environ.has_key("NUMBER_OF_PROCESSORS"):
-        ncpus = int(os.environ["NUMBER_OF_PROCESSORS"]);
-        if ncpus > 0:
-            return ncpus
-    #return the default value
-    return 1
-
+    return multiprocessing.cpu_count()

 def launch(path_or_url):
    if os.path.exists(path_or_url):
@ -343,67 +324,6 @@ def english_sort(x, y):
    '''
    return cmp(_spat.sub('', x), _spat.sub('', y))

-class LoggingInterface:
-
-    def __init__(self, logger):
-        self.__logger = self.logger = logger
-        
-    def setup_cli_handler(self, verbosity):
-        for handler in self.__logger.handlers:
-            if isinstance(handler, logging.StreamHandler):
-                return
-        if os.environ.get('CALIBRE_WORKER', None) is not None and self.__logger.handlers:
-            return
-        stream    = sys.stdout
-        formatter = logging.Formatter()
-        level     = logging.INFO
-        if verbosity > 0:
-            formatter = ColoredFormatter('[%(levelname)s] %(message)s') if verbosity > 1 else \
-                        ColoredFormatter('%(levelname)s: %(message)s')
-            level     = logging.DEBUG
-            if verbosity > 1:
-                stream = sys.stderr
-        
-        handler = logging.StreamHandler(stream)
-        handler.setFormatter(formatter)
-        handler.setLevel(level)
-        self.__logger.addHandler(handler)
-        self.__logger.setLevel(level)
-
-
-    def ___log(self, func, msg, args, kwargs):
-        args = [msg] + list(args)
-        for i in range(len(args)):
-            if not isinstance(args[i], basestring):
-                continue
-            if sys.version_info[:2] > (2, 5):
-                if not isinstance(args[i], unicode):
-                    args[i] = args[i].decode(preferred_encoding, 'replace')
-            elif isinstance(args[i], unicode):
-                args[i] = args[i].encode(preferred_encoding, 'replace')
-        func(*args, **kwargs)
-
-    def log_debug(self, msg, *args, **kwargs):
-        self.___log(self.__logger.debug, msg, args, kwargs)
-
-    def log_info(self, msg, *args, **kwargs):
-        self.___log(self.__logger.info, msg, args, kwargs)
-
-    def log_warning(self, msg, *args, **kwargs):
-        self.___log(self.__logger.warning, msg, args, kwargs)
-
-    def log_warn(self, msg, *args, **kwargs):
-        self.___log(self.__logger.warning, msg, args, kwargs)
-
-    def log_error(self, msg, *args, **kwargs):
-        self.___log(self.__logger.error, msg, args, kwargs)
-
-    def log_critical(self, msg, *args, **kwargs):
-        self.___log(self.__logger.critical, msg, args, kwargs)
-
-    def log_exception(self, msg, *args):
-        self.___log(self.__logger.exception, msg, args, {})
-
 def walk(dir):
    ''' A nice interface to os.walk '''
    for record in os.walk(dir):
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -53,7 +53,7 @@ if plugins is None:
            plugin_path = getattr(pkg_resources, 'resource_filename')('calibre', 'plugins')
            sys.path.insert(0, plugin_path)

-        for plugin in ['pictureflow', 'lzx', 'msdes', 'podofo'] + \
+        for plugin in ['pictureflow', 'lzx', 'msdes', 'podofo', 'cPalmdoc'] + \
                    (['winutil'] if iswindows else []) + \
                    (['usbobserver'] if isosx else []):
            try:
--- a/src/calibre/customize/init.py
+++ b/src/calibre/customize/init.py
@ -221,3 +221,4 @@ class MetadataWriterPlugin(Plugin):
        '''
        pass

+   
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -1,8 +1,9 @@
-from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

-import textwrap, os
+import textwrap
+import os
+import glob
 from calibre.customize import FileTypePlugin, MetadataReaderPlugin, MetadataWriterPlugin
 from calibre.constants import __version__

@ -20,30 +21,55 @@ every time you add an HTML file to the library.\
    on_import = True

    def run(self, htmlfile):
-        of = self.temporary_file('_plugin_html2zip.zip')
-        from calibre.ebooks.html import gui_main as html2oeb
-        html2oeb(htmlfile, of)
+        from calibre.ptempfile import TemporaryDirectory
+        from calibre.gui2.convert.gui_conversion import gui_convert
+        from calibre.customize.conversion import OptionRecommendation
+        from calibre.ebooks.epub import initialize_container
+
+        with TemporaryDirectory('_plugin_html2zip') as tdir:
+            gui_convert(htmlfile, tdir, [('debug_input', tdir,
+                OptionRecommendation.HIGH)])
+            of = self.temporary_file('_plugin_html2zip.zip')
+            opf = glob.glob(os.path.join(tdir, '*.opf'))[0]
+            ncx = glob.glob(os.path.join(tdir, '*.ncx'))
+            if ncx:
+                os.remove(ncx[0])
+            epub = initialize_container(of.name, os.path.basename(opf))
+            epub.add_dir(tdir)
+            epub.close()
+
        return of.name

-class OPFMetadataReader(MetadataReaderPlugin):

-    name        = 'Read OPF metadata'
-    file_types  = set(['opf'])
-    description = _('Read metadata from %s files')%'OPF'
+class ComicMetadataReader(MetadataReaderPlugin):
+
+    name = 'Read comic metadata'
+    file_types = set(['cbr', 'cbz'])
+    description = _('Extract cover from comic files')

    def get_metadata(self, stream, ftype):
-        from calibre.ebooks.metadata.opf2 import OPF
+        if ftype == 'cbr':
+            from calibre.libunrar import extract_member as extract_first
+            extract_first
+        else:
+            from calibre.libunzip import extract_member as extract_first
        from calibre.ebooks.metadata import MetaInformation
-        return MetaInformation(OPF(stream, os.getcwd()))
+        ret = extract_first(stream)
+        mi = MetaInformation(None, None)
+        if ret is not None:
+            path, data = ret
+            ext = os.path.splitext(path)[1][1:]
+            mi.cover_data = (ext.lower(), data)
+        return mi

-class RTFMetadataReader(MetadataReaderPlugin):
+class EPUBMetadataReader(MetadataReaderPlugin):

-    name        = 'Read RTF metadata' 
-    file_types  = set(['rtf'])
-    description = _('Read metadata from %s files')%'RTF'
+    name        = 'Read EPUB metadata'
+    file_types  = set(['epub'])
+    description = _('Read metadata from %s files')%'EPUB'

    def get_metadata(self, stream, ftype):
-        from calibre.ebooks.metadata.rtf import get_metadata
+        from calibre.ebooks.metadata.epub import get_metadata
        return get_metadata(stream)

 class FB2MetadataReader(MetadataReaderPlugin):
@ -56,35 +82,14 @@ class FB2MetadataReader(MetadataReaderPlugin):
        from calibre.ebooks.metadata.fb2 import get_metadata
        return get_metadata(stream)

+class HTMLMetadataReader(MetadataReaderPlugin):

-class LRFMetadataReader(MetadataReaderPlugin):
-    
-    name        = 'Read LRF metadata'
-    file_types  = set(['lrf'])
-    description = _('Read metadata from %s files')%'LRF'
+    name        = 'Read HTML metadata'
+    file_types  = set(['html'])
+    description = _('Read metadata from %s files')%'HTML'

    def get_metadata(self, stream, ftype):
-        from calibre.ebooks.lrf.meta import get_metadata
-        return get_metadata(stream)
-
-class PDFMetadataReader(MetadataReaderPlugin):
-    
-    name        = 'Read PDF metadata'
-    file_types  = set(['pdf'])
-    description = _('Read metadata from %s files')%'PDF'
-    
-    def get_metadata(self, stream, ftype):
-        from calibre.ebooks.metadata.pdf import get_metadata
-        return get_metadata(stream)
-
-class LITMetadataReader(MetadataReaderPlugin):
-    
-    name        = 'Read LIT metadata'
-    file_types  = set(['lit'])
-    description = _('Read metadata from %s files')%'LIT'
-    
-    def get_metadata(self, stream, ftype):
-        from calibre.ebooks.metadata.lit import get_metadata
+        from calibre.ebooks.metadata.html import get_metadata
        return get_metadata(stream)

 class IMPMetadataReader(MetadataReaderPlugin):
@ -98,66 +103,24 @@ class IMPMetadataReader(MetadataReaderPlugin):
        from calibre.ebooks.metadata.imp import get_metadata
        return get_metadata(stream)

-class RBMetadataReader(MetadataReaderPlugin):
+class LITMetadataReader(MetadataReaderPlugin):

-    name        = 'Read RB metadata'
-    file_types  = set(['rb'])
-    description = _('Read metadata from %s files')%'RB'
-    author      = 'Ashish Kulkarni'
+    name        = 'Read LIT metadata'
+    file_types  = set(['lit'])
+    description = _('Read metadata from %s files')%'LIT'

    def get_metadata(self, stream, ftype):
-        from calibre.ebooks.metadata.rb import get_metadata
+        from calibre.ebooks.metadata.lit import get_metadata
        return get_metadata(stream)

-class EPUBMetadataReader(MetadataReaderPlugin):
+class LRFMetadataReader(MetadataReaderPlugin):

-    name        = 'Read EPUB metadata'
-    file_types  = set(['epub'])
-    description = _('Read metadata from %s files')%'EPUB'
+    name        = 'Read LRF metadata'
+    file_types  = set(['lrf'])
+    description = _('Read metadata from %s files')%'LRF'

    def get_metadata(self, stream, ftype):
-        from calibre.ebooks.metadata.epub import get_metadata
-        return get_metadata(stream)
-
-class HTMLMetadataReader(MetadataReaderPlugin):
-    
-    name        = 'Read HTML metadata'
-    file_types  = set(['html'])
-    description = _('Read metadata from %s files')%'HTML'
-    
-    def get_metadata(self, stream, ftype):
-        from calibre.ebooks.metadata.html import get_metadata
-        return get_metadata(stream)
-
-class MOBIMetadataReader(MetadataReaderPlugin):
-    
-    name        = 'Read MOBI metadata'
-    file_types  = set(['mobi', 'prc', 'azw'])
-    description = _('Read metadata from %s files')%'MOBI'
-    
-    def get_metadata(self, stream, ftype):
-        from calibre.ebooks.mobi.reader import get_metadata
-        return get_metadata(stream)
-
-
-class TOPAZMetadataReader(MetadataReaderPlugin):
-    
-    name        = 'Read Topaz metadata'
-    file_types  = set(['tpz', 'azw1'])
-    description = _('Read metadata from %s files')%'MOBI'
-    
-    def get_metadata(self, stream, ftype):
-        from calibre.ebooks.metadata.topaz import get_metadata
-        return get_metadata(stream)
-
-class ODTMetadataReader(MetadataReaderPlugin):
-    
-    name        = 'Read ODT metadata'
-    file_types  = set(['odt'])
-    description = _('Read metadata from %s files')%'ODT'
-    
-    def get_metadata(self, stream, ftype):
-        from calibre.ebooks.metadata.odt import get_metadata
+        from calibre.ebooks.lrf.meta import get_metadata
        return get_metadata(stream)

 class LRXMetadataReader(MetadataReaderPlugin):
@ -170,34 +133,56 @@ class LRXMetadataReader(MetadataReaderPlugin):
        from calibre.ebooks.metadata.lrx import get_metadata
        return get_metadata(stream)

-class ComicMetadataReader(MetadataReaderPlugin):
+class MOBIMetadataReader(MetadataReaderPlugin):

-    name = 'Read comic metadata'
-    file_types = set(['cbr', 'cbz'])
-    description = _('Extract cover from comic files')
+    name        = 'Read MOBI metadata'
+    file_types  = set(['mobi', 'prc', 'azw'])
+    description = _('Read metadata from %s files')%'MOBI'

    def get_metadata(self, stream, ftype):
-        if ftype == 'cbr':
-            from calibre.libunrar import extract_member as extract_first
-        else:
-            from calibre.libunzip import extract_member as extract_first
+        from calibre.ebooks.mobi.reader import get_metadata
+        return get_metadata(stream)
+
+class ODTMetadataReader(MetadataReaderPlugin):
+
+    name        = 'Read ODT metadata'
+    file_types  = set(['odt'])
+    description = _('Read metadata from %s files')%'ODT'
+
+    def get_metadata(self, stream, ftype):
+        from calibre.ebooks.metadata.odt import get_metadata
+        return get_metadata(stream)
+
+class OPFMetadataReader(MetadataReaderPlugin):
+
+    name        = 'Read OPF metadata'
+    file_types  = set(['opf'])
+    description = _('Read metadata from %s files')%'OPF'
+
+    def get_metadata(self, stream, ftype):
+        from calibre.ebooks.metadata.opf2 import OPF
        from calibre.ebooks.metadata import MetaInformation
-        ret = extract_first(stream)
-        mi = MetaInformation(None, None)
-        if ret is not None:
-            path, data = ret
-            ext = os.path.splitext(path)[1][1:]
-            mi.cover_data = (ext.lower(), data)
-        return mi
+        return MetaInformation(OPF(stream, os.getcwd()))

-class ZipMetadataReader(MetadataReaderPlugin):
+class PDBMetadataReader(MetadataReaderPlugin):

-    name = 'Read ZIP metadata'
-    file_types = set(['zip', 'oebzip'])
-    description = _('Read metadata from ebooks in ZIP archives')
+    name        = 'Read PDB metadata'
+    file_types  = set(['pdb'])
+    description = _('Read metadata from %s files') % 'PDB'
+    author      = 'John Schember'

    def get_metadata(self, stream, ftype):
-        from calibre.ebooks.metadata.zip import get_metadata
+        from calibre.ebooks.metadata.pdb import get_metadata
+        return get_metadata(stream)
+
+class PDFMetadataReader(MetadataReaderPlugin):
+
+    name        = 'Read PDF metadata'
+    file_types  = set(['pdf'])
+    description = _('Read metadata from %s files')%'PDF'
+
+    def get_metadata(self, stream, ftype):
+        from calibre.ebooks.metadata.pdf import get_metadata
        return get_metadata(stream)

 class RARMetadataReader(MetadataReaderPlugin):
@ -210,6 +195,58 @@ class RARMetadataReader(MetadataReaderPlugin):
        from calibre.ebooks.metadata.rar import get_metadata
        return get_metadata(stream)

+class RBMetadataReader(MetadataReaderPlugin):
+
+    name        = 'Read RB metadata'
+    file_types  = set(['rb'])
+    description = _('Read metadata from %s files')%'RB'
+    author      = 'Ashish Kulkarni'
+
+    def get_metadata(self, stream, ftype):
+        from calibre.ebooks.metadata.rb import get_metadata
+        return get_metadata(stream)
+
+class RTFMetadataReader(MetadataReaderPlugin):
+
+    name        = 'Read RTF metadata'
+    file_types  = set(['rtf'])
+    description = _('Read metadata from %s files')%'RTF'
+
+    def get_metadata(self, stream, ftype):
+        from calibre.ebooks.metadata.rtf import get_metadata
+        return get_metadata(stream)
+
+class TOPAZMetadataReader(MetadataReaderPlugin):
+
+    name        = 'Read Topaz metadata'
+    file_types  = set(['tpz', 'azw1'])
+    description = _('Read metadata from %s files')%'MOBI'
+
+    def get_metadata(self, stream, ftype):
+        from calibre.ebooks.metadata.topaz import get_metadata
+        return get_metadata(stream)
+
+class TXTMetadataReader(MetadataReaderPlugin):
+
+    name        = 'Read TXT metadata'
+    file_types  = set(['txt'])
+    description = _('Read metadata from %s files') % 'TXT'
+    author      = 'John Schember'
+
+    def get_metadata(self, stream, ftype):
+        from calibre.ebooks.metadata.txt import get_metadata
+        return get_metadata(stream)
+
+class ZipMetadataReader(MetadataReaderPlugin):
+
+    name = 'Read ZIP metadata'
+    file_types = set(['zip', 'oebzip'])
+    description = _('Read metadata from ebooks in ZIP archives')
+
+    def get_metadata(self, stream, ftype):
+        from calibre.ebooks.metadata.zip import get_metadata
+        return get_metadata(stream)
+

 class EPUBMetadataWriter(MetadataWriterPlugin):

@ -231,16 +268,6 @@ class LRFMetadataWriter(MetadataWriterPlugin):
        from calibre.ebooks.lrf.meta import set_metadata
        set_metadata(stream, mi)

-class RTFMetadataWriter(MetadataWriterPlugin):
-    
-    name = 'Set RTF metadata'
-    file_types = set(['rtf'])
-    description = _('Set metadata in %s files')%'RTF'
-    
-    def set_metadata(self, stream, mi, type):
-        from calibre.ebooks.metadata.rtf import set_metadata
-        set_metadata(stream, mi)
-
 class MOBIMetadataWriter(MetadataWriterPlugin):

    name        = 'Set MOBI metadata'
@ -252,20 +279,128 @@ class MOBIMetadataWriter(MetadataWriterPlugin):
        from calibre.ebooks.metadata.mobi import set_metadata
        set_metadata(stream, mi)

+class PDBMetadataWriter(MetadataWriterPlugin):
+
+    name        = 'Set PDB metadata'
+    file_types  = set(['pdb'])
+    description = _('Set metadata from %s files') % 'PDB'
+    author      = 'John Schember'
+
+    def set_metadata(self, stream, mi, type):
+        from calibre.ebooks.metadata.pdb import set_metadata
+        set_metadata(stream, mi)
+
 class PDFMetadataWriter(MetadataWriterPlugin):

    name        = 'Set PDF metadata'
    file_types  = set(['pdf'])
    description = _('Set metadata in %s files') % 'PDF'
-    author      = 'John Schember'
+    author      = 'Kovid Goyal'

    def set_metadata(self, stream, mi, type):
        from calibre.ebooks.metadata.pdf import set_metadata
        set_metadata(stream, mi)

+class RTFMetadataWriter(MetadataWriterPlugin):

-plugins = [HTML2ZIP]
+    name = 'Set RTF metadata'
+    file_types = set(['rtf'])
+    description = _('Set metadata in %s files')%'RTF'
+
+    def set_metadata(self, stream, mi, type):
+        from calibre.ebooks.metadata.rtf import set_metadata
+        set_metadata(stream, mi)
+
+
+from calibre.ebooks.comic.input import ComicInput
+from calibre.ebooks.epub.input import EPUBInput
+from calibre.ebooks.fb2.input import FB2Input
+from calibre.ebooks.html.input import HTMLInput
+from calibre.ebooks.lit.input import LITInput
+from calibre.ebooks.mobi.input import MOBIInput
+from calibre.ebooks.odt.input import ODTInput
+from calibre.ebooks.pdb.input import PDBInput
+from calibre.ebooks.pdf.input import PDFInput
+from calibre.ebooks.pml.input import PMLInput
+from calibre.ebooks.rb.input import RBInput
+from calibre.web.feeds.input import RecipeInput
+from calibre.ebooks.rtf.input import RTFInput
+from calibre.ebooks.txt.input import TXTInput
+
+from calibre.ebooks.epub.output import EPUBOutput
+from calibre.ebooks.fb2.output import FB2Output
+from calibre.ebooks.lit.output import LITOutput
+from calibre.ebooks.lrf.output import LRFOutput
+from calibre.ebooks.mobi.output import MOBIOutput
+from calibre.ebooks.oeb.output import OEBOutput
+from calibre.ebooks.pdb.output import PDBOutput
+from calibre.ebooks.pdf.output import PDFOutput
+from calibre.ebooks.pml.output import PMLOutput
+from calibre.ebooks.rb.output import RBOutput
+from calibre.ebooks.rtf.output import RTFOutput
+from calibre.ebooks.txt.output import TXTOutput
+
+from calibre.customize.profiles import input_profiles, output_profiles
+
+
+from calibre.devices.bebook.driver import BEBOOK, BEBOOK_MINI
+from calibre.devices.blackberry.driver import BLACKBERRY
+from calibre.devices.cybookg3.driver import CYBOOKG3
+from calibre.devices.eb600.driver import EB600
+from calibre.devices.jetbook.driver import JETBOOK
+from calibre.devices.kindle.driver import KINDLE
+from calibre.devices.kindle.driver import KINDLE2
+from calibre.devices.prs500.driver import PRS500
+from calibre.devices.prs505.driver import PRS505
+from calibre.devices.prs700.driver import PRS700
+
+
+plugins = []
+plugins += [
+    ComicInput,
+    EPUBInput,
+    FB2Input,
+    HTMLInput,
+    LITInput,
+    MOBIInput,
+    ODTInput,
+    PDBInput,
+    PDFInput,
+    PMLInput,
+    RBInput,
+    RecipeInput,
+    RTFInput,
+    TXTInput,
+]
+plugins += [
+    EPUBOutput,
+    FB2Output,
+    LITOutput,
+    LRFOutput,
+    MOBIOutput,
+    OEBOutput,
+    PDBOutput,
+    PDFOutput,
+    PMLOutput,
+    RBOutput,
+    RTFOutput,
+    TXTOutput,
+]
+plugins += [
+    BEBOOK,
+    BEBOOK_MINI,
+    BLACKBERRY,
+    CYBOOKG3,
+    EB600,
+    JETBOOK,
+    KINDLE,
+    KINDLE2,
+    PRS500,
+    PRS505,
+    PRS700,
+]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataReader')]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataWriter')]
+plugins += input_profiles + output_profiles
--- a/src/calibre/customize/conversion.py
+++ b/src/calibre/customize/conversion.py
@ -0,0 +1,293 @@
+from __future__ import with_statement
+'''
+Defines the plugin system for conversions.
+'''
+import re, os, shutil
+
+from calibre import CurrentDir
+from calibre.customize import Plugin
+
+class ConversionOption(object):
+
+    '''
+    Class representing conversion options
+    '''
+
+    def __init__(self, name=None, help=None, long_switch=None,
+                 short_switch=None, choices=None):
+        self.name = name
+        self.help = help
+        self.long_switch = long_switch
+        self.short_switch = short_switch
+        self.choices = choices
+
+        if self.long_switch is None:
+            self.long_switch = self.name.replace('_', '-')
+
+        self.validate_parameters()
+
+    def validate_parameters(self):
+        '''
+        Validate the parameters passed to :method:`__init__`.
+        '''
+        if re.match(r'[a-zA-Z_]([a-zA-Z0-9_])*', self.name) is None:
+            raise ValueError(self.name + ' is not a valid Python identifier')
+        if not self.help:
+            raise ValueError('You must set the help text')
+
+    def __hash__(self):
+        return hash(self.name)
+
+    def __eq__(self, other):
+        return hash(self) == hash(other)
+
+    def clone(self):
+        return ConversionOption(name=self.name, help=self.help,
+                long_switch=self.long_switch, short_switch=self.short_switch,
+                choices=self.choices)
+
+class OptionRecommendation(object):
+    LOW  = 1
+    MED  = 2
+    HIGH = 3
+
+    def __init__(self, recommended_value=None, level=LOW, **kwargs):
+        '''
+        An option recommendation. That is, an option as well as its recommended
+        value and the level of the recommendation.
+        '''
+        self.level = level
+        self.recommended_value = recommended_value
+        self.option = kwargs.pop('option', None)
+        if self.option is None:
+            self.option = ConversionOption(**kwargs)
+
+        self.validate_parameters()
+
+    @property
+    def help(self):
+        return self.option.help
+
+    def clone(self):
+        return OptionRecommendation(recommended_value=self.recommended_value,
+                level=self.level, option=self.option.clone())
+
+    def validate_parameters(self):
+        if self.option.choices and self.recommended_value not in \
+                                                    self.option.choices:
+            raise ValueError('OpRec: %s: Recommended value not in choices'%
+                             self.option.name)
+        if not (isinstance(self.recommended_value, (int, float, str, unicode))\
+            or self.recommended_value is None):
+            raise ValueError('OpRec: %s:'%self.option.name +
+                             repr(self.recommended_value) +
+                             ' is not a string or a number')
+
+class DummyReporter(object):
+
+    def __call__(self, percent, msg=''):
+        pass
+
+class InputFormatPlugin(Plugin):
+    '''
+    InputFormatPlugins are responsible for converting a document into
+    HTML+OPF+CSS+etc.
+    The results of the conversion *must* be encoded in UTF-8.
+    The main action happens in :method:`convert`.
+    '''
+
+    type = _('Conversion Input')
+    can_be_disabled = False
+    supported_platforms = ['windows', 'osx', 'linux']
+
+    #: Set of file types for which this plugin should be run
+    #: For example: ``set(['azw', 'mobi', 'prc'])``
+    file_types     = set([])
+
+    #: If True, this input plugin generates a collection of images,
+    #: one per HTML file. You can obtain access to the images via
+    #: convenience method, :method:`get_image_collection`.
+    is_image_collection = False
+
+    #: Options shared by all Input format plugins. Do not override
+    #: in sub-classes. Use :member:`options` instead. Every option must be an
+    #: instance of :class:`OptionRecommendation`.
+    common_options = set([
+        OptionRecommendation(name='debug_input',
+            recommended_value=None, level=OptionRecommendation.LOW,
+            help=_('Save the output from the input plugin to the specified '
+                   'directory. Useful if you are unsure at which stage '
+                   'of the conversion process a bug is occurring. '
+                   'WARNING: This completely deletes the contents of '
+                   'the specified directory.')
+        ),
+
+        OptionRecommendation(name='input_encoding',
+            recommended_value=None, level=OptionRecommendation.LOW,
+            help=_('Specify the character encoding of the input document. If '
+                   'set this option will override any encoding declared by the '
+                   'document itself. Particularly useful for documents that '
+                   'do not declare an encoding or that have erroneous '
+                   'encoding declarations.')
+        ),
+
+    ])
+
+    #: Options to customize the behavior of this plugin. Every option must be an
+    #: instance of :class:`OptionRecommendation`.
+    options = set([])
+
+    #: A set of 3-tuples of the form
+    #: (option_name, recommended_value, recommendation_level)
+    recommendations = set([])
+
+    def __init__(self, *args):
+        Plugin.__init__(self, *args)
+        self.report_progress = DummyReporter()
+
+    def get_images(self):
+        '''
+        Return a list of absolute paths to the images, if this input plugin
+        represents an image collection. The list of images is in the same order
+        as the spine and the TOC.
+        '''
+        raise NotImplementedError()
+
+    def preprocess_html(self, html):
+        '''
+        This method is called by the conversion pipeline on all HTML before it
+        is parsed. It is meant to be used to do any required preprocessing on
+        the HTML, like removing hard line breaks, etc.
+
+        :param html: A unicode string
+        :return: A unicode string
+        '''
+        return html
+
+
+    def convert(self, stream, options, file_ext, log, accelerators):
+        '''
+        This method must be implemented in sub-classes. It must return
+        the path to the created OPF file or an :class:`OEBBook` instance.
+        All output should be contained in the current directory.
+        If this plugin creates files outside the current
+        directory they must be deleted/marked for deletion before this method
+        returns.
+
+        :param stream:   A file like object that contains the input file.
+
+        :param options:  Options to customize the conversion process.
+                         Guaranteed to have attributes corresponding
+                         to all the options declared by this plugin. In
+                         addition, it will have a verbose attribute that
+                         takes integral values from zero upwards. Higher numbers
+                         mean be more verbose. Another useful attribute is
+                         ``input_profile`` that is an instance of
+                         :class:`calibre.customize.profiles.InputProfile`.
+
+        :param file_ext: The extension (without the .) of the input file. It
+                         is guaranteed to be one of the `file_types` supported
+                         by this plugin.
+
+        :param log: A :class:`calibre.utils.logging.Log` object. All output
+                    should use this object.
+
+        :param accelarators: A dictionary of various information that the input
+                             plugin can get easily that would speed up the
+                             subsequent stages of the conversion.
+
+        '''
+        raise NotImplementedError
+
+    def __call__(self, stream, options, file_ext, log,
+                 accelerators, output_dir):
+        log('InputFormatPlugin: %s running'%self.name, end=' ')
+        if hasattr(stream, 'name'):
+            log('on', stream.name)
+
+        with CurrentDir(output_dir):
+            for x in os.listdir('.'):
+                shutil.rmtree(x) if os.path.isdir(x) else os.remove(x)
+
+            ret = self.convert(stream, options, file_ext,
+                               log, accelerators)
+
+        if options.debug_input is not None:
+            options.debug_input = os.path.abspath(options.debug_input)
+            if not os.path.exists(options.debug_input):
+                os.makedirs(options.debug_input)
+            if isinstance(ret, basestring):
+                shutil.rmtree(options.debug_input)
+                shutil.copytree(output_dir, options.debug_input)
+            else:
+                from calibre.ebooks.oeb.writer import OEBWriter
+                w = OEBWriter(pretty_print=options.pretty_print)
+                w(ret, options.debug_input)
+
+            log.info('Input debug saved to:', options.debug_input)
+
+        return ret
+
+
+class OutputFormatPlugin(Plugin):
+    '''
+    OutputFormatPlugins are responsible for converting an OEB document
+    (OPF+HTML) into an output ebook.
+
+    The OEB document can be assumed to be encoded in UTF-8.
+    The main action happens in :method:`convert`.
+    '''
+
+    type = _('Conversion Output')
+    can_be_disabled = False
+    supported_platforms = ['windows', 'osx', 'linux']
+
+    #: The file type (extension without leading period) that this
+    #: plugin outputs
+    file_type     = None
+
+    #: Options shared by all Input format plugins. Do not override
+    #: in sub-classes. Use :member:`options` instead. Every option must be an
+    #: instance of :class:`OptionRecommendation`.
+    common_options = set([
+        OptionRecommendation(name='pretty_print',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('If specified, the output plugin will try to create output '
+            'that is as human readable as possible. May not have any effect '
+            'for some output plugins.')
+        ),
+        ])
+
+    #: Options to customize the behavior of this plugin. Every option must be an
+    #: instance of :class:`OptionRecommendation`.
+    options = set([])
+
+    #: A set of 3-tuples of the form
+    #: (option_name, recommended_value, recommendation_level)
+    recommendations = set([])
+
+    def __init__(self, *args):
+        Plugin.__init__(self, *args)
+        self.report_progress = DummyReporter()
+
+
+    def convert(self, oeb_book, output, input_plugin, opts, log):
+        '''
+        Render the contents of `oeb_book` (which is an instance of
+        :class:`calibre.ebooks.oeb.OEBBook` to the file specified by output.
+
+        :param output: Either a file like object or a string. If it is a string
+        it is the path to a directory that may or may not exist. The output
+        plugin should write its output into that directory. If it is a file like
+        object, the output plugin should write its output into the file.
+
+        :param input_plugin: The input plugin that was used at the beginning of
+        the conversion pipeline.
+
+        :param opts: Conversion options. Guaranteed to have attributes
+        corresponding to the OptionRecommendations of this plugin.
+
+        :param log: The logger. Print debug/info messages etc. using this.
+        '''
+        raise NotImplementedError
+
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@ -0,0 +1,241 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from itertools import izip
+
+from calibre.customize import Plugin as _Plugin
+
+FONT_SIZES = [('xx-small', 1),
+              ('x-small',  None),
+              ('small',    2),
+              ('medium',   3),
+              ('large',    4),
+              ('x-large',  5),
+              ('xx-large', 6),
+              (None,       7)]
+
+
+class Plugin(_Plugin):
+
+    fbase  = 12
+    fsizes = [5, 7, 9, 12, 13.5, 17, 20, 22, 24]
+    screen_size = (1600, 1200)
+    dpi = 100
+
+    def __init__(self, *args, **kwargs):
+        _Plugin.__init__(self, *args, **kwargs)
+        self.width, self.height = self.screen_size
+        fsizes = list(self.fsizes)
+        self.fkey = list(self.fsizes)
+        self.fsizes = []
+        for (name, num), size in izip(FONT_SIZES, fsizes):
+            self.fsizes.append((name, num, float(size)))
+        self.fnames = dict((name, sz) for name, _, sz in self.fsizes if name)
+        self.fnums = dict((num, sz) for _, num, sz in self.fsizes if num)
+
+
+class InputProfile(Plugin):
+
+    author = 'Kovid Goyal'
+    supported_platforms = set(['windows', 'osx', 'linux'])
+    can_be_disabled = False
+    type = _('Input profile')
+
+    name        = 'Default Input Profile'
+    short_name  = 'default' # Used in the CLI so dont use spaces etc. in it
+    description = _('This profile tries to provide sane defaults and is useful '
+                    'if you know nothing about the input document.')
+
+
+class SonyReaderInput(InputProfile):
+
+    name        = 'Sony Reader'
+    short_name  = 'sony'
+    description = _('This profile is intended for the SONY PRS line. '
+                    'The 500/505/700 etc.')
+
+    screen_size               = (584, 754)
+    dpi                       = 168.451
+    fbase                     = 12
+    fsizes                    = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
+
+
+class MSReaderInput(InputProfile):
+
+    name        = 'Microsoft Reader'
+    short_name  = 'msreader'
+    description = _('This profile is intended for the Microsoft Reader.')
+
+    screen_size               = (480, 652)
+    dpi                       = 96
+    fbase                     = 13
+    fsizes                    = [10, 11, 13, 16, 18, 20, 22, 26]
+
+class MobipocketInput(InputProfile):
+
+    name        = 'Mobipocket Books'
+    short_name  = 'mobipocket'
+    description = _('This profile is intended for the Mobipocket books.')
+
+    # Unfortunately MOBI books are not narrowly targeted, so this information is
+    # quite likely to be spurious
+    screen_size               = (600, 800)
+    dpi                       = 96
+    fbase                     = 18
+    fsizes                    = [14, 14, 16, 18, 20, 22, 24, 26]
+
+class HanlinV3Input(InputProfile):
+
+    name        = 'Hanlin V3'
+    short_name  = 'hanlinv3'
+    description = _('This profile is intended for the Hanlin V3 and its clones.')
+
+    # Screen size is a best guess
+    screen_size               = (584, 754)
+    dpi                       = 168.451
+    fbase                     = 16
+    fsizes                    = [12, 12, 14, 16, 18, 20, 22, 24]
+
+class CybookG3Input(InputProfile):
+
+    name        = 'Cybook G3'
+    short_name  = 'cybookg3'
+    description = _('This profile is intended for the Cybook G3.')
+
+    # Screen size is a best guess
+    screen_size               = (600, 800)
+    dpi                       = 168.451
+    fbase                     = 16
+    fsizes                    = [12, 12, 14, 16, 18, 20, 22, 24]
+
+class KindleInput(InputProfile):
+
+    name        = 'Kindle'
+    short_name  = 'kindle'
+    description = _('This profile is intended for the Amazon Kindle.')
+
+    # Screen size is a best guess
+    screen_size               = (525, 640)
+    dpi                       = 168.451
+    fbase                     = 16
+    fsizes                    = [12, 12, 14, 16, 18, 20, 22, 24]
+
+
+input_profiles = [InputProfile, SonyReaderInput, MSReaderInput,
+        MobipocketInput, HanlinV3Input, CybookG3Input, KindleInput]
+
+
+class OutputProfile(Plugin):
+
+    author = 'Kovid Goyal'
+    supported_platforms = set(['windows', 'osx', 'linux'])
+    can_be_disabled = False
+    type = _('Output profile')
+
+    name        = 'Default Output Profile'
+    short_name  = 'default' # Used in the CLI so dont use spaces etc. in it
+    description = _('This profile tries to provide sane defaults and is useful '
+                    'if you want to produce a document intended to be read at a '
+                    'computer or on a range of devices.')
+
+    # The image size for comics
+    comic_screen_size = (584, 754)
+
+    @classmethod
+    def tags_to_string(cls, tags):
+        return ', '.join(tags)
+
+class SonyReaderOutput(OutputProfile):
+
+    name        = 'Sony Reader'
+    short_name  = 'sony'
+    description = _('This profile is intended for the SONY PRS line. '
+                    'The 500/505/700 etc.')
+
+    screen_size               = (600, 775)
+    dpi                       = 168.451
+    fbase                     = 12
+    fsizes                    = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
+
+class SonyReaderLandscapeOutput(SonyReaderOutput):
+
+    name        = 'Sony Reader Landscape'
+    short_name  = 'sony-landscape'
+    description = _('This profile is intended for the SONY PRS line. '
+                    'The 500/505/700 etc, in landscape mode. Mainly useful '
+                    'for comics.')
+
+    screen_size               = (784, 1012)
+    comic_screen_size         = (784, 1012)
+
+
+class MSReaderOutput(OutputProfile):
+
+    name        = 'Microsoft Reader'
+    short_name  = 'msreader'
+    description = _('This profile is intended for the Microsoft Reader.')
+
+    screen_size               = (480, 652)
+    dpi                       = 96
+    fbase                     = 13
+    fsizes                    = [10, 11, 13, 16, 18, 20, 22, 26]
+
+class MobipocketOutput(OutputProfile):
+
+    name        = 'Mobipocket Books'
+    short_name  = 'mobipocket'
+    description = _('This profile is intended for the Mobipocket books.')
+
+    # Unfortunately MOBI books are not narrowly targeted, so this information is
+    # quite likely to be spurious
+    screen_size               = (600, 800)
+    dpi                       = 96
+    fbase                     = 18
+    fsizes                    = [14, 14, 16, 18, 20, 22, 24, 26]
+
+class HanlinV3Output(OutputProfile):
+
+    name        = 'Hanlin V3'
+    short_name  = 'hanlinv3'
+    description = _('This profile is intended for the Hanlin V3 and its clones.')
+
+    # Screen size is a best guess
+    screen_size               = (584, 754)
+    dpi                       = 168.451
+    fbase                     = 16
+    fsizes                    = [12, 12, 14, 16, 18, 20, 22, 24]
+
+class CybookG3Output(OutputProfile):
+
+    name        = 'Cybook G3'
+    short_name  = 'cybookg3'
+    description = _('This profile is intended for the Cybook G3.')
+
+    # Screen size is a best guess
+    screen_size               = (600, 800)
+    dpi                       = 168.451
+    fbase                     = 16
+    fsizes                    = [12, 12, 14, 16, 18, 20, 22, 24]
+
+class KindleOutput(OutputProfile):
+
+    name        = 'Kindle'
+    short_name  = 'kindle'
+    description = _('This profile is intended for the Amazon Kindle.')
+
+    # Screen size is a best guess
+    screen_size               = (525, 640)
+    dpi                       = 168.451
+    fbase                     = 16
+    fsizes                    = [12, 12, 14, 16, 18, 20, 22, 24]
+
+    @classmethod
+    def tags_to_string(cls, tags):
+        return 'ttt '.join(tags)+'ttt '
+
+
+output_profiles = [OutputProfile, SonyReaderOutput, MSReaderOutput,
+        MobipocketOutput, HanlinV3Output, CybookG3Output, KindleOutput,
+        SonyReaderLandscapeOutput]
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -6,13 +6,15 @@ import os, shutil, traceback, functools, sys, re

 from calibre.customize import Plugin, FileTypePlugin, MetadataReaderPlugin, \
                              MetadataWriterPlugin
+from calibre.customize.conversion import InputFormatPlugin, OutputFormatPlugin
+from calibre.customize.profiles import InputProfile, OutputProfile
 from calibre.customize.builtins import plugins as builtin_plugins
 from calibre.constants import __version__, iswindows, isosx
+from calibre.devices.interface import DevicePlugin
 from calibre.ebooks.metadata import MetaInformation
 from calibre.utils.config import make_config_dir, Config, ConfigProxy, \
                                 plugin_dir, OptionParser

-
 version = tuple([int(x) for x in __version__.split('.')])

 platform = 'linux'
@ -47,7 +49,7 @@ def load_plugin(path_to_zip_file):

    :return: A :class:`Plugin` instance.
    '''
-    print 'Loading plugin from', path_to_zip_file
+    #print 'Loading plugin from', path_to_zip_file
    if not os.access(path_to_zip_file, os.R_OK):
        raise PluginNotFound
    zf = ZipFile(path_to_zip_file)
@ -77,6 +79,15 @@ _on_import           = {}
 _on_preprocess       = {}
 _on_postprocess      = {}

+def input_profiles():
+    for plugin in _initialized_plugins:
+        if isinstance(plugin, InputProfile):
+            yield plugin
+
+def output_profiles():
+    for plugin in _initialized_plugins:
+        if isinstance(plugin, OutputProfile):
+            yield plugin


 def reread_filetype_plugins():
@ -121,7 +132,19 @@ def reread_metadata_plugins():
                    _metadata_writers[ft] = []
                _metadata_writers[ft].append(plugin)

+def metadata_readers():
+    ans = set([])
+    for plugins in _metadata_readers.values():
+        for plugin in plugins:
+            ans.add(plugin)
+    return ans

+def metadata_writers():
+    ans = set([])
+    for plugins in _metadata_writers.values():
+        for plugin in plugins:
+            ans.add(plugin)
+    return ans

 def get_file_type_metadata(stream, ftype):
    mi = MetaInformation(None, None)
@ -229,6 +252,47 @@ def find_plugin(name):
        if plugin.name == name:
            return plugin

+def input_format_plugins():
+    for plugin in _initialized_plugins:
+        if isinstance(plugin, InputFormatPlugin):
+            yield plugin
+
+def plugin_for_input_format(fmt):
+    for plugin in input_format_plugins():
+        if fmt.lower() in plugin.file_types:
+            return plugin
+
+def available_input_formats():
+    formats = set([])
+    for plugin in input_format_plugins():
+        if not is_disabled(plugin):
+            for format in plugin.file_types:
+                formats.add(format)
+    return formats
+
+def output_format_plugins():
+    for plugin in _initialized_plugins:
+        if isinstance(plugin, OutputFormatPlugin):
+            yield plugin
+
+def plugin_for_output_format(fmt):
+    for plugin in output_format_plugins():
+        if fmt.lower() == plugin.file_type:
+            return plugin
+
+def available_output_formats():
+    formats = set([])
+    for plugin in output_format_plugins():
+        if not is_disabled(plugin):
+            formats.add(plugin.file_type)
+    return formats
+
+def device_plugins():
+    for plugin in _initialized_plugins:
+        if isinstance(plugin, DevicePlugin):
+            if not is_disabled(plugin):
+                yield plugin
+
 def disable_plugin(plugin_or_name):
    x = getattr(plugin_or_name, 'name', plugin_or_name)
    plugin = find_plugin(x)
--- a/src/calibre/devices/init.py
+++ b/src/calibre/devices/init.py
@ -5,21 +5,6 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 Device drivers.
 '''

-def devices():
-    from calibre.devices.prs500.driver import PRS500
-    from calibre.devices.prs505.driver import PRS505
-    from calibre.devices.prs700.driver import PRS700
-    from calibre.devices.cybookg3.driver import CYBOOKG3
-    from calibre.devices.kindle.driver import KINDLE
-    from calibre.devices.kindle.driver import KINDLE2
-    from calibre.devices.bebook.driver import BEBOOK
-    from calibre.devices.bebook.driver import BEBOOKMINI
-    from calibre.devices.blackberry.driver import BLACKBERRY
-    from calibre.devices.eb600.driver import EB600
-    from calibre.devices.jetbook.driver import JETBOOK
-    return (PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2,
-            BEBOOK, BEBOOKMINI, BLACKBERRY, EB600, JETBOOK)
-
 import time

 DAY_MAP   = dict(Sun=0, Mon=1, Tue=2, Wed=3, Thu=4, Fri=5, Sat=6)
--- a/src/calibre/devices/bebook/driver.py
+++ b/src/calibre/devices/bebook/driver.py
@ -7,19 +7,25 @@ Device driver for BeBook
 from calibre.devices.usbms.driver import USBMS

 class BEBOOK(USBMS):
+    name           = 'BeBook driver'
+    description    = _('Communicate with the BeBook eBook reader.')
+    author         = _('Tijmen Ruizendaal')
+    supported_platforms = ['windows', 'osx', 'linux']
+
+
    # Ordered list of supported formats
    FORMATS     = ['mobi', 'epub', 'pdf', 'txt']

    VENDOR_ID   = [0x0525]
    PRODUCT_ID  = [0x8803, 0x6803]
-    BCD		= [0x312]
+    BCD         = [0x312]

-    VENDOR_NAME = 'LINUX'
+    VENDOR_NAME      = 'LINUX'
    WINDOWS_MAIN_MEM = 'FILE-STOR_GADGET'
    WINDOWS_CARD_MEM = 'FILE-STOR_GADGET'

    OSX_MAIN_MEM = 'BeBook Internal Memory'
-    OSX_CARD_MEM = 'BeBook Storage Card'
+    OSX_CARD_A_MEM = 'BeBook Storage Card'

    MAIN_MEMORY_VOLUME_LABEL  = 'BeBook Internal Memory'
    STORAGE_CARD_VOLUME_LABEL = 'BeBook Storage Card'
@ -30,20 +36,22 @@ class BEBOOK(USBMS):

    def windows_sort_drives(self, drives):
        main = drives.get('main', None)
-        card = drives.get('card', None)
+        card = drives.get('carda', None)
        if card and main and card < main:
            drives['main'] = card
-            drives['card'] = main
+            drives['carda'] = main

        return drives


+class BEBOOK_MINI(BEBOOK):
+    name           = 'BeBook Mini driver'
+    description    = _('Communicate with the BeBook Mini eBook reader.')

-class BEBOOKMINI(BEBOOK):

    VENDOR_ID	= [0x0492]
    PRODUCT_ID	= [0x8813]
-    BCD		= [0x319]
+    BCD         = [0x319]

    OSX_MAIN_MEM = 'BeBook Mini Internal Memory'
    OSX_CARD_MEM = 'BeBook Mini Storage Card'
--- a/src/calibre/devices/blackberry/driver.py
+++ b/src/calibre/devices/blackberry/driver.py
@ -7,6 +7,12 @@ __docformat__ = 'restructuredtext en'
 from calibre.devices.usbms.driver import USBMS

 class BLACKBERRY(USBMS):
+
+    name           = 'Blackberry Device Interface'
+    description    = _('Communicate with the Blackberry smart phone.')
+    author         = _('Kovid Goyal')
+    supported_platforms = ['windows', 'linux']
+
    # Ordered list of supported formats
    FORMATS     = ['mobi', 'prc']
    
@ -16,15 +22,11 @@ class BLACKBERRY(USBMS):
    
    VENDOR_NAME = 'RIM'
    WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'
-    #WINDOWS_CARD_MEM = 'CARD_STORAGE'
    
    #OSX_MAIN_MEM = 'Kindle Internal Storage Media'
-    #OSX_CARD_MEM = 'Kindle Card Storage Media'
    
    MAIN_MEMORY_VOLUME_LABEL  = 'Blackberry Main Memory'
-    #STORAGE_CARD_VOLUME_LABEL = 'Kindle Storage Card'
    
    EBOOK_DIR_MAIN = 'ebooks'
-    #EBOOK_DIR_CARD = "documents"
    SUPPORTS_SUB_DIRS = True

--- a/src/calibre/devices/cybookg3/driver.py
+++ b/src/calibre/devices/cybookg3/driver.py
@ -7,11 +7,17 @@ Device driver for Bookeen's Cybook Gen 3
 import os, shutil
 from itertools import cycle

-from calibre.devices.errors import FreeSpaceError
+from calibre.devices.errors import DeviceError, FreeSpaceError
 from calibre.devices.usbms.driver import USBMS
 import calibre.devices.cybookg3.t2b as t2b

 class CYBOOKG3(USBMS):
+    name           = 'Cybook Gen 3 Device Interface'
+    description    = _('Communicate with the Cybook eBook reader.')
+    author         = _('John Schember')
+    supported_platforms = ['windows', 'osx', 'linux']
+
+
    # Ordered list of supported formats
    # Be sure these have an entry in calibre.devices.mime
    FORMATS     = ['mobi', 'prc', 'html', 'pdf', 'rtf', 'txt']
@ -22,60 +28,45 @@ class CYBOOKG3(USBMS):

    VENDOR_NAME = 'BOOKEEN'
    WINDOWS_MAIN_MEM = 'CYBOOK_GEN3__-FD'
-    WINDOWS_CARD_MEM = 'CYBOOK_GEN3__-SD'
+    WINDOWS_CARD_A_MEM = 'CYBOOK_GEN3__-SD'

    OSX_MAIN_MEM = 'Bookeen Cybook Gen3 -FD Media'
-    OSX_CARD_MEM = 'Bookeen Cybook Gen3 -SD Media'
+    OSX_CARD_A_MEM = 'Bookeen Cybook Gen3 -SD Media'

    MAIN_MEMORY_VOLUME_LABEL  = 'Cybook Gen 3 Main Memory'
    STORAGE_CARD_VOLUME_LABEL = 'Cybook Gen 3 Storage Card'

    EBOOK_DIR_MAIN = "eBooks"
-    EBOOK_DIR_CARD = "eBooks"
+    EBOOK_DIR_CARD_A = "eBooks"
    THUMBNAIL_HEIGHT = 144
    SUPPORTS_SUB_DIRS = True

-    def upload_books(self, files, names, on_card=False, end_session=True,
+    def upload_books(self, files, names, on_card=None, end_session=True,
                     metadata=None):
-        if on_card and not self._card_prefix:
-            raise ValueError(_('The reader has no storage card connected.'))
-
-        if not on_card:
-            path = os.path.join(self._main_prefix, self.EBOOK_DIR_MAIN)
-        else:
-            path = os.path.join(self._card_prefix, self.EBOOK_DIR_CARD)
-
-        def get_size(obj):
-            if hasattr(obj, 'seek'):
-                obj.seek(0, os.SEEK_END)
-                size = obj.tell()
-                obj.seek(0)
-                return size
-            return os.path.getsize(obj)
-
-        sizes = [get_size(f) for f in files]
-        size = sum(sizes)
-
-        if on_card and size > self.free_space()[2] - 1024*1024:
-            raise FreeSpaceError(_("There is insufficient free space on the storage card"))
-        if not on_card and size > self.free_space()[0] - 2*1024*1024:
-            raise FreeSpaceError(_("There is insufficient free space in main memory"))
+        path = self._sanity_check(on_card, files)

        paths = []
        names = iter(names)
        metadata = iter(metadata)

-        for infile in files:
+        for i, infile in enumerate(files):
            newpath = path
            mdata = metadata.next()

-            if self.SUPPORTS_SUB_DIRS:
-                if 'tags' in mdata.keys():
-                    for tag in mdata['tags']:
-                        if tag.startswith('/'):
-                            newpath += tag
-                            newpath = os.path.normpath(newpath)
-                            break
+            if 'tags' in mdata.keys():
+                for tag in mdata['tags']:
+                    if tag.startswith(_('News')):
+                        newpath = os.path.join(newpath, 'news')
+                        newpath = os.path.join(newpath, mdata.get('title', ''))
+                        newpath = os.path.join(newpath, mdata.get('timestamp', ''))
+                    elif tag.startswith('/'):
+                        newpath += tag
+                        newpath = os.path.normpath(newpath)
+                        break
+
+            if newpath == path:
+                newpath = os.path.join(newpath, mdata.get('authors', _('Unknown')))
+                newpath = os.path.join(newpath, mdata.get('title', _('Unknown')))

            if not os.path.exists(newpath):
                os.makedirs(newpath)
@ -103,10 +94,15 @@ class CYBOOKG3(USBMS):
            t2b.write_t2b(t2bfile, coverdata)
            t2bfile.close()

+            self.report_progress(i / float(len(files)), _('Transferring books to device...'))
+
+        self.report_progress(1.0, _('Transferring books to device...'))
+        
        return zip(paths, cycle([on_card]))

    def delete_books(self, paths, end_session=True):
-        for path in paths:
+        for i, path in enumerate(paths):
+            self.report_progress((i+1) / float(len(paths)), _('Removing books from device...'))
            if os.path.exists(path):
                os.unlink(path)

@ -115,6 +111,8 @@ class CYBOOKG3(USBMS):
                # Delete the ebook auxiliary file
                if os.path.exists(filepath + '.mbp'):
                    os.unlink(filepath + '.mbp')
+                if os.path.exists(filepath + '.dat'):
+                    os.unlink(filepath + '.dat')

                # Delete the thumbnails file auto generated for the ebook
                if os.path.exists(filepath + '_6090.t2b'):
@ -124,4 +122,4 @@ class CYBOOKG3(USBMS):
                    os.removedirs(os.path.dirname(path))
                except:
                    pass
-
+        self.report_progress(1.0, _('Removing books from device...'))
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@ -14,6 +14,11 @@ Windows PNP strings:
 from calibre.devices.usbms.driver import USBMS

 class EB600(USBMS):
+    name           = 'Netronix EB600 Device Interface'
+    description    = _('Communicate with the EB600 eBook reader.')
+    author         = _('Kovid Goyal')
+    supported_platforms = ['windows', 'osx', 'linux']
+
    # Ordered list of supported formats
    FORMATS     = ['epub', 'prc', 'chm', 'djvu', 'html', 'rtf', 'txt', 'pdf']
    DRM_FORMATS = ['prc', 'mobi', 'html', 'pdf', 'txt']
@ -24,24 +29,24 @@ class EB600(USBMS):

    VENDOR_NAME      = 'NETRONIX'
    WINDOWS_MAIN_MEM = 'EBOOK'
-    WINDOWS_CARD_MEM = 'EBOOK'
+    WINDOWS_CARD_A_MEM = 'EBOOK'

    OSX_MAIN_MEM = 'EB600 Internal Storage Media'
-    OSX_CARD_MEM = 'EB600 Card Storage Media'
+    OSX_CARD_A_MEM = 'EB600 Card Storage Media'

    MAIN_MEMORY_VOLUME_LABEL  = 'EB600 Main Memory'
    STORAGE_CARD_VOLUME_LABEL = 'EB600 Storage Card'

    EBOOK_DIR_MAIN = ''
-    EBOOK_DIR_CARD = ''
+    EBOOK_DIR_CARD_A = ''
    SUPPORTS_SUB_DIRS = True

    def windows_sort_drives(self, drives):
        main = drives.get('main', None)
-        card = drives.get('card', None)
+        card = drives.get('carda', None)
        if card and main and card < main:
            drives['main'] = card
-            drives['card'] = main
+            drives['carda'] = main

        return drives

--- a/src/calibre/devices/interface.py
+++ b/src/calibre/devices/interface.py
@ -6,8 +6,9 @@ the GUI. A device backend must subclass the L{Device} class. See prs500.py for
 a backend that implement the Device interface for the SONY PRS500 Reader.
 """

+from calibre.customize import Plugin

-class Device(object):
+class DevicePlugin(Plugin):
    """
    Defines the interface that should be implemented by backends that
    communicate with an ebook reader.
@ -16,6 +17,8 @@ class Device(object):
    the front-end needs to call several methods one after another, in which case
    the USB session should not be closed after each method call.
    """
+    type = _('Device Interface')
+
    # Ordered list of supported formats
    FORMATS     = ["lrf", "rtf", "pdf", "txt"]
    VENDOR_ID   = 0x0000
@ -27,7 +30,7 @@ class Device(object):
    # Whether the metadata on books can be set via the GUI.
    CAN_SET_METADATA = True

-    def __init__(self, key='-1', log_packets=False, report_progress=None) :
+    def reset(self, key='-1', log_packets=False, report_progress=None) :
        """
        @param key: The key to unlock the device
        @param log_packets: If true the packet stream to/from the device is logged
@ -87,7 +90,13 @@ class Device(object):

    def card_prefix(self, end_session=True):
        '''
-        Return prefix to paths on the card or '' if no cards present.
+        Return a 2 element list of the prefix to paths on the cards.
+        If no card is present None is set for the card's prefix.
+        E.G.
+        ('/place', '/place2')
+        (None, 'place2')
+        ('place', None)
+        (None, None)
        '''
        raise NotImplementedError()

@ -95,8 +104,8 @@ class Device(object):
        """
        Get total space available on the mountpoints:
            1. Main memory
-            2. Memory Stick
-            3. SD Card
+            2. Memory Card A
+            3. Memory Card B

        @return: A 3 element list with total space in bytes of (1, 2, 3). If a
        particular device doesn't have any of these locations it should return 0.
@ -115,24 +124,25 @@ class Device(object):
        """
        raise NotImplementedError()

-    def books(self, oncard=False, end_session=True):
+    def books(self, oncard=None, end_session=True):
        """
        Return a list of ebooks on the device.
-        @param oncard:  If True return a list of ebooks on the storage card, 
-                        otherwise return list of ebooks in main memory of device.
-                        If True and no books on card return empty list. 
+        @param oncard:  If 'carda' or 'cardb' return a list of ebooks on the
+                        specific storage card, otherwise return list of ebooks
+                        in main memory of device. If a card is specified and no
+                        books are on the card return empty list.
        @return: A BookList.
        """
        raise NotImplementedError()

-    def upload_books(self, files, names, on_card=False, end_session=True,
+    def upload_books(self, files, names, on_card=None, end_session=True,
                     metadata=None):
        '''
        Upload a list of books to the device. If a file already
        exists on the device, it should be replaced.
        This method should raise a L{FreeSpaceError} if there is not enough
        free space on the device. The text of the FreeSpaceError must contain the
-        word "card" if C{on_card} is True otherwise it must contain the word "memory".
+        word "card" if C{on_card} is not None otherwise it must contain the word "memory".
        @param files: A list of paths and/or file-like objects.
        @param names: A list of file names that the books should have
        once uploaded to the device. len(names) == len(files)
@ -163,7 +173,8 @@ class Device(object):
        another dictionary that maps tag names to lists of book ids. The ids are
        ids from the book database.
        @param booklists: A tuple containing the result of calls to
-                                (L{books}(oncard=False), L{books}(oncard=True)).
+                                (L{books}(oncard=None), L{books}(oncard='carda'),
+                                L{books}(oncard='cardb')).
        '''
        raise NotImplementedError

@ -180,7 +191,8 @@ class Device(object):
        with the device.
        @param paths: paths to books on the device.
        @param booklists:  A tuple containing the result of calls to
-                                (L{books}(oncard=False), L{books}(oncard=True)).
+                                (L{books}(oncard=None), L{books}(oncard='carda'),
+                                L{books}(oncard='cardb')).
        '''
        raise NotImplementedError()

@ -188,7 +200,8 @@ class Device(object):
        '''
        Update metadata on device.
        @param booklists: A tuple containing the result of calls to
-                                (L{books}(oncard=False), L{books}(oncard=True)).
+                                (L{books}(oncard=None), L{books}(oncard='carda'),
+                                L{books}(oncard='cardb')).
        '''
        raise NotImplementedError()

@ -199,6 +212,30 @@ class Device(object):
        '''
        raise NotImplementedError()

+    @classmethod
+    def config_widget(cls):
+        '''
+        Should return a QWidget. The QWidget contains the settings for the device interface
+        '''
+        raise NotImplementedError()
+
+    @classmethod
+    def save_settings(cls, settings_widget):
+        '''
+        Should save settings to disk. Takes the widget created in config_widget
+        and saves all settings to disk.
+        '''
+        raise NotImplementedError()
+
+    @classmethod
+    def settings(cls):
+        '''
+        Should return an opts object. The opts object should have one attribute
+        `format_map` which is an ordered list of formats for the device.
+        '''
+        raise NotImplementedError()
+
+


 class BookList(list):
--- a/src/calibre/devices/jetbook/driver.py
+++ b/src/calibre/devices/jetbook/driver.py
@ -7,10 +7,16 @@ Device driver for Ectaco Jetbook firmware >= JL04_v030e
 import os, re, sys, shutil
 from itertools import cycle

-from calibre.devices.usbms.driver import USBMS, metadata_from_formats
+from calibre.devices.usbms.driver import USBMS
 from calibre import sanitize_file_name as sanitize

 class JETBOOK(USBMS):
+    name           = 'Ectaco JetBook Device Interface'
+    description    = _('Communicate with the JetBook eBook reader.')
+    author         = _('James Ralston')
+    supported_platforms = ['windows', 'osx', 'linux']
+
+
    # Ordered list of supported formats
    # Be sure these have an entry in calibre.devices.mime
    FORMATS     = ['epub', 'mobi', 'prc', 'txt', 'rtf', 'pdf']
@ -46,27 +52,34 @@ class JETBOOK(USBMS):
        names = iter(names)
        metadata = iter(metadata)

-        for infile in files:
+        for i, infile in enumerate(files):
            newpath = path

-            if self.SUPPORTS_SUB_DIRS:
-                mdata = metadata.next()
+            mdata = metadata.next()

-                if 'tags' in mdata.keys():
-                    for tag in mdata['tags']:
-                        if tag.startswith('/'):
-                            newpath += tag
-                            newpath = os.path.normpath(newpath)
-                            break
-
-            if not os.path.exists(newpath):
-                os.makedirs(newpath)
+            if 'tags' in mdata.keys():
+                for tag in mdata['tags']:
+                    if tag.startswith(_('News')):
+                        newpath = os.path.join(newpath, 'news')
+                        newpath = os.path.join(newpath, mdata.get('title', ''))
+                        newpath = os.path.join(newpath, mdata.get('timestamp', ''))
+                        break
+                    elif tag.startswith('/'):
+                        newpath += tag
+                        newpath = os.path.normpath(newpath)
+                        break

            author = sanitize(mdata.get('authors','Unknown')).replace(' ', '_')
            title = sanitize(mdata.get('title', 'Unknown')).replace(' ', '_')
            fileext = os.path.splitext(os.path.basename(names.next()))[1]
            fname = '%s#%s%s' % (author, title, fileext)

+            if newpath == path:
+                newpath = os.path.join(newpath, author, title)
+
+            if not os.path.exists(newpath):
+                os.makedirs(newpath)
+
            filepath = os.path.join(newpath, fname)
            paths.append(filepath)

@ -81,6 +94,10 @@ class JETBOOK(USBMS):
            else:
                shutil.copy2(infile, filepath)

+            self.report_progress((i+1) / float(len(files)), _('Transferring books to device...'))
+
+        self.report_progress(1.0, _('Transferring books to device...'))
+
        return zip(paths, cycle([on_card]))

    @classmethod
@ -93,6 +110,7 @@ class JETBOOK(USBMS):

            return txt

+        from calibre.devices.usbms.driver import metadata_from_formats
        mi = metadata_from_formats([path])

        if (mi.title==_('Unknown') or mi.authors==[_('Unknown')]) \
@ -108,10 +126,10 @@ class JETBOOK(USBMS):

    def windows_sort_drives(self, drives):
        main = drives.get('main', None)
-        card = drives.get('card', None)
+        card = drives.get('carda', None)
        if card and main and card < main:
            drives['main'] = card
-            drives['card'] = main
+            drives['carda'] = main

        return drives

--- a/src/calibre/devices/kindle/driver.py
+++ b/src/calibre/devices/kindle/driver.py
@ -6,9 +6,14 @@ Device driver for Amazon's Kindle

 import os, re, sys

-from calibre.devices.usbms.driver import USBMS, metadata_from_formats
+from calibre.devices.usbms.driver import USBMS

 class KINDLE(USBMS):
+    name           = 'Kindle Device Interface'
+    description    = _('Communicate with the Kindle eBook reader.')
+    author         = _('John Schember')
+    supported_platforms = ['windows', 'osx', 'linux']
+
    # Ordered list of supported formats
    FORMATS     = ['azw', 'mobi', 'prc', 'azw1', 'tpz', 'txt']

@ -18,23 +23,24 @@ class KINDLE(USBMS):

    VENDOR_NAME = 'KINDLE'
    WINDOWS_MAIN_MEM = 'INTERNAL_STORAGE'
-    WINDOWS_CARD_MEM = 'CARD_STORAGE'
+    WINDOWS_CARD_A_MEM = 'CARD_STORAGE'

    OSX_MAIN_MEM = 'Kindle Internal Storage Media'
-    OSX_CARD_MEM = 'Kindle Card Storage Media'
+    OSX_CARD_A_MEM = 'Kindle Card Storage Media'

    MAIN_MEMORY_VOLUME_LABEL  = 'Kindle Main Memory'
    STORAGE_CARD_VOLUME_LABEL = 'Kindle Storage Card'

    EBOOK_DIR_MAIN = "documents"
-    EBOOK_DIR_CARD = "documents"
+    EBOOK_DIR_CARD_A = "documents"
    SUPPORTS_SUB_DIRS = True

    WIRELESS_FILE_NAME_PATTERN = re.compile(
    r'(?P<title>[^-]+)-asin_(?P<asin>[a-zA-Z\d]{10,})-type_(?P<type>\w{4})-v_(?P<index>\d+).*')

    def delete_books(self, paths, end_session=True):
-        for path in paths:
+        for i, path in enumerate(paths):
+            self.report_progress((i+1) / float(len(paths)), _('Removing books from device...'))
            if os.path.exists(path):
                os.unlink(path)

@ -43,9 +49,11 @@ class KINDLE(USBMS):
                # Delete the ebook auxiliary file
                if os.path.exists(filepath + '.mbp'):
                    os.unlink(filepath + '.mbp')
+        self.report_progress(1.0, _('Removing books from device...'))

    @classmethod
    def metadata_from_path(cls, path):
+        from calibre.ebooks.metadata.meta import metadata_from_formats
        mi = metadata_from_formats([path])
        if mi.title == _('Unknown') or ('-asin' in mi.title and '-type' in mi.title):
            match = cls.WIRELESS_FILE_NAME_PATTERN.match(os.path.basename(path))
@ -58,6 +66,10 @@ class KINDLE(USBMS):


 class KINDLE2(KINDLE):
+    name           = 'Kindle 2 Device Interface'
+    description    = _('Communicate with the Kindle 2 eBook reader.')
+    author         = _('John Schember')
+    supported_platforms = ['windows', 'osx', 'linux']

    PRODUCT_ID = [0x0002]
    BCD        = [0x0100]
--- a/src/calibre/devices/libusb.py
+++ b/src/calibre/devices/libusb.py
@ -116,8 +116,8 @@ class Device(Structure):
            raise Error("Cannot open device")
        return handle.contents    
    
-    @apply
-    def configurations():
+    @dynamic_property
+    def configurations(self):
        doc = """ List of device configurations. See L{ConfigDescriptor} """
        def fget(self):
            ans = []
@ -127,8 +127,8 @@ class Device(Structure):
        return property(doc=doc, fget=fget)

 class Bus(Structure):
-    @apply
-    def device_list():
+    @dynamic_property
+    def device_list(self):
        doc = \
        """ 
        Flat list of devices on this bus. 
--- a/src/calibre/devices/prs500/books.py
+++ b/src/calibre/devices/prs500/books.py
@ -55,8 +55,8 @@ class Book(object):
    size         = book_metadata_field("size", formatter=int)
    # When setting this attribute you must use an epoch
    datetime     = book_metadata_field("date", formatter=strptime, setter=strftime)
-    @apply
-    def title_sorter():
+    @dynamic_property
+    def title_sorter(self):
        doc = '''String to sort the title. If absent, title is returned'''
        def fget(self):
            src = self.elem.getAttribute('titleSorter').strip()
@ -67,8 +67,8 @@ class Book(object):
            self.elem.setAttribute('titleSorter', sortable_title(unicode(val)))
        return property(doc=doc, fget=fget, fset=fset)

-    @apply
-    def thumbnail():
+    @dynamic_property
+    def thumbnail(self):
        doc = \
        """
        The thumbnail. Should be a height 68 image.
@ -88,15 +88,15 @@ class Book(object):
                return decode(rc)
        return property(fget=fget, doc=doc)

-    @apply
-    def path():
+    @dynamic_property
+    def path(self):
        doc = """ Absolute path to book on device. Setting not supported. """
        def fget(self):
            return self.root + self.rpath
        return property(fget=fget, doc=doc)

-    @apply
-    def db_id():
+    @dynamic_property
+    def db_id(self):
        doc = '''The database id in the application database that this file corresponds to'''
        def fget(self):
            match = re.search(r'_(\d+)$', self.rpath.rpartition('.')[0])
@ -116,7 +116,7 @@ class Book(object):
               self.authors.encode('utf-8') + " at " + self.path.encode('utf-8')


-def fix_ids(media, cache):
+def fix_ids(media, cache, *args):
    '''
    Adjust ids in cache to correspond with media.
    '''
--- a/src/calibre/devices/prs500/cli/main.py
+++ b/src/calibre/devices/prs500/cli/main.py
@ -13,7 +13,7 @@ from calibre import __version__, iswindows, __appname__
 from calibre.devices.errors import PathError 
 from calibre.utils.terminfo import TerminalController
 from calibre.devices.errors import ArgumentError, DeviceError, DeviceLocked
-from calibre.devices import devices
+from calibre.customize.ui import device_plugins
 from calibre.devices.scanner import DeviceScanner

 MINIMUM_COL_WIDTH = 12 #: Minimum width of columns in ls output
@ -39,8 +39,8 @@ class FileFormatter(object):
        self.name        = file.name
        self.path        = file.path
    
-    @apply
-    def mode_string():
+    @dynamic_property
+    def mode_string(self):
        doc=""" The mode string for this file. There are only two modes read-only and read-write """
        def fget(self):
            mode, x = "-", "-"      
@ -50,8 +50,8 @@ class FileFormatter(object):
            return mode
        return property(doc=doc, fget=fget)
    
-    @apply
-    def isdir_name():
+    @dynamic_property
+    def isdir_name(self):
        doc='''Return self.name + '/' if self is a directory'''
        def fget(self):
            name = self.name
@ -61,8 +61,8 @@ class FileFormatter(object):
        return property(doc=doc, fget=fget)
            
    
-    @apply
-    def name_in_color():
+    @dynamic_property
+    def name_in_color(self):
        doc=""" The name in ANSI text. Directories are blue, ebooks are green """
        def fget(self):
            cname = self.name
@ -75,22 +75,22 @@ class FileFormatter(object):
            return cname
        return property(doc=doc, fget=fget)
    
-    @apply
-    def human_readable_size():
+    @dynamic_property
+    def human_readable_size(self):
        doc=""" File size in human readable form """
        def fget(self):
            return human_readable(self.size)
        return property(doc=doc, fget=fget)
    
-    @apply
-    def modification_time():
+    @dynamic_property
+    def modification_time(self):
        doc=""" Last modified time in the Linux ls -l format """
        def fget(self):
            return time.strftime("%Y-%m-%d %H:%M", time.localtime(self.wtime))
        return property(doc=doc, fget=fget)
    
-    @apply
-    def creation_time():
+    @dynamic_property
+    def creation_time(self):
        doc=""" Last modified time in the Linux ls -l format """
        def fget(self):
            return time.strftime("%Y-%m-%d %H:%M", time.localtime(self.ctime))
@ -203,9 +203,10 @@ def main():
        _wmi = wmi.WMI()
    scanner = DeviceScanner(_wmi)
    scanner.scan()
-    for d in devices():
+    for d in device_plugins():
        if scanner.is_device_connected(d):
-            dev = d(log_packets=options.log_packets)
+            dev = d
+            dev.reset(log_packets=options.log_packets)
    
    if dev is None:
        print >>sys.stderr, 'Unable to find a connected ebook reader.'
--- a/src/calibre/devices/prs500/driver.py
+++ b/src/calibre/devices/prs500/driver.py
@ -40,13 +40,14 @@ from array import array
 from functools import wraps
 from StringIO import StringIO

-from calibre.devices.interface import Device
+from calibre.devices.interface import DevicePlugin
 from calibre.devices.libusb import Error as USBError
 from calibre.devices.libusb import get_device_by_id
 from calibre.devices.prs500.prstypes import *
 from calibre.devices.errors import *
 from calibre.devices.prs500.books import BookList, fix_ids
 from calibre import __author__, __appname__
+from calibre.devices.usbms.deviceconfig import DeviceConfig

 # Protocol versions this driver has been tested with
 KNOWN_USB_PROTOCOL_VERSIONS = [0x3030303030303130L]
@ -76,12 +77,16 @@ class File(object):
        return self.name


-class PRS500(Device):
+class PRS500(DeviceConfig, DevicePlugin):

    """
    Implements the backend for communication with the SONY Reader.
    Each method decorated by C{safe} performs a task.
    """
+    name           = 'PRS-500 Device Interface'
+    description    = _('Communicate with the Sony PRS-500 eBook reader.')
+    author         = _('Kovid Goyal')
+    supported_platforms = ['windows', 'osx', 'linux']

    VENDOR_ID    = 0x054c #: SONY Vendor Id
    PRODUCT_ID   = 0x029b #: Product Id for the PRS-500
@ -181,7 +186,7 @@ class PRS500(Device):

        return run_session

-    def __init__(self, key='-1', log_packets=False, report_progress=None) :
+    def reset(self, key='-1', log_packets=False, report_progress=None) :
        """
        @param key: The key to unlock the device
        @param log_packets: If true the packet stream to/from the device is logged
@ -620,6 +625,8 @@ class PRS500(Device):
                data_type=FreeSpaceAnswer, \
                command_number=FreeSpaceQuery.NUMBER)[0]
            data.append( pkt.free )
+        data = [x for x in data if x != 0]
+        data.append(0)
        return data

    def _exists(self, path):
--- a/src/calibre/devices/prs500/prstypes.py
+++ b/src/calibre/devices/prs500/prstypes.py
@ -284,8 +284,8 @@ class Command(TransferBuffer):
    # Length of the data part of this packet
    length = field(start=12, fmt=DWORD) 
    
-    @apply
-    def data():
+    @dynamic_property
+    def data(self):
        doc = \
        """ 
        The data part of this command. Returned/set as/by a TransferBuffer. 
@ -447,8 +447,8 @@ class LongCommand(Command):
        self.length  = 16
        self.command = command
    
-    @apply
-    def command():
+    @dynamic_property
+    def command(self):
        doc = \
        """ 
        Usually carries extra information needed for the command
@ -568,8 +568,8 @@ class FileOpen(PathCommand):
        PathCommand.__init__(self, path, FileOpen.NUMBER, path_len_at_byte=20)
        self.mode = mode
    
-    @apply
-    def mode():
+    @dynamic_property
+    def mode(self):
        doc = \
                    """ 
                    The file open mode. Is either L{FileOpen.READ} 
@ -651,8 +651,8 @@ class Response(Command):
            raise PacketError("Response packets must have their number set to " \
            + hex(0x00001000))
    
-    @apply
-    def data():
+    @dynamic_property
+    def data(self):
        doc = \
                  """ 
                  The last 3 DWORDs (12 bytes) of data in this 
@ -681,43 +681,43 @@ class ListResponse(Response):
    PATH_NOT_FOUND = 0xffffffd7 #: Queried path is not found 
    PERMISSION_DENIED = 0xffffffd6 #: Permission denied
    
-    @apply
-    def is_file():
+    @dynamic_property
+    def is_file(self):
        doc = """ True iff queried path is a file """
        def fget(self):      
            return self.code == ListResponse.IS_FILE
        return property(doc=doc, fget=fget)
    
-    @apply
-    def is_invalid():
+    @dynamic_property
+    def is_invalid(self):
        doc = """ True iff queried path is invalid """
        def fget(self):    
            return self.code == ListResponse.IS_INVALID
        return property(doc=doc, fget=fget)
    
-    @apply
-    def path_not_found():
+    @dynamic_property
+    def path_not_found(self):
        doc = """ True iff queried path is not found """
        def fget(self):    
            return self.code == ListResponse.PATH_NOT_FOUND
        return property(doc=doc, fget=fget)
    
-    @apply
-    def permission_denied():
+    @dynamic_property
+    def permission_denied(self):
        doc = """ True iff permission is denied for path operations """
        def fget(self):    
            return self.code == ListResponse.PERMISSION_DENIED
        return property(doc=doc, fget=fget)
    
-    @apply
-    def is_unmounted():
+    @dynamic_property
+    def is_unmounted(self):
        doc = """ True iff queried path is unmounted (i.e. removed storage card) """
        def fget(self):
            return self.code == ListResponse.IS_UNMOUNTED
        return property(doc=doc, fget=fget)
    
-    @apply
-    def is_eol():
+    @dynamic_property
+    def is_eol(self):
        doc = """ True iff there are no more items in the list """
        def fget(self):
            return self.code == ListResponse.IS_EOL
@ -759,8 +759,8 @@ class FileProperties(Answer):
    # 0 = default permissions, 4 = read only
    permissions = field(start=36, fmt=DWORD)  
    
-    @apply
-    def is_dir():
+    @dynamic_property
+    def is_dir(self):
        doc = """True if path points to a directory, False if it points to a file."""    
        
        def fget(self):
@ -776,8 +776,8 @@ class FileProperties(Answer):
        return property(doc=doc, fget=fget, fset=fset)
    
    
-    @apply
-    def is_readonly():
+    @dynamic_property
+    def is_readonly(self):
        doc = """ Whether this file is readonly."""
        
        def fget(self):
@ -801,8 +801,8 @@ class IdAnswer(Answer):
    
    """ Defines the structure of packets that contain identifiers for queries. """
    
-    @apply
-    def id():
+    @dynamic_property
+    def id(self):
        doc = \
        """ 
        The identifier. C{unsigned int} stored in 4 bytes 
@ -841,8 +841,8 @@ class ListAnswer(Answer):
    name_length = field(start=20, fmt=DWORD)
    name        = stringfield(name_length, start=24)
    
-    @apply
-    def is_dir():
+    @dynamic_property
+    def is_dir(self):
        doc = \
        """ 
        True if list item points to a directory, False if it points to a file.
@ -859,4 +859,3 @@ class ListAnswer(Answer):
        
        return property(doc=doc, fget=fget, fset=fset)

-
--- a/src/calibre/devices/prs505/books.py
+++ b/src/calibre/devices/prs505/books.py
@ -64,8 +64,8 @@ class Book(object):
    # When setting this attribute you must use an epoch
    datetime     = book_metadata_field("date", formatter=strptime, setter=strftime)

-    @apply
-    def title_sorter():
+    @dynamic_property
+    def title_sorter(self):
        doc = '''String to sort the title. If absent, title is returned'''
        def fget(self):
            src = self.elem.getAttribute('titleSorter').strip()
@ -76,8 +76,8 @@ class Book(object):
            self.elem.setAttribute('titleSorter', sortable_title(unicode(val)))
        return property(doc=doc, fget=fget, fset=fset)

-    @apply
-    def thumbnail():
+    @dynamic_property
+    def thumbnail(self):
        doc = \
        """
        The thumbnail. Should be a height 68 image.
@ -99,15 +99,15 @@ class Book(object):
                return decode(rc)
        return property(fget=fget, doc=doc)

-    @apply
-    def path():
+    @dynamic_property
+    def path(self):
        doc = """ Absolute path to book on device. Setting not supported. """
        def fget(self):
            return self.mountpath + self.rpath
        return property(fget=fget, doc=doc)

-    @apply
-    def db_id():
+    @dynamic_property
+    def db_id(self):
        doc = '''The database id in the application database that this file corresponds to'''
        def fget(self):
            match = re.search(r'_(\d+)$', self.rpath.rpartition('.')[0])
@ -129,7 +129,7 @@ class Book(object):

 class BookList(_BookList):

-    def __init__(self, xml_file, mountpath):
+    def __init__(self, xml_file, mountpath, report_progress=None):
        _BookList.__init__(self)
        xml_file.seek(0)
        self.document = dom.parse(xml_file)
@ -144,7 +144,10 @@ class BookList(_BookList):
        else:
            self.prefix = ''

-        for book in self.root_element.childNodes:
+        nodes = self.root_element.childNodes
+        for i, book in enumerate(nodes):
+            if report_progress:
+                report_progress((i+1) / float(len(nodes)), _('Getting list of books on device...'))
            if hasattr(book, 'tagName') and book.tagName.endswith('text'):
                tags = [i.getAttribute('title') for i in self.get_playlists(book.getAttribute('id'))]
                self.append(Book(book, mountpath, tags, prefix=self.prefix))
@ -380,14 +383,16 @@ class BookList(_BookList):
                item.setAttribute('id', str(map[id]))
                pl.appendChild(item)

-def fix_ids(main, card):
+def fix_ids(main, carda, cardb):
    '''
    Adjust ids the XML databases.
    '''
    if hasattr(main, 'purge_empty_playlists'):
        main.purge_empty_playlists()
-    if hasattr(card, 'purge_empty_playlists'):
-        card.purge_empty_playlists()
+    if hasattr(carda, 'purge_empty_playlists'):
+        carda.purge_empty_playlists()
+    if hasattr(cardb, 'purge_empty_playlists'):
+        cardb.purge_empty_playlists()

    def regen_ids(db):
        if not hasattr(db, 'root_element'):
@ -413,6 +418,7 @@ def fix_ids(main, card):
        db.reorder_playlists()

    regen_ids(main)
-    regen_ids(card)
+    regen_ids(carda)
+    regen_ids(cardb)

    main.set_next_id(str(main.max_id()+1))
--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@ -1,399 +1,120 @@
 __license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
+__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
+                '2009, John Schember <john at nachtimwald.com>'
 '''
 Device driver for the SONY PRS-505
 '''
-import sys, os, shutil, time, subprocess, re
+import os, time
 from itertools import cycle

-from calibre.devices.interface import Device
+from calibre.devices.usbms.cli import CLI
+from calibre.devices.usbms.device import Device
 from calibre.devices.errors import DeviceError, FreeSpaceError
 from calibre.devices.prs505.books import BookList, fix_ids
-from calibre import iswindows, islinux, isosx, __appname__
-from calibre.devices.errors import PathError
+from calibre import __appname__

-class File(object):
-    def __init__(self, path):
-        stats = os.stat(path)
-        self.is_dir = os.path.isdir(path)
-        self.is_readonly = not os.access(path, os.W_OK)
-        self.ctime = stats.st_ctime
-        self.wtime = stats.st_mtime
-        self.size  = stats.st_size
-        if path.endswith(os.sep):
-            path = path[:-1]
-        self.path = path
-        self.name = os.path.basename(path)
+class PRS505(CLI, Device):

+    name           = 'PRS-505 Device Interface'
+    description    = _('Communicate with the Sony PRS-505 eBook reader.')
+    author         = _('Kovid Goyal and John Schember')
+    supported_platforms = ['windows', 'osx', 'linux']

-class PRS505(Device):
-    VENDOR_ID    = 0x054c   #: SONY Vendor Id
-    PRODUCT_ID   = 0x031e   #: Product Id for the PRS-505
-    BCD          = [0x229]  #: Needed to disambiguate 505 and 700 on linux
-    PRODUCT_NAME = 'PRS-505'
-    VENDOR_NAME  = 'SONY'
    FORMATS      = ['epub', 'lrf', 'lrx', 'rtf', 'pdf', 'txt']

-    MEDIA_XML    = 'database/cache/media.xml'
-    CACHE_XML    = 'Sony Reader/database/cache.xml'
+    VENDOR_ID    = [0x054c]   #: SONY Vendor Id
+    PRODUCT_ID   = [0x031e]   #: Product Id for the PRS-505
+    BCD          = [0x229]  #: Needed to disambiguate 505 and 700 on linux
+
+    VENDOR_NAME  = 'SONY'
+    WINDOWS_MAIN_MEM = 'PRS-505'
+    WINDOWS_CARD_A_MEM = 'PRS-505/UC:MS'
+    WINDOWS_CARD_B_MEM = 'PRS-505/UC:SD'
+
+    OSX_MAIN_MEM = 'Sony PRS-505/UC Media'
+    OSX_CARD_A_MEM = 'Sony PRS-505/UC:MS Media'
+    OSX_CARD_B_MEM = 'Sony PRS-505/UC:SD'

    MAIN_MEMORY_VOLUME_LABEL  = 'Sony Reader Main Memory'
    STORAGE_CARD_VOLUME_LABEL = 'Sony Reader Storage Card'

-    OSX_NAME                  = 'Sony PRS-505'
+    MEDIA_XML    = 'database/cache/media.xml'
+    CACHE_XML    = 'Sony Reader/database/cache.xml'

    CARD_PATH_PREFIX          = __appname__

-    FDI_TEMPLATE = \
-'''
-  <device>
-      <match key="info.category" string="volume">
-          <match key="@info.parent:@info.parent:@info.parent:@info.parent:usb.vendor_id" int="%(vendor_id)s">
-              <match key="@info.parent:@info.parent:@info.parent:@info.parent:usb.product_id" int="%(product_id)s">
-                  <match key="@info.parent:@info.parent:@info.parent:@info.parent:usb.device_revision_bcd" int="%(bcd)s">
-                      <match key="volume.is_partition" bool="false">
-                          <merge key="volume.label" type="string">%(main_memory)s</merge>
-                          <merge key="%(app)s.mainvolume" type="string">%(deviceclass)s</merge>
-                      </match>
-                  </match>
-              </match>
-          </match>
-      </match>
-  </device>
-  <device>
-      <match key="info.category" string="volume">
-          <match key="@info.parent:@info.parent:@info.parent:@info.parent:usb.vendor_id" int="%(vendor_id)s">
-              <match key="@info.parent:@info.parent:@info.parent:@info.parent:usb.product_id" int="%(product_id)s">
-                  <match key="@info.parent:@info.parent:@info.parent:@info.parent:usb.device_revision_bcd" int="%(bcd)s">
-                      <match key="volume.is_partition" bool="true">
-                          <merge key="volume.label" type="string">%(storage_card)s</merge>
-                          <merge key="%(app)s.cardvolume" type="string">%(deviceclass)s</merge>
-                      </match>
-                  </match>
-              </match>
-          </match>
-      </match>
-  </device>
-'''.replace('%(app)s', __appname__)
-
-
-    def __init__(self, log_packets=False):
-        self._main_prefix = self._card_prefix = None
-
-    @classmethod
-    def get_fdi(cls):
-        return cls.FDI_TEMPLATE%dict(
-                                     deviceclass=cls.__name__,
-                                     vendor_id=hex(cls.VENDOR_ID),
-                                     product_id=hex(cls.PRODUCT_ID),
-                                     bcd=hex(cls.BCD[0]),
-                                     main_memory=cls.MAIN_MEMORY_VOLUME_LABEL,
-                                     storage_card=cls.STORAGE_CARD_VOLUME_LABEL,
-                                     )
-
-    @classmethod
-    def is_device(cls, device_id):
-        device_id = device_id.upper()
-        if 'VEN_'+cls.VENDOR_NAME in device_id and \
-               'PROD_'+cls.PRODUCT_NAME in device_id:
-            return True
-        vid, pid = hex(cls.VENDOR_ID)[2:], hex(cls.PRODUCT_ID)[2:]
-        if len(vid) < 4: vid = '0'+vid
-        if len(pid) < 4: pid = '0'+pid
-        if 'VID_'+vid in device_id and \
-               'PID_'+pid in device_id:
-            return True
-        return False
-
-    @classmethod
-    def get_osx_mountpoints(cls, raw=None):
-        if raw is None:
-            ioreg = '/usr/sbin/ioreg'
-            if not os.access(ioreg, os.X_OK):
-                ioreg = 'ioreg'
-            raw = subprocess.Popen((ioreg+' -w 0 -S -c IOMedia').split(),
-                                   stdout=subprocess.PIPE).communicate()[0]
-        lines = raw.splitlines()
-        names = {}
-        for i, line in enumerate(lines):
-            if line.strip().endswith('<class IOMedia>') and cls.OSX_NAME in line:
-                loc = 'stick' if ':MS' in line else 'card' if ':SD' in line else 'main'
-                for line in lines[i+1:]:
-                    line = line.strip()
-                    if line.endswith('}'):
-                        break
-                    match = re.search(r'"BSD Name"\s+=\s+"(.*?)"', line)
-                    if match is not None:
-                        names[loc] = match.group(1)
-                        break
-            if len(names.keys()) == 3:
-                break
-        return names
-
-
-    def open_osx(self):
-        mount = subprocess.Popen('mount', shell=True,
-                                 stdout=subprocess.PIPE).stdout.read()
-        names = self.get_osx_mountpoints()
-        dev_pat = r'/dev/%s(\w*)\s+on\s+([^\(]+)\s+'
-        if 'main' not in names.keys():
-            raise DeviceError(_('Unable to detect the %s disk drive. Try rebooting.')%self.__class__.__name__)
-        main_pat = dev_pat%names['main']
-        self._main_prefix = re.search(main_pat, mount).group(2) + os.sep
-        card_pat = names['stick'] if 'stick' in names.keys() else names['card'] if 'card' in names.keys() else None
-        if card_pat is not None:
-            card_pat = dev_pat%card_pat
-            self._card_prefix = re.search(card_pat, mount).group(2) + os.sep
-
-
-    def open_windows(self):
-        time.sleep(6)
-        drives = []
-        wmi = __import__('wmi', globals(), locals(), [], -1)
-        c = wmi.WMI(find_classes=False)
-        for drive in c.Win32_DiskDrive():
-            if self.__class__.is_device(str(drive.PNPDeviceID)):
-                if drive.Partitions == 0:
-                    continue
-                try:
-                    partition = drive.associators("Win32_DiskDriveToDiskPartition")[0]
-                    logical_disk = partition.associators('Win32_LogicalDiskToPartition')[0]
-                    prefix = logical_disk.DeviceID+os.sep
-                    drives.append((drive.Index, prefix))
-                except IndexError:
-                    continue
-
-
-        if not drives:
-            raise DeviceError(_('Unable to detect the %s disk drive. Try rebooting.')%self.__class__.__name__)
-
-        drives.sort(cmp=lambda a, b: cmp(a[0], b[0]))
-        self._main_prefix = drives[0][1]
-        if len(drives) > 1:
-            self._card_prefix = drives[1][1]
-
-
-    def open_linux(self):
-        import dbus
-        bus = dbus.SystemBus()
-        hm  = dbus.Interface(bus.get_object("org.freedesktop.Hal", "/org/freedesktop/Hal/Manager"), "org.freedesktop.Hal.Manager")
-
-        def conditional_mount(dev, main_mem=True):
-            mmo = bus.get_object("org.freedesktop.Hal", dev)
-            label = mmo.GetPropertyString('volume.label', dbus_interface='org.freedesktop.Hal.Device')
-            is_mounted = mmo.GetPropertyString('volume.is_mounted', dbus_interface='org.freedesktop.Hal.Device')
-            mount_point = mmo.GetPropertyString('volume.mount_point', dbus_interface='org.freedesktop.Hal.Device')
-            fstype = mmo.GetPropertyString('volume.fstype', dbus_interface='org.freedesktop.Hal.Device')
-            if is_mounted:
-                return str(mount_point)
-            mmo.Mount(label, fstype, ['umask=077', 'uid='+str(os.getuid()), 'sync'],
-                          dbus_interface='org.freedesktop.Hal.Device.Volume')
-            return os.path.normpath('/media/'+label)+'/'
-
-
-        mm = hm.FindDeviceStringMatch(__appname__+'.mainvolume', self.__class__.__name__)
-        if not mm:
-            raise DeviceError(_('Unable to detect the %s disk drive. Try rebooting.')%(self.__class__.__name__,))
-        self._main_prefix = None
-        for dev in mm:
-            try:
-                self._main_prefix = conditional_mount(dev)+os.sep
-                break
-            except dbus.exceptions.DBusException:
-                continue
-
-
-        if not self._main_prefix:
-            raise DeviceError('Could not open device for reading. Try a reboot.')
-
-        self._card_prefix = None
-        cards = hm.FindDeviceStringMatch(__appname__+'.cardvolume', self.__class__.__name__)
-        keys = []
-        for card in cards:
-            keys.append(int('UC_SD' in bus.get_object("org.freedesktop.Hal", card).GetPropertyString('info.parent', dbus_interface='org.freedesktop.Hal.Device')))
-
-        cards = zip(cards, keys)
-        cards.sort(cmp=lambda x, y: cmp(x[1], y[1]))
-        cards = [i[0] for i in cards]
-
-        for dev in cards:
-            try:
-                self._card_prefix = conditional_mount(dev, False)+os.sep
-                break
-            except:
-                import traceback
-                print traceback
-                continue
-
-
    def open(self):
-        time.sleep(5)
-        self._main_prefix = self._card_prefix = None
-        if islinux:
+        Device.open(self)
+
+        def write_cache(prefix):
            try:
-                self.open_linux()
-            except DeviceError:
-                time.sleep(3)
-                self.open_linux()
-        if iswindows:
-            try:
-                self.open_windows()
-            except DeviceError:
-                time.sleep(3)
-                self.open_windows()
-        if isosx:
-            try:
-                self.open_osx()
-            except DeviceError:
-                time.sleep(3)
-                self.open_osx()
-        if self._card_prefix is not None:
-            try:
-                cachep = os.path.join(self._card_prefix, self.CACHE_XML)
+                cachep = os.path.join(prefix, self.CACHE_XML)
                if not os.path.exists(cachep):
                    try:
                        os.makedirs(os.path.dirname(cachep), mode=0777)
                    except:
                        time.sleep(5)
                        os.makedirs(os.path.dirname(cachep), mode=0777)
-                    f = open(cachep, 'wb')
-                    f.write(u'''<?xml version="1.0" encoding="UTF-8"?>
-<cache xmlns="http://www.kinoma.com/FskCache/1">
-</cache>
-'''.encode('utf8'))
-                    f.close()
+                    with open(cachep, 'wb') as f:
+                        f.write(u'''<?xml version="1.0" encoding="UTF-8"?>
+                            <cache xmlns="http://www.kinoma.com/FskCache/1">
+                            </cache>
+                            '''.encode('utf8'))
+                    return True
            except:
                self._card_prefix = None
                import traceback
                traceback.print_exc()
+            return False

-    def set_progress_reporter(self, pr):
-        self.report_progress = pr
+        if self._card_a_prefix is not None:
+            if not write_cache(self._card_a_prefix):
+                self._card_a_prefix = None
+        if self._card_b_prefix is not None:
+            if not write_cache(self._card_b_prefix):
+                self._card_b_prefix = None

    def get_device_information(self, end_session=True):
+        self.report_progress(1.0, _('Get device information...'))
        return (self.__class__.__name__, '', '', '')

-    def card_prefix(self, end_session=True):
-        return self._card_prefix
-
-    @classmethod
-    def _windows_space(cls, prefix):
-        if prefix is None:
-            return 0, 0
-        win32file = __import__('win32file', globals(), locals(), [], -1)
-        try:
-            sectors_per_cluster, bytes_per_sector, free_clusters, total_clusters = \
-                win32file.GetDiskFreeSpace(prefix[:-1])
-        except Exception, err:
-            if getattr(err, 'args', [None])[0] == 21: # Disk not ready
-                time.sleep(3)
-                sectors_per_cluster, bytes_per_sector, free_clusters, total_clusters = \
-                    win32file.GetDiskFreeSpace(prefix[:-1])
-            else: raise
-        mult = sectors_per_cluster * bytes_per_sector
-        return total_clusters * mult, free_clusters * mult
-
-    def total_space(self, end_session=True):
-        msz = csz = 0
-        if not iswindows:
-            if self._main_prefix is not None:
-                stats = os.statvfs(self._main_prefix)
-                msz = stats.f_frsize * (stats.f_blocks + stats.f_bavail - stats.f_bfree)
-            if self._card_prefix is not None:
-                stats = os.statvfs(self._card_prefix)
-                csz = stats.f_frsize * (stats.f_blocks + stats.f_bavail - stats.f_bfree)
-        else:
-            msz = self._windows_space(self._main_prefix)[0]
-            csz = self._windows_space(self._card_prefix)[0]
-
-        return (msz, 0, csz)
-
-    def free_space(self, end_session=True):
-        msz = csz = 0
-        if not iswindows:
-            if self._main_prefix is not None:
-                stats = os.statvfs(self._main_prefix)
-                msz = stats.f_frsize * stats.f_bavail
-            if self._card_prefix is not None:
-                stats = os.statvfs(self._card_prefix)
-                csz = stats.f_frsize * stats.f_bavail
-        else:
-            msz = self._windows_space(self._main_prefix)[1]
-            csz = self._windows_space(self._card_prefix)[1]
-
-        return (msz, 0, csz)
-
-    def books(self, oncard=False, end_session=True):
-        if oncard and self._card_prefix is None:
+    def books(self, oncard=None, end_session=True):
+        if oncard == 'carda' and not self._card_a_prefix:
+            self.report_progress(1.0, _('Getting list of books on device...'))
            return []
+        elif oncard == 'cardb' and not self._card_b_prefix:
+            self.report_progress(1.0, _('Getting list of books on device...'))
+            return []
+        elif oncard and oncard != 'carda' and oncard != 'cardb':
+            self.report_progress(1.0, _('Getting list of books on device...'))
+            return []
+
        db = self.__class__.CACHE_XML if oncard else self.__class__.MEDIA_XML
-        prefix = self._card_prefix if oncard else self._main_prefix
-        bl = BookList(open(prefix + db, 'rb'), prefix)
+        prefix = self._card_a_prefix if oncard == 'carda' else self._card_b_prefix if oncard == 'cardb' else self._main_prefix
+        bl = BookList(open(prefix + db, 'rb'), prefix, self.report_progress)
        paths = bl.purge_corrupted_files()
        for path in paths:
-            path = os.path.join(self._card_prefix if oncard else self._main_prefix, path)
+            path = os.path.join(prefix, path)
            if os.path.exists(path):
                os.unlink(path)
+        self.report_progress(1.0, _('Getting list of books on device...'))
        return bl

-    def munge_path(self, path):
-        if path.startswith('/') and not (path.startswith(self._main_prefix) or \
-            (self._card_prefix and path.startswith(self._card_prefix))):
-            path = self._main_prefix + path[1:]
-        elif path.startswith('card:'):
-            path = path.replace('card:', self._card_prefix[:-1])
-        return path
-
-    def mkdir(self, path, end_session=True):
-        """ Make directory """
-        path = self.munge_path(path)
-        os.mkdir(path)
-
-    def list(self, path, recurse=False, end_session=True, munge=True):
-        if munge:
-            path = self.munge_path(path)
-        if os.path.isfile(path):
-            return [(os.path.dirname(path), [File(path)])]
-        entries = [File(os.path.join(path, f)) for f in os.listdir(path)]
-        dirs = [(path, entries)]
-        for _file in entries:
-            if recurse and _file.is_dir:
-                dirs[len(dirs):] = self.list(_file.path, recurse=True, munge=False)
-        return dirs
-
-    def get_file(self, path, outfile, end_session=True):
-        path = self.munge_path(path)
-        src = open(path, 'rb')
-        shutil.copyfileobj(src, outfile, 10*1024*1024)
-
-    def put_file(self, infile, path, replace_file=False, end_session=True):
-        path = self.munge_path(path)
-        if os.path.isdir(path):
-            path = os.path.join(path, infile.name)
-        if not replace_file and os.path.exists(path):
-            raise PathError('File already exists: '+path)
-        dest = open(path, 'wb')
-        shutil.copyfileobj(infile, dest, 10*1024*1024)
-        dest.flush()
-        dest.close()
-
-    def rm(self, path, end_session=True):
-        path = self.munge_path(path)
-        os.unlink(path)
-
-    def touch(self, path, end_session=True):
-        path = self.munge_path(path)
-        if not os.path.exists(path):
-            open(path, 'w').close()
-        if not os.path.isdir(path):
-            os.utime(path, None)
-
-    def upload_books(self, files, names, on_card=False, end_session=True,
+    def upload_books(self, files, names, on_card=None, end_session=True,
                     metadata=None):
-        if on_card and not self._card_prefix:
-            raise ValueError(_('The reader has no storage card connected.'))
-        path = os.path.join(self._card_prefix, self.CARD_PATH_PREFIX) if on_card \
-               else os.path.join(self._main_prefix, 'database', 'media', 'books')
+        if on_card == 'carda' and not self._card_a_prefix:
+            raise ValueError(_('The reader has no storage card in this slot.'))
+        elif on_card == 'cardb' and not self._card_b_prefix:
+            raise ValueError(_('The reader has no storage card in this slot.'))
+        elif on_card and on_card not in ('carda', 'cardb'):
+            raise DeviceError(_('The reader has no storage card in this slot.'))
+
+        if on_card == 'carda':
+            path = os.path.join(self._card_a_prefix, self.CARD_PATH_PREFIX)
+        elif on_card == 'cardb':
+            path = os.path.join(self._card_b_prefix, self.CARD_PATH_PREFIX)
+        else:
+            path = os.path.join(self._main_prefix, 'database', 'media', 'books')

        def get_size(obj):
            if hasattr(obj, 'seek'):
@ -403,34 +124,61 @@ class PRS505(Device):
                return size
            return os.path.getsize(obj)

-        sizes = map(get_size, files)
+        sizes = [get_size(f) for f in files]
        size = sum(sizes)
-        space = self.free_space()
-        mspace = space[0]
-        cspace = space[2]
-        if on_card and size > cspace - 1024*1024:
-            raise FreeSpaceError("There is insufficient free space "+\
-                                          "on the storage card")
-        if not on_card and size > mspace - 2*1024*1024:
-            raise FreeSpaceError("There is insufficient free space " +\
-                                         "in main memory")
+
+        if not on_card and size > self.free_space()[0] - 2*1024*1024:
+            raise FreeSpaceError(_("There is insufficient free space in main memory"))
+        if on_card == 'carda' and size > self.free_space()[1] - 1024*1024:
+            raise FreeSpaceError(_("There is insufficient free space on the storage card"))
+        if on_card == 'cardb' and size > self.free_space()[2] - 1024*1024:
+            raise FreeSpaceError(_("There is insufficient free space on the storage card"))

        paths, ctimes = [], []

        names = iter(names)
-        for infile in files:
+        metadata = iter(metadata)
+        for i, infile in enumerate(files):
            close = False
            if not hasattr(infile, 'read'):
                infile, close = open(infile, 'rb'), True
            infile.seek(0)
-            name = names.next()
-            paths.append(os.path.join(path, name))
-            if not os.path.exists(os.path.dirname(paths[-1])):
-                os.makedirs(os.path.dirname(paths[-1]))
+
+            newpath = path
+            mdata = metadata.next()
+
+            if 'tags' in mdata.keys():
+                for tag in mdata['tags']:
+                    if tag.startswith(_('News')):
+                        newpath = os.path.join(newpath, 'news')
+                        newpath = os.path.join(newpath, mdata.get('title', ''))
+                        newpath = os.path.join(newpath, mdata.get('timestamp', ''))
+                    elif tag.startswith('/'):
+                        newpath = path
+                        newpath += tag
+                        newpath = os.path.normpath(newpath)
+                        break
+
+            if newpath == path:
+                newpath = os.path.join(newpath, mdata.get('authors', _('Unknown')))
+                newpath = os.path.join(newpath, mdata.get('title', _('Unknown')))
+
+            if not os.path.exists(newpath):
+                os.makedirs(newpath)
+
+            filepath = os.path.join(newpath, names.next())
+            paths.append(filepath)
+
            self.put_file(infile, paths[-1], replace_file=True)
+
            if close:
                infile.close()
            ctimes.append(os.path.getctime(paths[-1]))
+
+            self.report_progress((i+1) / float(len(files)), _('Transferring books to device...'))
+
+        self.report_progress(1.0, _('Transferring books to device...'))
+
        return zip(paths, sizes, ctimes, cycle([on_card]))

    @classmethod
@ -439,17 +187,19 @@ class PRS505(Device):
        for location in locations:
            info = metadata.next()
            path = location[0]
-            on_card = 1 if location[3] else 0
+            blist = 2 if location[3] == 'cardb' else 1 if location[3] == 'carda' else 0
            name = path.rpartition(os.sep)[2]
-            name = (cls.CARD_PATH_PREFIX+'/' if on_card else 'database/media/books/') + name
+            name = (cls.CARD_PATH_PREFIX+'/' if blist else 'database/media/books/') + name
            name = name.replace('//', '/')
-            booklists[on_card].add_book(info, name, *location[1:-1])
+            booklists[blist].add_book(info, name, *location[1:-1])
        fix_ids(*booklists)

    def delete_books(self, paths, end_session=True):
-        for path in paths:
+        for i, path in enumerate(paths):
+            self.report_progress((i+1) / float(len(paths)), _('Removing books from device...'))
            if os.path.exists(path):
                os.unlink(path)
+        self.report_progress(1.0, _('Removing books from device...'))

    @classmethod
    def remove_books_from_metadata(cls, paths, booklists):
@ -466,18 +216,15 @@ class PRS505(Device):
        f = open(self._main_prefix + self.__class__.MEDIA_XML, 'wb')
        booklists[0].write(f)
        f.close()
-        if self._card_prefix is not None and hasattr(booklists[1], 'write'):
-            if not os.path.exists(self._card_prefix):
-                os.makedirs(self._card_prefix)
-            f = open(self._card_prefix + self.__class__.CACHE_XML, 'wb')
-            booklists[1].write(f)
-            f.close()

+        def write_card_prefix(prefix, listid):
+            if prefix is not None and hasattr(booklists[listid], 'write'):
+                if not os.path.exists(prefix):
+                    os.makedirs(prefix)
+                f = open(prefix + self.__class__.CACHE_XML, 'wb')
+                booklists[listid].write(f)
+                f.close()
+        write_card_prefix(self._card_a_prefix, 1)
+        write_card_prefix(self._card_b_prefix, 2)
        
-
-
-def main(args=sys.argv):
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
+        self.report_progress(1.0, _('Sending metadata to device...'))
--- a/src/calibre/devices/prs700/driver.py
+++ b/src/calibre/devices/prs700/driver.py
@ -9,7 +9,18 @@ from calibre.devices.prs505.driver import PRS505

 class PRS700(PRS505):

-    BCD          = [0x31a]
-    PRODUCT_NAME = 'PRS-700'
-    OSX_NAME     = 'Sony PRS-700'
+    name           = 'PRS-700 Device Interface'
+    description    = _('Communicate with the Sony PRS-700 eBook reader.')
+    author         = _('Kovid Goyal and John Schember')
+    supported_platforms = ['windows', 'osx', 'linux']
+    
+    BCD          = [0x31a]
+    
+    WINDOWS_MAIN_MEM = 'PRS-700'
+    WINDOWS_CARD_A_MEM = 'PRS-700/UC:MS'
+    WINDOWS_CARD_B_MEM = 'PRS-700/UC:SD'
+
+    OSX_MAIN_MEM = 'Sony PRS-700/UC Media'
+    OSX_CARD_A_MEM = 'Sony PRS-700/UC:MS Media'
+    OSX_CARD_B_MEM = 'Sony PRS-700/UC:SD'

--- a/src/calibre/devices/usbms/books.py
+++ b/src/calibre/devices/usbms/books.py
@ -21,15 +21,15 @@ class Book(object):
    def __eq__(self, other):
        return self.path == other.path
        
-    @apply
-    def title_sorter():
+    @dynamic_property
+    def title_sorter(self):
        doc = '''String to sort the title. If absent, title is returned'''
        def fget(self):
            return re.sub('^\s*A\s+|^\s*The\s+|^\s*An\s+', '', self.title).rstrip()
        return property(doc=doc, fget=fget)
    
-    @apply
-    def thumbnail():
+    @dynamic_property
+    def thumbnail(self):
        return None
        
    def __str__(self):
@ -44,4 +44,3 @@ class BookList(_BookList):
    def set_tags(self, book, tags):
        pass

-
--- a/src/calibre/devices/usbms/cli.py
+++ b/src/calibre/devices/usbms/cli.py
@ -0,0 +1,82 @@
+# -*- coding: utf-8 -*-
+from __future__ import with_statement
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os, shutil
+
+from calibre.devices.errors import PathError
+
+class File(object):
+
+    def __init__(self, path):
+        stats = os.stat(path)
+        self.is_dir = os.path.isdir(path)
+        self.is_readonly = not os.access(path, os.W_OK)
+        self.ctime = stats.st_ctime
+        self.wtime = stats.st_mtime
+        self.size  = stats.st_size
+        if path.endswith(os.sep):
+            path = path[:-1]
+        self.path = path
+        self.name = os.path.basename(path)
+
+
+class CLI(object):
+
+    def get_file(self, path, outfile, end_session=True):
+        path = self.munge_path(path)
+        with open(path, 'rb') as src:
+            shutil.copyfileobj(src, outfile, 10*1024*1024)
+
+    def put_file(self, infile, path, replace_file=False, end_session=True):
+        path = self.munge_path(path)
+        if os.path.isdir(path):
+            path = os.path.join(path, infile.name)
+        if not replace_file and os.path.exists(path):
+            raise PathError('File already exists: ' + path)
+        dest = open(path, 'wb')
+        shutil.copyfileobj(infile, dest, 10*1024*1024)
+        dest.flush()
+        dest.close()
+
+    def munge_path(self, path):
+        if path.startswith('/') and not (path.startswith(self._main_prefix) or \
+            (self._card_a_prefix and path.startswith(self._card_a_prefix)) or \
+            (self._card_b_prefix and path.startswith(self._card_b_prefix))):
+            path = self._main_prefix + path[1:]
+        elif path.startswith('carda:'):
+            path = path.replace('carda:', self._card_prefix[:-1])
+        elif path.startswith('cardb:'):
+            path = path.replace('cardb:', self._card_prefix[:-1])
+        return path
+
+    def list(self, path, recurse=False, end_session=True, munge=True):
+        if munge:
+            path = self.munge_path(path)
+        if os.path.isfile(path):
+            return [(os.path.dirname(path), [File(path)])]
+        entries = [File(os.path.join(path, f)) for f in os.listdir(path)]
+        dirs = [(path, entries)]
+        for _file in entries:
+            if recurse and _file.is_dir:
+                dirs[len(dirs):] = self.list(_file.path, recurse=True, munge=False)
+        return dirs
+
+    def mkdir(self, path, end_session=True):
+        if self.SUPPORTS_SUB_DIRS:
+            path = self.munge_path(path)
+            os.mkdir(path)
+
+    def rm(self, path, end_session=True):
+        path = self.munge_path(path)
+        self.delete_books([path])
+
+    def touch(self, path, end_session=True):
+        path = self.munge_path(path)
+        if not os.path.exists(path):
+            open(path, 'w').close()
+        if not os.path.isdir(path):
+            os.utime(path, None)
--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@ -8,11 +8,12 @@ device. This class handles device detection.

 import os, subprocess, time, re

-from calibre.devices.interface import Device as _Device
+from calibre.devices.interface import DevicePlugin
 from calibre.devices.errors import DeviceError
+from calibre.devices.usbms.deviceconfig import DeviceConfig
 from calibre import iswindows, islinux, isosx, __appname__

-class Device(_Device):
+class Device(DeviceConfig, DevicePlugin):
    '''
    This class provides logic common to all drivers for devices that export themselves
    as USB Mass Storage devices. If you are writing such a driver, inherit from this
@ -25,10 +26,12 @@ class Device(_Device):

    VENDOR_NAME = None
    WINDOWS_MAIN_MEM = None
-    WINDOWS_CARD_MEM = None
+    WINDOWS_CARD_A_MEM = None
+    WINDOWS_CARD_B_MEM = None

    OSX_MAIN_MEM = None
-    OSX_CARD_MEM = None
+    OSX_CARD_A_MEM = None
+    OSX_CARD_B_MEM = None

    MAIN_MEMORY_VOLUME_LABEL  = ''
    STORAGE_CARD_VOLUME_LABEL = ''
@ -63,18 +66,30 @@ class Device(_Device):
          </match>
      </match>
  </device>
+  <device>
+      <match key="info.category" string="volume">
+          <match key="@info.parent:@info.parent:@info.parent:@info.parent:usb.vendor_id" int="%(vendor_id)s">
+              <match key="@info.parent:@info.parent:@info.parent:@info.parent:usb.product_id" int="%(product_id)s">
+                %(BCD_start)s
+                  <match key="@info.parent:storage.lun" int="%(lun2)d">
+                          <merge key="volume.label" type="string">%(storage_card)s</merge>
+                          <merge key="%(app)s.cardvolume" type="string">%(deviceclass)s</merge>
+                  </match>
+                %(BCD_end)s
+              </match>
+          </match>
+      </match>
+  </device>
 '''
-    FDI_BCD_TEMPLATE = '<match key="@info.parent:@info.parent:@info.parent:@info.parent:usb.device_revision_bcd" int="%(bcd)s">'
    FDI_LUNS = {'lun0':0, 'lun1':1, 'lun2':2}
+    FDI_BCD_TEMPLATE = '<match key="@info.parent:@info.parent:@info.parent:@info.parent:usb.device_revision_bcd" int="%(bcd)s">'

-
-    def __init__(self, key='-1', log_packets=False, report_progress=None) :
-        self._main_prefix = self._card_prefix = None
+    def reset(self, key='-1', log_packets=False, report_progress=None) :
+        self._main_prefix = self._card_a_prefix = self._card_b_prefix = None

    @classmethod
    def get_fdi(cls):
        fdi = ''
-
        for vid in cls.VENDOR_ID:
            for pid in cls.PRODUCT_ID:
                fdi_base_values = dict(
@ -85,7 +100,6 @@ class Device(_Device):
                                       main_memory=cls.MAIN_MEMORY_VOLUME_LABEL,
                                       storage_card=cls.STORAGE_CARD_VOLUME_LABEL,
                                  )
-
                fdi_base_values.update(cls.FDI_LUNS)

                if cls.BCD is None:
@ -105,7 +119,7 @@ class Device(_Device):
        self.report_progress = report_progress

    def card_prefix(self, end_session=True):
-        return self._card_prefix
+        return (self._card_a_prefix, self._card_b_prefix)

    @classmethod
    def _windows_space(cls, prefix):
@ -125,34 +139,41 @@ class Device(_Device):
        return total_clusters * mult, free_clusters * mult

    def total_space(self, end_session=True):
-        msz = csz = 0
+        msz = casz = cbsz = 0
        if not iswindows:
            if self._main_prefix is not None:
                stats = os.statvfs(self._main_prefix)
                msz = stats.f_frsize * (stats.f_blocks + stats.f_bavail - stats.f_bfree)
-            if self._card_prefix is not None:
-                stats = os.statvfs(self._card_prefix)
-                csz = stats.f_frsize * (stats.f_blocks + stats.f_bavail - stats.f_bfree)
+            if self._card_a_prefix is not None:
+                stats = os.statvfs(self._card_a_prefix)
+                casz = stats.f_frsize * (stats.f_blocks + stats.f_bavail - stats.f_bfree)
+            if self._card_b_prefix is not None:
+                stats = os.statvfs(self._card_b_prefix)
+                cbsz = stats.f_frsize * (stats.f_blocks + stats.f_bavail - stats.f_bfree)
        else:
            msz = self._windows_space(self._main_prefix)[0]
-            csz = self._windows_space(self._card_prefix)[0]
+            casz = self._windows_space(self._card_a_prefix)[0]
+            cbsz = self._windows_space(self._card_b_prefix)[0]

-        return (msz, 0, csz)
+        return (msz, casz, cbsz)

    def free_space(self, end_session=True):
-        msz = csz = 0
+        msz = casz = cbsz = 0
        if not iswindows:
            if self._main_prefix is not None:
                stats = os.statvfs(self._main_prefix)
                msz = stats.f_frsize * stats.f_bavail
-            if self._card_prefix is not None:
-                stats = os.statvfs(self._card_prefix)
-                csz = stats.f_frsize * stats.f_bavail
+            if self._card_a_prefix is not None:
+                stats = os.statvfs(self._card_a_prefix)
+                casz = stats.f_frsize * stats.f_bavail
+            if self._card_b_prefix is not None:
+                stats = os.statvfs(self._card_b_prefix)
+                cbsz = stats.f_frsize * stats.f_bavail
        else:
            msz = self._windows_space(self._main_prefix)[1]
            csz = self._windows_space(self._card_prefix)[1]

-        return (msz, 0, csz)
+        return (msz, casz, cbsz)

    def windows_match_device(self, pnp_id, device_id):
        pnp_id = pnp_id.upper()
@ -193,10 +214,12 @@ class Device(_Device):
        for drive in c.Win32_DiskDrive():
            if self.windows_match_device(str(drive.PNPDeviceID), self.WINDOWS_MAIN_MEM):
                drives['main'] = self.windows_get_drive_prefix(drive)
-            elif self.windows_match_device(str(drive.PNPDeviceID), self.WINDOWS_CARD_MEM):
-                drives['card'] = self.windows_get_drive_prefix(drive)
+            elif self.windows_match_device(str(drive.PNPDeviceID), self.WINDOWS_CARD_A_MEM):
+                drives['carda'] = self.windows_get_drive_prefix(drive)
+            elif self.windows_match_device(str(drive.PNPDeviceID), self.WINDOWS_CARD_B_MEM):
+                drives['cardb'] = self.windows_get_drive_prefix(drive)

-            if 'main' in drives.keys() and 'card' in drives.keys():
+            if 'main' in drives.keys() and 'carda' in drives.keys() and 'cardb' in drives.keys():
                break

        if 'main' not in drives:
@ -206,7 +229,8 @@ class Device(_Device):

        drives = self.windows_sort_drives(drives)
        self._main_prefix = drives.get('main')
-        self._card_prefix = drives.get('card', None)
+        self._card_a_prefix = drives.get('carda', None)
+        self._card_b_prefix = drives.get('cardb', None)

    @classmethod
    def run_ioreg(cls, raw=None):
@ -237,9 +261,11 @@ class Device(_Device):
        for i, line in enumerate(lines):
            if self.OSX_MAIN_MEM is not None and line.strip().endswith('<class IOMedia>') and self.OSX_MAIN_MEM in line:
                get_dev_node(lines[i+1:], 'main')
-            if self.OSX_CARD_MEM is not None and line.strip().endswith('<class IOMedia>') and self.OSX_CARD_MEM in line:
-                get_dev_node(lines[i+1:], 'card')
-            if len(names.keys()) == 2:
+            if self.OSX_CARD_A_MEM is not None and line.strip().endswith('<class IOMedia>') and self.OSX_CARD_A_MEM in line:
+                get_dev_node(lines[i+1:], 'carda')
+            if self.OSX_CARD_B_MEM is not None and line.strip().endswith('<class IOMedia>') and self.OSX_CARD_B_MEM in line:
+                get_dev_node(lines[i+1:], 'cardb')
+            if len(names.keys()) == 3:
                break
        return names

@ -251,10 +277,18 @@ class Device(_Device):
            raise DeviceError(_('Unable to detect the %s disk drive. Try rebooting.')%self.__class__.__name__)
        main_pat = dev_pat % names['main']
        self._main_prefix = re.search(main_pat, mount).group(2) + os.sep
-        card_pat = names['card'] if 'card' in names.keys() else None
-        if card_pat is not None:
-            card_pat = dev_pat % card_pat
-            self._card_prefix = re.search(card_pat, mount).group(2) + os.sep
+        card_a_pat = names['carda'] if 'carda' in names.keys() else None
+        card_b_pat = names['cardb'] if 'cardb' in names.keys() else None
+
+        def get_card_prefix(pat):
+            if pat is not None:
+                pat = dev_pat % pat
+                return re.search(pat, mount).group(2) + os.sep
+            else:
+                return None
+
+        self._card_a_prefix = get_card_prefix(card_a_pat)
+        self._card_b_prefix = get_card_prefix(card_b_pat)

    def open_linux(self):
        import dbus
@ -287,21 +321,24 @@ class Device(_Device):
        if not self._main_prefix:
            raise DeviceError('Could not open device for reading. Try a reboot.')

-        self._card_prefix = None
+        self._card_a_prefix = self._card_b_prefix = None
        cards = hm.FindDeviceStringMatch(__appname__+'.cardvolume', self.__class__.__name__)

-        for dev in cards:
+        def mount_card(dev):
            try:
-                self._card_prefix = conditional_mount(dev)+os.sep
-                break
+                return conditional_mount(dev)+os.sep
            except:
                import traceback
                print traceback
-                continue
+
+        if len(cards) >= 1:
+            self._card_a_prefix = mount_card(cards[0])
+        if len(cards) >=2:
+            self._card_b_prefix = mount_card(cards[1])

    def open(self):
        time.sleep(5)
-        self._main_prefix = self._card_prefix = None
+        self._main_prefix = self._card_a_prefix = self._card_b_prefix = None
        if islinux:
            try:
                self.open_linux()
--- a/src/calibre/devices/usbms/deviceconfig.py
+++ b/src/calibre/devices/usbms/deviceconfig.py
@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.utils.config import Config, ConfigProxy
+
+class DeviceConfig(object):
+
+    HELP_MESSAGE = _('Ordered list of formats the device will accept')
+
+    @classmethod
+    def _config(cls):
+        klass = cls if isinstance(cls, type) else cls.__class__
+        c = Config('device_drivers_%s' % klass.__name__, _('settings for device drivers'))
+        c.add_opt('format_map', default=cls.FORMATS,  help=cls.HELP_MESSAGE)
+        return c
+
+    @classmethod
+    def _configProxy(cls):
+        return ConfigProxy(cls._config())
+
+    @classmethod
+    def config_widget(cls):
+        from calibre.gui2.device_drivers.configwidget import ConfigWidget
+        cw = ConfigWidget(cls.settings(), cls.FORMATS)
+        return cw
+
+    @classmethod
+    def save_settings(cls, config_widget):
+        cls._configProxy()['format_map'] = config_widget.format_map()
+
+    @classmethod
+    def settings(cls):
+        return cls._config().parse()
+
+    def customization_help(cls, gui=False):
+        return cls.HELP_MESSAGE
+
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@ -10,71 +10,89 @@ for a particular device.
 import os, fnmatch, shutil
 from itertools import cycle

-from calibre.ebooks.metadata.meta import metadata_from_formats, path_to_ext
 from calibre.ebooks.metadata import authors_to_string
+from calibre.devices.usbms.cli import CLI
 from calibre.devices.usbms.device import Device
 from calibre.devices.usbms.books import BookList, Book
-from calibre.devices.errors import FreeSpaceError, PathError
+from calibre.devices.errors import DeviceError, FreeSpaceError
 from calibre.devices.mime import mime_type_ext

-class File(object):
-    def __init__(self, path):
-        stats = os.stat(path)
-        self.is_dir = os.path.isdir(path)
-        self.is_readonly = not os.access(path, os.W_OK)
-        self.ctime = stats.st_ctime
-        self.wtime = stats.st_mtime
-        self.size  = stats.st_size
-        if path.endswith(os.sep):
-            path = path[:-1]
-        self.path = path
-        self.name = os.path.basename(path)
+# CLI must come before Device as it implments the CLI functions that
+# are inherited from the device interface in Device.
+class USBMS(CLI, Device):
+
+    name           = 'USBMS Base Device Interface'
+    description    = _('Communicate with an eBook reader.')
+    author         = _('John Schember')
+    supported_platforms = ['windows', 'osx', 'linux']

-class USBMS(Device):
    FORMATS = []
    EBOOK_DIR_MAIN = ''
-    EBOOK_DIR_CARD = ''
+    EBOOK_DIR_CARD_A = ''
+    EBOOK_DIR_CARD_B = ''
    SUPPORTS_SUB_DIRS = False
    CAN_SET_METADATA = False

-    def __init__(self, key='-1', log_packets=False, report_progress=None):
-        Device.__init__(self, key=key, log_packets=log_packets,
+    def reset(self, key='-1', log_packets=False, report_progress=None):
+        Device.reset(self, key=key, log_packets=log_packets,
                        report_progress=report_progress)

    def get_device_information(self, end_session=True):
+        self.report_progress(1.0, _('Get device information...'))
        return (self.__class__.__name__, '', '', '')

-    def books(self, oncard=False, end_session=True):
+    def books(self, oncard=None, end_session=True):
+        from calibre.ebooks.metadata.meta import path_to_ext
        bl = BookList()

-        if oncard and self._card_prefix is None:
+        if oncard == 'carda' and not self._card_a_prefix:
+            self.report_progress(1.0, _('Getting list of books on device...'))
+            return bl
+        elif oncard == 'cardb' and not self._card_b_prefix:
+            self.report_progress(1.0, _('Getting list of books on device...'))
+            return bl
+        elif oncard and oncard != 'carda' and oncard != 'cardb':
+            self.report_progress(1.0, _('Getting list of books on device...'))
            return bl

-        prefix = self._card_prefix if oncard else self._main_prefix
-        ebook_dir = self.EBOOK_DIR_CARD if oncard else self.EBOOK_DIR_MAIN
+        prefix = self._card_a_prefix if oncard == 'carda' else self._card_b_prefix if oncard == 'cardb' else self._main_prefix
+        ebook_dir = self.EBOOK_DIR_CARD_A if oncard == 'carda' else self.EBOOK_DIR_CARD_B if oncard == 'cardb' else self.EBOOK_DIR_MAIN

        # Get all books in the ebook_dir directory
        if self.SUPPORTS_SUB_DIRS:
            for path, dirs, files in os.walk(os.path.join(prefix, ebook_dir)):
                # Filter out anything that isn't in the list of supported ebook types
                for book_type in self.FORMATS:
-                    for filename in fnmatch.filter(files, '*.%s' % (book_type)):
+                    match = fnmatch.filter(files, '*.%s' % (book_type))
+                    for i, filename in enumerate(match):
+                        self.report_progress((i+1) / float(len(match)), _('Getting list of books on device...'))
                        bl.append(self.__class__.book_from_path(os.path.join(path, filename)))
        else:
            path = os.path.join(prefix, ebook_dir)
-            for filename in os.listdir(path):
+            paths = os.listdir(path)
+            for i, filename in enumerate(paths):
+                self.report_progress((i+1) / float(len(paths)), _('Getting list of books on device...'))
                if path_to_ext(filename) in self.FORMATS:
                    bl.append(self.__class__.book_from_path(os.path.join(path, filename)))
+                    
+        self.report_progress(1.0, _('Getting list of books on device...'))
+        
        return bl

    def _sanity_check(self, on_card, files):
-        if on_card and not self._card_prefix:
-            raise ValueError(_('The reader has no storage card connected.'))
+        if on_card == 'carda' and not self._card_a_prefix:
+            raise ValueError(_('The reader has no storage card in this slot.'))
+        elif on_card == 'cardb' and not self._card_b_prefix:
+            raise ValueError(_('The reader has no storage card in this slot.'))
+        elif on_card and on_card not in ('carda', 'cardb'):
+            raise DeviceError(_('The reader has no storage card in this slot.'))

-        if not on_card:
-            path = os.path.join(self._main_prefix, self.EBOOK_DIR_MAIN)
+        if on_card == 'carda':
+            path = os.path.join(self._card_a_prefix, self.EBOOK_DIR_CARD_A)
+        elif on_card == 'cardb':
+            path = os.path.join(self._card_b_prefix, self.EBOOK_DIR_CARD_B)
        else:
-            path = os.path.join(self._card_prefix, self.EBOOK_DIR_CARD)
+            path = os.path.join(self._main_prefix, self.EBOOK_DIR_MAIN)

        def get_size(obj):
            if hasattr(obj, 'seek'):
@ -87,13 +105,15 @@ class USBMS(Device):
        sizes = [get_size(f) for f in files]
        size = sum(sizes)

-        if on_card and size > self.free_space()[2] - 1024*1024:
-            raise FreeSpaceError(_("There is insufficient free space on the storage card"))
        if not on_card and size > self.free_space()[0] - 2*1024*1024:
            raise FreeSpaceError(_("There is insufficient free space in main memory"))
+        if on_card == 'carda' and size > self.free_space()[1] - 1024*1024:
+            raise FreeSpaceError(_("There is insufficient free space on the storage card"))
+        if on_card == 'cardb' and size > self.free_space()[2] - 1024*1024:
+            raise FreeSpaceError(_("There is insufficient free space on the storage card"))
        return path

-    def upload_books(self, files, names, on_card=False, end_session=True,
+    def upload_books(self, files, names, on_card=None, end_session=True,
                     metadata=None):

        path = self._sanity_check(on_card, files)
@ -102,7 +122,7 @@ class USBMS(Device):
        names = iter(names)
        metadata = iter(metadata)

-        for infile in files:
+        for i, infile in enumerate(files):
            newpath = path

            if self.SUPPORTS_SUB_DIRS:
@ -110,11 +130,21 @@ class USBMS(Device):

                if 'tags' in mdata.keys():
                    for tag in mdata['tags']:
-                        if tag.startswith('/'):
+                        if tag.startswith(_('News')):
+                            newpath = os.path.join(newpath, 'news')
+                            newpath = os.path.join(newpath, mdata.get('title', ''))
+                            newpath = os.path.join(newpath, mdata.get('timestamp', ''))
+                            break
+                        elif tag.startswith('/'):
                            newpath += tag
                            newpath = os.path.normpath(newpath)
                            break

+                if newpath == path:
+                    newpath = os.path.join(newpath,
+                        mdata.get('authors', _('Unknown')),
+                        mdata.get('title', _('Unknown')))
+
            if not os.path.exists(newpath):
                os.makedirs(newpath)

@ -132,22 +162,28 @@ class USBMS(Device):
            else:
                shutil.copy2(infile, filepath)

+            self.report_progress((i+1) / float(len(files)), _('Transferring books to device...'))
+
+        self.report_progress(1.0, _('Transferring books to device...'))
+        
        return zip(paths, cycle([on_card]))

-    @classmethod
-    def add_books_to_metadata(cls, locations, metadata, booklists):
-        for location in locations:
+    def add_books_to_metadata(self, locations, metadata, booklists):
+        for i, location in enumerate(locations):
+            self.report_progress((i+1) / float(len(locations)), _('Adding books to device metadata listing...'))
            path = location[0]
-            on_card = 1 if location[1] else 0
+            blist = 2 if location[1] == 'cardb' else 1 if location[1] == 'carda' else 0

-            book = cls.book_from_path(path)
+            book = self.book_from_path(path)

-            if not book in booklists[on_card]:
-                booklists[on_card].append(book)
+            if not book in booklists[blist]:
+                booklists[blist].append(book)
+        self.report_progress(1.0, _('Adding books to device metadata listing...'))


    def delete_books(self, paths, end_session=True):
-        for path in paths:
+        for i, path in enumerate(paths):
+            self.report_progress((i+1) / float(len(paths)), _('Removing books from device...'))
            if os.path.exists(path):
                # Delete the ebook
                os.unlink(path)
@ -156,79 +192,31 @@ class USBMS(Device):
                        os.removedirs(os.path.dirname(path))
                    except:
                        pass
+        self.report_progress(1.0, _('Removing books from device...'))

-    @classmethod
-    def remove_books_from_metadata(cls, paths, booklists):
-        for path in paths:
+    def remove_books_from_metadata(self, paths, booklists):
+        for i, path in enumerate(paths):
+            self.report_progress((i+1) / float(len(paths)), _('Removing books from device metadata listing...'))
            for bl in booklists:
                for book in bl:
                    if path.endswith(book.path):
                        bl.remove(book)
+        self.report_progress(1.0, _('Removing books from device metadata listing...'))

    def sync_booklists(self, booklists, end_session=True):
        # There is no meta data on the device to update. The device is treated
        # as a mass storage device and does not use a meta data xml file like
        # the Sony Readers.
-        pass
-
-    def get_file(self, path, outfile, end_session=True):
-        path = self.munge_path(path)
-        with open(path, 'rb') as src:
-            shutil.copyfileobj(src, outfile, 10*1024*1024)
-
-    def put_file(self, infile, path, replace_file=False, end_session=True):
-        path = self.munge_path(path)
-        if os.path.isdir(path):
-            path = os.path.join(path, infile.name)
-        if not replace_file and os.path.exists(path):
-            raise PathError('File already exists: ' + path)
-        dest = open(path, 'wb')
-        shutil.copyfileobj(infile, dest, 10*1024*1024)
-        dest.flush()
-        dest.close()
-
-    def munge_path(self, path):
-        if path.startswith('/') and not (path.startswith(self._main_prefix) or \
-            (self._card_prefix and path.startswith(self._card_prefix))):
-            path = self._main_prefix + path[1:]
-        elif path.startswith('card:'):
-            path = path.replace('card:', self._card_prefix[:-1])
-        return path
-
-    def list(self, path, recurse=False, end_session=True, munge=True):
-        if munge:
-            path = self.munge_path(path)
-        if os.path.isfile(path):
-            return [(os.path.dirname(path), [File(path)])]
-        entries = [File(os.path.join(path, f)) for f in os.listdir(path)]
-        dirs = [(path, entries)]
-        for _file in entries:
-            if recurse and _file.is_dir:
-                dirs[len(dirs):] = self.list(_file.path, recurse=True, munge=False)
-        return dirs
-
-    def mkdir(self, path, end_session=True):
-        if self.SUPPORTS_SUB_DIRS:
-            path = self.munge_path(path)
-            os.mkdir(path)
-
-    def rm(self, path, end_session=True):
-        path = self.munge_path(path)
-        self.delete_books([path])
-
-    def touch(self, path, end_session=True):
-        path = self.munge_path(path)
-        if not os.path.exists(path):
-            open(path, 'w').close()
-        if not os.path.isdir(path):
-            os.utime(path, None)
+        self.report_progress(1.0, _('Sending metadata to device...'))

    @classmethod
    def metadata_from_path(cls, path):
+        from calibre.ebooks.metadata.meta import metadata_from_formats
        return metadata_from_formats([path])

    @classmethod
    def book_from_path(cls, path):
+        from calibre.ebooks.metadata.meta import path_to_ext
        fileext = path_to_ext(path)
        mi = cls.metadata_from_path(path)
        mime = mime_type_ext(fileext)
--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -60,6 +60,8 @@ class HTMLRenderer(object):
 def render_html(path_to_html, width=590, height=750):
    from PyQt4.QtWebKit import QWebPage
    from PyQt4.Qt import QEventLoop, QPalette, Qt, SIGNAL, QUrl, QSize
+    from calibre.gui2 import is_ok_to_use_qt
+    if not is_ok_to_use_qt(): return None
    path_to_html = os.path.abspath(path_to_html)
    with CurrentDir(os.path.dirname(path_to_html)):
        page = QWebPage()
--- a/src/calibre/ebooks/lrf/comic/init.py
+++ b/src/calibre/ebooks/lrf/comic/init.py
--- a/src/calibre/ebooks/comic/input.py
+++ b/src/calibre/ebooks/comic/input.py
@ -0,0 +1,473 @@
+from __future__ import with_statement
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
+__docformat__ = 'restructuredtext en'
+
+'''
+Based on ideas from comiclrf created by FangornUK.
+'''
+
+import os, shutil, traceback, textwrap, time
+from Queue import Empty
+
+from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
+from calibre import extract, CurrentDir, prints
+from calibre.ptempfile import PersistentTemporaryDirectory
+from calibre.utils.ipc.server import Server
+from calibre.utils.ipc.job import ParallelJob
+
+def extract_comic(path_to_comic_file):
+    '''
+    Un-archive the comic file.
+    '''
+    tdir = PersistentTemporaryDirectory(suffix='_comic_extract')
+    extract(path_to_comic_file, tdir)
+    return tdir
+
+def find_pages(dir, sort_on_mtime=False, verbose=False):
+    '''
+    Find valid comic pages in a previously un-archived comic.
+
+    :param dir: Directory in which extracted comic lives
+    :param sort_on_mtime: If True sort pages based on their last modified time.
+                          Otherwise, sort alphabetically.
+    '''
+    extensions = ['jpeg', 'jpg', 'gif', 'png']
+    pages = []
+    for datum in os.walk(dir):
+        for name in datum[-1]:
+            path = os.path.join(datum[0], name)
+            if '__MACOSX' in path: continue
+            for ext in extensions:
+                if path.lower().endswith('.'+ext):
+                    pages.append(path)
+                    break
+    if sort_on_mtime:
+        comparator = lambda x, y : cmp(os.stat(x).st_mtime, os.stat(y).st_mtime)
+    else:
+        comparator = lambda x, y : cmp(os.path.basename(x), os.path.basename(y))
+
+    pages.sort(cmp=comparator)
+    if verbose:
+        prints('Found comic pages...')
+        prints('\t'+'\n\t'.join([os.path.basename(p) for p in pages]))
+    return pages
+
+class PageProcessor(list):
+    '''
+    Contains the actual image rendering logic. See :method:`render` and
+    :method:`process_pages`.
+    '''
+
+    def __init__(self, path_to_page, dest, opts, num):
+        list.__init__(self)
+        self.path_to_page = path_to_page
+        self.opts         = opts
+        self.num          = num
+        self.dest         = dest
+        self.rotate       = False
+        self.render()
+
+
+    def render(self):
+        import calibre.utils.PythonMagickWand as pw
+        img = pw.NewMagickWand()
+        if img < 0:
+            raise RuntimeError('Cannot create wand.')
+        if not pw.MagickReadImage(img, self.path_to_page):
+            raise IOError('Failed to read image from: %'%self.path_to_page)
+        width  = pw.MagickGetImageWidth(img)
+        height = pw.MagickGetImageHeight(img)
+        if self.num == 0: # First image so create a thumbnail from it
+            thumb = pw.CloneMagickWand(img)
+            if thumb < 0:
+                raise RuntimeError('Cannot create wand.')
+            pw.MagickThumbnailImage(thumb, 60, 80)
+            pw.MagickWriteImage(thumb, os.path.join(self.dest, 'thumbnail.png'))
+            pw.DestroyMagickWand(thumb)
+        self.pages = [img]
+        if width > height:
+            if self.opts.landscape:
+                self.rotate = True
+            else:
+                split1, split2 = map(pw.CloneMagickWand, (img, img))
+                pw.DestroyMagickWand(img)
+                if split1 < 0 or split2 < 0:
+                    raise RuntimeError('Cannot create wand.')
+                pw.MagickCropImage(split1, (width/2)-1, height, 0, 0)
+                pw.MagickCropImage(split2, (width/2)-1, height, width/2, 0 )
+                self.pages = [split2, split1] if self.opts.right2left else [split1, split2]
+        self.process_pages()
+
+    def process_pages(self):
+        import calibre.utils.PythonMagickWand as p
+        for i, wand in enumerate(self.pages):
+            pw = p.NewPixelWand()
+            try:
+                if pw < 0:
+                    raise RuntimeError('Cannot create wand.')
+                p.PixelSetColor(pw, 'white')
+
+                p.MagickSetImageBorderColor(wand, pw)
+                if self.rotate:
+                    p.MagickRotateImage(wand, pw, -90)
+
+                # 25 percent fuzzy trim?
+                if not self.opts.disable_trim:
+                    p.MagickTrimImage(wand, 25*65535/100)
+                p.MagickSetImagePage(wand, 0,0,0,0)   #Clear page after trim, like a "+repage"
+                # Do the Photoshop "Auto Levels" equivalent
+                if not self.opts.dont_normalize:
+                    p.MagickNormalizeImage(wand)
+                sizex = p.MagickGetImageWidth(wand)
+                sizey = p.MagickGetImageHeight(wand)
+
+                SCRWIDTH, SCRHEIGHT = self.opts.output_profile.comic_screen_size
+
+                if self.opts.keep_aspect_ratio:
+                    # Preserve the aspect ratio by adding border
+                    aspect = float(sizex) / float(sizey)
+                    if aspect <= (float(SCRWIDTH) / float(SCRHEIGHT)):
+                        newsizey = SCRHEIGHT
+                        newsizex = int(newsizey * aspect)
+                        deltax = (SCRWIDTH - newsizex) / 2
+                        deltay = 0
+                    else:
+                        newsizex = SCRWIDTH
+                        newsizey = int(newsizex / aspect)
+                        deltax = 0
+                        deltay = (SCRHEIGHT - newsizey) / 2
+                    p.MagickResizeImage(wand, newsizex, newsizey, p.CatromFilter, 1.0)
+                    p.MagickSetImageBorderColor(wand, pw)
+                    p.MagickBorderImage(wand, pw, deltax, deltay)
+                elif self.opts.wide:
+                    # Keep aspect and Use device height as scaled image width so landscape mode is clean
+                    aspect = float(sizex) / float(sizey)
+                    screen_aspect = float(SCRWIDTH) / float(SCRHEIGHT)
+                    # Get dimensions of the landscape mode screen
+                    # Add 25px back to height for the battery bar.
+                    wscreenx = SCRHEIGHT + 25
+                    wscreeny = int(wscreenx / screen_aspect)
+                    if aspect <= screen_aspect:
+                        newsizey = wscreeny
+                        newsizex = int(newsizey * aspect)
+                        deltax = (wscreenx - newsizex) / 2
+                        deltay = 0
+                    else:
+                        newsizex = wscreenx
+                        newsizey = int(newsizex / aspect)
+                        deltax = 0
+                        deltay = (wscreeny - newsizey) / 2
+                    p.MagickResizeImage(wand, newsizex, newsizey, p.CatromFilter, 1.0)
+                    p.MagickSetImageBorderColor(wand, pw)
+                    p.MagickBorderImage(wand, pw, deltax, deltay)
+                else:
+                    p.MagickResizeImage(wand, SCRWIDTH, SCRHEIGHT, p.CatromFilter, 1.0)
+
+                if not self.opts.dont_sharpen:
+                    p.MagickSharpenImage(wand, 0.0, 1.0)
+
+                p.MagickSetImageType(wand, p.GrayscaleType)
+
+                if self.opts.despeckle:
+                    p.MagickDespeckleImage(wand)
+
+                p.MagickQuantizeImage(wand, self.opts.colors, p.RGBColorspace, 0, 1, 0)
+                dest = '%d_%d.png'%(self.num, i)
+                dest = os.path.join(self.dest, dest)
+                p.MagickWriteImage(wand, dest+'8')
+                os.rename(dest+'8', dest)
+                self.append(dest)
+            finally:
+                if pw > 0:
+                    p.DestroyPixelWand(pw)
+                p.DestroyMagickWand(wand)
+
+def render_pages(tasks, dest, opts, notification=lambda x, y: x):
+    '''
+    Entry point for the job server.
+    '''
+    failures, pages = [], []
+    from calibre.utils.PythonMagickWand import ImageMagick
+    with ImageMagick():
+        for num, path in tasks:
+            try:
+                pages.extend(PageProcessor(path, dest, opts, num))
+                msg = _('Rendered %s')%path
+            except:
+                failures.append(path)
+                msg = _('Failed %s')%path
+                if opts.verbose:
+                    msg += '\n' + traceback.format_exc()
+            prints(msg)
+            notification(0.5, msg)
+
+    return pages, failures
+
+
+class Progress(object):
+
+    def __init__(self, total, update):
+        self.total  = total
+        self.update = update
+        self.done   = 0
+
+    def __call__(self, percent, msg=''):
+        self.done += 1
+        #msg = msg%os.path.basename(job.args[0])
+        self.update(float(self.done)/self.total, msg)
+
+def process_pages(pages, opts, update, tdir):
+    '''
+    Render all identified comic pages.
+    '''
+    from calibre.utils.PythonMagickWand import ImageMagick
+    ImageMagick
+
+    progress = Progress(len(pages), update)
+    server = Server()
+    jobs = []
+    tasks = [(p, os.path.join(tdir, os.path.basename(p))) for p in pages]
+    tasks = server.split(pages)
+    for task in tasks:
+        jobs.append(ParallelJob('render_pages', '', progress,
+                                args=[task, tdir, opts]))
+        server.add_job(jobs[-1])
+    while True:
+        time.sleep(1)
+        running = False
+        for job in jobs:
+            while True:
+                try:
+                    x = job.notifications.get_nowait()
+                    progress(*x)
+                except Empty:
+                    break
+            job.update()
+            if not job.is_finished:
+                running = True
+        if not running:
+            break
+    server.close()
+    ans, failures = [], []
+
+    for job in jobs:
+        if job.failed:
+            raw_input()
+            raise Exception(_('Failed to process comic: \n\n%s')%
+                    job.log_file.read())
+        pages, failures_ = job.result
+        ans += pages
+        failures += failures_
+    return ans, failures
+
+
+class ComicInput(InputFormatPlugin):
+
+    name        = 'Comic Input'
+    author      = 'Kovid Goyal'
+    description = 'Optimize comic files (.cbz, .cbr, .cbc) for viewing on portable devices'
+    file_types  = set(['cbz', 'cbr', 'cbc'])
+    is_image_collection = True
+
+    options = set([
+        OptionRecommendation(name='colors', recommended_value=64,
+            help=_('Number of colors for grayscale image conversion. Default: %default')),
+        OptionRecommendation(name='dont_normalize', recommended_value=False,
+            help=_('Disable normalize (improve contrast) color range '
+            'for pictures. Default: False')),
+        OptionRecommendation(name='keep_aspect_ratio', recommended_value=False,
+            help=_('Maintain picture aspect ratio. Default is to fill the screen.')),
+        OptionRecommendation(name='dont_sharpen', recommended_value=False,
+            help=_('Disable sharpening.')),
+        OptionRecommendation(name='disable_trim', recommended_value=False,
+            help=_('Disable trimming of comic pages. For some comics, '
+                     'trimming might remove content as well as borders.')),
+        OptionRecommendation(name='landspace', recommended_value=False,
+            help=_("Don't split landscape images into two portrait images")),
+        OptionRecommendation(name='wide', recommended_value=False,
+            help=_("Keep aspect ratio and scale image using screen height as "
+            "image width for viewing in landscape mode.")),
+        OptionRecommendation(name='right2left', recommended_value=False,
+              help=_('Used for right-to-left publications like manga. '
+              'Causes landscape pages to be split into portrait pages '
+              'from right to left.')),
+        OptionRecommendation(name='despeckle', recommended_value=False,
+              help=_('Enable Despeckle. Reduces speckle noise. '
+              'May greatly increase processing time.')),
+        OptionRecommendation(name='no_sort', recommended_value=False,
+              help=_("Don't sort the files found in the comic "
+              "alphabetically by name. Instead use the order they were "
+              "added to the comic.")),
+        OptionRecommendation(name='no_process', recommended_value=False,
+              help=_("Apply no processing to the image")),
+        ])
+
+    recommendations = set([
+        ('margin_left', 0, OptionRecommendation.HIGH),
+        ('margin_top',  0, OptionRecommendation.HIGH),
+        ('margin_right', 0, OptionRecommendation.HIGH),
+        ('margin_bottom', 0, OptionRecommendation.HIGH),
+        ('insert_blank_line', False, OptionRecommendation.HIGH),
+        ('remove_paragraph_spacing',  False, OptionRecommendation.HIGH),
+        ('dont_justify', True, OptionRecommendation.HIGH),
+        ('dont_split_on_pagebreaks', True, OptionRecommendation.HIGH),
+        ('chapter', None, OptionRecommendation.HIGH),
+        ('page_breaks_brefore', None, OptionRecommendation.HIGH),
+        ('use_auto_toc', False, OptionRecommendation.HIGH),
+        ('page_breaks_before', None, OptionRecommendation.HIGH),
+        ('disable_font_rescaling', True, OptionRecommendation.HIGH),
+        ('linearize_tables', False, OptionRecommendation.HIGH),
+        ])
+
+    def get_comics_from_collection(self, stream):
+        from calibre.libunzip import extract as zipextract
+        tdir = PersistentTemporaryDirectory('_comic_collection')
+        zipextract(stream, tdir)
+        comics = []
+        with CurrentDir(tdir):
+            if not os.path.exists('comics.txt'):
+                raise ValueError('%s is not a valid comic collection'
+                        %stream.name)
+            for line in open('comics.txt',
+                    'rb').read().decode('utf-8').splitlines():
+                fname, title = line.partition(':')[0], line.partition(':')[-1]
+                fname = os.path.join(tdir, *fname.split('/'))
+                if not title:
+                    title = os.path.basename(fname).rpartition('.')[0]
+                if os.access(fname, os.R_OK):
+                    comics.append([title, fname])
+        if not comics:
+            raise ValueError('%s has no comics'%stream.name)
+        return comics
+
+    def get_pages(self, comic, tdir2):
+        tdir  = extract_comic(comic)
+        new_pages = find_pages(tdir, sort_on_mtime=self.opts.no_sort,
+                verbose=self.opts.verbose)
+        thumbnail = None
+        if not new_pages:
+            raise ValueError('Could not find any pages in the comic: %s'
+                    %comic)
+        if self.opts.no_process:
+            n2 = []
+            for page in new_pages:
+                n2.append(os.path.join(tdir2, os.path.basename(page)))
+                shutil.copyfile(page, n2[-1])
+            new_pages = n2
+        else:
+            new_pages, failures = process_pages(new_pages, self.opts,
+                    self.report_progress, tdir2)
+            if not new_pages:
+                raise ValueError('Could not find any valid pages in comic: %s'
+                        % comic)
+            if failures:
+                self.log.warning('Could not process the following pages '
+                '(run with --verbose to see why):')
+                for f in failures:
+                    self.log.warning('\t', f)
+            thumbnail = os.path.join(tdir2, 'thumbnail.png')
+            if not os.access(thumbnail, os.R_OK):
+                thumbnail = None
+        return new_pages
+
+    def get_images(self):
+        return self._images
+
+    def convert(self, stream, opts, file_ext, log, accelerators):
+        from calibre.ebooks.metadata import MetaInformation
+        from calibre.ebooks.metadata.opf2 import OPFCreator
+        from calibre.ebooks.metadata.toc import TOC
+
+        self.opts, self.log= opts, log
+        if file_ext == 'cbc':
+            comics_ = self.get_comics_from_collection(stream)
+        else:
+            comics_ = [['Comic', os.path.abspath(stream.name)]]
+        stream.close()
+        comics = []
+        for i, x in enumerate(comics_):
+            title, fname = x
+            cdir = 'comic_%d'%(i+1) if len(comics_) > 1 else '.'
+            cdir = os.path.abspath(cdir)
+            if not os.path.exists(cdir):
+                os.makedirs(cdir)
+            pages = self.get_pages(fname, cdir)
+            if not pages: continue
+            wrappers = self.create_wrappers(pages)
+            comics.append((title, pages, wrappers))
+
+        if not comics:
+            raise ValueError('No comic pages found in %s'%stream.name)
+
+        mi  = MetaInformation(os.path.basename(stream.name).rpartition('.')[0],
+            [_('Unknown')])
+        opf = OPFCreator(os.path.abspath('.'), mi)
+        entries = []
+
+        def href(x):
+            if len(comics) == 1: return os.path.basename(x)
+            return '/'.join(x.split(os.sep)[-2:])
+
+        for comic in comics:
+            pages, wrappers = comic[1:]
+            entries += [(w, None) for w in map(href, wrappers)] + \
+                    [(x, None) for x in map(href, pages)]
+        opf.create_manifest(entries)
+        spine = []
+        for comic in comics:
+            spine.extend(map(href, comic[2]))
+        self._images = []
+        for comic in comics:
+            self._images.extend(comic[1])
+        opf.create_spine(spine)
+        toc = TOC()
+        if len(comics) == 1:
+            wrappers = comics[0][2]
+            for i, x in enumerate(wrappers):
+                toc.add_item(href(x), None, _('Page')+' %d'%(i+1),
+                        play_order=i)
+        else:
+            po = 0
+            for comic in comics:
+                po += 1
+                wrappers = comic[2]
+                stoc = toc.add_item(href(wrappers[0]),
+                        None, comic[0], play_order=po)
+                for i, x in enumerate(wrappers):
+                    stoc.add_item(href(x), None,
+                            _('Page')+' %d'%(i+1), play_order=po)
+                    po += 1
+        opf.set_toc(toc)
+        m, n = open('metadata.opf', 'wb'), open('toc.ncx', 'wb')
+        opf.render(m, n, 'toc.ncx')
+        return os.path.abspath('metadata.opf')
+
+    def create_wrappers(self, pages):
+        from calibre.ebooks.oeb.base import XHTML_NS
+        wrappers = []
+        WRAPPER = textwrap.dedent('''\
+        <html xmlns="%s">
+            <head>
+                <title>Page #%d</title>
+                <style type="text/css">
+                    @page { margin:0pt; padding: 0pt}
+                    body { margin: 0pt; padding: 0pt}
+                    div { text-align: center }
+                </style>
+            </head>
+            <body>
+                <div>
+                    <img src="%s" alt="comic page #%d" />
+                </div>
+            </body>
+        </html>
+        ''')
+        dir = os.path.dirname(pages[0])
+        for i, page in enumerate(pages):
+            wrapper = WRAPPER%(XHTML_NS, i+1, os.path.basename(page), i+1)
+            page = os.path.join(dir, 'page_%d.xhtml'%(i+1))
+            open(page, 'wb').write(wrapper)
+            wrappers.append(page)
+        return wrappers
+
--- a/src/calibre/ebooks/compression/init.py
+++ b/src/calibre/ebooks/compression/init.py
@ -0,0 +1,5 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
--- a/src/calibre/ebooks/compression/palmdoc.c
+++ b/src/calibre/ebooks/compression/palmdoc.c
@ -0,0 +1,204 @@
+/*
+:mod:`cPalmdoc` -- Palmdoc compression/decompression
+=====================================================
+
+.. module:: cPalmdoc
+    :platform: All
+    :synopsis: Compression decompression of Palmdoc implemented in C for speed
+
+.. moduleauthor:: Kovid Goyal <kovid@kovidgoyal.net> Copyright 2009
+
+*/
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include <stdio.h>
+
+#define DELTA sizeof(Byte)*4096
+
+#define BUFFER 6000
+
+#define MIN(x, y) ( ((x) < (y)) ? (x) : (y) )
+
+typedef unsigned short int Byte;
+typedef struct {
+	Byte	*data;
+	Py_ssize_t len;
+} buffer;
+
+#ifdef	bool
+#undef	bool
+#endif
+#define	bool		int
+
+#ifdef	false
+#undef	false
+#endif
+#define	false		0
+
+#ifdef	true
+#undef	true
+#endif
+#define	true		1
+
+#define CHAR(x) (( (x) > 127 ) ? (x)-256 : (x))
+
+static PyObject *
+cpalmdoc_decompress(PyObject *self, PyObject *args) {
+    const char *_input = NULL; Py_ssize_t input_len = 0;
+    Py_ssize_t i = 0, o = 0, j = 0, di, n;
+    if (!PyArg_ParseTuple(args, "t#", &_input, &input_len))
+		return NULL;
+    Byte *input = (Byte *)PyMem_Malloc(sizeof(Byte)*input_len);
+    if (input == NULL) return PyErr_NoMemory();
+    // Map chars to bytes
+    for (j = 0; j < input_len; j++) 
+        input[j] = (_input[j] < 0) ? _input[j]+256 : _input[j];
+    char *output = (char *)PyMem_Malloc(sizeof(char)*BUFFER);
+    Byte c;
+    PyObject *ans;
+    if (output == NULL) return PyErr_NoMemory();
+
+    while (i < input_len) {
+        c = input[i++];
+        if (c >= 1 && c <= 8)  // copy 'c' bytes
+            while (c--) output[o++] = input[i++];
+
+        else if (c <= 0x7F)  // 0, 09-7F = self
+            output[o++] = c;
+        
+        else if (c >= 0xC0) { // space + ASCII char
+            output[o++] = ' ';
+            output[o++] = c ^ 0x80;
+        }
+        else { // 80-BF repeat sequences
+            c = (c << 8) + input[i++];
+            di = (c & 0x3FFF) >> 3;
+            for ( n = (c & 7) + 3; n--; ++o ) 
+                output[o] = output[o - di];
+        }
+    }
+    ans = Py_BuildValue("s#", output, o);
+    if (output != NULL) PyMem_Free(output);
+    if (input != NULL) PyMem_Free(input);
+    return ans;
+}
+
+static bool 
+cpalmdoc_memcmp( Byte *a, Byte *b, Py_ssize_t len) {
+    Py_ssize_t i;
+    for (i = 0; i < len; i++) if (a[i] != b[i]) return false;
+    return true;
+}
+
+static Py_ssize_t
+cpalmdoc_rfind(Byte *data, Py_ssize_t pos, Py_ssize_t chunk_length) {
+    Py_ssize_t i;
+    for (i = pos - chunk_length; i > -1; i--) 
+        if (cpalmdoc_memcmp(data+i, data+pos, chunk_length)) return i;
+    return pos;
+}
+
+
+static Py_ssize_t
+cpalmdoc_do_compress(buffer *b, char *output) {
+    Py_ssize_t i = 0, j, chunk_len, dist;
+    unsigned compound;
+    Byte c, n;
+    bool found;
+    char *head;
+    head = output;
+    buffer temp; 
+    temp.data = (Byte *)PyMem_Malloc(sizeof(Byte)*8); temp.len = 0;
+    if (temp.data == NULL) return 0;
+    while (i < b->len) {
+        c = b->data[i];
+        //do repeats
+        if ( i > 10 && (b->len - i) > 10) {
+            found = false;
+            for (chunk_len = 10; chunk_len > 2; chunk_len--) {
+                j = cpalmdoc_rfind(b->data, i, chunk_len);
+                dist = i - j;
+                if (j < i && dist <= 2047) {
+                    found = true;
+                    compound = (dist << 3) + chunk_len-3;
+                    *(output++) = CHAR(0x80 + (compound >> 8 ));
+                    *(output++) = CHAR(compound & 0xFF);
+                    i += chunk_len;
+                    break;
+                }
+            }
+            if (found) continue;
+        }
+
+        //write single character
+        i++;
+        if (c == 32 && i < b->len) {
+            n = b->data[i];
+            if ( n >= 0x40 && n <= 0x7F) {
+                *(output++) = CHAR(n^0x80); i++; continue;
+            }
+        }
+        if (c == 0 || (c > 8 && c < 0x80))
+            *(output++) = CHAR(c);
+        else { // Write binary data
+            j = i;
+            temp.data[0] = c; temp.len = 1;
+            while (j < b->len && temp.len < 8) {
+                c = b->data[j];
+                if (c == 0 || (c > 8 && c < 0x80)) break;
+                temp.data[temp.len++] = c; j++;
+            }
+            i += temp.len - 1;
+            *(output++) = temp.len;
+            for (j=0; j < temp.len; j++) *(output++) = temp.data[j];
+        }
+    }
+    return output - head;
+}
+
+static PyObject *
+cpalmdoc_compress(PyObject *self, PyObject *args) {
+    const char *_input = NULL; Py_ssize_t input_len = 0;
+    Py_ssize_t j = 0;
+    buffer b;
+    if (!PyArg_ParseTuple(args, "t#", &_input, &input_len))
+		return NULL;
+    b.data = (Byte *)PyMem_Malloc(sizeof(Byte)*input_len);
+    if (b.data == NULL) return PyErr_NoMemory();
+    // Map chars to bytes
+    for (j = 0; j < input_len; j++) 
+        b.data[j] = (_input[j] < 0) ? _input[j]+256 : _input[j];
+    b.len = input_len;
+    char *output = (char *)PyMem_Malloc(sizeof(char) * b.len);
+    if (output == NULL) return PyErr_NoMemory();
+    j = cpalmdoc_do_compress(&b, output);
+    if ( j == 0) return PyErr_NoMemory();
+    PyObject *ans = Py_BuildValue("s#", output, j);
+    PyMem_Free(output);
+    PyMem_Free(b.data);
+    return ans;
+}
+
+static PyMethodDef cPalmdocMethods[] = {
+    {"decompress", cpalmdoc_decompress, METH_VARARGS,
+    "decompress(bytestring) -> decompressed bytestring\n\n"
+    		"Decompress a palmdoc compressed byte string. "
+    },
+
+    {"compress", cpalmdoc_compress, METH_VARARGS,
+    "compress(bytestring) -> compressed bytestring\n\n"
+    		"Palmdoc compress a byte string. "
+    },
+    {NULL, NULL, 0, NULL}
+};
+
+PyMODINIT_FUNC
+initcPalmdoc(void) {
+    PyObject *m;
+    m = Py_InitModule3("cPalmdoc", cPalmdocMethods,
+    "Compress and decompress palmdoc strings."
+    );
+    if (m == NULL) return;
+}
+
--- a/src/calibre/ebooks/compression/palmdoc.py
+++ b/src/calibre/ebooks/compression/palmdoc.py
@ -2,41 +2,46 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai

 __license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
-    'and Marshall T. Vandegrift <llasram@gmail.com>'
+__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

 from cStringIO import StringIO
 from struct import pack

-COUNT_BITS = 3
+from calibre.constants import plugins
+cPalmdoc = plugins['cPalmdoc'][0]
+if not cPalmdoc:
+    raise RuntimeError(('Failed to load required cPalmdoc module: '
+            '%s')%plugins['cPalmdoc'][1])

 def decompress_doc(data):
-    buffer = [ord(i) for i in data]
-    res = []
-    i = 0
-    while i < len(buffer):
-        c = buffer[i]
-        i += 1
-        if c >= 1 and c <= 8:
-            res.extend(buffer[i:i+c])
-            i += c
-        elif c <= 0x7f:
-            res.append(c)
-        elif c >= 0xc0:
-            res.extend( (ord(' '), c^0x80) )
-        else:
-            c = (c << 8) + buffer[i]
-            i += 1
-            di = (c & 0x3fff) >> COUNT_BITS
-            j = len(res)
-            num = (c & ((1 << COUNT_BITS) - 1)) + 3
-
-            for k in range( num ):
-                res.append(res[j - di+k])
-
-    return ''.join([chr(i) for i in res])
+    return cPalmdoc.decompress(data)

 def compress_doc(data):
+    return cPalmdoc.compress(data)
+
+def test():
+    TESTS = [
+            'abc\x03\x04\x05\x06ms', # Test binary writing
+            'a b c \xfed ', # Test encoding of spaces
+            '0123456789axyz2bxyz2cdfgfo9iuyerh',
+            '0123456789asd0123456789asd|yyzzxxffhhjjkk',
+            ('ciewacnaq eiu743 r787q 0w%  ; sa fd\xef\ffdxosac wocjp acoiecowei '
+            'owaic jociowapjcivcjpoivjporeivjpoavca; p9aw8743y6r74%$^$^%8 ')
+            ]
+    for test in TESTS:
+        print 'Test:', repr(test)
+        print '\tTesting compression...'
+        good = py_compress_doc(test)
+        x = compress_doc(test)
+        print '\t\tgood:',  repr(good)
+        print '\t\tx   :',  repr(x)
+        assert x == good
+        print '\tTesting decompression...'
+        print '\t\t', repr(decompress_doc(x))
+        assert decompress_doc(x) == test
+        print
+
+def py_compress_doc(data):
    out = StringIO()
    i = 0
    ldata = len(data)
--- a/src/calibre/ebooks/conversion/init.py
+++ b/src/calibre/ebooks/conversion/init.py
@ -0,0 +1,4 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@ -0,0 +1,224 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Command line interface to conversion sub-system
+'''
+
+USAGE = '%prog ' + _('''\
+input_file output_file [options]
+
+Convert an ebook from one format to another.
+
+input_file is the input and output_file is the output. Both must be \
+specified as the first two arguments to the command.
+
+The output ebook format is guessed from the file extension of \
+output_file. output_file can also be of the special format .EXT where \
+EXT is the output file extension. In this case, the name of the output \
+file is derived the name of the input file. Note that the filenames must \
+not start with a hyphen. Finally, if output_file has no extension, then \
+it is treated as a directory and an "open ebook" (OEB) consisting of HTML \
+files is written to that directory. These files are the files that would \
+normally have been passed to the output plugin.
+
+After specifying the input \
+and output file you can customize the conversion by specifying various \
+options. the available options depend on the input and output file types. \
+To get help on them specify the input and output file and then use the -h \
+option.
+
+For full documentation of the conversion system see
+''') + 'http://calibre.kovidgoyal.net/user_manual/conversion.html'
+
+import sys, os
+from optparse import OptionGroup, Option
+
+from calibre.utils.config import OptionParser
+from calibre.utils.logging import Log
+from calibre.constants import preferred_encoding
+from calibre.customize.conversion import OptionRecommendation
+
+def print_help(parser, log):
+    help = parser.format_help().encode(preferred_encoding, 'replace')
+    log(help)
+
+def check_command_line_options(parser, args, log):
+    if len(args) < 3 or args[1].startswith('-') or args[2].startswith('-'):
+        print_help(parser, log)
+        log.error('\n\nYou must specify the input AND output files')
+        raise SystemExit(1)
+
+    input = os.path.abspath(args[1])
+    if not input.endswith('.recipe') and not os.access(input, os.R_OK):
+        log.error('Cannot read from', input)
+        raise SystemExit(1)
+
+    output = args[2]
+    if output.startswith('.') and output != '.':
+        output = os.path.splitext(os.path.basename(input))[0]+output
+    output = os.path.abspath(output)
+
+    return input, output
+
+def option_recommendation_to_cli_option(add_option, rec):
+    opt = rec.option
+    switches = ['-'+opt.short_switch] if opt.short_switch else []
+    switches.append('--'+opt.long_switch)
+    attrs = dict(dest=opt.name, help=opt.help,
+                     choices=opt.choices, default=rec.recommended_value)
+    if isinstance(rec.recommended_value, type(True)):
+        attrs['action'] = 'store_false' if rec.recommended_value else \
+                          'store_true'
+    add_option(Option(*switches, **attrs))
+
+def add_input_output_options(parser, plumber):
+    input_options, output_options = \
+                                plumber.input_options, plumber.output_options
+
+    def add_options(group, options):
+        for opt in options:
+            option_recommendation_to_cli_option(group, opt)
+
+    if input_options:
+        title = _('INPUT OPTIONS')
+        io = OptionGroup(parser, title, _('Options to control the processing'
+                          ' of the input %s file')%plumber.input_fmt)
+        add_options(io.add_option, input_options)
+        parser.add_option_group(io)
+
+    if output_options:
+        title = _('OUTPUT OPTIONS')
+        oo = OptionGroup(parser, title, _('Options to control the processing'
+                          ' of the output %s')%plumber.output_fmt)
+        add_options(oo.add_option, output_options)
+        parser.add_option_group(oo)
+
+def add_pipeline_options(parser, plumber):
+    groups = {
+              '' : ('',
+                    [
+                     'input_profile',
+                     'output_profile',
+                     ]
+                    ),
+              'LOOK AND FEEL' : (
+                  _('Options to control the look and feel of the output'),
+                  [
+                      'base_font_size', 'disable_font_rescaling',
+                      'font_size_mapping',
+                      'line_height',
+                      'linearize_tables',
+                      'extra_css',
+                      'margin_top', 'margin_left', 'margin_right',
+                      'margin_bottom', 'dont_justify',
+                      'insert_blank_line', 'remove_paragraph_spacing',
+                  ]
+                  ),
+
+              'STRUCTURE DETECTION' : (
+                  _('Control auto-detection of document structure.'),
+                  [
+                      'chapter', 'chapter_mark',
+                      'prefer_metadata_cover', 'remove_first_image',
+                      'insert_metadata', 'page_breaks_before',
+                      'preprocess_html',
+                  ]
+                  ),
+
+              'TABLE OF CONTENTS' : (
+                  _('Control the automatic generation of a Table of Contents. By '
+                  'default, if the source file has a Table of Contents, it will '
+                  'be used in preference to the automatically generated one.'),
+                  [
+                    'level1_toc', 'level2_toc', 'level3_toc',
+                    'toc_threshold', 'max_toc_links', 'no_chapters_in_toc',
+                    'use_auto_toc', 'toc_filter',
+                  ]
+                  ),
+
+              'METADATA' : (_('Options to set metadata in the output'),
+                            plumber.metadata_option_names,
+                            ),
+              'DEBUG': (_('Options to help with debugging the conversion'),
+                        [
+                         'verbose',
+                         ]),
+
+
+              }
+
+    group_order = ['', 'LOOK AND FEEL', 'STRUCTURE DETECTION',
+            'TABLE OF CONTENTS', 'METADATA', 'DEBUG']
+
+    for group in group_order:
+        desc, options = groups[group]
+        if group:
+            group = OptionGroup(parser, group, desc)
+            parser.add_option_group(group)
+        add_option = group.add_option if group != '' else parser.add_option
+
+        for name in options:
+            rec = plumber.get_option_by_name(name)
+            if rec.level < rec.HIGH:
+                option_recommendation_to_cli_option(add_option, rec)
+
+    option_recommendation_to_cli_option(parser.add_option,
+            plumber.get_option_by_name('list_recipes'))
+
+def option_parser():
+    return OptionParser(usage=USAGE)
+
+
+class ProgressBar(object):
+
+    def __init__(self, log):
+        self.log = log
+
+    def __call__(self, frac, msg=''):
+        if msg:
+            percent = int(frac*100)
+            self.log('%d%% %s'%(percent, msg))
+
+def create_option_parser(args, log):
+    parser = option_parser()
+    if len(args) < 3:
+        print_help(parser, log)
+        raise SystemExit(1)
+
+    input, output = check_command_line_options(parser, args, log)
+
+    from calibre.ebooks.conversion.plumber import Plumber
+
+    reporter = ProgressBar(log)
+    plumber = Plumber(input, output, log, reporter)
+    add_input_output_options(parser, plumber)
+    add_pipeline_options(parser, plumber)
+
+    return parser, plumber
+
+def main(args=sys.argv):
+    log = Log()
+    parser, plumber = create_option_parser(args, log)
+    opts = parser.parse_args(args)[0]
+    y = lambda q : os.path.abspath(os.path.expanduser(q))
+    for x in ('read_metadata_from_opf', 'cover'):
+        if getattr(opts, x, None) is not None:
+            setattr(opts, x, y(getattr(opts, x)))
+    recommendations = [(n.dest, getattr(opts, n.dest),
+                        OptionRecommendation.HIGH) \
+                                        for n in parser.options_iter()
+                                        if n.dest]
+    plumber.merge_ui_recommendations(recommendations)
+
+    plumber.run()
+
+    if plumber.opts.debug_input is None:
+        log(_('Output saved to'), ' ', plumber.output)
+
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/src/calibre/ebooks/conversion/config.py
+++ b/src/calibre/ebooks/conversion/config.py
@ -0,0 +1,96 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os
+
+from calibre.utils.config import config_dir
+from calibre.utils.lock import ExclusiveFile
+from calibre import sanitize_file_name
+from calibre.customize.conversion import OptionRecommendation
+
+
+config_dir = os.path.join(config_dir, 'conversion')
+if not os.path.exists(config_dir):
+    os.makedirs(config_dir)
+
+def name_to_path(name):
+    return os.path.join(config_dir, sanitize_file_name(name)+'.py')
+
+def save_defaults(name, recs):
+    path = name_to_path(name)
+    raw = str(recs)
+    with open(path, 'wb'):
+        pass
+    with ExclusiveFile(path) as f:
+        f.write(raw)
+
+def load_defaults(name):
+    path = name_to_path(name)
+    if not os.path.exists(path):
+        open(path, 'wb').close()
+    with ExclusiveFile(path) as f:
+        raw = f.read()
+    r = GuiRecommendations()
+    if raw:
+        r.from_string(raw)
+    return r
+
+def save_specifics(db, book_id, recs):
+    raw = str(recs)
+    db.set_conversion_options(book_id, 'PIPE', raw)
+
+def load_specifics(db, book_id):
+    raw = db.conversion_options(book_id, 'PIPE')
+    r = GuiRecommendations()
+    if raw:
+        r.from_string(raw)
+    return r
+
+class GuiRecommendations(dict):
+
+    def __new__(cls, *args):
+        dict.__new__(cls)
+        obj = super(GuiRecommendations, cls).__new__(cls, *args)
+        obj.disabled_options = set([])
+        return obj
+
+    def to_recommendations(self, level=OptionRecommendation.LOW):
+        ans = []
+        for key, val in self.items():
+            ans.append((key, val, level))
+        return ans
+
+    def __str__(self):
+        ans = ['{']
+        for key, val in self.items():
+            ans.append('\t'+repr(key)+' : '+repr(val)+',')
+        ans.append('}')
+        return '\n'.join(ans)
+
+    def from_string(self, raw):
+        try:
+            d = eval(raw)
+        except SyntaxError:
+            d = None
+        if d:
+            self.update(d)
+
+    def merge_recommendations(self, get_option, level, options,
+            only_existing=False):
+        for name in options:
+            if only_existing and name not in self:
+                continue
+            opt = get_option(name)
+            if opt is None: continue
+            if opt.level == OptionRecommendation.HIGH:
+                self[name] = opt.recommended_value
+                self.disabled_options.add(name)
+            elif opt.level > level or name not in self:
+                self[name] = opt.recommended_value
+
+
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -0,0 +1,690 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os, re
+
+from calibre.customize.conversion import OptionRecommendation, DummyReporter
+from calibre.customize.ui import input_profiles, output_profiles, \
+        plugin_for_input_format, plugin_for_output_format
+from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
+from calibre.ptempfile import PersistentTemporaryDirectory
+from calibre import extract, walk
+
+def supported_input_formats():
+    from calibre.customize.ui import available_input_formats
+    fmts = available_input_formats()
+    for x in ('zip', 'rar', 'oebzip'):
+        fmts.add(x)
+    return fmts
+
+INPUT_FORMAT_PREFERENCES = ['cbr', 'cbz', 'cbc', 'lit', 'mobi', 'prc', 'azw', 'fb2', 'html',
+        'rtf', 'pdf', 'txt', 'pdb']
+OUTPUT_FORMAT_PREFERENCES = ['epub', 'mobi', 'lit', 'pdf', 'pdb', 'txt']
+
+class OptionValues(object):
+    pass
+
+class CompositeProgressReporter(object):
+
+    def __init__(self, global_min, global_max, global_reporter):
+        self.global_min, self.global_max = global_min, global_max
+        self.global_reporter = global_reporter
+
+    def __call__(self, fraction, msg=''):
+        global_frac = self.global_min + fraction * \
+                (self.global_max - self.global_min)
+        self.global_reporter(global_frac, msg)
+
+class Plumber(object):
+    '''
+    The `Plumber` manages the conversion pipeline. An UI should call the methods
+    :method:`merge_ui_recommendations` and then :method:`run`. The plumber will
+    take care of the rest.
+    '''
+
+    metadata_option_names = [
+        'title', 'authors', 'title_sort', 'author_sort', 'cover', 'comments',
+        'publisher', 'series', 'series_index', 'rating', 'isbn',
+        'tags', 'book_producer', 'language'
+        ]
+
+    def __init__(self, input, output, log, report_progress=DummyReporter()):
+        '''
+        :param input: Path to input file.
+        :param output: Path to output file/directory
+        '''
+        self.input = os.path.abspath(input)
+        self.output = os.path.abspath(output)
+        self.log = log
+        self.ui_reporter = report_progress
+
+        # Initialize the conversion options that are independent of input and
+        # output formats. The input and output plugins can still disable these
+        # options via recommendations.
+        self.pipeline_options = [
+
+OptionRecommendation(name='verbose',
+            recommended_value=0, level=OptionRecommendation.LOW,
+            short_switch='v',
+            help=_('Level of verbosity. Specify multiple times for greater '
+                   'verbosity.')
+        ),
+
+OptionRecommendation(name='input_profile',
+            recommended_value='default', level=OptionRecommendation.LOW,
+            choices=[x.short_name for x in input_profiles()],
+            help=_('Specify the input profile. The input profile gives the '
+                   'conversion system information on how to interpret '
+                   'various information in the input document. For '
+                   'example resolution dependent lengths (i.e. lengths in '
+                   'pixels). Choices are:')+\
+                        ', '.join([x.short_name for x in input_profiles()])
+        ),
+
+OptionRecommendation(name='output_profile',
+            recommended_value='default', level=OptionRecommendation.LOW,
+            choices=[x.short_name for x in output_profiles()],
+            help=_('Specify the output profile. The output profile '
+                   'tells the conversion system how to optimize the '
+                   'created document for the specified device. In some cases, '
+                   'an output profile is required to produce documents that '
+                   'will work on a device. For example EPUB on the SONY reader. '
+                   'Choices are:') + \
+                           ', '.join([x.short_name for x in output_profiles()])
+        ),
+
+OptionRecommendation(name='base_font_size',
+            recommended_value=0, level=OptionRecommendation.LOW,
+            help=_('The base font size in pts. All font sizes in the produced book '
+                   'will be rescaled based on this size. By choosing a larger '
+                   'size you can make the fonts in the output bigger and vice '
+                   'versa. By default, the base font size is chosen based on '
+                   'the output profile you chose.'
+                   )
+        ),
+
+OptionRecommendation(name='font_size_mapping',
+            recommended_value=None, level=OptionRecommendation.LOW,
+            help=_('Mapping from CSS font names to font sizes in pts. '
+                   'An example setting is 12,12,14,16,18,20,22,24. '
+                   'These are the mappings for the sizes xx-small to xx-large, '
+                   'with the final size being for huge fonts. The font '
+                   'rescaling algorithm uses these sizes to intelligently '
+                   'rescale fonts. The default is to use a mapping based on '
+                   'the output profile you chose.'
+                   )
+        ),
+
+OptionRecommendation(name='disable_font_rescaling',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('Disable all rescaling of font sizes.'
+                   )
+        ),
+
+
+OptionRecommendation(name='line_height',
+            recommended_value=0, level=OptionRecommendation.LOW,
+            help=_('The line height in pts. Controls spacing between consecutive '
+                   'lines of text. By default no line height manipulation is '
+                   'performed.'
+                   )
+        ),
+
+OptionRecommendation(name='linearize_tables',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('Some badly designed documents use tables to control the '
+                'layout of text on the page. When converted these documents '
+                'often have text that runs off the page and other artifacts. '
+                'This option will extract the content from the tables and '
+                'present it in a linear fashion.'
+                )
+        ),
+
+OptionRecommendation(name='level1_toc',
+            recommended_value=None, level=OptionRecommendation.LOW,
+            help=_('XPath expression that specifies all tags that '
+            'should be added to the Table of Contents at level one. If '
+            'this is specified, it takes precedence over other forms '
+            'of auto-detection.'
+                )
+        ),
+
+OptionRecommendation(name='level2_toc',
+            recommended_value=None, level=OptionRecommendation.LOW,
+            help=_('XPath expression that specifies all tags that should be '
+            'added to the Table of Contents at level two. Each entry is added '
+            'under the previous level one entry.'
+                )
+        ),
+
+OptionRecommendation(name='level3_toc',
+            recommended_value=None, level=OptionRecommendation.LOW,
+            help=_('XPath expression that specifies all tags that should be '
+                'added to the Table of Contents at level three. Each entry '
+                'is added under the previous level two entry.'
+                )
+        ),
+
+OptionRecommendation(name='use_auto_toc',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('Normally, if the source file already has a Table of '
+            'Contents, it is used in preference to the auto-generated one. '
+            'With this option, the auto-generated one is always used.'
+                )
+        ),
+
+OptionRecommendation(name='no_chapters_in_toc',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_("Don't add auto-detected chapters to the Table of "
+            'Contents.'
+                )
+        ),
+
+OptionRecommendation(name='toc_threshold',
+            recommended_value=6, level=OptionRecommendation.LOW,
+            help=_(
+        'If fewer than this number of chapters is detected, then links '
+        'are added to the Table of Contents. Default: %default')
+        ),
+
+OptionRecommendation(name='max_toc_links',
+            recommended_value=50, level=OptionRecommendation.LOW,
+            help=_('Maximum number of links to insert into the TOC. Set to 0 '
+               'to disable. Default is: %default. Links are only added to the '
+            'TOC if less than the threshold number of chapters were detected.'
+                )
+        ),
+
+OptionRecommendation(name='toc_filter',
+            recommended_value=None, level=OptionRecommendation.LOW,
+            help=_('Remove entries from the Table of Contents whose titles '
+            'match the specified regular expression. Matching entries and all '
+            'their children are removed.'
+                )
+        ),
+
+
+OptionRecommendation(name='chapter',
+        recommended_value="//*[((name()='h1' or name()='h2') and "
+              r"re:test(., 'chapter|book|section|part\s+', 'i')) or @class "
+              "= 'chapter']", level=OptionRecommendation.LOW,
+            help=_('An XPath expression to detect chapter titles. The default '
+                'is to consider <h1> or <h2> tags that contain the words '
+                '"chapter","book","section" or "part" as chapter titles as '
+                'well as any tags that have class="chapter". The expression '
+                'used must evaluate to a list of elements. To disable chapter '
+                'detection, use the expression "/". See the XPath Tutorial '
+                'in the calibre User Manual for further help on using this '
+                'feature.'
+                )
+        ),
+
+OptionRecommendation(name='chapter_mark',
+            recommended_value='pagebreak', level=OptionRecommendation.LOW,
+            choices=['pagebreak', 'rule', 'both', 'none'],
+            help=_('Specify how to mark detected chapters. A value of '
+                    '"pagebreak" will insert page breaks before chapters. '
+                    'A value of "rule" will insert a line before chapters. '
+                    'A value of "none" will disable chapter marking and a '
+                    'value of "both" will use both page breaks and lines '
+                    'to mark chapters.')
+        ),
+
+OptionRecommendation(name='extra_css',
+            recommended_value=None, level=OptionRecommendation.LOW,
+            help=_('Either the path to a CSS stylesheet or raw CSS. '
+                'This CSS will be appended to the style rules from '
+                'the source file, so it can be used to override those '
+                'rules.')
+        ),
+
+OptionRecommendation(name='page_breaks_before',
+            recommended_value="//*[name()='h1' or name()='h2']",
+            level=OptionRecommendation.LOW,
+            help=_('An XPath expression. Page breaks are inserted '
+            'before the specified elements.')
+        ),
+
+OptionRecommendation(name='margin_top',
+        recommended_value=5.0, level=OptionRecommendation.LOW,
+        help=_('Set the top margin in pts. Default is %default. '
+            'Note: 72 pts equals 1 inch')),
+
+OptionRecommendation(name='margin_bottom',
+        recommended_value=5.0, level=OptionRecommendation.LOW,
+        help=_('Set the bottom margin in pts. Default is %default. '
+            'Note: 72 pts equals 1 inch')),
+
+OptionRecommendation(name='margin_left',
+        recommended_value=5.0, level=OptionRecommendation.LOW,
+        help=_('Set the left margin in pts. Default is %default. '
+            'Note: 72 pts equals 1 inch')),
+
+OptionRecommendation(name='margin_right',
+        recommended_value=5.0, level=OptionRecommendation.LOW,
+        help=_('Set the right margin in pts. Default is %default. '
+            'Note: 72 pts equals 1 inch')),
+
+OptionRecommendation(name='dont_justify',
+        recommended_value=False, level=OptionRecommendation.LOW,
+        help=_('Do not force text to be justified in output. Whether text '
+            'is actually displayed justified or not depends on whether '
+            'the ebook format and reading device support justification.')
+        ),
+
+OptionRecommendation(name='remove_paragraph_spacing',
+        recommended_value=False, level=OptionRecommendation.LOW,
+        help=_('Remove spacing between paragraphs. Also sets an indent on '
+        'paragraphs of 1.5em. Spacing removal will not work '
+        'if the source file does not use paragraphs (<p> or <div> tags).')
+        ),
+
+OptionRecommendation(name='prefer_metadata_cover',
+        recommended_value=False, level=OptionRecommendation.LOW,
+        help=_('Use the cover detected from the source file in preference '
+        'to the specified cover.')
+        ),
+
+OptionRecommendation(name='insert_blank_line',
+        recommended_value=False, level=OptionRecommendation.LOW,
+        help=_('Insert a blank line between paragraphs. Will not work '
+            'if the source file does not use paragraphs (<p> or <div> tags).'
+            )
+        ),
+
+OptionRecommendation(name='remove_first_image',
+        recommended_value=False, level=OptionRecommendation.LOW,
+        help=_('Remove the first image from the input ebook. Useful if the '
+        'first image in the source file is a cover and you are specifying '
+        'an external cover.'
+            )
+        ),
+
+OptionRecommendation(name='insert_metadata',
+        recommended_value=False, level=OptionRecommendation.LOW,
+        help=_('Insert the book metadata at the start of '
+            'the book. This is useful if your ebook reader does not support '
+            'displaying/searching metadata directly.'
+            )
+        ),
+
+OptionRecommendation(name='preprocess_html',
+        recommended_value=False, level=OptionRecommendation.LOW,
+        help=_('Attempt to detect and correct hard line breaks and other '
+            'problems in the source file. This may make things worse, so use '
+            'with care.'
+            )
+        ),
+
+
+OptionRecommendation(name='read_metadata_from_opf',
+            recommended_value=None, level=OptionRecommendation.LOW,
+            short_switch='m',
+            help=_('Read metadata from the specified OPF file. Metadata read '
+                   'from this file will override any metadata in the source '
+                   'file.')
+        ),
+
+
+OptionRecommendation(name='title',
+    recommended_value=None, level=OptionRecommendation.LOW,
+    help=_('Set the title.')),
+
+OptionRecommendation(name='authors',
+    recommended_value=None, level=OptionRecommendation.LOW,
+    help=_('Set the authors. Multiple authors should be separated by '
+    'ampersands.')),
+
+OptionRecommendation(name='title_sort',
+    recommended_value=None, level=OptionRecommendation.LOW,
+    help=_('The version of the title to be used for sorting. ')),
+
+OptionRecommendation(name='author_sort',
+    recommended_value=None, level=OptionRecommendation.LOW,
+    help=_('String to be used when sorting by author. ')),
+
+OptionRecommendation(name='cover',
+    recommended_value=None, level=OptionRecommendation.LOW,
+    help=_('Set the cover to the specified file.')),
+
+OptionRecommendation(name='comments',
+    recommended_value=None, level=OptionRecommendation.LOW,
+    help=_('Set the ebook description.')),
+
+OptionRecommendation(name='publisher',
+    recommended_value=None, level=OptionRecommendation.LOW,
+    help=_('Set the ebook publisher.')),
+
+OptionRecommendation(name='series',
+    recommended_value=None, level=OptionRecommendation.LOW,
+    help=_('Set the series this ebook belongs to.')),
+
+OptionRecommendation(name='series_index',
+    recommended_value=None, level=OptionRecommendation.LOW,
+    help=_('Set the index of the book in this series.')),
+
+OptionRecommendation(name='rating',
+    recommended_value=None, level=OptionRecommendation.LOW,
+    help=_('Set the rating. Should be a number between 1 and 5.')),
+
+OptionRecommendation(name='isbn',
+    recommended_value=None, level=OptionRecommendation.LOW,
+    help=_('Set the ISBN of the book.')),
+
+OptionRecommendation(name='tags',
+    recommended_value=None, level=OptionRecommendation.LOW,
+    help=_('Set the tags for the book. Should be a comma separated list.')),
+
+OptionRecommendation(name='book_producer',
+    recommended_value=None, level=OptionRecommendation.LOW,
+    help=_('Set the book producer.')),
+
+OptionRecommendation(name='language',
+    recommended_value=None, level=OptionRecommendation.LOW,
+    help=_('Set the language.')),
+
+OptionRecommendation(name='list_recipes',
+    recommended_value=False, help=_('List available recipes.')),
+
+]
+
+        input_fmt = os.path.splitext(self.input)[1]
+        if not input_fmt:
+            raise ValueError('Input file must have an extension')
+        input_fmt = input_fmt[1:].lower()
+        if input_fmt in ('zip', 'rar', 'oebzip'):
+            self.log('Processing archive...')
+            tdir = PersistentTemporaryDirectory('_plumber')
+            self.input, input_fmt = self.unarchive(self.input, tdir)
+
+        if os.path.exists(self.output) and os.path.isdir(self.output):
+            output_fmt = 'oeb'
+        else:
+            output_fmt = os.path.splitext(self.output)[1]
+            if not output_fmt:
+                output_fmt = '.oeb'
+            output_fmt = output_fmt[1:].lower()
+
+        self.input_plugin  = plugin_for_input_format(input_fmt)
+        self.output_plugin = plugin_for_output_format(output_fmt)
+
+        if self.input_plugin is None:
+            raise ValueError('No plugin to handle input format: '+input_fmt)
+
+        if self.output_plugin is None:
+            raise ValueError('No plugin to handle output format: '+output_fmt)
+
+        self.input_fmt = input_fmt
+        self.output_fmt = output_fmt
+
+        # Build set of all possible options. Two options are equal if their
+        # names are the same.
+        self.input_options  = self.input_plugin.options.union(
+                                    self.input_plugin.common_options)
+        self.output_options = self.output_plugin.options.union(
+                                    self.output_plugin.common_options)
+
+        # Remove the options that have been disabled by recommendations from the
+        # plugins.
+        self.merge_plugin_recommendations()
+
+    @classmethod
+    def unarchive(self, path, tdir):
+        extract(path, tdir)
+        files = list(walk(tdir))
+        from calibre.customize.ui import available_input_formats
+        fmts = available_input_formats()
+        for x in ('htm', 'html', 'xhtm', 'xhtml'): fmts.remove(x)
+
+        for ext in fmts:
+            for f in files:
+                if f.lower().endswith('.'+ext):
+                    if ext in ['txt', 'rtf'] and os.stat(f).st_size < 2048:
+                        continue
+                    return f, ext
+        return self.find_html_index(files)
+
+    @classmethod
+    def find_html_index(self, files):
+        '''
+        Given a list of files, find the most likely root HTML file in the
+        list.
+        '''
+        html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}$', re.IGNORECASE)
+        html_files = [f for f in files if html_pat.search(f) is not None]
+        if not html_files:
+            raise ValueError(_('Could not find an ebook inside the archive'))
+        html_files = [(f, os.stat(f).st_size) for f in html_files]
+        html_files.sort(cmp = lambda x, y: cmp(x[1], y[1]))
+        html_files = [f[0] for f in html_files]
+        for q in ('toc', 'index'):
+            for f in html_files:
+                if os.path.splitext(os.path.basename(f))[0].lower() == q:
+                    return f, os.path.splitext(f)[1].lower()[1:]
+        return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]
+
+
+
+    def get_option_by_name(self, name):
+        for group in (self.input_options, self.pipeline_options,
+                      self.output_options):
+            for rec in group:
+                if rec.option == name:
+                    return rec
+
+    def get_option_help(self, name):
+        rec = self.get_option_by_name(name)
+        help = getattr(rec, 'help', None)
+        if help is not None:
+            return help.replace('%default', str(rec.recommended_value))
+
+    def merge_plugin_recommendations(self):
+        for source in (self.input_plugin, self.output_plugin):
+            for name, val, level in source.recommendations:
+                rec = self.get_option_by_name(name)
+                if rec is not None and rec.level <= level:
+                    rec.recommended_value = val
+                    rec.level = level
+
+    def merge_ui_recommendations(self, recommendations):
+        '''
+        Merge recommendations from the UI. As long as the UI recommendation
+        level is >= the baseline recommended level, the UI value is used,
+        *except* if the baseline has a recommendation level of `HIGH`.
+        '''
+        for name, val, level in recommendations:
+            rec = self.get_option_by_name(name)
+            if rec is not None and rec.level <= level and rec.level < rec.HIGH:
+                rec.recommended_value = val
+                rec.level = level
+
+    def read_user_metadata(self):
+        '''
+        Read all metadata specified by the user. Command line options override
+        metadata from a specified OPF file.
+        '''
+        from calibre.ebooks.metadata import MetaInformation, string_to_authors
+        from calibre.ebooks.metadata.opf2 import OPF
+        mi = MetaInformation(None, [])
+        if self.opts.read_metadata_from_opf is not None:
+            self.opts.read_metadata_from_opf = os.path.abspath(
+                                            self.opts.read_metadata_from_opf)
+            opf = OPF(open(self.opts.read_metadata_from_opf, 'rb'),
+                      os.path.dirname(self.opts.read_metadata_from_opf))
+            mi = MetaInformation(opf)
+        for x in self.metadata_option_names:
+            val = getattr(self.opts, x, None)
+            if val is not None:
+                if x == 'authors':
+                    val = string_to_authors(val)
+                elif x == 'tags':
+                    val = [i.strip() for i in val.split(',')]
+                elif x in ('rating', 'series_index'):
+                    val = float(val)
+                setattr(mi, x, val)
+        if mi.cover:
+            mi.cover_data = ('', open(mi.cover, 'rb').read())
+            mi.cover = None
+        self.user_metadata = mi
+
+    def setup_options(self):
+        '''
+        Setup the `self.opts` object.
+        '''
+        self.opts = OptionValues()
+        for group in (self.input_options, self.pipeline_options,
+                  self.output_options):
+            for rec in group:
+                setattr(self.opts, rec.option.name, rec.recommended_value)
+
+        for x in input_profiles():
+            if x.short_name == self.opts.input_profile:
+                self.opts.input_profile = x
+                break
+
+        for x in output_profiles():
+            if x.short_name == self.opts.output_profile:
+                self.opts.output_profile = x
+                break
+
+        self.read_user_metadata()
+
+    def run(self):
+        '''
+        Run the conversion pipeline
+        '''
+        # Setup baseline option values
+        self.setup_options()
+        if self.opts.verbose:
+            self.log.filter_level = self.log.DEBUG
+        if self.opts.list_recipes:
+            from calibre.web.feeds.recipes import titles
+            self.log('Available recipes:')
+            for title in sorted(titles):
+                self.log('\t'+title)
+            self.log('%d recipes available'%len(titles))
+            raise SystemExit(0)
+
+        # Run any preprocess plugins
+        from calibre.customize.ui import run_plugins_on_preprocess
+        self.input = run_plugins_on_preprocess(self.input)
+
+        # Create an OEBBook from the input file. The input plugin does all the
+        # heavy lifting.
+        accelerators = {}
+
+        tdir = PersistentTemporaryDirectory('_plumber')
+        stream = self.input if self.input_fmt == 'recipe' else \
+                open(self.input, 'rb')
+
+        if hasattr(self.opts, 'lrf') and self.output_plugin.file_type == 'lrf':
+            self.opts.lrf = True
+
+        self.ui_reporter(0.01, _('Converting input to HTML...'))
+        ir = CompositeProgressReporter(0.01, 0.34, self.ui_reporter)
+        self.input_plugin.report_progress = ir
+        self.oeb = self.input_plugin(stream, self.opts,
+                                    self.input_fmt, self.log,
+                                    accelerators, tdir)
+        if self.opts.debug_input is not None:
+            self.log('Debug input called, aborting the rest of the pipeline.')
+            return
+        if not hasattr(self.oeb, 'manifest'):
+            self.oeb = create_oebbook(self.log, self.oeb, self.opts,
+                    self.input_plugin)
+        pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
+        pr(0., _('Running transforms on ebook...'))
+
+        from calibre.ebooks.oeb.transforms.guide import Clean
+        Clean()(self.oeb, self.opts)
+        pr(0.1)
+
+        self.opts.source = self.opts.input_profile
+        self.opts.dest = self.opts.output_profile
+
+        from calibre.ebooks.oeb.transforms.metadata import MergeMetadata
+        MergeMetadata()(self.oeb, self.user_metadata,
+                self.opts.prefer_metadata_cover)
+        pr(0.2)
+
+        from calibre.ebooks.oeb.transforms.structure import DetectStructure
+        DetectStructure()(self.oeb, self.opts)
+        pr(0.35)
+
+        from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
+        fbase = self.opts.base_font_size
+        if fbase < 1e-4:
+            fbase = float(self.opts.dest.fbase)
+        fkey = self.opts.font_size_mapping
+        if fkey is None:
+            fkey = self.opts.dest.fkey
+        else:
+            fkey = map(float, fkey.split(','))
+
+        from calibre.ebooks.oeb.transforms.jacket import Jacket
+        Jacket()(self.oeb, self.opts, self.user_metadata)
+        pr(0.4)
+
+        if self.opts.extra_css and os.path.exists(self.opts.extra_css):
+            self.opts.extra_css = open(self.opts.extra_css, 'rb').read()
+
+        oibl = self.opts.insert_blank_line
+        orps  = self.opts.remove_paragraph_spacing
+        if self.output_plugin.file_type == 'lrf':
+            self.opts.insert_blank_line = False
+            self.opts.remove_paragraph_spacing = False
+        line_height = self.opts.line_height
+        if line_height < 1e-4:
+            line_height = None
+        flattener = CSSFlattener(fbase=fbase, fkey=fkey,
+                lineh=line_height,
+                untable=self.output_plugin.file_type in ('mobi','lit'),
+                unfloat=self.output_plugin.file_type in ('mobi', 'lit'))
+        flattener(self.oeb, self.opts)
+        self.opts.insert_blank_line = oibl
+        self.opts.remove_paragraph_spacing = orps
+
+        if self.opts.linearize_tables and \
+                self.output_plugin.file_type not in ('mobi', 'lrf'):
+            from calibre.ebooks.oeb.transforms.linearize_tables import LinearizeTables
+            LinearizeTables()(self.oeb, self.opts)
+        pr(0.9)
+
+        from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
+
+        self.log.info('Cleaning up manifest...')
+        trimmer = ManifestTrimmer()
+        trimmer(self.oeb, self.opts)
+
+        self.oeb.toc.rationalize_play_orders()
+        pr(1.)
+
+        self.log.info('Creating %s...'%self.output_plugin.name)
+        our = CompositeProgressReporter(0.67, 1., self.ui_reporter)
+        self.output_plugin.report_progress = our
+        our(0., _('Creating')+' %s'%self.output_plugin.name)
+        self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
+                self.opts, self.log)
+        self.ui_reporter(1.)
+        self.log(self.output_fmt.upper(), 'output written to', self.output)
+
+def create_oebbook(log, path_or_stream, opts, input_plugin, reader=None,
+        encoding='utf-8'):
+    '''
+    Create an OEBBook.
+    '''
+    from calibre.ebooks.oeb.base import OEBBook
+    html_preprocessor = HTMLPreProcessor(input_plugin.preprocess_html,
+            opts.preprocess_html)
+    oeb = OEBBook(log, html_preprocessor,
+            pretty_print=opts.pretty_print, input_encoding=encoding)
+    # Read OEB Book into OEBBook
+    log('Parsing all content...')
+    if reader is None:
+        from calibre.ebooks.oeb.reader import OEBReader
+        reader = OEBReader
+
+    reader()(oeb, path_or_stream)
+    return oeb
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -0,0 +1,202 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import re, functools
+
+from calibre import entity_to_unicode
+
+XMLDECL_RE    = re.compile(r'^\s*<[?]xml.*?[?]>')
+SVG_NS       = 'http://www.w3.org/2000/svg'
+XLINK_NS     = 'http://www.w3.org/1999/xlink'
+
+convert_entities = functools.partial(entity_to_unicode, exceptions=['quot', 'apos', 'lt', 'gt', 'amp'])
+_span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)
+
+
+def sanitize_head(match):
+    x = match.group(1)
+    x = _span_pat.sub('', x)
+    return '<head>\n'+x+'\n</head>'
+
+def chap_head(match):
+    chap = match.group('chap')
+    title = match.group('title')
+    if not title:
+               return '<h1>'+chap+'</h1><br/>\n'
+    else:
+               return '<h1>'+chap+'<br/>\n'+title+'</h1><br/>\n'
+
+def wrap_lines(match):
+    ital = match.group('ital')
+    if not ital:
+               return ' '
+    else:
+               return ital+' '
+
+def line_length(raw, percent):
+    '''
+    raw is the raw text to find the line length to use for wrapping.
+    percentage is a decimal number, 0 - 1 which is used to determine
+    how far in the list of line lengths to use.
+    '''
+    raw = raw.replace('&nbsp;', ' ')
+    linere = re.compile('(?<=<br>).*?(?=<br>)', re.DOTALL)
+    lines = linere.findall(raw)
+
+    lengths = []
+    for line in lines:
+        if len(line) > 0:
+            lengths.append(len(line))
+    total = sum(lengths)
+    avg = total / len(lengths)
+    max_line = avg * 2
+
+    lengths = sorted(lengths)
+    for i in range(len(lengths) - 1, -1, -1):
+        if lengths[i] > max_line:
+            del lengths[i]
+
+    if percent > 1:
+        percent = 1
+    if percent < 0:
+        percent = 0
+
+    index = int(len(lengths) * percent) - 1
+
+    return lengths[index]
+
+
+class CSSPreProcessor(object):
+
+    PAGE_PAT   = re.compile(r'@page[^{]*?{[^}]*?}')
+
+    def __call__(self, data):
+        data = self.PAGE_PAT.sub('', data)
+        return data
+
+class HTMLPreProcessor(object):
+
+    PREPROCESS = [
+                  # Some idiotic HTML generators (Frontpage I'm looking at you)
+                  # Put all sorts of crap into <head>. This messes up lxml
+                  (re.compile(r'<head[^>]*>(.*?)</head>', re.IGNORECASE|re.DOTALL),
+                   sanitize_head),
+                  # Convert all entities, since lxml doesn't handle them well
+                  (re.compile(r'&(\S+?);'), convert_entities),
+                  # Remove the <![if/endif tags inserted by everybody's darling, MS Word
+                  (re.compile(r'</{0,1}!\[(end){0,1}if\]{0,1}>', re.IGNORECASE),
+                   lambda match: ''),
+                  ]
+
+    # Fix pdftohtml markup
+    PDFTOHTML  = [
+                  # Fix umlauts
+                  (re.compile(u'¨\s*(<br.*?>)*\s*o', re.UNICODE), lambda match: u'ö'),
+                  (re.compile(u'¨\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ö'),
+                  (re.compile(u'¨\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'ü'),
+                  (re.compile(u'¨\s*(<br.*?>)*\s*U', re.UNICODE), lambda match: u'Ü'),
+                  (re.compile(u'¨\s*(<br.*?>)*\s*e', re.UNICODE), lambda match: u'ë'),
+                  (re.compile(u'¨\s*(<br.*?>)*\s*E', re.UNICODE), lambda match: u'Ë'),
+                  (re.compile(u'¨\s*(<br.*?>)*\s*i', re.UNICODE), lambda match: u'ï'),
+                  (re.compile(u'¨\s*(<br.*?>)*\s*I', re.UNICODE), lambda match: u'Ï'),
+                  (re.compile(u'¨\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'ä'),
+                  (re.compile(u'¨\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Ä'),
+
+                  # Remove page links
+                  (re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''),
+                  # Remove <hr> tags
+                  (re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<br />'),
+                  # Replace <br><br> with <p>
+                  (re.compile(r'<br.*?>\s*<br.*?>', re.IGNORECASE), lambda match: '<p>'),
+
+                  # Remove hyphenation
+                  (re.compile(r'-<br.*?>\n\r?'), lambda match: ''),
+
+                  # Remove gray background
+                  (re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>'),
+
+                  # Remove non breaking spaces
+                  (re.compile(ur'\u00a0'), lambda match : ' '),
+
+                  # Detect Chapters to match default XPATH in GUI
+                  (re.compile(r'(?=<(/?br|p))(<(/?br|p)[^>]*)?>\s*(?P<chap>(<i><b>|<i>|<b>)?(Chapter|Epilogue|Prologue|Book|Part)\s*(\d+|\w+)?(</i></b>|</i>|</b>)?)(</?p[^>]*>|<br[^>]*>)\n?((?=(<i>)?\s*\w+(\s+\w+)?(</i>)?(<br[^>]*>|</?p[^>]*>))((?P<title>(<i>)?\s*\w+(\s+\w+)?(</i>)?)(<br[^>]*>|</?p[^>]*>)))?', re.IGNORECASE), chap_head),
+                  (re.compile(r'(?=<(/?br|p))(<(/?br|p)[^>]*)?>\s*(?P<chap>([A-Z \'"!]{5,})\s*(\d+|\w+)?)(</?p[^>]*>|<br[^>]*>)\n?((?=(<i>)?\s*\w+(\s+\w+)?(</i>)?(<br[^>]*>|</?p[^>]*>))((?P<title>.*)(<br[^>]*>|</?p[^>]*>)))?'), chap_head),
+
+                  # Have paragraphs show better
+                  (re.compile(r'<br.*?>'), lambda match : '<p>'),
+                  # Clean up spaces
+                  (re.compile(u'(?<=[\.,:;\?!”"\'])[\s^ ]*(?=<)'), lambda match: ' '),
+                  # Add space before and after italics
+                  (re.compile(u'(?<!“)<i>'), lambda match: ' <i>'),
+                  (re.compile(r'</i>(?=\w)'), lambda match: '</i> '),
+                 ]
+
+    # Fix Book Designer markup
+    BOOK_DESIGNER = [
+                     # HR
+                     (re.compile('<hr>', re.IGNORECASE),
+                      lambda match : '<span style="page-break-after:always"> </span>'),
+                     # Create header tags
+                     (re.compile('<h2[^><]*?id=BookTitle[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE),
+                      lambda match : '<h1 id="BookTitle" align="%s">%s</h1>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
+                     (re.compile('<h2[^><]*?id=BookAuthor[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE),
+                      lambda match : '<h2 id="BookAuthor" align="%s">%s</h2>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
+                     (re.compile('<span[^><]*?id=title[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
+                      lambda match : '<h2 class="title">%s</h2>'%(match.group(1),)),
+                     (re.compile('<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
+                      lambda match : '<h3 class="subtitle">%s</h3>'%(match.group(1),)),
+                     ]
+    def __init__(self, input_plugin_preprocess, plugin_preprocess):
+        self.input_plugin_preprocess = input_plugin_preprocess
+        self.plugin_preprocess = plugin_preprocess
+
+    def is_baen(self, src):
+        return re.compile(r'<meta\s+name="Publisher"\s+content=".*?Baen.*?"',
+                          re.IGNORECASE).search(src) is not None
+
+    def is_book_designer(self, raw):
+        return re.search('<H2[^><]*id=BookTitle', raw) is not None
+
+    def is_pdftohtml(self, src):
+        return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
+
+    def __call__(self, html, remove_special_chars=None):
+        if remove_special_chars is not None:
+            html = remove_special_chars.sub('', html)
+        html = html.replace('\0', '')
+        if self.is_baen(html):
+            rules = []
+        elif self.is_book_designer(html):
+            rules = self.BOOK_DESIGNER
+        elif self.is_pdftohtml(html):
+            line_length_rules = [
+                # Un wrap using punctuation
+                (re.compile(r'(?<=.{%i}[a-z,;:-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?[\w\d])' % line_length(html, .3), re.UNICODE), wrap_lines),
+            ]
+
+            rules = self.PDFTOHTML + line_length_rules
+        else:
+            rules = []
+        for rule in self.PREPROCESS + rules:
+            html = rule[0].sub(rule[1], html)
+
+        # Handle broken XHTML w/ SVG (ugh)
+        if 'svg:' in html and SVG_NS not in html:
+            html = html.replace(
+                '<html', '<html xmlns:svg="%s"' % SVG_NS, 1)
+        if 'xlink:' in html and XLINK_NS not in html:
+            html = html.replace(
+                '<html', '<html xmlns:xlink="%s"' % XLINK_NS, 1)
+
+        html = XMLDECL_RE.sub('', html)
+
+        if self.plugin_preprocess:
+            html = self.input_plugin_preprocess(html)
+
+        return html
+
--- a/src/calibre/ebooks/epub/init.py
+++ b/src/calibre/ebooks/epub/init.py
@ -6,32 +6,7 @@ __docformat__ = 'restructuredtext en'
 '''
 Conversion to EPUB.
 '''
-import sys, textwrap, re, os, uuid
-from itertools import cycle
-from calibre.utils.config import Config, StringConfig
 from calibre.utils.zipfile import ZipFile, ZIP_STORED
-from calibre.ebooks.html import config as common_config, tostring
-from lxml import etree
-
-class DefaultProfile(object):
-    
-    flow_size            = sys.maxint
-    screen_size          = None
-    remove_special_chars = False
-    remove_object_tags   = False
-    
-class PRS505(DefaultProfile):
-    
-    flow_size            = 270000
-    screen_size          = (590, 765)
-    remove_special_chars = re.compile(u'[\u200b\u00ad]')
-    remove_object_tags   = True
-        
-
-PROFILES = {
-            'PRS505' : PRS505,
-            'None'   : DefaultProfile,
-            }

 def rules(stylesheets):
    for s in stylesheets:
@ -40,38 +15,6 @@ def rules(stylesheets):
                if r.type == r.STYLE_RULE:
                    yield r

-def decrypt_font(key, path):
-    raw = open(path, 'rb').read()
-    crypt = raw[:1024]
-    key = cycle(iter(key))
-    decrypt = ''.join([chr(ord(x)^key.next()) for x in crypt])
-    with open(path, 'wb') as f:
-        f.write(decrypt)
-        f.write(raw[1024:])
-
-def process_encryption(encfile, opf):
-    key = None
-    m = re.search(r'(?i)(urn:uuid:[0-9a-f-]+)', open(opf, 'rb').read())
-    if m:
-        key = m.group(1)
-        key = list(map(ord, uuid.UUID(key).bytes))
-    try:
-        root = etree.parse(encfile)
-        for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
-            algorithm = em.get('Algorithm', '')
-            if algorithm != 'http://ns.adobe.com/pdf/enc#RC':
-                return False
-            cr = em.getparent().xpath('descendant::*[contains(name(), "CipherReference")]')[0]
-            uri = cr.get('URI')
-            path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
-            if os.path.exists(path):
-                decrypt_font(key, path)
-        return True
-    except:
-        import traceback
-        traceback.print_exc()
-    return False
-
 def initialize_container(path_to_container, opf_name='metadata.opf'):
    '''
    Create an empty EPUB document, with a default skeleton.
@ -90,152 +33,4 @@ def initialize_container(path_to_container, opf_name='metadata.opf'):
    zf.writestr('META-INF/container.xml', CONTAINER)
    return zf

-def config(defaults=None, name='epub'):
-    desc = _('Options to control the conversion to EPUB')
-    if defaults is None:
-        c = Config(name, desc)
-    else:
-        c = StringConfig(defaults, desc)

-    c.update(common_config())
-    c.remove_opt('output')
-    c.remove_opt('zip')
-    
-    c.add_opt('output', ['-o', '--output'], default=None,
-             help=_('The output EPUB file. If not specified, it is '
-                    'derived from the input file name.'))
-    c.add_opt('profile', ['--profile'], default='PRS505', choices=list(PROFILES.keys()),
-              help=_('Profile of the target device this EPUB is meant for. '
-                     'Set to None to create a device independent EPUB. '
-                     'The profile is used for device specific restrictions '
-                     'on the EPUB. Choices are: ')+str(list(PROFILES.keys())))
-    c.add_opt('override_css', ['--override-css'], default=None,
-              help=_('Either the path to a CSS stylesheet or raw CSS. '
-                     'This CSS will override any existing CSS '
-                     'declarations in the source files.'))
-    structure = c.add_group('structure detection', 
-                            _('Control auto-detection of document structure.'))
-    structure('chapter', ['--chapter'], 
-              default="//*[re:match(name(), 'h[1-2]') and "
-              "re:test(., 'chapter|book|section|part', 'i')] | "
-              "//*[@class = 'chapter']",
-            help=_('''\
-An XPath expression to detect chapter titles. The default is to consider <h1> or
-<h2> tags that contain the words "chapter","book","section" or "part" as chapter titles as 
-well as any tags that have class="chapter". 
-The expression used must evaluate to a list of elements. To disable chapter detection,
-use the expression "/". See the XPath Tutorial in the calibre User Manual for further
-help on using this feature.
-''').replace('\n', ' '))
-    structure('chapter_mark', ['--chapter-mark'], choices=['pagebreak', 'rule', 'both', 'none'],
-              default='pagebreak', 
-              help=_('Specify how to mark detected chapters. A value of '
-                     '"pagebreak" will insert page breaks before chapters. '
-                     'A value of "rule" will insert a line before chapters. '
-                     'A value of "none" will disable chapter marking and a '
-                     'value of "both" will use both page breaks and lines '
-                     'to mark chapters.'))
-    structure('cover', ['--cover'], default=None,
-              help=_('Path to the cover to be used for this book'))
-    structure('prefer_metadata_cover', ['--prefer-metadata-cover'], default=False,
-              action='store_true',
-              help=_('Use the cover detected from the source file in preference '
-                     'to the specified cover.'))
-    structure('remove_first_image', ['--remove-first-image'], default=False,
-              help=_('Remove the first image from the input ebook. Useful if '
-                     'the first image in the source file is a cover and you '
-                     'are specifying an external cover.'))
-    structure('dont_split_on_page_breaks', ['--dont-split-on-page-breaks'], default=False,
-              help=_('Turn off splitting at page breaks. Normally, input files '
-                     'are automatically split at every page break into '
-                     'two files. This gives an output ebook that can be parsed '
-                     'faster and with less resources. However, splitting is '
-                     'slow and if your source file contains a very large '
-                     'number of page breaks, you should turn off splitting '
-                     'on page breaks.'))
-    structure('page', ['--page'], default=None,
-              help=_('XPath expression to detect page boundaries for building '
-                     'a custom pagination map, as used by AdobeDE. Default is '
-                     'not to build an explicit pagination map.'))
-    structure('page_names', ['--page-names'], default=None,
-              help=_('XPath expression to find the name of each page in the '
-                     'pagination map relative to its boundary element. '
-                     'Default is to number all pages staring with 1.'))
-    toc = c.add_group('toc', 
-        _('''\
-Control the automatic generation of a Table of Contents. If an OPF file is detected
-and it specifies a Table of Contents, then that will be used rather than trying
-to auto-generate a Table of Contents.
-''').replace('\n', ' '))
-    toc('max_toc_links', ['--max-toc-links'], default=50, 
-        help=_('Maximum number of links to insert into the TOC. Set to 0 '
-               'to disable. Default is: %default. Links are only added to the '
-               'TOC if less than the --toc-threshold number of chapters were detected.'))
-    toc('no_chapters_in_toc', ['--no-chapters-in-toc'], default=False,
-        help=_("Don't add auto-detected chapters to the Table of Contents."))
-    toc('toc_threshold', ['--toc-threshold'], default=6,
-        help=_('If fewer than this number of chapters is detected, then links '
-               'are added to the Table of Contents. Default: %default'))
-    toc('level1_toc', ['--level1-toc'], default=None,
-        help=_('XPath expression that specifies all tags that should be added '
-               'to the Table of Contents at level one. If this is specified, '
-               'it takes precedence over other forms of auto-detection.'))
-    toc('level2_toc', ['--level2-toc'], default=None,
-        help=_('XPath expression that specifies all tags that should be added '
-               'to the Table of Contents at level two. Each entry is added '
-               'under the previous level one entry.'))
-    toc('level3_toc', ['--level3-toc'], default=None,
-        help=_('XPath expression that specifies all tags that should be added '
-               'to the Table of Contents at level three. Each entry is added '
-               'under the previous level two entry.'))
-    toc('from_ncx', ['--from-ncx'], default=None,
-        help=_('Path to a .ncx file that contains the table of contents to use '
-               'for this ebook. The NCX file should contain links relative to '
-               'the directory it is placed in. See '
-               'http://www.niso.org/workrooms/daisy/Z39-86-2005.html#NCX for '
-               'an overview of the NCX format.'))
-    toc('use_auto_toc', ['--use-auto-toc'], default=False,
-        help=_('Normally, if the source file already has a Table of Contents, '
-               'it is used in preference to the auto-generated one. '
-               'With this option, the auto-generated one is always used.'))
-    
-    layout = c.add_group('page layout', _('Control page layout'))
-    layout('margin_top', ['--margin-top'], default=5.0, 
-           help=_('Set the top margin in pts. Default is %default'))
-    layout('margin_bottom', ['--margin-bottom'], default=5.0, 
-           help=_('Set the bottom margin in pts. Default is %default'))
-    layout('margin_left', ['--margin-left'], default=5.0, 
-           help=_('Set the left margin in pts. Default is %default'))
-    layout('margin_right', ['--margin-right'], default=5.0, 
-           help=_('Set the right margin in pts. Default is %default'))
-    layout('base_font_size2', ['--base-font-size'], default=12.0,
-           help=_('The base font size in pts. Default is %defaultpt. '
-                  'Set to 0 to disable rescaling of fonts.'))
-    layout('remove_paragraph_spacing', ['--remove-paragraph-spacing'], default=False,
-           help=_('Remove spacing between paragraphs. '
-                  'Also sets a indent on paragraphs of 1.5em. '
-                  'You can override this by adding p {text-indent: 0cm} to '
-                  '--override-css. Spacing removal will not work if the source '
-                  'file forces inter-paragraph spacing.'))
-    layout('no_justification', ['--no-justification'], default=False,
-           help=_('Do not force text to be justified in output.'))
-    layout('linearize_tables', ['--linearize-tables'], default=False,
-           help=_('Remove table markup, converting it into paragraphs. '
-                  'This is useful if your source file uses a table to manage layout.'))
-    layout('preserve_tag_structure', ['--preserve-tag-structure'], default=False,
-           help=_('Preserve the HTML tag structure while splitting large HTML files. '
-                  'This is only neccessary if the HTML files contain CSS that '
-                  'uses sibling selectors. Enabling this greatly slows down '
-                  'processing of large HTML files.'))
-    
-    c.add_opt('show_opf', ['--show-opf'], default=False, group='debug',
-              help=_('Print generated OPF file to stdout'))
-    c.add_opt('show_ncx', ['--show-ncx'], default=False, group='debug',
-              help=_('Print generated NCX file to stdout'))
-    c.add_opt('keep_intermediate', ['--keep-intermediate-files'], group='debug', 
-              default=False,
-              help=_('Keep intermediate files during processing by html2epub'))
-    c.add_opt('extract_to', ['--extract-to'], group='debug', default=None,
-              help=_('Extract the contents of the produced EPUB file to the '
-                     'specified directory.'))
-    return c
--- a/src/calibre/ebooks/epub/fonts.py
+++ b/src/calibre/ebooks/epub/fonts.py
@ -1,300 +0,0 @@
-#!/usr/bin/env  python
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-__docformat__ = 'restructuredtext en'
-
-'''
-Font size rationalization. See :function:`relativize`.
-'''
-
-import logging, re, operator, functools, collections, unittest, copy, sys
-from xml.dom import SyntaxErr
-
-from lxml.cssselect import CSSSelector
-from lxml import etree
-from lxml.html import HtmlElement
-
-from calibre.ebooks.html import fromstring
-from calibre.ebooks.epub import rules
-from cssutils import CSSParser
-
-num           = r'[-]?\d+|[-]?\d*\.\d+'
-length        = r'(?P<zero>0)|(?P<num>{num})(?P<unit>%|em|ex|px|in|cm|mm|pt|pc)'.replace('{num}', num)
-absolute_size = r'(?P<abs>(x?x-)?(small|large)|medium)'
-relative_size = r'(?P<rel>smaller|larger)'
-
-font_size_pat   = re.compile('|'.join((relative_size, absolute_size, length)), re.I)
-line_height_pat = re.compile(r'({num})(px|in|cm|mm|pt|pc)'.replace('{num}', num))  
-
-PTU = {
-       'in' : 72.,
-       'cm' : 72/2.54,
-       'mm' : 72/25.4,
-       'pt' : 1.0,
-       'pc' : 1/12.,
-       }
-
-DEFAULT_FONT_SIZE = 12
-
-class Rationalizer(object):
-    
-    @classmethod
-    def specificity(cls, s):
-        '''Map CSS specificity tuple to a single integer'''
-        return sum([10**(4-i) + x for i,x in enumerate(s)]) 
-        
-    @classmethod
-    def compute_font_size(cls, elem):
-        '''
-        Calculate the effective font size of an element traversing its ancestors as far as
-        neccessary.
-        '''
-        cfs = elem.computed_font_size
-        if cfs is not None:
-            return
-        sfs = elem.specified_font_size
-        if callable(sfs):
-            parent = elem.getparent()
-            cls.compute_font_size(parent)
-            elem.computed_font_size = sfs(parent.computed_font_size)
-        else:
-            elem.computed_font_size = sfs
-        
-    @classmethod
-    def calculate_font_size(cls, style):
-        'Return font size in pts from style object. For relative units returns a callable'
-        match = font_size_pat.search(style.font)
-        fs = ''
-        if match:
-            fs = match.group()
-        if style.fontSize:
-            fs = style.fontSize
-            
-        match = font_size_pat.search(fs)
-        if match is None:
-            return None
-        match = match.groupdict()
-        unit = match.get('unit', '')
-        if unit: unit = unit.lower()
-        if unit in PTU.keys():
-            return PTU[unit] * float(match['num'])
-        if unit in ('em', 'ex'):
-            return functools.partial(operator.mul, float(match['num']))
-        if unit == '%':
-            return functools.partial(operator.mul, float(match['num'])/100.)
-        abs = match.get('abs', '')
-        if abs: abs = abs.lower()
-        if abs:
-            x = (1.2)**(abs.count('x') * (-1 if 'small' in abs else 1))
-            return 12 * x
-        if match.get('zero', False):
-            return 0.
-        return functools.partial(operator.mul, 1.2) if 'larger' in fs.lower() else functools.partial(operator.mul, 0.8) 
-        
-    @classmethod
-    def resolve_rules(cls, stylesheets):
-        for sheet in stylesheets:
-            if hasattr(sheet, 'fs_rules'):
-                continue
-            sheet.fs_rules = []
-            sheet.lh_rules = []
-            for r in sheet:
-                if r.type == r.STYLE_RULE:
-                    font_size = cls.calculate_font_size(r.style)
-                    if font_size is not None:
-                        for s in r.selectorList:
-                            sheet.fs_rules.append([CSSSelector(s.selectorText), font_size])
-                    orig = line_height_pat.search(r.style.lineHeight) 
-                    if orig is not None:
-                        for s in r.selectorList:
-                            sheet.lh_rules.append([CSSSelector(s.selectorText), float(orig.group(1)) * PTU[orig.group(2).lower()]])
-    
-        
-    @classmethod
-    def apply_font_size_rules(cls, stylesheets, root):
-        'Add a ``specified_font_size`` attribute to every element that has a specified font size'
-        cls.resolve_rules(stylesheets)
-        for sheet in stylesheets:
-            for selector, font_size in sheet.fs_rules:
-                elems = selector(root)
-                for elem in elems:
-                    elem.specified_font_size = font_size
-    
-    @classmethod
-    def remove_font_size_information(cls, stylesheets):
-        for r in rules(stylesheets):
-            r.style.removeProperty('font-size')
-            try:
-                new = font_size_pat.sub('', r.style.font).strip()
-                if new:
-                    r.style.font = new
-                else:
-                    r.style.removeProperty('font')
-            except SyntaxErr:
-                r.style.removeProperty('font')
-            if line_height_pat.search(r.style.lineHeight) is not None:
-                r.style.removeProperty('line-height')
-    
-    @classmethod
-    def compute_font_sizes(cls, root, stylesheets, base=12):
-        stylesheets = [s for s in stylesheets if hasattr(s, 'cssText')]
-        cls.apply_font_size_rules(stylesheets, root)
-        
-        # Compute the effective font size of all tags
-        root.computed_font_size = DEFAULT_FONT_SIZE
-        for elem in root.iter(etree.Element):
-            cls.compute_font_size(elem)
-        
-        extra_css = {}
-        if base > 0:
-            # Calculate the "base" (i.e. most common) font size
-            font_sizes = collections.defaultdict(lambda : 0)
-            body = root.xpath('//body')[0]
-            IGNORE = ('h1', 'h2', 'h3', 'h4', 'h5', 'h6')
-            for elem in body.iter(etree.Element):
-                if elem.tag not in IGNORE:
-                    t = getattr(elem, 'text', '')
-                    if t: t = t.strip()
-                    if t:
-                        font_sizes[elem.computed_font_size] += len(t)
-                    
-                t = getattr(elem, 'tail', '')
-                if t: t = t.strip()
-                if t:
-                    parent = elem.getparent()
-                    if parent.tag not in IGNORE:
-                        font_sizes[parent.computed_font_size] += len(t)
-                
-            try:
-                most_common = max(font_sizes.items(), key=operator.itemgetter(1))[0]
-                scale = base/most_common if most_common > 0 else 1.
-            except ValueError:
-                scale = 1.
-            
-            # rescale absolute line-heights
-            counter = 0
-            for sheet in stylesheets:
-                for selector, lh in sheet.lh_rules:
-                    for elem in selector(root):
-                        elem.set('id', elem.get('id', 'cfs_%d'%counter))
-                        counter += 1
-                        if not extra_css.has_key(elem.get('id')):
-                            extra_css[elem.get('id')] = []
-                        extra_css[elem.get('id')].append('line-height:%fpt'%(lh*scale))
-            
-        
-            
-            # Rescale all computed font sizes
-            for elem in body.iter(etree.Element):
-                if isinstance(elem, HtmlElement):
-                    elem.computed_font_size *= scale
-        
-        # Remove all font size specifications from the last stylesheet 
-        cls.remove_font_size_information(stylesheets[-1:])
-                    
-        # Create the CSS to implement the rescaled font sizes
-        for elem in body.iter(etree.Element):
-            cfs, pcfs = map(operator.attrgetter('computed_font_size'), (elem, elem.getparent()))
-            if abs(cfs-pcfs) > 1/12. and abs(pcfs) > 1/12.:
-                elem.set('id', elem.get('id', 'cfs_%d'%counter))
-                counter += 1
-                if not extra_css.has_key(elem.get('id')):
-                    extra_css[elem.get('id')] = []
-                extra_css[elem.get('id')].append('font-size: %f%%'%(100*(cfs/pcfs)))
-                
-        css = CSSParser(loglevel=logging.ERROR).parseString('')
-        for id, r in extra_css.items():
-            css.add('#%s {%s}'%(id, ';'.join(r)))
-        return css
-    
-    @classmethod
-    def rationalize(cls, stylesheets, root, opts):
-        logger     = logging.getLogger('html2epub')
-        logger.info('\t\tRationalizing fonts...')
-        extra_css = None
-        if opts.base_font_size2 > 0:
-            try:
-                extra_css = cls.compute_font_sizes(root, stylesheets, base=opts.base_font_size2)
-            except:
-                logger.warning('Failed to rationalize font sizes.')
-                if opts.verbose > 1:
-                    logger.exception('')
-            finally:
-                root.remove_font_size_information()
-        logger.debug('\t\tDone rationalizing')
-        return extra_css
-
-################################################################################
-############## Testing
-################################################################################
-
-class FontTest(unittest.TestCase):
-    
-    def setUp(self):
-        from calibre.ebooks.epub import config
-        self.opts = config(defaults='').parse()
-        self.html = '''
-        <html>
-            <head>
-                <title>Test document</title>
-            </head>
-            <body>
-                <div id="div1">
-                <!-- A comment -->
-                    <p id="p1">Some <b>text</b></p>
-                </div>
-                <p id="p2">Some other <span class="it">text</span>.</p>
-                <p id="longest">The longest piece of single font size text in this entire file. Used to test resizing.</p>
-            </body>
-        </html> 
-        '''
-        self.root = fromstring(self.html)
-        
-    def do_test(self, css, base=DEFAULT_FONT_SIZE, scale=1):
-        root1 = copy.deepcopy(self.root)
-        root1.computed_font_size = DEFAULT_FONT_SIZE
-        stylesheet = CSSParser(loglevel=logging.ERROR).parseString(css)
-        stylesheet2 = Rationalizer.compute_font_sizes(root1, [stylesheet], base)
-        root2 = copy.deepcopy(root1)
-        root2.remove_font_size_information()
-        root2.computed_font_size = DEFAULT_FONT_SIZE
-        Rationalizer.apply_font_size_rules([stylesheet2], root2)
-        for elem in root2.iter(etree.Element):
-            Rationalizer.compute_font_size(elem)
-        for e1, e2 in zip(root1.xpath('//body')[0].iter(etree.Element), root2.xpath('//body')[0].iter(etree.Element)):
-            self.assertAlmostEqual(e1.computed_font_size, e2.computed_font_size, 
-                msg='Computed font sizes for %s not equal. Original: %f Processed: %f'%\
-                (root1.getroottree().getpath(e1), e1.computed_font_size, e2.computed_font_size))
-        return stylesheet2.cssText
-        
-    def testStripping(self):
-        'Test that any original entries are removed from the CSS'
-        css = 'p { font: bold 10px italic smaller; font-size: x-large} \na { font-size: 0 }'
-        css = CSSParser(loglevel=logging.ERROR).parseString(css)
-        Rationalizer.compute_font_sizes(copy.deepcopy(self.root), [css])
-        self.assertEqual(css.cssText.replace(' ', '').replace('\n', ''), 
-                         'p{font:bolditalic}')
-    
-    def testIdentity(self):
-        'Test that no unnecessary font size changes are made'
-        extra_css = self.do_test('div {font-size:12pt} \nspan {font-size:100%}')
-        self.assertEqual(extra_css.strip(), '')
-        
-    def testRelativization(self):
-        'Test conversion of absolute to relative sizes'
-        self.do_test('#p1 {font: 24pt} b {font: 12pt} .it {font: 48pt} #p2 {font: 100%}')
-        
-    def testResizing(self):
-        'Test resizing of fonts'
-        self.do_test('#longest {font: 24pt} .it {font:20pt; line-height:22pt}')
-        
-
-def suite():
-    return unittest.TestLoader().loadTestsFromTestCase(FontTest)
-    
-def test():
-    unittest.TextTestRunner(verbosity=2).run(suite())
-
-if __name__ == '__main__':
-    sys.exit(test())    
-        
--- a/src/calibre/ebooks/epub/from_any.py
+++ b/src/calibre/ebooks/epub/from_any.py
@ -1,207 +0,0 @@
-from __future__ import with_statement
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-__docformat__ = 'restructuredtext en'
-
-'''
-Convert any ebook format to epub.
-'''
-
-import sys, os, re
-from contextlib import nested
-
-from calibre import extract, walk
-from calibre.ebooks import DRMError
-from calibre.ebooks.epub import config as common_config, process_encryption
-from calibre.ebooks.epub.from_html import convert as html2epub, find_html_index
-from calibre.ptempfile import TemporaryDirectory
-from calibre.ebooks.metadata import MetaInformation
-from calibre.ebooks.metadata.opf2 import OPFCreator, OPF
-from calibre.utils.zipfile import ZipFile
-from calibre.customize.ui import run_plugins_on_preprocess
-
-def lit2opf(path, tdir, opts):
-    from calibre.ebooks.lit.reader import LitReader
-    print 'Exploding LIT file:', path
-    reader = LitReader(path)
-    reader.extract_content(tdir, False)
-    opf = None
-    for opf in walk(tdir):
-        if opf.lower().endswith('.opf'):
-            break
-    if not opf.endswith('.opf'):
-        opf = None
-    if opf is not None: # Check for url-quoted filenames
-        _opf = OPF(opf, os.path.dirname(opf))
-        replacements = []
-        for item in _opf.itermanifest():
-            href = item.get('href', '')
-            path = os.path.join(os.path.dirname(opf), *(href.split('/')))
-            if not os.path.exists(path) and os.path.exists(path.replace('&', '%26')):
-                npath = path
-                path = path.replace('&', '%26')
-                replacements.append((path, npath))
-        if replacements:
-            print 'Fixing quoted filenames...'
-            for path, npath in replacements:
-                if os.path.exists(path):
-                    os.rename(path, npath)
-            for f in walk(tdir):
-                with open(f, 'r+b') as f:
-                    raw = f.read()
-                    for path, npath in replacements:
-                        raw = raw.replace(os.path.basename(path), os.path.basename(npath))
-                        f.seek(0)
-                        f.truncate()
-                        f.write(raw)
-    return opf
-
-def mobi2opf(path, tdir, opts):
-    from calibre.ebooks.mobi.reader import MobiReader
-    print 'Exploding MOBI file:', path.encode('utf-8') if isinstance(path, unicode) else path
-    reader = MobiReader(path)
-    reader.extract_content(tdir)
-    files = list(walk(tdir))
-    opts.encoding = 'utf-8'
-    for f in files:
-        if f.lower().endswith('.opf'):
-            return f
-    html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}', re.IGNORECASE)
-    hf = [f for f in files if html_pat.match(os.path.splitext(f)[1]) is not None]
-    mi = MetaInformation(os.path.splitext(os.path.basename(path))[0], [_('Unknown')])
-    opf = OPFCreator(tdir, mi)
-    opf.create_manifest([(hf[0], None)])
-    opf.create_spine([hf[0]])
-    ans = os.path.join(tdir, 'metadata.opf')
-    opf.render(open(ans, 'wb'))
-    return ans
-
-def fb22opf(path, tdir, opts):
-    from calibre.ebooks.lrf.fb2.convert_from import to_html
-    print 'Converting FB2 to HTML...'
-    return to_html(path, tdir)
-
-def rtf2opf(path, tdir, opts):
-    from calibre.ebooks.lrf.rtf.convert_from import generate_html
-    generate_html(path, tdir)
-    return os.path.join(tdir, 'metadata.opf')
-
-def txt2opf(path, tdir, opts):
-    from calibre.ebooks.lrf.txt.convert_from import generate_html
-    generate_html(path, opts.encoding, tdir)
-    opts.encoding = 'utf-8'
-    return os.path.join(tdir, 'metadata.opf')
-
-def pdf2opf(path, tdir, opts):
-    from calibre.ebooks.lrf.pdf.convert_from import generate_html
-    generate_html(path, tdir)
-    opts.dont_split_on_page_breaks = True
-    return os.path.join(tdir, 'metadata.opf')
-
-def epub2opf(path, tdir, opts):
-    zf = ZipFile(path)
-    zf.extractall(tdir)
-    opts.chapter_mark = 'none'
-    encfile = os.path.join(tdir, 'META-INF', 'encryption.xml')
-    opf = None
-    for f in walk(tdir):
-        if f.lower().endswith('.opf'):
-            opf = f
-            break
-    if opf and os.path.exists(encfile):
-        if not process_encryption(encfile, opf):
-            raise DRMError(os.path.basename(path))
-
-    if opf is None:
-        raise ValueError('%s is not a valid EPUB file'%path)
-    return opf
-
-def odt2epub(path, tdir, opts):
-    from calibre.ebooks.odt.to_oeb import Extract
-    opts.encoding = 'utf-8'
-    return Extract()(path, tdir)
-
-MAP = {
-       'lit'  : lit2opf,
-       'mobi' : mobi2opf,
-       'prc'  : mobi2opf,
-       'azw'  : mobi2opf,
-       'fb2'  : fb22opf,
-       'rtf'  : rtf2opf,
-       'txt'  : txt2opf,
-       'pdf'  : pdf2opf,
-       'epub' : epub2opf,
-       'odt'  : odt2epub,
-       }
-SOURCE_FORMATS = ['lit', 'mobi', 'prc', 'azw', 'fb2', 'odt', 'rtf',
-                  'txt', 'pdf', 'rar', 'zip', 'oebzip', 'htm', 'html', 'epub']
-
-def unarchive(path, tdir):
-    extract(path, tdir)
-    files = list(walk(tdir))
-
-    for ext in ['opf'] + list(MAP.keys()):
-        for f in files:
-            if f.lower().endswith('.'+ext):
-                if ext in ['txt', 'rtf'] and os.stat(f).st_size < 2048:
-                    continue
-                return f, ext
-    return find_html_index(files)
-
-def any2epub(opts, path, notification=None, create_epub=True,
-             oeb_cover=False, extract_to=None):
-    path = run_plugins_on_preprocess(path)
-    ext = os.path.splitext(path)[1]
-    if not ext:
-        raise ValueError('Unknown file type: '+path)
-    ext = ext.lower()[1:]
-
-    if opts.output is None:
-        opts.output = os.path.splitext(os.path.basename(path))[0]+'.epub'
-
-    with nested(TemporaryDirectory('_any2epub1'), TemporaryDirectory('_any2epub2')) as (tdir1, tdir2):
-        if ext in ['rar', 'zip', 'oebzip']:
-            path, ext = unarchive(path, tdir1)
-            print 'Found %s file in archive'%(ext.upper())
-
-        if ext in MAP.keys():
-            path = MAP[ext](path, tdir2, opts)
-            ext = 'opf'
-
-
-        if re.match(r'((x){0,1}htm(l){0,1})|opf', ext) is None:
-            raise ValueError('Conversion from %s is not supported'%ext.upper())
-
-        print 'Creating EPUB file...'
-        html2epub(path, opts, notification=notification,
-                  create_epub=create_epub, oeb_cover=oeb_cover,
-                  extract_to=extract_to)
-
-def config(defaults=None):
-    return common_config(defaults=defaults)
-
-
-def formats():
-    return ['html', 'rar', 'zip', 'oebzip']+list(MAP.keys())
-
-USAGE = _('''\
-%%prog [options] filename
-
-Convert any of a large number of ebook formats to a %s file. Supported formats are: %s
-''')
-
-def option_parser(usage=USAGE):
-    return config().option_parser(usage=usage%('EPUB', formats()))
-
-def main(args=sys.argv):
-    parser = option_parser()
-    opts, args = parser.parse_args(args)
-    if len(args) < 2:
-        parser.print_help()
-        print 'No input file specified.'
-        return 1
-    any2epub(opts, args[1])
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/epub/from_comic.py
+++ b/src/calibre/ebooks/epub/from_comic.py
@ -1,21 +0,0 @@
-from __future__ import with_statement
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-__docformat__ = 'restructuredtext en'
-
-'Convert a comic in CBR/CBZ format to epub'
-
-import sys
-from functools import partial
-from calibre.ebooks.lrf.comic.convert_from import do_convert, option_parser, config, main as _main
-
-convert = partial(do_convert, output_format='epub')
-main    = partial(_main, output_format='epub')
-
-if __name__ == '__main__':
-    sys.exit(main())
-
-if False:
-    option_parser
-    config
-    
--- a/src/calibre/ebooks/epub/from_feeds.py
+++ b/src/calibre/ebooks/epub/from_feeds.py
@ -1,71 +0,0 @@
-from __future__ import with_statement
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-__docformat__ = 'restructuredtext en'
-
-'''
-Convert periodical content into EPUB ebooks.
-'''
-import sys, glob, os
-from calibre.web.feeds.main import config as feeds2disk_config, USAGE, run_recipe
-from calibre.ebooks.epub.from_html import config as html2epub_config
-from calibre.ptempfile import TemporaryDirectory
-from calibre.ebooks.epub.from_html import convert as html2epub
-from calibre import strftime, sanitize_file_name
-
-def config(defaults=None):
-    c = feeds2disk_config(defaults=defaults)
-    c.remove('lrf')
-    c.remove('epub')
-    c.remove('output_dir')
-    c.update(html2epub_config(defaults=defaults))
-    c.remove('chapter_mark')
-    return c
-
-def option_parser():
-    c = config()
-    return c.option_parser(usage=USAGE)
-
-def convert(opts, recipe_arg, notification=None):
-    opts.lrf  = False
-    opts.epub = True
-    if opts.debug:
-        opts.verbose = 2
-    parser = option_parser()
-    with TemporaryDirectory('_feeds2epub') as tdir:
-        opts.output_dir = tdir
-        recipe = run_recipe(opts, recipe_arg, parser, notification=notification)
-        c = config()
-        recipe_opts = c.parse_string(recipe.html2epub_options)
-        c.smart_update(recipe_opts, opts)
-        opts = recipe_opts
-        opts.chapter_mark = 'none'
-        opts.dont_split_on_page_breaks = True
-        opf = glob.glob(os.path.join(tdir, '*.opf'))
-        if not opf:
-            raise Exception('Downloading of recipe: %s failed'%recipe_arg)
-        opf = opf[0]
-        
-        if opts.output is None:
-            fname = recipe.title + strftime(recipe.timefmt) + '.epub'
-            opts.output = os.path.join(os.getcwd(), sanitize_file_name(fname))
-        
-        print 'Generating epub...'
-        opts.encoding = 'utf-8'
-        opts.remove_paragraph_spacing = True
-        html2epub(opf, opts, notification=notification)
-    
-
-def main(args=sys.argv, notification=None, handler=None):
-    parser = option_parser()
-    opts, args = parser.parse_args(args)
-    if len(args) != 2 and opts.feeds is None:
-        parser.print_help()
-        return 1
-    recipe_arg = args[1] if len(args) > 1 else None
-    convert(opts, recipe_arg, notification=notification)
-        
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/epub/from_html.py
+++ b/src/calibre/ebooks/epub/from_html.py
@ -1,547 +0,0 @@
-from __future__ import with_statement
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-__docformat__ = 'restructuredtext en'
-
-'''
-Conversion of HTML/OPF files follows several stages:
-
-    * All links in the HTML files or in the OPF manifest are
-    followed to build up a list of HTML files to be converted.
-    This stage is implemented by
-    :function:`calibre.ebooks.html.traverse` and
-    :class:`calibre.ebooks.html.HTMLFile`.
-
-    * The HTML is pre-processed to make it more semantic.
-    All links in the HTML files to other resources like images,
-    stylesheets, etc. are relativized. The resources are copied
-    into the `resources` sub directory. This is accomplished by
-    :class:`calibre.ebooks.html.PreProcessor` and
-    :class:`calibre.ebooks.html.Parser`.
-
-    * The HTML is processed. Various operations are performed.
-    All style declarations are extracted and consolidated into
-    a single style sheet. Chapters are auto-detected and marked.
-    Various font related manipulations are performed. See
-    :class:`HTMLProcessor`.
-
-    * The processed HTML is saved and the
-    :module:`calibre.ebooks.epub.split` module is used to split up
-    large HTML files into smaller chunks.
-
-    * The EPUB container is created.
-'''
-
-import os, sys, cStringIO, logging, re, functools, shutil
-
-from lxml.etree import XPath
-from lxml import html, etree
-from PyQt4.Qt import QApplication, QPixmap, Qt
-
-from calibre.ebooks.html import Processor, merge_metadata, get_filelist,\
-    opf_traverse, create_metadata, rebase_toc, Link, parser
-from calibre.ebooks.epub import config as common_config, tostring
-from calibre.ptempfile import TemporaryDirectory
-from calibre.ebooks.metadata.toc import TOC
-from calibre.ebooks.metadata.opf2 import OPF
-from calibre.ebooks.epub import initialize_container, PROFILES
-from calibre.ebooks.epub.split import split
-from calibre.ebooks.epub.pages import add_page_map
-from calibre.ebooks.epub.fonts import Rationalizer
-from calibre.constants import preferred_encoding
-from calibre.customize.ui import run_plugins_on_postprocess
-from calibre import walk, CurrentDir, to_unicode, fit_image
-
-content = functools.partial(os.path.join, u'content')
-
-def remove_bad_link(element, attribute, link, pos):
-    if attribute is not None:
-        if element.tag in ['link']:
-            element.getparent().remove(element)
-        else:
-            element.set(attribute, '')
-            del element.attrib[attribute]
-
-def check_links(opf_path, pretty_print):
-    '''
-    Find and remove all invalid links in the HTML files
-    '''
-    logger = logging.getLogger('html2epub')
-    logger.info('\tChecking files for bad links...')
-    pathtoopf = os.path.abspath(opf_path)
-    with CurrentDir(os.path.dirname(pathtoopf)):
-        opf = OPF(open(pathtoopf, 'rb'), os.path.dirname(pathtoopf))
-        html_files = []
-        for item in opf.itermanifest():
-            if 'html' in item.get('media-type', '').lower():
-                f = item.get('href').split('/')[-1]
-                if isinstance(f, str):
-                    f = f.decode('utf-8')
-                html_files.append(os.path.abspath(content(f)))
-
-        for path in html_files:
-            if not os.access(path, os.R_OK):
-                continue
-            base = os.path.dirname(path)
-            root = html.fromstring(open(content(path), 'rb').read(), parser=parser)
-            for element, attribute, link, pos in list(root.iterlinks()):
-                link = to_unicode(link)
-                plink = Link(link, base)
-                bad = False
-                if plink.path is not None and not os.path.exists(plink.path):
-                    bad = True
-                if bad:
-                    remove_bad_link(element, attribute, link, pos)
-            open(content(path), 'wb').write(tostring(root, pretty_print))
-
-def find_html_index(files):
-    '''
-    Given a list of files, find the most likely root HTML file in the
-    list.
-    '''
-    html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}$', re.IGNORECASE)
-    html_files = [f for f in files if html_pat.search(f) is not None]
-    if not html_files:
-        raise ValueError(_('Could not find an ebook inside the archive'))
-    html_files = [(f, os.stat(f).st_size) for f in html_files]
-    html_files.sort(cmp = lambda x, y: cmp(x[1], y[1]))
-    html_files = [f[0] for f in html_files]
-    for q in ('toc', 'index'):
-        for f in html_files:
-            if os.path.splitext(os.path.basename(f))[0].lower() == q:
-                return f, os.path.splitext(f)[1].lower()[1:]
-    return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]
-
-def rescale_images(imgdir, screen_size, log):
-    pwidth, pheight = screen_size
-    if QApplication.instance() is None:
-        QApplication([])
-    for f in os.listdir(imgdir):
-        path = os.path.join(imgdir, f)
-        if os.path.splitext(f)[1] in ('.css', '.js'):
-            continue
-
-        p = QPixmap()
-        p.load(path)
-        if p.isNull():
-            continue
-        width, height = p.width(), p.height()
-        scaled, new_width, new_height = fit_image(width, height, pwidth,
-                pheight)
-        if scaled:
-            log.info('Rescaling image: '+f)
-            p.scaled(new_width, new_height, Qt.IgnoreAspectRatio,
-                    Qt.SmoothTransformation).save(path, 'JPEG')
-
-
-
-
-
-class HTMLProcessor(Processor, Rationalizer):
-
-    def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, stylesheets):
-        Processor.__init__(self, htmlfile, opts, tdir, resource_map, htmlfiles,
-                           name='html2epub')
-        if opts.verbose > 2:
-            self.debug_tree('parsed')
-        self.detect_chapters()
-
-        self.extract_css(stylesheets)
-        if self.opts.base_font_size2 > 0:
-            self.font_css = self.rationalize(self.external_stylesheets+[self.stylesheet],
-                                             self.root, self.opts)
-        if opts.verbose > 2:
-            self.debug_tree('nocss')
-
-        if hasattr(self.body, 'xpath'):
-            for script in list(self.body.xpath('descendant::script')):
-                script.getparent().remove(script)
-
-        self.fix_markup()
-
-    def convert_image(self, img):
-        rpath = img.get('src', '')
-        path = os.path.join(os.path.dirname(self.save_path()), *rpath.split('/'))
-        if os.path.exists(path) and os.path.isfile(path):
-            if QApplication.instance() is None:
-                app = QApplication([])
-                app
-            p = QPixmap()
-            p.load(path)
-            if not p.isNull():
-                p.save(path + '_calibre_converted.jpg')
-                os.remove(path)
-                for key, val in self.resource_map.items():
-                    if val == rpath:
-                        self.resource_map[key] = rpath+'_calibre_converted.jpg'
-        img.set('src', rpath+'_calibre_converted.jpg')
-
-    def fix_markup(self):
-        '''
-        Perform various markup transforms to get the output to render correctly
-        in the quirky ADE.
-        '''
-        # Replace <br> that are children of <body> as ADE doesn't handle them
-        if hasattr(self.body, 'xpath'):
-            for br in self.body.xpath('./br'):
-                if br.getparent() is None:
-                    continue
-                try:
-                    sibling = br.itersiblings().next()
-                except:
-                    sibling = None
-                br.tag = 'p'
-                br.text = u'\u00a0'
-                if (br.tail and br.tail.strip()) or sibling is None or \
-                   getattr(sibling, 'tag', '') != 'br':
-                    style = br.get('style', '').split(';')
-                    style = filter(None, map(lambda x: x.strip(), style))
-                    style.append('margin: 0pt; border:0pt; height:0pt')
-                    br.set('style', '; '.join(style))
-                else:
-                    sibling.getparent().remove(sibling)
-                    if sibling.tail:
-                        if not br.tail:
-                            br.tail = ''
-                        br.tail += sibling.tail
-
-
-        if self.opts.profile.remove_object_tags:
-            for tag in self.root.xpath('//embed'):
-                tag.getparent().remove(tag)
-            for tag in self.root.xpath('//object'):
-                if tag.get('type', '').lower().strip() in ('image/svg+xml',):
-                    continue
-                tag.getparent().remove(tag)
-
-
-        for tag in self.root.xpath('//title|//style'):
-            if not tag.text:
-                tag.getparent().remove(tag)
-        for tag in self.root.xpath('//script'):
-            if not tag.text and not tag.get('src', False):
-                tag.getparent().remove(tag)
-
-        for tag in self.root.xpath('//form'):
-            tag.getparent().remove(tag)
-
-        for tag in self.root.xpath('//center'):
-            tag.tag = 'div'
-            tag.set('style', 'text-align:center')
-
-        if self.opts.linearize_tables:
-            for tag in self.root.xpath('//table | //tr | //th | //td'):
-                tag.tag = 'div'
-
-        # ADE can't handle &amp; in an img url
-        for tag in self.root.xpath('//img[@src]'):
-            tag.set('src', tag.get('src', '').replace('&', ''))
-
-
-    def save(self):
-        for meta in list(self.root.xpath('//meta')):
-            meta.getparent().remove(meta)
-        # Strip all comments since Adobe DE is petrified of them
-        Processor.save(self, strip_comments=True)
-
-    def remove_first_image(self):
-        images = self.root.xpath('//img')
-        if images:
-            images[0].getparent().remove(images[0])
-            return True
-        return False
-
-
-
-
-def config(defaults=None):
-    return common_config(defaults=defaults)
-
-def option_parser():
-    c = config()
-    return c.option_parser(usage=_('''\
-%prog [options] file.html|opf
-
-Convert a HTML file to an EPUB ebook. Recursively follows links in the HTML file.
-If you specify an OPF file instead of an HTML file, the list of links is takes from
-the <spine> element of the OPF file.
-'''))
-
-def parse_content(filelist, opts, tdir):
-    os.makedirs(os.path.join(tdir, 'content', 'resources'))
-    resource_map, stylesheets = {}, {}
-    toc = TOC(base_path=tdir, type='root')
-    stylesheet_map = {}
-    first_image_removed = False
-    for htmlfile in filelist:
-        logging.getLogger('html2epub').debug('Processing %s...'%htmlfile)
-        hp = HTMLProcessor(htmlfile, opts, os.path.join(tdir, 'content'),
-                           resource_map, filelist, stylesheets)
-        if not first_image_removed and opts.remove_first_image:
-            first_image_removed = hp.remove_first_image()
-        hp.populate_toc(toc)
-        hp.save()
-        stylesheet_map[os.path.basename(hp.save_path())] = \
-            [s for s in hp.external_stylesheets + [hp.stylesheet, hp.font_css, hp.override_css] if s is not None]
-
-    logging.getLogger('html2epub').debug('Saving stylesheets...')
-    if opts.base_font_size2 > 0:
-        Rationalizer.remove_font_size_information(stylesheets.values())
-        for path, css in stylesheets.items():
-            raw = getattr(css, 'cssText', css)
-            if isinstance(raw, unicode):
-                raw = raw.encode('utf-8')
-            open(path, 'wb').write(raw)
-    if toc.count('chapter') > opts.toc_threshold:
-        toc.purge(['file', 'link', 'unknown'])
-    if toc.count('chapter') + toc.count('file') > opts.toc_threshold:
-        toc.purge(['link', 'unknown'])
-    toc.purge(['link'], max=opts.max_toc_links)
-
-    return resource_map, hp.htmlfile_map, toc, stylesheet_map
-
-TITLEPAGE = '''\
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
-    <head>
-        <title>Cover</title>
-        <style type="text/css" title="override_css">
-            @page {padding: 0pt; margin:0pt}
-            body { text-align: center; padding:0pt; margin: 0pt; }
-            div { margin: 0pt; padding: 0pt; }
-        </style>
-    </head>
-    <body>
-        <div>
-            <img src="%s" alt="cover" style="height: 100%%" />
-        </div>
-    </body>
-</html>
-'''
-
-def create_cover_image(src, dest, screen_size, rescale_cover=True):
-    try:
-        from PyQt4.Qt import QImage, Qt
-        if QApplication.instance() is None:
-            QApplication([])
-        im = QImage()
-        im.load(src)
-        if im.isNull():
-            raise ValueError('Invalid cover image')
-        if rescale_cover and screen_size is not None:
-            width, height = im.width(), im.height()
-            dw, dh = (screen_size[0]-width)/float(width), (screen_size[1]-height)/float(height)
-            delta = min(dw, dh)
-            if delta > 0:
-                nwidth = int(width + delta*(width))
-                nheight = int(height + delta*(height))
-                im = im.scaled(int(nwidth), int(nheight), Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
-        im.save(dest)
-    except:
-        import traceback
-        traceback.print_exc()
-        return False
-    return True
-
-def process_title_page(mi, filelist, htmlfilemap, opts, tdir):
-    old_title_page = None
-    f = lambda x : os.path.normcase(os.path.normpath(x))
-    if not isinstance(mi.cover, basestring):
-        mi.cover = None
-    if mi.cover:
-        if f(filelist[0].path) == f(mi.cover):
-            old_title_page = htmlfilemap[filelist[0].path]
-    #logger = logging.getLogger('html2epub')
-    metadata_cover = mi.cover
-    if metadata_cover and not os.path.exists(metadata_cover):
-        metadata_cover = None
-
-    cpath = '/'.join(('resources', '_cover_.jpg'))
-    cover_dest = os.path.join(tdir, 'content', *cpath.split('/'))
-    if metadata_cover is not None:
-        if not create_cover_image(metadata_cover, cover_dest,
-                                  opts.profile.screen_size):
-            metadata_cover = None
-    specified_cover = opts.cover
-    if specified_cover and not os.path.exists(specified_cover):
-        specified_cover = None
-    if specified_cover is not None:
-        if not create_cover_image(specified_cover, cover_dest,
-                                  opts.profile.screen_size):
-            specified_cover = None
-
-    cover = metadata_cover if specified_cover is None or (opts.prefer_metadata_cover and metadata_cover is not None) else specified_cover
-
-    if cover is not None:
-        titlepage = TITLEPAGE%cpath
-        tp = 'calibre_title_page.html' if old_title_page is None else old_title_page
-        tppath = os.path.join(tdir, 'content', tp)
-        with open(tppath, 'wb') as f:
-            f.write(titlepage)
-        return tp if old_title_page is None else None, True
-    elif os.path.exists(cover_dest):
-        os.remove(cover_dest)
-    return None, old_title_page is not None
-
-def find_oeb_cover(htmlfile):
-    if os.stat(htmlfile).st_size > 2048:
-        return None
-    match = re.search(r'(?i)<img[^<>]+src\s*=\s*[\'"](.+?)[\'"]', open(htmlfile, 'rb').read())
-    if match:
-        return match.group(1)
-
-def condense_ncx(ncx_path):
-    tree = etree.parse(ncx_path)
-    for tag in tree.getroot().iter(tag=etree.Element):
-        if tag.text:
-            tag.text = tag.text.strip()
-        if tag.tail:
-            tag.tail = tag.tail.strip()
-    compressed = etree.tostring(tree.getroot(), encoding='utf-8')
-    open(ncx_path, 'wb').write(compressed)
-
-def convert(htmlfile, opts, notification=None, create_epub=True,
-            oeb_cover=False, extract_to=None):
-    htmlfile = os.path.abspath(htmlfile)
-    if opts.output is None:
-        opts.output = os.path.splitext(os.path.basename(htmlfile))[0] + '.epub'
-    opts.profile = PROFILES[opts.profile]
-    opts.output = os.path.abspath(opts.output)
-    if opts.override_css is not None:
-        try:
-            opts.override_css = open(opts.override_css, 'rb').read().decode(preferred_encoding, 'replace')
-        except:
-            opts.override_css = opts.override_css.decode(preferred_encoding, 'replace')
-    if opts.from_opf:
-        opts.from_opf = os.path.abspath(opts.from_opf)
-    if opts.from_ncx:
-        opts.from_ncx = os.path.abspath(opts.from_ncx)
-    if htmlfile.lower().endswith('.opf'):
-        opf = OPF(htmlfile, os.path.dirname(os.path.abspath(htmlfile)))
-        filelist = opf_traverse(opf, verbose=opts.verbose, encoding=opts.encoding)
-        if not filelist:
-            # Bad OPF look for a HTML file instead
-            htmlfile = find_html_index(walk(os.path.dirname(htmlfile)))[0]
-            if htmlfile is None:
-                raise ValueError('Could not find suitable file to convert.')
-            filelist = get_filelist(htmlfile, opts)[1]
-        mi = merge_metadata(None, opf, opts)
-    else:
-        opf, filelist = get_filelist(htmlfile, opts)
-        mi = merge_metadata(htmlfile, opf, opts)
-    opts.chapter = XPath(opts.chapter,
-                    namespaces={'re':'http://exslt.org/regular-expressions'})
-    for x in (1, 2, 3):
-        attr = 'level%d_toc'%x
-        if getattr(opts, attr):
-            setattr(opts, attr, XPath(getattr(opts, attr),
-                      namespaces={'re':'http://exslt.org/regular-expressions'}))
-        else:
-            setattr(opts, attr, None)
-
-    with TemporaryDirectory(suffix='_html2epub', keep=opts.keep_intermediate) as tdir:
-        if opts.keep_intermediate:
-            print 'Intermediate files in', tdir
-        resource_map, htmlfile_map, generated_toc, stylesheet_map = \
-                                        parse_content(filelist, opts, tdir)
-        logger = logging.getLogger('html2epub')
-        resources = [os.path.join(tdir, 'content', f) for f in resource_map.values()]
-
-
-        title_page, has_title_page = process_title_page(mi, filelist, htmlfile_map, opts, tdir)
-        spine = [htmlfile_map[f.path] for f in filelist]
-        if not oeb_cover and title_page is not None:
-            spine = [title_page] + spine
-        mi.cover = None
-        mi.cover_data = (None, None)
-
-
-        mi = create_metadata(tdir, mi, spine, resources)
-        buf = cStringIO.StringIO()
-        if mi.toc:
-            rebase_toc(mi.toc, htmlfile_map, tdir)
-        if opts.use_auto_toc or mi.toc is None or len(list(mi.toc.flat())) < 2:
-            mi.toc = generated_toc
-        if opts.from_ncx:
-            toc = TOC()
-            toc.read_ncx_toc(opts.from_ncx)
-            mi.toc = toc
-        for item in mi.manifest:
-            if getattr(item, 'mime_type', None) == 'text/html':
-                item.mime_type = 'application/xhtml+xml'
-        opf_path = os.path.join(tdir, 'metadata.opf')
-        with open(opf_path, 'wb') as f:
-            mi.render(f, buf, 'toc.ncx')
-        toc = buf.getvalue()
-        if toc:
-            with open(os.path.join(tdir, 'toc.ncx'), 'wb') as f:
-                f.write(toc)
-            if opts.show_ncx:
-                print toc
-        split(opf_path, opts, stylesheet_map)
-        if opts.page:
-            logger.info('\tBuilding page map...')
-            add_page_map(opf_path, opts)
-        check_links(opf_path, opts.pretty_print)
-
-        opf = OPF(opf_path, tdir)
-        opf.remove_guide()
-        oeb_cover_file = None
-        if oeb_cover and title_page is not None:
-            oeb_cover_file = find_oeb_cover(os.path.join(tdir, 'content', title_page))
-        if has_title_page or (oeb_cover and oeb_cover_file):
-            opf.create_guide_element()
-            if has_title_page and not oeb_cover:
-                opf.add_guide_item('cover', 'Cover', 'content/'+spine[0])
-            if oeb_cover and oeb_cover_file:
-                opf.add_guide_item('cover', 'Cover', 'content/'+oeb_cover_file)
-
-        cpath = os.path.join(tdir, 'content', 'resources', '_cover_.jpg')
-        if os.path.exists(cpath):
-            opf.add_path_to_manifest(cpath, 'image/jpeg')
-        with open(opf_path, 'wb') as f:
-            f.write(opf.render())
-        ncx_path = os.path.join(os.path.dirname(opf_path), 'toc.ncx')
-        if os.path.exists(ncx_path) and os.stat(ncx_path).st_size > opts.profile.flow_size:
-            logger.info('Condensing NCX from %d bytes...'%os.stat(ncx_path).st_size)
-            condense_ncx(ncx_path)
-            if os.stat(ncx_path).st_size > opts.profile.flow_size:
-                logger.warn('NCX still larger than allowed size at %d bytes. Menu based Table of Contents may not work on device.'%os.stat(ncx_path).st_size)
-
-        if opts.profile.screen_size is not None:
-            rescale_images(os.path.join(tdir, 'content', 'resources'),
-                    opts.profile.screen_size, logger)
-
-        if create_epub:
-            epub = initialize_container(opts.output)
-            epub.add_dir(tdir)
-            epub.close()
-            run_plugins_on_postprocess(opts.output, 'epub')
-            logger.info(_('Output written to ')+opts.output)
-
-        if opts.show_opf:
-            print open(opf_path, 'rb').read()
-
-        if opts.extract_to is not None:
-            if os.path.exists(opts.extract_to):
-                shutil.rmtree(opts.extract_to)
-            shutil.copytree(tdir, opts.extract_to)
-
-        if extract_to is not None:
-            if os.path.exists(extract_to):
-                shutil.rmtree(extract_to)
-            shutil.copytree(tdir, extract_to)
-
-
-
-def main(args=sys.argv):
-    parser = option_parser()
-    opts, args = parser.parse_args(args)
-    if len(args) < 2:
-        parser.print_help()
-        print _('You must specify an input HTML file')
-        return 1
-    convert(args[1], opts)
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/epub/input.py
+++ b/src/calibre/ebooks/epub/input.py
@ -0,0 +1,127 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os, re, uuid
+from itertools import cycle
+
+from lxml import etree
+
+from calibre.customize.conversion import InputFormatPlugin
+
+class EPUBInput(InputFormatPlugin):
+
+    name        = 'EPUB Input'
+    author      = 'Kovid Goyal'
+    description = 'Convert EPUB files (.epub) to HTML'
+    file_types  = set(['epub'])
+
+    @classmethod
+    def decrypt_font(cls, key, path):
+        raw = open(path, 'rb').read()
+        crypt = raw[:1024]
+        key = cycle(iter(key))
+        decrypt = ''.join([chr(ord(x)^key.next()) for x in crypt])
+        with open(path, 'wb') as f:
+            f.write(decrypt)
+            f.write(raw[1024:])
+
+    @classmethod
+    def process_ecryption(cls, encfile, opf, log):
+        key = None
+        m = re.search(r'(?i)(urn:uuid:[0-9a-f-]+)', open(opf, 'rb').read())
+        if m:
+            key = m.group(1)
+            key = list(map(ord, uuid.UUID(key).bytes))
+        try:
+            root = etree.parse(encfile)
+            for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
+                algorithm = em.get('Algorithm', '')
+                if algorithm != 'http://ns.adobe.com/pdf/enc#RC':
+                    return False
+                cr = em.getparent().xpath('descendant::*[contains(name(), "CipherReference")]')[0]
+                uri = cr.get('URI')
+                path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
+                if os.path.exists(path):
+                    cls.decrypt_font(key, path)
+            return True
+        except:
+            import traceback
+            traceback.print_exc()
+        return False
+
+    @classmethod
+    def rationalize_cover(self, opf):
+        guide_cover, guide_elem = None, None
+        for guide_elem in opf.iterguide():
+            if guide_elem.get('type', '').lower() == 'cover':
+                guide_cover = guide_elem.get('href', '')
+                break
+        if not guide_cover:
+            return
+        spine = list(opf.iterspine())
+        if not spine:
+            return
+        idref = spine[0].get('idref', '')
+        manifest = list(opf.itermanifest())
+        if not manifest:
+            return
+        if manifest[0].get('id', False) != idref:
+            return
+        spine[0].getparent().remove(spine[0])
+        guide_elem.set('href', 'calibre_raster_cover.jpg')
+        for elem in list(opf.iterguide()):
+            if elem.get('type', '').lower() == 'titlepage':
+                elem.getparent().remove(elem)
+        from calibre.ebooks.oeb.base import OPF
+        t = etree.SubElement(guide_elem.getparent(), OPF('reference'))
+        t.set('type', 'titlepage')
+        t.set('href', guide_cover)
+        t.set('title', 'Title Page')
+        from calibre.ebooks import render_html
+        renderer = render_html(guide_cover)
+        if renderer is not None:
+            open('calibre_raster_cover.jpg', 'wb').write(
+                renderer.data)
+
+
+    def convert(self, stream, options, file_ext, log, accelerators):
+        from calibre.utils.zipfile import ZipFile
+        from calibre import walk
+        from calibre.ebooks import DRMError
+        from calibre.ebooks.metadata.opf2 import OPF
+        zf = ZipFile(stream)
+        zf.extractall(os.getcwd())
+        encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml'))
+        opf = None
+        for f in walk(u'.'):
+            if f.lower().endswith('.opf'):
+                opf = os.path.abspath(f)
+                break
+        path = getattr(stream, 'name', 'stream')
+
+        if opf is None:
+            raise ValueError('%s is not a valid EPUB file'%path)
+
+        if os.path.exists(encfile):
+            if not self.process_encryption(encfile, opf, log):
+                raise DRMError(os.path.basename(path))
+
+        opf = os.path.relpath(opf, os.getcwdu())
+        parts = os.path.split(opf)
+        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))
+
+        if len(parts) > 1 and parts[0]:
+            delta = '/'.join(parts[:-1])+'/'
+            for elem in opf.itermanifest():
+                elem.set('href', delta+elem.get('href'))
+            for elem in opf.iterguide():
+                elem.set('href', delta+elem.get('href'))
+
+        self.rationalize_cover(opf)
+
+        with open('content.opf', 'wb') as nopf:
+            nopf.write(opf.render())
+
+        return os.path.abspath('content.opf')
--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@ -0,0 +1,294 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os, shutil, re
+from urllib import unquote
+
+from calibre.customize.conversion import OutputFormatPlugin
+from calibre.ptempfile import TemporaryDirectory
+from calibre.constants import __appname__, __version__
+from calibre import strftime, guess_type
+from calibre.customize.conversion import OptionRecommendation
+
+from lxml import etree
+
+
+class EPUBOutput(OutputFormatPlugin):
+
+    name = 'EPUB Output'
+    author = 'Kovid Goyal'
+    file_type = 'epub'
+
+    options = set([
+        OptionRecommendation(name='extract_to',
+            help=_('Extract the contents of the generated EPUB file to the '
+                'specified directory. The contents of the directory are first '
+                'deleted, so be careful.')),
+
+        OptionRecommendation(name='dont_split_on_page_breaks',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('Turn off splitting at page breaks. Normally, input '
+                    'files are automatically split at every page break into '
+                    'two files. This gives an output ebook that can be '
+                    'parsed faster and with less resources. However, '
+                    'splitting is slow and if your source file contains a '
+                    'very large number of page breaks, you should turn off '
+                    'splitting on page breaks.'
+                )
+        ),
+
+        OptionRecommendation(name='flow_size', recommended_value=260,
+            help=_('Split all HTML files larger than this size (in KB). '
+                'This is necessary as most EPUB readers cannot handle large '
+                'file sizes. The default of %defaultKB is the size required '
+                'for Adobe Digital Editions.')
+        ),
+
+
+        ])
+
+
+    TITLEPAGE_COVER = '''\
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
+    <head>
+        <title>Cover</title>
+        <style type="text/css" title="override_css">
+            @page {padding: 0pt; margin:0pt}
+            body { text-align: center; padding:0pt; margin: 0pt; }
+            div { margin: 0pt; padding: 0pt; }
+        </style>
+    </head>
+    <body>
+        <div>
+            <img src="%s" alt="cover" style="height: 100%%" />
+        </div>
+    </body>
+</html>
+'''
+
+    TITLEPAGE = '''\
+<html  xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
+    <head>
+        <title>%(title)s</title>
+        <style type="text/css">
+            body {
+                background: white no-repeat fixed center center;
+                text-align: center;
+                vertical-align: center;
+                overflow: hidden;
+                font-size: 18px;
+            }
+            h1 { font-family: serif; }
+            h2, h4 { font-family: monospace; }
+        </style>
+    </head>
+    <body>
+        <h1>%(title)s</h1>
+        <br/><br/>
+        <div style="position:relative">
+            <div style="position: absolute; left: 0; top: 0; width:100%%; height:100%%; vertical-align:center">
+                <img src="%(img)s" alt="calibre" style="opacity:0.3"/>
+            </div>
+            <div style="position: absolute; left: 0; top: 0; width:100%%; height:100%%; vertical-align:center">
+                <h2>%(date)s</h2>
+                <br/><br/><br/><br/><br/>
+                <h3>%(author)s</h3>
+                <br/><br/><br/><br/><br/><br/><br/><br/><br/>
+                <h4>Produced by %(app)s</h4>
+            </div>
+        </div>
+    </body>
+</html>
+'''
+
+    def convert(self, oeb, output_path, input_plugin, opts, log):
+        self.log, self.opts, self.oeb = log, opts, oeb
+
+        from calibre.ebooks.oeb.transforms.split import Split
+        split = Split(not self.opts.dont_split_on_page_breaks,
+                max_flow_size=self.opts.flow_size*1024
+                )
+        split(self.oeb, self.opts)
+
+
+        self.workaround_ade_quirks()
+
+        from calibre.ebooks.oeb.transforms.rescale import RescaleImages
+        RescaleImages()(oeb, opts)
+        self.insert_cover()
+
+        with TemporaryDirectory('_epub_output') as tdir:
+            from calibre.customize.ui import plugin_for_output_format
+            oeb_output = plugin_for_output_format('oeb')
+            oeb_output.convert(oeb, tdir, input_plugin, opts, log)
+            opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
+            self.condense_ncx([os.path.join(tdir, x) for x in os.listdir(tdir)\
+                    if x.endswith('.ncx')][0])
+
+            from calibre.ebooks.epub import initialize_container
+            epub = initialize_container(output_path, os.path.basename(opf))
+            epub.add_dir(tdir)
+            if opts.extract_to is not None:
+                if os.path.exists(opts.extract_to):
+                    shutil.rmtree(opts.extract_to)
+                os.mkdir(opts.extract_to)
+                epub.extractall(path=opts.extract_to)
+                self.log.info('EPUB extracted to', opts.extract_to)
+            epub.close()
+
+    def default_cover(self):
+        '''
+        Create a generic cover for books that dont have a cover
+        '''
+        try:
+            from calibre.gui2 import images_rc # Needed for access to logo
+            from PyQt4.Qt import QApplication, QFile, QIODevice
+        except:
+            return None
+        from calibre.ebooks.metadata import authors_to_string
+        images_rc
+        m = self.oeb.metadata
+        title = unicode(m.title[0])
+        a = [unicode(x) for x in m.creators if m.role == 'aut']
+        author = authors_to_string(a)
+        if QApplication.instance() is None: QApplication([])
+        f = QFile(':/library')
+        f.open(QIODevice.ReadOnly)
+        img_data = str(f.readAll())
+        id, href = self.oeb.manifest.generate('calibre-logo',
+                'calibre-logo.png')
+        self.oeb.manifest.add(id, href, 'image/png', data=img_data)
+        html = self.TITLEPAGE%dict(title=title, author=author,
+                date=strftime('%d %b, %Y'),
+                app=__appname__ +' '+__version__,
+                img=href)
+        id, href = self.oeb.manifest.generate('calibre-titlepage',
+                'calibre-titlepage.xhtml')
+        return self.oeb.manifest.add(id, href, guess_type('t.xhtml')[0],
+                data=etree.fromstring(html))
+
+
+    def insert_cover(self):
+        from calibre.ebooks.oeb.base import urldefrag
+        from calibre import guess_type
+        g, m = self.oeb.guide, self.oeb.manifest
+        if 'titlepage' not in g:
+            if 'cover' in g:
+                tp = self.TITLEPAGE_COVER%unquote(g['cover'].href)
+                id, href = m.generate('titlepage', 'titlepage.xhtml')
+                item = m.add(id, href, guess_type('t.xhtml')[0],
+                        data=etree.fromstring(tp))
+            else:
+                item = self.default_cover()
+        else:
+            item = self.oeb.manifest.hrefs[
+                    urldefrag(self.oeb.guide['titlepage'].href)[0]]
+        if item is not None:
+            self.oeb.spine.insert(0, item, True)
+            if 'cover' not in self.oeb.guide.refs:
+                self.oeb.guide.add('cover', 'Title Page', 'a')
+            self.oeb.guide.refs['cover'].href = item.href
+            if 'titlepage' in self.oeb.guide.refs:
+                self.oeb.guide.refs['titlepage'].href = item.href
+
+
+
+    def condense_ncx(self, ncx_path):
+        if not self.opts.pretty_print:
+            tree = etree.parse(ncx_path)
+            for tag in tree.getroot().iter(tag=etree.Element):
+                if tag.text:
+                    tag.text = tag.text.strip()
+                if tag.tail:
+                    tag.tail = tag.tail.strip()
+            compressed = etree.tostring(tree.getroot(), encoding='utf-8')
+            open(ncx_path, 'wb').write(compressed)
+
+
+
+    def workaround_ade_quirks(self):
+        '''
+        Perform various markup transforms to get the output to render correctly
+        in the quirky ADE.
+        '''
+        from calibre.ebooks.oeb.base import XPNSMAP, XHTML
+        from lxml.etree import XPath as _XPath
+        from functools import partial
+        XPath = partial(_XPath, namespaces=XPNSMAP)
+
+        for x in self.oeb.spine:
+            root = x.data
+            body = XPath('//h:body')(root)
+            if body:
+                body = body[0]
+            # Replace <br> that are children of <body> as ADE doesn't handle them
+            if hasattr(body, 'xpath'):
+                for br in XPath('./h:br')(body):
+                    if br.getparent() is None:
+                        continue
+                    try:
+                        sibling = br.itersiblings().next()
+                    except:
+                        sibling = None
+                    br.tag = XHTML('p')
+                    br.text = u'\u00a0'
+                    if (br.tail and br.tail.strip()) or sibling is None or \
+                    getattr(sibling, 'tag', '') != XHTML('br'):
+                        style = br.get('style', '').split(';')
+                        style = filter(None, map(lambda x: x.strip(), style))
+                        style.append('margin: 0pt; border:0pt; height:0pt')
+                        br.set('style', '; '.join(style))
+                    else:
+                        sibling.getparent().remove(sibling)
+                        if sibling.tail:
+                            if not br.tail:
+                                br.tail = ''
+                            br.tail += sibling.tail
+
+            for tag in XPath('//h:embed')(root):
+                tag.getparent().remove(tag)
+            for tag in XPath('//h:object')(root):
+                if tag.get('type', '').lower().strip() in ('image/svg+xml',):
+                    continue
+                tag.getparent().remove(tag)
+
+            for tag in XPath('//h:title|//h:style')(root):
+                if not tag.text:
+                    tag.getparent().remove(tag)
+            for tag in XPath('//h:script')(root):
+                if not tag.text and not tag.get('src', False):
+                    tag.getparent().remove(tag)
+
+            for tag in XPath('//h:form')(root):
+                tag.getparent().remove(tag)
+
+            for tag in XPath('//h:center')(root):
+                tag.tag = XHTML('div')
+                tag.set('style', 'text-align:center')
+
+            # ADE can't handle &amp; in an img url
+            for tag in XPath('//h:img[@src]')(root):
+                tag.set('src', tag.get('src', '').replace('&', ''))
+
+            stylesheet = self.oeb.manifest.hrefs['stylesheet.css']
+            stylesheet.data.add('a { color: inherit; text-decoration: inherit; '
+                    'cursor: default; }')
+            stylesheet.data.add('a[href] { color: blue; '
+                    'text-decoration: underline; cursor:pointer; }')
+
+            special_chars = re.compile(u'[\u200b\u00ad]')
+            for elem in root.iterdescendants():
+                if getattr(elem, 'text', False):
+                    elem.text = special_chars.sub('', elem.text)
+                    elem.text = elem.text.replace(u'\u2011', '-')
+                if getattr(elem, 'tail', False):
+                    elem.tail = special_chars.sub('', elem.tail)
+                    elem.tail = elem.tail.replace(u'\u2011', '-')
+
+
+
--- a/src/calibre/ebooks/epub/pages.py
+++ b/src/calibre/ebooks/epub/pages.py
@ -11,7 +11,7 @@ __docformat__ = 'restructuredtext en'
 import os, re
 from itertools import count, chain
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS
-from calibre.ebooks.oeb.base import OEBBook, DirWriter
+from calibre.ebooks.oeb.base import OEBBook
 from lxml import etree, html
 from lxml.etree import XPath

--- a/src/calibre/ebooks/epub/split.py
+++ b/src/calibre/ebooks/epub/split.py
@ -1,509 +0,0 @@
-from __future__ import with_statement
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-__docformat__ = 'restructuredtext en'
-
-'''
-Split the flows in an epub file to conform to size limitations.
-'''
-
-import os, math, logging, functools, collections, re, copy, sys
-
-from lxml.etree import XPath as _XPath
-from lxml import etree, html
-from lxml.cssselect import CSSSelector
-
-from calibre.ebooks.metadata.opf2 import OPF
-from calibre.ebooks.epub import tostring, rules
-from calibre import CurrentDir, LoggingInterface
-
-XPath = functools.partial(_XPath, namespaces={'re':'http://exslt.org/regular-expressions'})
-content = functools.partial(os.path.join, 'content')
-
-SPLIT_ATTR       = 'cs'
-SPLIT_POINT_ATTR = 'csp'
-
-class SplitError(ValueError):
-
-    def __init__(self, path, root):
-        size = len(tostring(root))/1024.
-        ValueError.__init__(self, _('Could not find reasonable point at which to split: %s Sub-tree size: %d KB')%
-                            (os.path.basename(path), size))
-
-
-
-class Splitter(LoggingInterface):
-
-    def __init__(self, path, opts, stylesheet_map, opf):
-        LoggingInterface.__init__(self, logging.getLogger('htmlsplit'))
-        self.setup_cli_handler(opts.verbose)
-        self.path = path
-        self.always_remove = not opts.preserve_tag_structure or \
-                    os.stat(content(path)).st_size > 5*opts.profile.flow_size
-        self.base = (os.path.splitext(path)[0].replace('%', '%%') + '_split_%d.html')
-        self.opts = opts
-        self.orig_size = os.stat(content(path)).st_size
-        self.log_info('\tSplitting %s (%d KB)', path, self.orig_size/1024.)
-        root = html.fromstring(open(content(path)).read())
-
-        self.page_breaks, self.trees = [], []
-        self.split_size = 0
-
-        # Split on page breaks
-        self.splitting_on_page_breaks = True
-        if not opts.dont_split_on_page_breaks:
-            self.log_info('\tSplitting on page breaks...')
-            if self.path in stylesheet_map:
-                self.find_page_breaks(stylesheet_map[self.path], root)
-            self.split_on_page_breaks(root.getroottree())
-            trees = list(self.trees)
-        else:
-            self.trees = [root.getroottree()]
-            trees = list(self.trees)
-
-        # Split any remaining over-sized trees
-        self.splitting_on_page_breaks = False
-        if self.opts.profile.flow_size < sys.maxint:
-            lt_found = False
-            self.log_info('\tLooking for large trees...')
-            self.tree_map = {}
-            for i, tree in enumerate(list(trees)):
-                self.split_trees = []
-                size = len(tostring(tree.getroot()))
-                if size > self.opts.profile.flow_size:
-                    lt_found = True
-                    try:
-                        self.split_to_size(tree)
-                        self.tree_map[tree] = self.split_trees
-                    except (SplitError, RuntimeError): # Splitting fails
-                        if not self.always_remove:
-                            self.always_remove = True
-                            self.split_trees = []
-                            self.split_to_size(tree)
-                            self.tree_map[tree] = self.split_trees
-                        else:
-                            raise
-            t = []
-            for x in trees:
-                t.extend(self.tree_map.get(x, [x]))
-            trees = t
-            if not lt_found:
-                self.log_info('\tNo large trees found')
-
-        self.trees = trees
-        self.was_split = len(self.trees) > 1
-        if self.was_split:
-            self.commit()
-            self.log_info('\t\tSplit into %d parts.', len(self.trees))
-            if self.opts.verbose:
-                for f in self.files:
-                    self.log_info('\t\t\t%s - %d KB', f, os.stat(content(f)).st_size/1024.)
-            self.fix_opf(opf)
-
-        self.trees = None
-
-
-    def split_text(self, text, root, size):
-        self.log_debug('\t\t\tSplitting text of length: %d'%len(text))
-        rest = text.replace('\r', '')
-        parts = re.split('\n\n', rest)
-        self.log_debug('\t\t\t\tFound %d parts'%len(parts))
-        if max(map(len, parts)) > size:
-            raise SplitError('Cannot split as file contains a <pre> tag with a very large paragraph', root)
-        ans = []
-        buf = ''
-        for part in parts:
-            if len(buf) + len(part) < size:
-                buf += '\n\n'+part
-            else:
-                ans.append(buf)
-                buf = part
-        return ans
-
-
-    def split_to_size(self, tree):
-        self.log_debug('\t\tSplitting...')
-        root = tree.getroot()
-        # Split large <pre> tags
-        for pre in list(root.xpath('//pre')):
-            text = u''.join(pre.xpath('descendant::text()'))
-            pre.text = text
-            for child in list(pre.iterchildren()):
-                pre.remove(child)
-            if len(pre.text) > self.opts.profile.flow_size*0.5:
-                frags = self.split_text(pre.text, root, int(0.2*self.opts.profile.flow_size))
-                new_pres = []
-                for frag in frags:
-                    pre2 = copy.copy(pre)
-                    pre2.text = frag
-                    pre2.tail = u''
-                    new_pres.append(pre2)
-                new_pres[-1].tail = pre.tail
-                p = pre.getparent()
-                i = p.index(pre)
-                p[i:i+1] = new_pres
-
-        split_point, before = self.find_split_point(root)
-        if split_point is None or self.split_size > 6*self.orig_size:
-            if not self.always_remove:
-                self.log_warn(_('\t\tToo much markup. Re-splitting without '
-                                'structure preservation. This may cause '
-                                'incorrect rendering.'))
-            raise SplitError(self.path, root)
-
-        for t in self.do_split(tree, split_point, before):
-            r = t.getroot()
-            if self.is_page_empty(r):
-                continue
-            size = len(tostring(r))
-            if size <= self.opts.profile.flow_size:
-                self.split_trees.append(t)
-                #print tostring(t.getroot(), pretty_print=True)
-                self.log_debug('\t\t\tCommitted sub-tree #%d (%d KB)',
-                               len(self.split_trees), size/1024.)
-                self.split_size += size
-            else:
-                self.split_to_size(t)
-
-    def is_page_empty(self, root):
-        body = root.find('body')
-        if body is None:
-            return False
-        txt = re.sub(r'\s+', '', html.tostring(body, method='text', encoding=unicode))
-        if len(txt) > 4:
-            #if len(txt) < 100:
-            #    print 1111111, html.tostring(body, method='html', encoding=unicode)
-            return False
-        for img in root.xpath('//img'):
-            if img.get('style', '') != 'display:none':
-                return False
-        return True
-
-    def do_split(self, tree, split_point, before):
-        '''
-        Split ``tree`` into a *before* and *after* tree at ``split_point``,
-        preserving tag structure, but not duplicating any text.
-        All tags that have had their text and tail
-        removed have the attribute ``calibre_split`` set to 1.
-
-        :param before: If True tree is split before split_point, otherwise after split_point
-        :return: before_tree, after_tree
-        '''
-        path         = tree.getpath(split_point)
-        tree, tree2  = copy.deepcopy(tree), copy.deepcopy(tree)
-        root         = tree.getroot()
-        root2        = tree2.getroot()
-        body, body2  = root.body, root2.body
-        split_point  = root.xpath(path)[0]
-        split_point2 = root2.xpath(path)[0]
-
-        def nix_element(elem, top=True):
-            if self.always_remove:
-                parent = elem.getparent()
-                index = parent.index(elem)
-                if top:
-                    parent.remove(elem)
-                else:
-                    index = parent.index(elem)
-                    parent[index:index+1] = list(elem.iterchildren())
-
-            else:
-                elem.text = u''
-                elem.tail = u''
-                elem.set(SPLIT_ATTR, '1')
-                if elem.tag.lower() in ['ul', 'ol', 'dl', 'table', 'hr', 'img']:
-                    elem.set('style', 'display:none')
-
-        def fix_split_point(sp):
-            if not self.splitting_on_page_breaks:
-                sp.set('style', sp.get('style', '')+'page-break-before:avoid;page-break-after:avoid')
-
-        # Tree 1
-        hit_split_point = False
-        for elem in list(body.iterdescendants(etree.Element)):
-            if elem.get(SPLIT_ATTR, '0') == '1':
-                continue
-            if elem is split_point:
-                hit_split_point = True
-                if before:
-                    nix_element(elem)
-                fix_split_point(elem)
-                continue
-            if hit_split_point:
-                nix_element(elem)
-
-
-        # Tree 2
-        hit_split_point = False
-        for elem in list(body2.iterdescendants(etree.Element)):
-            if elem.get(SPLIT_ATTR, '0') == '1':
-                continue
-            if elem is split_point2:
-                hit_split_point = True
-                if not before:
-                    nix_element(elem, top=False)
-                fix_split_point(elem)
-                continue
-            if not hit_split_point:
-                nix_element(elem, top=False)
-
-        return tree, tree2
-
-
-    def split_on_page_breaks(self, orig_tree):
-        ordered_ids = []
-        for elem in orig_tree.xpath('//*[@id]'):
-            id = elem.get('id')
-            if id in self.page_break_ids:
-                ordered_ids.append(self.page_breaks[self.page_break_ids.index(id)])
-
-        self.trees = []
-        tree = orig_tree
-        for pattern, before in ordered_ids:
-            self.log_info('\t\tSplitting on page-break')
-            elem = pattern(tree)
-            if elem:
-                before, after = self.do_split(tree, elem[0], before)
-                self.trees.append(before)
-                tree = after
-        self.trees.append(tree)
-        self.trees = [t for t in self.trees if not self.is_page_empty(t.getroot())]
-
-
-
-    def find_page_breaks(self, stylesheets, root):
-        '''
-        Find all elements that have either page-break-before or page-break-after set.
-        Populates `self.page_breaks` with id based XPath selectors (for elements that don't
-        have ids, an id is created).
-        '''
-        page_break_selectors = set([])
-        for rule in rules(stylesheets):
-            before = getattr(rule.style.getPropertyCSSValue('page-break-before'), 'cssText', '').strip().lower()
-            after  = getattr(rule.style.getPropertyCSSValue('page-break-after'), 'cssText', '').strip().lower()
-            try:
-                if before and before != 'avoid':
-                    page_break_selectors.add((CSSSelector(rule.selectorText), True))
-            except:
-                pass
-            try:
-                if after and after != 'avoid':
-                    page_break_selectors.add((CSSSelector(rule.selectorText), False))
-            except:
-                pass
-
-        page_breaks = set([])
-        for selector, before in page_break_selectors:
-            for elem in selector(root):
-                elem.pb_before = before
-                page_breaks.add(elem)
-
-        for i, elem in enumerate(root.iter()):
-            elem.pb_order = i
-
-        page_breaks = list(page_breaks)
-        page_breaks.sort(cmp=lambda x,y : cmp(x.pb_order, y.pb_order))
-        self.page_break_ids = []
-        for i, x in enumerate(page_breaks):
-            x.set('id', x.get('id', 'calibre_pb_%d'%i))
-            id = x.get('id')
-            self.page_breaks.append((XPath('//*[@id="%s"]'%id), x.pb_before))
-            self.page_break_ids.append(id)
-
-
-    def find_split_point(self, root):
-        '''
-        Find the tag at which to split the tree rooted at `root`.
-        Search order is:
-            * Heading tags
-            * <div> tags
-            * <pre> tags
-            * <hr> tags
-            * <p> tags
-            * <br> tags
-            * <li> tags
-
-        We try to split in the "middle" of the file (as defined by tag counts.
-        '''
-        def pick_elem(elems):
-            if elems:
-                elems = [i for i in elems if i.get(SPLIT_POINT_ATTR, '0') != '1'\
-                          and i.get(SPLIT_ATTR, '0') != '1']
-                if elems:
-                    i = int(math.floor(len(elems)/2.))
-                    elems[i].set(SPLIT_POINT_ATTR, '1')
-                    return elems[i]
-
-        for path in (
-                     '//*[re:match(name(), "h[1-6]", "i")]',
-                     '/html/body/div',
-                     '//pre',
-                     '//hr',
-                     '//p',
-                     '//div',
-                     '//br',
-                     '//li',
-                     ):
-            elems = root.xpath(path,
-                    namespaces={'re':'http://exslt.org/regular-expressions'})
-            elem = pick_elem(elems)
-            if elem is not None:
-                try:
-                    XPath(elem.getroottree().getpath(elem))
-                except:
-                    continue
-                return elem, True
-
-        return None, True
-
-    def commit(self):
-        '''
-        Commit all changes caused by the split. This removes the previously
-        introduced ``calibre_split`` attribute and calculates an *anchor_map* for
-        all anchors in the original tree. Internal links are re-directed. The
-        original file is deleted and the split files are saved.
-        '''
-        self.anchor_map = collections.defaultdict(lambda :self.base%0)
-        self.files = []
-
-        for i, tree in enumerate(self.trees):
-            root = tree.getroot()
-            self.files.append(self.base%i)
-            for elem in root.xpath('//*[@id]'):
-                if elem.get(SPLIT_ATTR, '0') == '0':
-                    self.anchor_map[elem.get('id')] = self.files[-1]
-            for elem in root.xpath('//*[@%s or @%s]'%(SPLIT_ATTR, SPLIT_POINT_ATTR)):
-                elem.attrib.pop(SPLIT_ATTR, None)
-                elem.attrib.pop(SPLIT_POINT_ATTR, '0')
-
-        for current, tree in zip(self.files, self.trees):
-            for a in tree.getroot().xpath('//a[@href]'):
-                href = a.get('href').strip()
-                if href.startswith('#'):
-                    anchor = href[1:]
-                    file = self.anchor_map[anchor]
-                    if file != current:
-                        a.set('href', file+href)
-            open(content(current), 'wb').\
-                write(tostring(tree.getroot(), pretty_print=self.opts.pretty_print))
-
-        os.remove(content(self.path))
-
-
-    def fix_opf(self, opf):
-        '''
-        Fix references to the split file in the OPF.
-        '''
-        items = [item for item in opf.itermanifest() if item.get('href') == 'content/'+self.path]
-        new_items = [('content/'+f, None) for f in self.files]
-        id_map = {}
-        for item in items:
-            id_map[item.get('id')] = opf.replace_manifest_item(item, new_items)
-
-        for id in id_map.keys():
-            opf.replace_spine_items_by_idref(id, id_map[id])
-
-        for ref in opf.iterguide():
-            href = ref.get('href', '')
-            if href.startswith('content/'+self.path):
-                href = href.split('#')
-                frag = None
-                if len(href) > 1:
-                    frag = href[1]
-                if frag not in self.anchor_map:
-                    self.log_warning('\t\tUnable to re-map OPF link', href)
-                    continue
-                new_file = self.anchor_map[frag]
-                ref.set('href', 'content/'+new_file+('' if frag is None else ('#'+frag)))
-
-
-
-def fix_content_links(html_files, changes, opts):
-    split_files = [f.path for f in changes]
-    anchor_maps = [f.anchor_map for f in changes]
-    files = list(html_files)
-    for j, f in enumerate(split_files):
-        try:
-            i = files.index(f)
-            files[i:i+1] = changes[j].files
-        except ValueError:
-            continue
-
-    for htmlfile in files:
-        changed = False
-        root = html.fromstring(open(content(htmlfile), 'rb').read())
-        for a in root.xpath('//a[@href]'):
-            href = a.get('href')
-            if not href.startswith('#'):
-                href = href.split('#')
-                anchor = href[1] if len(href) > 1 else None
-                href = href[0]
-                if href in split_files:
-                    try:
-                        newf = anchor_maps[split_files.index(href)][anchor]
-                    except:
-                        print '\t\tUnable to remap HTML link:', href, anchor
-                        continue
-                    frag = ('#'+anchor) if anchor else ''
-                    a.set('href', newf+frag)
-                    changed = True
-
-        if changed:
-            open(content(htmlfile), 'wb').write(tostring(root, pretty_print=opts.pretty_print))
-
-def fix_ncx(path, changes):
-    split_files = [f.path for f in changes]
-    anchor_maps = [f.anchor_map for f in changes]
-    tree = etree.parse(path)
-    changed = False
-    for content in tree.getroot().xpath('//x:content[@src]',
-                    namespaces={'x':"http://www.daisy.org/z3986/2005/ncx/"}):
-        href = content.get('src')
-        if not href.startswith('#'):
-            href = href.split('#')
-            anchor = href[1] if len(href) > 1 else None
-            href = href[0].split('/')[-1]
-            if href in split_files:
-                try:
-                    newf = anchor_maps[split_files.index(href)][anchor]
-                except:
-                    print 'Unable to remap NCX link:', href, anchor
-                frag = ('#'+anchor) if anchor else ''
-                content.set('src', 'content/'+newf+frag)
-                changed = True
-    if changed:
-        open(path, 'wb').write(etree.tostring(tree.getroot(), encoding='UTF-8', xml_declaration=True))
-
-def find_html_files(opf):
-    '''
-    Find all HTML files referenced by `opf`.
-    '''
-    html_files = []
-    for item in opf.itermanifest():
-        if 'html' in item.get('media-type', '').lower():
-            f = item.get('href').split('/')[-1]
-            f2 = f.replace('&', '%26')
-            if not os.path.exists(content(f)) and os.path.exists(content(f2)):
-                f = f2
-                item.set('href', item.get('href').replace('&', '%26'))
-            if os.path.exists(content(f)):
-                html_files.append(f)
-    return html_files
-
-
-def split(pathtoopf, opts, stylesheet_map):
-    pathtoopf = os.path.abspath(pathtoopf)
-    opf = OPF(open(pathtoopf, 'rb'), os.path.dirname(pathtoopf))
-
-    with CurrentDir(os.path.dirname(pathtoopf)):
-        html_files = find_html_files(opf)
-        changes = [Splitter(f, opts, stylesheet_map, opf) for f in html_files]
-        changes = [c for c in changes if c.was_split]
-
-        fix_content_links(html_files, changes, opts)
-        for item in opf.itermanifest():
-            if item.get('media-type', '') == 'application/x-dtbncx+xml':
-                fix_ncx(item.get('href'), changes)
-                break
-
-        open(pathtoopf, 'wb').write(opf.render())
--- a/src/calibre/ebooks/lrf/fb2/init.py
+++ b/src/calibre/ebooks/lrf/fb2/init.py
--- a/src/calibre/ebooks/lrf/fb2/fb2.xsl
+++ b/src/calibre/ebooks/lrf/fb2/fb2.xsl
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@ -0,0 +1,154 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Transform OEB content into FB2 markup
+'''
+
+import os
+import re
+from base64 import b64encode
+
+from calibre import entity_to_unicode
+from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
+from calibre.ebooks.oeb.stylizer import Stylizer
+from calibre.ebooks.oeb.base import OEB_IMAGES
+from calibre.constants import __appname__, __version__
+
+from BeautifulSoup import BeautifulSoup
+
+TAG_MAP = {
+    'b' : 'strong',
+    'i' : 'emphasis',
+    'p' : 'p',
+    'div' : 'p',
+}
+
+STYLES = [
+    ('font-weight', {'bold'   : 'strong', 'bolder' : 'strong'}),
+    ('font-style', {'italic' : 'emphasis'}),
+]
+
+class FB2MLizer(object):
+    def __init__(self, ignore_tables=False):
+        self.ignore_tables = ignore_tables
+        
+    def extract_content(self, oeb_book, opts):
+        oeb_book.logger.info('Converting XHTML to FB2 markup...')
+        self.oeb_book = oeb_book
+        self.opts = opts
+        return self.fb2mlize_spine()
+        
+    def fb2mlize_spine(self):
+        output = self.fb2_header()
+        if 'titlepage' in self.oeb_book.guide:
+            href = self.oeb_book.guide['titlepage'].href
+            item = self.oeb_book.manifest.hrefs[href]
+            if item.spine_position is None:
+                stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+                output += self.dump_text(item.data.find(XHTML('body')), stylizer)
+        for item in self.oeb_book.spine:
+            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+            output += self.dump_text(item.data.find(XHTML('body')), stylizer)
+        output += self.fb2_body_footer()
+        output += self.fb2mlize_images()
+        output += self.fb2_footer()
+        output = self.clean_text(output)
+        return BeautifulSoup(output.encode('utf-8')).prettify()
+
+    def fb2_header(self):
+        return u'<?xml version="1.0" encoding="utf-8"?> ' \
+        '<FictionBook xmlns:xlink="http://www.w3.org/1999/xlink" ' \
+        'xmlns="http://www.gribuser.ru/xml/fictionbook/2.0"> ' \
+        '<description><title-info><book-title>%s</book-title> ' \
+        '</title-info><document-info> ' \
+        '<program-used>%s - %s</program-used></document-info> ' \
+        '</description><body><section>' % (self.oeb_book.metadata.title[0].value, __appname__, __version__)
+        
+    def fb2_body_footer(self):
+        return u'</section></body>'
+        
+    def fb2_footer(self):
+        return u'</FictionBook>'
+
+    def fb2mlize_images(self):
+        images = u''
+        for item in self.oeb_book.manifest:
+            if item.media_type in OEB_IMAGES:
+                data = b64encode(item.data)
+                images += '<binary id="%s" content-type="%s">%s</binary>' % (os.path.basename(item.href),  item.media_type, data)
+        return images
+
+    def clean_text(self, text):
+        for entity in set(re.findall('&.+?;', text)):
+            mo = re.search('(%s)' % entity[1:-1], text)
+            text = text.replace(entity, entity_to_unicode(mo))
+
+        text = text.replace('&', '')
+
+        return text
+
+    def dump_text(self, elem, stylizer, tag_stack=[]):
+        if not isinstance(elem.tag, basestring) \
+           or namespace(elem.tag) != XHTML_NS:
+            return u''
+            
+        fb2_text = u''
+        style = stylizer.style(elem)
+
+        if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
+           or style['visibility'] == 'hidden':
+            return u''
+        
+        tag = barename(elem.tag)
+        tag_count = 0
+
+        if tag == 'img':
+            fb2_text += '<image xlink:herf="#%s" />' % os.path.basename(elem.attrib['src'])
+        
+
+        fb2_tag = TAG_MAP.get(tag, 'p')
+        if fb2_tag and fb2_tag not in tag_stack:
+            tag_count += 1
+            fb2_text += '<%s>' % fb2_tag
+            tag_stack.append(fb2_tag)
+
+        # Processes style information
+        for s in STYLES:
+            style_tag = s[1].get(style[s[0]], None)
+            if style_tag:
+                tag_count += 1
+                fb2_text += '<%s>' % style_tag
+                tag_stack.append(style_tag)
+
+        if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
+            fb2_text += elem.text
+        
+        for item in elem:
+            fb2_text += self.dump_text(item, stylizer, tag_stack)
+
+        close_tag_list = []
+        for i in range(0, tag_count):
+            close_tag_list.insert(0, tag_stack.pop())
+            
+        fb2_text += self.close_tags(close_tag_list)
+
+        if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
+            if 'p' not in tag_stack:
+                fb2_text += '<p>%s</p>' % elem.tail
+            else:
+                fb2_text += elem.tail
+            
+        return fb2_text
+
+    def close_tags(self, tags):
+        fb2_text = u''
+        for i in range(0, len(tags)):
+            fb2_tag = tags.pop()
+            fb2_text += '</%s>' % fb2_tag
+
+        return fb2_text
+
--- a/src/calibre/ebooks/fb2/input.py
+++ b/src/calibre/ebooks/fb2/input.py
@ -0,0 +1,74 @@
+from __future__ import with_statement
+__license__   = 'GPL v3'
+__copyright__ = '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
+"""
+Convert .fb2 files to .lrf
+"""
+import os
+from base64 import b64decode
+from lxml import etree
+
+from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
+from calibre import guess_type
+
+FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0'
+
+class FB2Input(InputFormatPlugin):
+
+    name        = 'FB2 Input'
+    author      = 'Anatoly Shipitsin'
+    description = 'Convert FB2 files to HTML'
+    file_types  = set(['fb2'])
+
+    recommendations = set([
+        ('level1_toc', '//h:h1', OptionRecommendation.MED),
+        ('level2_toc', '//h:h2', OptionRecommendation.MED),
+        ('level3_toc', '//h:h3', OptionRecommendation.MED),
+        ])
+
+    def convert(self, stream, options, file_ext, log,
+                accelerators):
+        from calibre.resources import fb2_xsl
+        from calibre.ebooks.metadata.opf2 import OPFCreator
+        from calibre.ebooks.metadata.meta import get_metadata
+        from calibre.ebooks.oeb.base import XLINK_NS
+        NAMESPACES = {'f':FB2NS, 'l':XLINK_NS}
+
+        log.debug('Parsing XML...')
+        parser = etree.XMLParser(recover=True, no_network=True)
+        doc = etree.parse(stream, parser)
+        self.extract_embedded_content(doc)
+        log.debug('Converting XML to HTML...')
+        styledoc = etree.fromstring(fb2_xsl)
+
+        transform = etree.XSLT(styledoc)
+        result = transform(doc)
+        open('index.xhtml', 'wb').write(transform.tostring(result))
+        stream.seek(0)
+        mi = get_metadata(stream, 'fb2')
+        if not mi.title:
+            mi.title = _('Unknown')
+        if not mi.authors:
+            mi.authors = [_('Unknown')]
+        opf = OPFCreator(os.getcwdu(), mi)
+        entries = [(f, guess_type(f)[0]) for f in os.listdir('.')]
+        opf.create_manifest(entries)
+        opf.create_spine(['index.xhtml'])
+
+        for img in doc.xpath('//f:coverpage/f:image', namespaces=NAMESPACES):
+            href = img.get('{%s}href'%XLINK_NS, img.get('href', None))
+            if href is not None:
+                if href.startswith('#'):
+                    href = href[1:]
+                opf.guide.set_cover(os.path.abspath(href))
+
+        opf.render(open('metadata.opf', 'wb'))
+        return os.path.join(os.getcwd(), 'metadata.opf')
+
+    def extract_embedded_content(self, doc):
+        for elem in doc.xpath('./*'):
+            if 'binary' in elem.tag and elem.attrib.has_key('id'):
+                fname = elem.attrib['id']
+                data = b64decode(elem.text.strip())
+                open(fname, 'wb').write(data)
+
--- a/src/calibre/ebooks/fb2/output.py
+++ b/src/calibre/ebooks/fb2/output.py
@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os
+
+from calibre.customize.conversion import OutputFormatPlugin
+from calibre.ebooks.fb2.fb2ml import FB2MLizer
+
+class FB2Output(OutputFormatPlugin):
+
+    name = 'FB2 Output'
+    author = 'John Schember'
+    file_type = 'fb2'
+
+    def convert(self, oeb_book, output_path, input_plugin, opts, log):    
+        fb2mlizer = FB2MLizer(ignore_tables=opts.linearize_tables)
+        fb2_content = fb2mlizer.extract_content(oeb_book, opts)
+
+        close = False
+        if not hasattr(output_path, 'write'):
+            close = True
+            if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
+                os.makedirs(os.path.dirname(output_path))
+            out_stream = open(output_path, 'wb')
+        else:
+            out_stream = output_path
+        
+        out_stream.seek(0)
+        out_stream.truncate()
+        out_stream.write(fb2_content)
+        
+        if close:
+            out_stream.close()
+
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
--- a/src/calibre/ebooks/html/init.py
+++ b/src/calibre/ebooks/html/init.py
@ -0,0 +1,30 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import re
+
+from lxml.etree import tostring as _tostring
+
+def tostring(root, strip_comments=False, pretty_print=False):
+    '''
+    Serialize processed XHTML.
+    '''
+    root.set('xmlns', 'http://www.w3.org/1999/xhtml')
+    root.set('{http://www.w3.org/1999/xhtml}xlink', 'http://www.w3.org/1999/xlink')
+    for x in root.iter():
+        if hasattr(x.tag, 'rpartition') and x.tag.rpartition('}')[-1].lower() == 'svg':
+            x.set('xmlns', 'http://www.w3.org/2000/svg')
+
+    ans = _tostring(root, encoding='utf-8', pretty_print=pretty_print)
+    if strip_comments:
+        ans = re.compile(r'<!--.*?-->', re.DOTALL).sub('', ans)
+    ans = '<?xml version="1.0" encoding="utf-8" ?>\n'+ans
+
+    return ans
+
+
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -0,0 +1,300 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+'''
+Input plugin for HTML or OPF ebooks.
+'''
+
+import os, re, sys
+from urlparse import urlparse, urlunparse
+from urllib import unquote
+
+from calibre.customize.conversion import InputFormatPlugin
+from calibre.ebooks.metadata.opf2 import OPFCreator
+from calibre.ebooks.chardet import xml_to_unicode
+from calibre.customize.conversion import OptionRecommendation
+from calibre import unicode_path
+
+class Link(object):
+    '''
+    Represents a link in a HTML file.
+    '''
+
+    @classmethod
+    def url_to_local_path(cls, url, base):
+        path = urlunparse(('', '', url.path, url.params, url.query, ''))
+        path = unquote(path)
+        if os.path.isabs(path):
+            return path
+        return os.path.abspath(os.path.join(base, path))
+
+    def __init__(self, url, base):
+        '''
+        :param url:  The url this link points to. Must be an unquoted unicode string.
+        :param base: The base directory that relative URLs are with respect to.
+                     Must be a unicode string.
+        '''
+        assert isinstance(url, unicode) and isinstance(base, unicode)
+        self.url         = url
+        self.parsed_url  = urlparse(self.url)
+        self.is_local    = self.parsed_url.scheme in ('', 'file')
+        self.is_internal = self.is_local and not bool(self.parsed_url.path)
+        self.path        = None
+        self.fragment    = unquote(self.parsed_url.fragment)
+        if self.is_local and not self.is_internal:
+            self.path = self.url_to_local_path(self.parsed_url, base)
+
+    def __hash__(self):
+        if self.path is None:
+            return hash(self.url)
+        return hash(self.path)
+
+    def __eq__(self, other):
+        return self.path == getattr(other, 'path', other)
+
+    def __str__(self):
+        return u'Link: %s --> %s'%(self.url, self.path)
+
+
+class IgnoreFile(Exception):
+
+    def __init__(self, msg, errno):
+        Exception.__init__(self, msg)
+        self.doesnt_exist = errno == 2
+        self.errno = errno
+
+class HTMLFile(object):
+    '''
+    Contains basic information about an HTML file. This
+    includes a list of links to other files as well as
+    the encoding of each file. Also tries to detect if the file is not a HTML
+    file in which case :member:`is_binary` is set to True.
+
+    The encoding of the file is available as :member:`encoding`.
+    '''
+
+    HTML_PAT  = re.compile(r'<\s*html', re.IGNORECASE)
+    TITLE_PAT = re.compile('<title>([^<>]+)</title>', re.IGNORECASE)
+    LINK_PAT  = re.compile(
+    r'<\s*a\s+.*?href\s*=\s*(?:(?:"(?P<url1>[^"]+)")|(?:\'(?P<url2>[^\']+)\')|(?P<url3>[^\s>]+))',
+    re.DOTALL|re.IGNORECASE)
+
+    def __init__(self, path_to_html_file, level, encoding, verbose, referrer=None):
+        '''
+        :param level: The level of this file. Should be 0 for the root file.
+        :param encoding: Use `encoding` to decode HTML.
+        :param referrer: The :class:`HTMLFile` that first refers to this file.
+        '''
+        self.path     = unicode_path(path_to_html_file, abs=True)
+        self.title    = os.path.splitext(os.path.basename(self.path))[0]
+        self.base     = os.path.dirname(self.path)
+        self.level    = level
+        self.referrer = referrer
+        self.links    = []
+
+        try:
+            with open(self.path, 'rb') as f:
+                src = f.read()
+        except IOError, err:
+            msg = 'Could not read from file: %s with error: %s'%(self.path, unicode(err))
+            if level == 0:
+                raise IOError(msg)
+            raise IgnoreFile(msg, err.errno)
+
+        self.is_binary = not bool(self.HTML_PAT.search(src[:1024]))
+        if not self.is_binary:
+            if encoding is None:
+                encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1]
+                self.encoding = encoding
+            else:
+                self.encoding = encoding
+
+            src = src.decode(encoding, 'replace')
+            match = self.TITLE_PAT.search(src)
+            self.title = match.group(1) if match is not None else self.title
+            self.find_links(src)
+
+
+
+    def __eq__(self, other):
+        return self.path == getattr(other, 'path', other)
+
+    def __str__(self):
+        return u'HTMLFile:%d:%s:%s'%(self.level, 'b' if self.is_binary else 'a', self.path)
+
+    def __repr__(self):
+        return str(self)
+
+
+    def find_links(self, src):
+        for match in self.LINK_PAT.finditer(src):
+            url = None
+            for i in ('url1', 'url2', 'url3'):
+                url = match.group(i)
+                if url:
+                    break
+            link = self.resolve(url)
+            if link not in self.links:
+                self.links.append(link)
+
+    def resolve(self, url):
+        return Link(url, self.base)
+
+
+def depth_first(root, flat, visited=set([])):
+    yield root
+    visited.add(root)
+    for link in root.links:
+        if link.path is not None and link not in visited:
+            try:
+                index = flat.index(link)
+            except ValueError: # Can happen if max_levels is used
+                continue
+            hf = flat[index]
+            if hf not in visited:
+                yield hf
+                visited.add(hf)
+                for hf in depth_first(hf, flat, visited):
+                    if hf not in visited:
+                        yield hf
+                        visited.add(hf)
+
+
+def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None):
+    '''
+    Recursively traverse all links in the HTML file.
+
+    :param max_levels: Maximum levels of recursion. Must be non-negative. 0
+                       implies that no links in the root HTML file are followed.
+    :param encoding:   Specify character encoding of HTML files. If `None` it is
+                       auto-detected.
+    :return:           A pair of lists (breadth_first, depth_first). Each list contains
+                       :class:`HTMLFile` objects.
+    '''
+    assert max_levels >= 0
+    level = 0
+    flat =  [HTMLFile(path_to_html_file, level, encoding, verbose)]
+    next_level = list(flat)
+    while level < max_levels and len(next_level) > 0:
+        level += 1
+        nl = []
+        for hf in next_level:
+            rejects = []
+            for link in hf.links:
+                if link.path is None or link.path in flat:
+                    continue
+                try:
+                    nf = HTMLFile(link.path, level, encoding, verbose, referrer=hf)
+                    if nf.is_binary:
+                        raise IgnoreFile('%s is a binary file'%nf.path, -1)
+                    nl.append(nf)
+                    flat.append(nf)
+                except IgnoreFile, err:
+                    rejects.append(link)
+                    if not err.doesnt_exist or verbose > 1:
+                        print repr(err)
+            for link in rejects:
+                hf.links.remove(link)
+
+        next_level = list(nl)
+    orec = sys.getrecursionlimit()
+    sys.setrecursionlimit(500000)
+    try:
+        return flat, list(depth_first(flat[0], flat))
+    finally:
+        sys.setrecursionlimit(orec)
+
+
+def get_filelist(htmlfile, dir, opts, log):
+    '''
+    Build list of files referenced by html file or try to detect and use an
+    OPF file instead.
+    '''
+    log.info('Building file list...')
+    filelist = traverse(htmlfile, max_levels=int(opts.max_levels),
+                        verbose=opts.verbose,
+                        encoding=opts.input_encoding)\
+                [0 if opts.breadth_first else 1]
+    if opts.verbose:
+        log.debug('\tFound files...')
+        for f in filelist:
+            log.debug('\t\t', f)
+    return filelist
+
+
+class HTMLInput(InputFormatPlugin):
+
+    name        = 'HTML Input'
+    author      = 'Kovid Goyal'
+    description = 'Convert HTML and OPF files to an OEB'
+    file_types  = set(['opf', 'html', 'htm', 'xhtml', 'xhtm'])
+
+    options = set([
+        OptionRecommendation(name='breadth_first',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('Traverse links in HTML files breadth first. Normally, '
+                    'they are traversed depth first.'
+                   )
+        ),
+
+        OptionRecommendation(name='max_levels',
+            recommended_value=5, level=OptionRecommendation.LOW,
+            help=_('Maximum levels of recursion when following links in '
+                   'HTML files. Must be non-negative. 0 implies that no '
+                   'links in the root HTML file are followed. Default is '
+                   '%default.'
+                   )
+        ),
+
+        OptionRecommendation(name='dont_package',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('Normally this input plugin re-arranges all the input '
+                'files into a standard folder hierarchy. Only use this option '
+                'if you know what you are doing as it can result in various '
+                'nasty side effects in the rest of of the conversion pipeline.'
+                )
+        ),
+    ])
+
+    def convert(self, stream, opts, file_ext, log,
+                accelerators):
+        from calibre.ebooks.metadata.html import get_metadata_
+
+        basedir = os.getcwd()
+        self.opts = opts
+
+        if hasattr(stream, 'name'):
+            basedir = os.path.dirname(stream.name)
+        if file_ext == 'opf':
+            opfpath = stream.name
+        else:
+            filelist = get_filelist(stream.name, basedir, opts, log)
+            mi = get_metadata_(stream.read(), opts.input_encoding)
+            mi = OPFCreator(os.getcwdu(), mi)
+            mi.guide = None
+            entries = [(f.path, 'application/xhtml+xml') for f in filelist]
+            mi.create_manifest(entries)
+            mi.create_spine([f.path for f in filelist])
+
+            mi.render(open('metadata.opf', 'wb'), encoding=opts.input_encoding)
+            opfpath = os.path.abspath('metadata.opf')
+
+        if opts.dont_package:
+            return opfpath
+
+        from calibre.ebooks.conversion.plumber import create_oebbook
+        oeb = create_oebbook(log, opfpath, opts, self,
+                encoding=opts.input_encoding)
+
+        from calibre.ebooks.oeb.transforms.package import Package
+        Package(os.getcwdu())(oeb, opts)
+
+        return oeb
+
+
--- a/src/calibre/ebooks/htmlsymbols.py
+++ b/src/calibre/ebooks/htmlsymbols.py
@ -0,0 +1,312 @@
+# -*- coding: utf-8 -*-
+'''
+Maping of non-acii symbols and their corresponding html entity number and name
+'''
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+
+# http://www.w3schools.com/tags/ref_symbols.asp
+HTML_SYMBOLS = {
+                # Math Symbols
+                u'∀' : ['&#8704;', '&forall;'], # for all
+                u'∂' : ['&#8706;', '&part;'], # part
+                u'∃' : ['&#8707;', '&exists;'], # exists
+                u'∅' : ['&#8709;', '&empty;'], # empty
+                u'∇' : ['&#8711;', '&nabla;'], # nabla
+                u'∈' : ['&#8712;', '&isin;'], # isin
+                u'∉' : ['&#8713;', '&notin;'], # notin
+                u'∋' : ['&#8715;', '&ni;'], # ni
+                u'∏' : ['&#8719;', '&prod;'], # prod
+                u'∑' : ['&#8721;', '&sum;'], # sum
+                u'−' : ['&#8722;', '&minus;'], # minus
+                u'∗' : ['&#8727;', '&lowast;'], # lowast
+                u'√' : ['&#8730;', '&radic;'], # square root
+                u'∝' : ['&#8733;', '&prop;'], # proportional to
+                u'∞' : ['&#8734;', '&infin;'], # infinity
+                u'∠' : ['&#8736;', '&ang;'], # angle
+                u'∧' : ['&#8743;', '&and;'], # and
+                u'∨' : ['&#8744;', '&or;'], # or
+                u'∩' : ['&#8745;', '&cap;'], # cap
+                u'∪' : ['&#8746;', '&cup;'], # cup
+                u'∫' : ['&#8747;', '&int;'], # integral
+                u'∴' : ['&#8756;', '&there4;'], # therefore
+                u'∼' : ['&#8764;', '&sim;'], # simular to
+                u'≅' : ['&#8773;', '&cong;'], # approximately equal
+                u'≈' : ['&#8776;', '&asymp;'], # almost equal
+                u'≠' : ['&#8800;', '&ne;'], # not equal
+                u'≡' : ['&#8801;', '&equiv;'], # equivalent
+                u'≤' : ['&#8804;', '&le;'], # less or equal
+                u'≥' : ['&#8805;', '&ge;'], # greater or equal
+                u'⊂' : ['&#8834;', '&sub;'], # subset of
+                u'⊃' : ['&#8835;', '&sup;'], # superset of
+                u'⊄' : ['&#8836;', '&nsub;'], # not subset of
+                u'⊆' : ['&#8838;', '&sube;'], # subset or equal
+                u'⊇' : ['&#8839;', '&supe;'], # superset or equal
+                u'⊕' : ['&#8853;', '&oplus;'], # circled plus
+                u'⊗' : ['&#8855;', '&otimes;'], # cirled times
+                u'⊥' : ['&#8869;', '&perp;'], # perpendicular
+                u'⋅' : ['&#8901;', '&sdot;'], # dot operator
+                # Greek Letters
+                u'Α' : ['&#913;', '&Alpha;'], # Alpha
+                u'Β' : ['&#914;', '&Beta;'], # Beta
+                u'Γ' : ['&#915;', '&Gamma;'], # Gamma
+                u'Δ' : ['&#916;', '&Delta;'], # Delta
+                u'Ε' : ['&#917;', '&Epsilon;'], # Epsilon
+                u'Ζ' : ['&#918;', '&Zeta;'], # Zeta
+                u'Η' : ['&#919;', '&Eta;'], # Eta
+                u'Θ' : ['&#920;', '&Theta;'], # Theta
+                u'Ι' : ['&#921;', '&Iota;'], # Iota
+                u'Κ' : ['&#922;', '&Kappa;'], # Kappa
+                u'Λ' : ['&#923;', '&Lambda;'], # Lambda
+                u'Μ' : ['&#924;', '&Mu;'], # Mu
+                u'Ν' : ['&#925;', '&Nu;'], # Nu
+                u'Ξ' : ['&#926;', '&Xi;'], # Xi
+                u'Ο' : ['&#927;', '&Omicron;'], # Omicron
+                u'Π' : ['&#928;', '&Pi;'], # Pi
+                u'Ρ' : ['&#929;', '&Rho;'], # Rho
+                u'Σ' : ['&#931;', '&Sigma;'], # Sigma
+                u'Τ' : ['&#932;', '&Tau;'], # Tau
+                u'Υ' : ['&#933;', '&Upsilon;'], # Upsilon
+                u'Φ' : ['&#934;', '&Phi;'], # Phi
+                u'Χ' : ['&#935;', '&Chi;'], # Chi
+                u'Ψ' : ['&#936;', '&Psi;'], # Psi
+                u'ω' : ['&#969;', '&omega;'], # omega
+                u'ϑ' : ['&#977;', '&thetasym;'], # theta symbol
+                u'ϒ' : ['&#978;', '&upsih;'], # upsilon symbol
+                u'ϖ' : ['&#982;', '&piv;'], # pi symbol
+                # Other
+                u'Œ' : ['&#338;', '&OElig;'], # capital ligature OE
+                u'œ' : ['&#339;', '&oelig;'], # small ligature oe
+                u'Š' : ['&#352;', '&Scaron;'], # capital S with caron
+                u'š' : ['&#353;', '&scaron;'], # small S with caron
+                u'Ÿ' : ['&#376;', '&Yuml;'], # capital Y with diaeres
+                u'ƒ' : ['&#402;', '&fnof;'], # f with hook
+                u'ˆ' : ['&#710;', '&circ;'], # modifier letter circumflex accent
+                u'˜' : ['&#732;', '&tilde;'], # small tilde
+                u'–' : ['&#8211;', '&ndash;'], # en dash
+                u'—' : ['&#8212;', '&mdash;'], # em dash
+                u'‘' : ['&#8216;', '&lsquo;'], # left single quotation mark
+                u'’' : ['&#8217;', '&rsquo;'], # right single quotation mark
+                u'‚' : ['&#8218;', '&sbquo;'], # single low-9 quotation mark
+                u'“' : ['&#8220;', '&ldquo;'], # left double quotation mark
+                u'”' : ['&#8221;', '&rdquo;'], # right double quotation mark
+                u'„' : ['&#8222;', '&bdquo;'], # double low-9 quotation mark
+                u'†' : ['&#8224;', '&dagger;'], # dagger
+                u'‡' : ['&#8225;', '&Dagger;'], # double dagger
+                u'•' : ['&#8226;', '&bull;'], # bullet
+                u'…' : ['&#8230;', '&hellip;'], # horizontal ellipsis
+                u'‰' : ['&#8240;', '&permil;'], # per mille 
+                u'′' : ['&#8242;', '&prime;'], # minutes
+                u'″' : ['&#8243;', '&Prime;'], # seconds
+                u'‹' : ['&#8249;', '&lsaquo;'], # single left angle quotation
+                u'›' : ['&#8250;', '&rsaquo;'], # single right angle quotation
+                u'‾' : ['&#8254;', '&oline;'], # overline
+                u'€' : ['&#8364;', '&euro;'], # euro
+                u'™' : ['&#8482;', '&trade;'], # trademark
+                u'←' : ['&#8592;', '&larr;'], # left arrow
+                u'↑' : ['&#8593;', '&uarr;'], # up arrow
+                u'→' : ['&#8594;', '&rarr;'], # right arrow
+                u'↓' : ['&#8595;', '&darr;'], # down arrow
+                u'↔' : ['&#8596;', '&harr;'], # left right arrow
+                u'↵' : ['&#8629;', '&crarr;'], # carriage return arrow
+                u'⌈' : ['&#8968;', '&lceil;'], # left ceiling
+                u'⌉' : ['&#8969;', '&rceil;'], # right ceiling
+                u'⌊' : ['&#8970;', '&lfloor;'], # left floor
+                u'⌋' : ['&#8971;', '&rfloor;'], # right floor
+                u'◊' : ['&#9674;', '&loz;'], # lozenge
+                u'♠' : ['&#9824;', '&spades;'], # spade
+                u'♣' : ['&#9827;', '&clubs;'], # club
+                u'♥' : ['&#9829;', '&hearts;'], # heart
+                u'♦' : ['&#9830;', '&diams;'], # diamond
+                # Extra http://www.ascii.cl/htmlcodes.htm
+                u' ' : ['&#32;'], # space
+                u'!' : ['&#33;'], # exclamation point
+                u'#' : ['&#35;'], # number sign
+                u'$' : ['&#36;'], # dollar sign
+                u'%' : ['&#37;'], # percent sign
+                u'\'' : ['&#39;'], # single quote
+                u'(' : ['&#40;'], # opening parenthesis
+                u')' : ['&#41;'], # closing parenthesis
+                u'*' : ['&#42;'], # asterisk
+                u'+' : ['&#43;'], # plus sign
+                u',' : ['&#44;'], # comma
+                u'-' : ['&#45;'], # minus sign - hyphen
+                u'.' : ['&#46;'], # period
+                u'/' : ['&#47;'], # slash
+                u'0' : ['&#48;'], # zero
+                u'1' : ['&#49;'], # one
+                u'2' : ['&#50;'], # two
+                u'3' : ['&#51;'], # three
+                u'4' : ['&#52;'], # four
+                u'5' : ['&#53;'], # five
+                u'6' : ['&#54;'], # six
+                u'7' : ['&#55;'], # seven
+                u'8' : ['&#56;'], # eight
+                u'9' : ['&#57;'], # nine
+                u':' : ['&#58;'], # colon
+                u';' : ['&#59;'], # semicolon
+                u'=' : ['&#61;'], # equal sign
+                u'?' : ['&#63;'], # question mark
+                u'@' : ['&#64;'], # at symbol
+                u'A' : ['&#65;'], # 
+                u'B' : ['&#66;'], # 
+                u'C' : ['&#67;'], # 
+                u'D' : ['&#68;'], # 
+                u'E' : ['&#69;'], # 
+                u'F' : ['&#70;'], # 
+                u'G' : ['&#71;'], # 
+                u'H' : ['&#72;'], # 
+                u'I' : ['&#73;'], # 
+                u'J' : ['&#74;'], # 
+                u'K' : ['&#75;'], # 
+                u'L' : ['&#76;'], # 
+                u'M' : ['&#77;'], # 
+                u'N' : ['&#78;'], # 
+                u'O' : ['&#79;'], # 
+                u'P' : ['&#80;'], # 
+                u'Q' : ['&#81;'], # 
+                u'R' : ['&#82;'], # 
+                u'S' : ['&#83;'], # 
+                u'T' : ['&#84;'], # 
+                u'U' : ['&#85;'], # 
+                u'V' : ['&#86;'], # 
+                u'W' : ['&#87;'], # 
+                u'X' : ['&#88;'], # 
+                u'Y' : ['&#89;'], # 
+                u'Z' : ['&#90;'], # 
+                u'[' : ['&#91;'], # opening bracket
+                u'\\' : ['&#92;'], # backslash
+                u']' : ['&#93;'], # closing bracket
+                u'^' : ['&#94;'], # caret - circumflex
+                u'_' : ['&#95;'], # underscore
+                u'`' : ['&#96;'], # grave accent
+                u'a' : ['&#97;'], # 
+                u'b' : ['&#98;'], # 
+                u'c' : ['&#99;'], # 
+                u'd' : ['&#100;'], # 
+                u'e' : ['&#101;'], # 
+                u'f' : ['&#102;'], # 
+                u'g' : ['&#103;'], # 
+                u'h' : ['&#104;'], # 
+                u'i' : ['&#105;'], # 
+                u'j' : ['&#106;'], # 
+                u'k' : ['&#107;'], # 
+                u'l' : ['&#108;'], # 
+                u'm' : ['&#109;'], # 
+                u'n' : ['&#110;'], # 
+                u'o' : ['&#111;'], # 
+                u'p' : ['&#112;'], # 
+                u'q' : ['&#113;'], # 
+                u'r' : ['&#114;'], # 
+                u's' : ['&#115;'], # 
+                u't' : ['&#116;'], # 
+                u'u' : ['&#117;'], # 
+                u'v' : ['&#118;'], # 
+                u'w' : ['&#119;'], # 
+                u'x' : ['&#120;'], # 
+                u'y' : ['&#121;'], # 
+                u'z' : ['&#122;'], # 
+                u'{' : ['&#123;'], # opening brace
+                u'|' : ['&#124;'], # vertical bar
+                u'}' : ['&#125;'], # closing brace
+                u'~' : ['&#126;'], # equivalency sign - tilde
+                u'<' : ['&#60;', '&lt;'], # less than sign
+                u'>' : ['&#62;', '&gt;'], # greater than sign
+                u'¡' : ['&#161;', '&iexcl;'], # inverted exclamation mark
+                u'¢' : ['&#162;', '&cent;'], # cent sign
+                u'£' : ['&#163;', '&pound;'], # pound sign
+                u'¤' : ['&#164;', '&curren;'], # currency sign
+                u'¥' : ['&#165;', '&yen;'], # yen sign
+                u'¦' : ['&#166;', '&brvbar;'], # broken vertical bar
+                u'§' : ['&#167;', '&sect;'], # section sign
+                u'¨' : ['&#168;', '&uml;'], # spacing diaeresis - umlaut
+                u'©' : ['&#169;', '&copy;'], # copyright sign
+                u'ª' : ['&#170;', '&ordf;'], # feminine ordinal indicator
+                u'«' : ['&#171;', '&laquo;'], # left double angle quotes
+                u'¬' : ['&#172;', '&not;'], # not sign
+                u'®' : ['&#174;', '&reg;'], # registered trade mark sign
+                u'¯' : ['&#175;', '&macr;'], # spacing macron - overline
+                u'°' : ['&#176;', '&deg;'], # degree sign
+                u'±' : ['&#177;', '&plusmn;'], # plus-or-minus sign
+                u'²' : ['&#178;', '&sup2;'], # superscript two - squared
+                u'³' : ['&#179;', '&sup3;'], # superscript three - cubed
+                u'´' : ['&#180;', '&acute;'], # acute accent - spacing acute
+                u'µ' : ['&#181;', '&micro;'], # micro sign
+                u'¶' : ['&#182;', '&para;'], # pilcrow sign - paragraph sign
+                u'·' : ['&#183;', '&middot;'], # middle dot - Georgian comma
+                u'¸' : ['&#184;', '&cedil;'], # spacing cedilla
+                u'¹' : ['&#185;', '&sup1;'], # superscript one
+                u'º' : ['&#186;', '&ordm;'], # masculine ordinal indicator
+                u'»' : ['&#187;', '&raquo;'], # right double angle quotes
+                u'¼' : ['&#188;', '&frac14;'], # fraction one quarter
+                u'½' : ['&#189;', '&frac12;'], # fraction one half
+                u'¾' : ['&#190;', '&frac34;'], # fraction three quarters
+                u'¿' : ['&#191;', '&iquest;'], # inverted question mark
+                u'À' : ['&#192;', '&Agrave;'], # latin capital letter A with grave
+                u'Á' : ['&#193;', '&Aacute;'], # latin capital letter A with acute
+                u'Â' : ['&#194;', '&Acirc;'], # latin capital letter A with circumflex
+                u'Ã' : ['&#195;', '&Atilde;'], # latin capital letter A with tilde
+                u'Ä' : ['&#196;', '&Auml;'], # latin capital letter A with diaeresis
+                u'Å' : ['&#197;', '&Aring;'], # latin capital letter A with ring above
+                u'Æ' : ['&#198;', '&AElig;'], # latin capital letter AE
+                u'Ç' : ['&#199;', '&Ccedil;'], # latin capital letter C with cedilla
+                u'È' : ['&#200;', '&Egrave;'], # latin capital letter E with grave
+                u'É' : ['&#201;', '&Eacute;'], # latin capital letter E with acute
+                u'Ê' : ['&#202;', '&Ecirc;'], # latin capital letter E with circumflex
+                u'Ë' : ['&#203;', '&Euml;'], # latin capital letter E with diaeresis
+                u'Ì' : ['&#204;', '&Igrave;'], # latin capital letter I with grave
+                u'Í' : ['&#205;', '&Iacute;'], # latin capital letter I with acute
+                u'Î' : ['&#206;', '&Icirc;'], # latin capital letter I with circumflex
+                u'Ï' : ['&#207;', '&Iuml;'], # latin capital letter I with diaeresis
+                u'Ð' : ['&#208;', '&ETH;'], # latin capital letter ETH
+                u'Ñ' : ['&#209;', '&Ntilde;'], # latin capital letter N with tilde
+                u'Ò' : ['&#210;', '&Ograve;'], # latin capital letter O with grave
+                u'Ó' : ['&#211;', '&Oacute;'], # latin capital letter O with acute
+                u'Ô' : ['&#212;', '&Ocirc;'], # latin capital letter O with circumflex
+                u'Õ' : ['&#213;', '&Otilde;'], # latin capital letter O with tilde
+                u'Ö' : ['&#214;', '&Ouml;'], # latin capital letter O with diaeresis
+                u'×' : ['&#215;', '&times;'], # multiplication sign
+                u'Ø' : ['&#216;', '&Oslash;'], # latin capital letter O with slash
+                u'Ù' : ['&#217;', '&Ugrave;'], # latin capital letter U with grave
+                u'Ú' : ['&#218;', '&Uacute;'], # latin capital letter U with acute
+                u'Û' : ['&#219;', '&Ucirc;'], # latin capital letter U with circumflex
+                u'Ü' : ['&#220;', '&Uuml;'], # latin capital letter U with diaeresis
+                u'Ý' : ['&#221;', '&Yacute;'], # latin capital letter Y with acute
+                u'Þ' : ['&#222;', '&THORN;'], # latin capital letter THORN
+                u'ß' : ['&#223;', '&szlig;'], # latin small letter sharp s - ess-zed
+                u'à' : ['&#224;', '&agrave;'], # latin small letter a with grave
+                u'á' : ['&#225;', '&aacute;'], # latin small letter a with acute
+                u'â' : ['&#226;', '&acirc;'], # latin small letter a with circumflex
+                u'ã' : ['&#227;', '&atilde;'], # latin small letter a with tilde
+                u'ä' : ['&#228;', '&auml;'], # latin small letter a with diaeresis
+                u'å' : ['&#229;', '&aring;'], # latin small letter a with ring above
+                u'æ' : ['&#230;', '&aelig;'], # latin small letter ae
+                u'ç' : ['&#231;', '&ccedil;'], # latin small letter c with cedilla
+                u'è' : ['&#232;', '&egrave;'], # latin small letter e with grave
+                u'é' : ['&#233;', '&eacute;'], # latin small letter e with acute
+                u'ê' : ['&#234;', '&ecirc;'], # latin small letter e with circumflex
+                u'ë' : ['&#235;', '&euml;'], # latin small letter e with diaeresis
+                u'ì' : ['&#236;', '&igrave;'], # latin small letter i with grave
+                u'í' : ['&#237;', '&iacute;'], # latin small letter i with acute
+                u'î' : ['&#238;', '&icirc;'], # latin small letter i with circumflex
+                u'ï' : ['&#239;', '&iuml;'], # latin small letter i with diaeresis
+                u'ð' : ['&#240;', '&eth;'], # latin small letter eth
+                u'ñ' : ['&#241;', '&ntilde;'], # latin small letter n with tilde
+                u'ò' : ['&#242;', '&ograve;'], # latin small letter o with grave
+                u'ó' : ['&#243;', '&oacute;'], # latin small letter o with acute
+                u'ô' : ['&#244;', '&ocirc;'], # latin small letter o with circumflex
+                u'õ' : ['&#245;', '&otilde;'], # latin small letter o with tilde
+                u'ö' : ['&#246;', '&ouml;'], # latin small letter o with diaeresis
+                u'÷' : ['&#247;', '&divide;'], # division sign
+                u'ø' : ['&#248;', '&oslash;'], # latin small letter o with slash
+                u'ù' : ['&#249;', '&ugrave;'], # latin small letter u with grave
+                u'ú' : ['&#250;', '&uacute;'], # latin small letter u with acute
+                u'û' : ['&#251;', '&ucirc;'], # latin small letter u with circumflex
+                u'ü' : ['&#252;', '&uuml;'], # latin small letter u with diaeresis
+                u'ý' : ['&#253;', '&yacute;'], # latin small letter y with acute
+                u'þ' : ['&#254;', '&thorn;'], # latin small letter thorn
+                u'ÿ' : ['&#255;', '&yuml;'], # latin small letter y with diaeresis
+                # More
+                u' ' : ['&#160;'],
+               }
+
--- a/src/calibre/ebooks/lit/input.py
+++ b/src/calibre/ebooks/lit/input.py
@ -0,0 +1,24 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.customize.conversion import InputFormatPlugin
+
+class LITInput(InputFormatPlugin):
+
+    name        = 'LIT Input'
+    author      = 'Marshall T. Vandegrift'
+    description = 'Convert LIT files to HTML'
+    file_types  = set(['lit'])
+
+    def convert(self, stream, options, file_ext, log,
+                accelerators):
+        from calibre.ebooks.lit.reader import LitReader
+        from calibre.ebooks.conversion.plumber import create_oebbook
+        return create_oebbook(log, stream, options, self, reader=LitReader)
+
+
--- a/src/calibre/ebooks/lit/output.py
+++ b/src/calibre/ebooks/lit/output.py
@ -0,0 +1,45 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+from calibre.customize.conversion import OutputFormatPlugin, \
+        OptionRecommendation
+
+class LITOutput(OutputFormatPlugin):
+
+    name = 'LIT Output'
+    author = 'Marshall T. Vandegrift'
+    file_type = 'lit'
+
+    recommendations = set([
+        ('dont_split_on_page_breaks', False, OptionRecommendation.HIGH),
+        ])
+
+    def convert(self, oeb, output_path, input_plugin, opts, log):
+        self.log, self.opts, self.oeb = log, opts, oeb
+        from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
+        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
+        from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
+        from calibre.ebooks.lit.writer import LitWriter
+        from calibre.ebooks.oeb.transforms.split import Split
+        split = Split(not self.opts.dont_split_on_page_breaks,
+                max_flow_size=0
+                )
+        split(self.oeb, self.opts)
+
+
+        tocadder = HTMLTOCAdder()
+        tocadder(oeb, opts)
+        mangler = CaseMangler()
+        mangler(oeb, opts)
+        rasterizer = SVGRasterizer()
+        rasterizer(oeb, opts)
+        lit = LitWriter()
+        lit(oeb, output_path)
+
+
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@ -7,21 +7,24 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
    'and Marshall T. Vandegrift <llasram@gmail.com>'

-import sys, struct, cStringIO, os
+import struct, os
 import functools
 import re
 from urlparse import urldefrag
+from cStringIO import StringIO
 from urllib import unquote as urlunquote
-from lxml import etree
 from calibre.ebooks.lit import LitError
 from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
 import calibre.ebooks.lit.mssha1 as mssha1
 from calibre.ebooks.oeb.base import urlnormalize
+from calibre.ebooks.oeb.reader import OEBReader
 from calibre.ebooks import DRMError
 from calibre import plugins
 lzx, lxzerror = plugins['lzx']
 msdes, msdeserror = plugins['msdes']

+__all__ = ["LitReader"]
+
 XML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
 """
 OPF_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
@ -109,6 +112,9 @@ def consume_sized_utf8_string(bytes, zpad=False):
        pos += 1
    return u''.join(result), bytes[pos:]

+def encode(string):
+    return unicode(string).encode('ascii', 'xmlcharrefreplace')
+
 class UnBinary(object):
    AMPERSAND_RE = re.compile(
        r'&(?!(?:#[0-9]+|#x[0-9a-fA-F]+|[a-zA-Z_:][a-zA-Z0-9.-_:]+);)')
@ -120,14 +126,14 @@ class UnBinary(object):
    def __init__(self, bin, path, manifest={}, map=HTML_MAP, atoms=EMPTY_ATOMS):
        self.manifest = manifest
        self.tag_map, self.attr_map, self.tag_to_attr_map = map
+        self.is_html = map is HTML_MAP
        self.tag_atoms, self.attr_atoms = atoms
-        self.opf = map is OPF_MAP
-        self.bin = bin
        self.dir = os.path.dirname(path)
-        self.buf = cStringIO.StringIO()
-        self.binary_to_text()
-        self.raw = self.buf.getvalue().lstrip().decode('utf-8')
+        buf = StringIO()
+        self.binary_to_text(bin, buf)
+        self.raw = buf.getvalue().lstrip()
        self.escape_reserved()
+        self._tree = None

    def escape_reserved(self):
        raw = self.raw
@ -154,18 +160,20 @@ class UnBinary(object):
        return '/'.join(relpath)

    def __unicode__(self):
+        return self.raw.decode('utf-8')
+
+    def __str__(self):
        return self.raw

-    def binary_to_text(self, base=0, depth=0):
+    def binary_to_text(self, bin, buf, index=0, depth=0):
        tag_name = current_map = None
        dynamic_tag = errors = 0
        in_censorship = is_goingdown = False
        state = 'text'
-        index = base
        flags = 0

-        while index < len(self.bin):
-            c, index = read_utf8_char(self.bin, index)
+        while index < len(bin):
+            c, index = read_utf8_char(bin, index)
            oc = ord(c)

            if state == 'text':
@ -178,7 +186,7 @@ class UnBinary(object):
                    c = '>>'
                elif c == '<':
                    c = '<<'
-                self.buf.write(c.encode('ascii', 'xmlcharrefreplace'))
+                buf.write(encode(c))

            elif state == 'get flags':
                if oc == 0:
@ -191,7 +199,7 @@ class UnBinary(object):
                state = 'text' if oc == 0 else 'get attr'
                if flags & FLAG_OPENING:
                    tag = oc
-                    self.buf.write('<')
+                    buf.write('<')
                    if not (flags & FLAG_CLOSING):
                        is_goingdown = True
                    if tag == 0x8000:
@ -199,7 +207,8 @@ class UnBinary(object):
                        continue
                    if flags & FLAG_ATOM:
                        if not self.tag_atoms or tag not in self.tag_atoms:
-                            raise LitError("atom tag %d not in atom tag list" % tag)
+                            raise LitError(
+                                "atom tag %d not in atom tag list" % tag)
                        tag_name = self.tag_atoms[tag]
                        current_map = self.attr_atoms
                    elif tag < len(self.tag_map):
@ -211,7 +220,7 @@ class UnBinary(object):
                        tag_name = '?'+unichr(tag)+'?'
                        current_map = self.tag_to_attr_map[tag]
                        print 'WARNING: tag %s unknown' % unichr(tag)
-                    self.buf.write(unicode(tag_name).encode('utf-8'))
+                    buf.write(encode(tag_name))
                elif flags & FLAG_CLOSING:
                    if depth == 0:
                        raise LitError('Extra closing tag')
@ -223,15 +232,14 @@ class UnBinary(object):
                    if not is_goingdown:
                        tag_name = None
                        dynamic_tag = 0
-                        self.buf.write(' />')
+                        buf.write(' />')
                    else:
-                        self.buf.write('>')
-                        index = self.binary_to_text(base=index, depth=depth+1)
+                        buf.write('>')
+                        index = self.binary_to_text(bin, buf, index, depth+1)
                        is_goingdown = False
                        if not tag_name:
                            raise LitError('Tag ends before it begins.')
-                        self.buf.write(u''.join(
-                                ('</', tag_name, '>')).encode('utf-8'))
+                        buf.write(encode(u''.join(('</', tag_name, '>'))))
                        dynamic_tag = 0
                        tag_name = None
                    state = 'text'
@ -251,7 +259,7 @@ class UnBinary(object):
                        in_censorship = True
                        state = 'get value length'
                        continue
-                    self.buf.write(' ' + unicode(attr).encode('utf-8') + '=')
+                    buf.write(' ' + encode(attr) + '=')
                    if attr in ['href', 'src']:
                        state = 'get href length'
                    else:
@ -259,24 +267,24 @@ class UnBinary(object):

            elif state == 'get value length':
                if not in_censorship:
-                    self.buf.write('"')
+                    buf.write('"')
                count = oc - 1
                if count == 0:
                    if not in_censorship:
-                        self.buf.write('"')
+                        buf.write('"')
                    in_censorship = False
                    state = 'get attr'
                    continue
                state = 'get value'
                if oc == 0xffff:
                    continue
-                if count < 0 or count > (len(self.bin) - index):
+                if count < 0 or count > (len(bin) - index):
                    raise LitError('Invalid character count %d' % count)

            elif state == 'get value':
                if count == 0xfffe:
                    if not in_censorship:
-                        self.buf.write('%s"' % (oc - 1))
+                        buf.write('%s"' % (oc - 1))
                    in_censorship = False
                    state = 'get attr'
                elif count > 0:
@ -285,17 +293,17 @@ class UnBinary(object):
                            c = '&quot;'
                        elif c == '<':
                            c = '&lt;'
-                        self.buf.write(c.encode('ascii', 'xmlcharrefreplace'))
+                        buf.write(c.encode('ascii', 'xmlcharrefreplace'))
                    count -= 1
                if count == 0:
                    if not in_censorship:
-                        self.buf.write('"')
+                        buf.write('"')
                    in_censorship = False
                    state = 'get attr'

            elif state == 'get custom length':
                count = oc - 1
-                if count <= 0 or count > len(self.bin)-index:
+                if count <= 0 or count > len(bin)-index:
                    raise LitError('Invalid character count %d' % count)
                dynamic_tag += 1
                state = 'get custom'
@ -305,26 +313,26 @@ class UnBinary(object):
                tag_name += c
                count -= 1
                if count == 0:
-                    self.buf.write(unicode(tag_name).encode('utf-8'))
+                    buf.write(encode(tag_name))
                    state = 'get attr'

            elif state == 'get attr length':
                count = oc - 1
-                if count <= 0 or count > (len(self.bin) - index):
+                if count <= 0 or count > (len(bin) - index):
                    raise LitError('Invalid character count %d' % count)
-                self.buf.write(' ')
+                buf.write(' ')
                state = 'get custom attr'

            elif state == 'get custom attr':
-                self.buf.write(unicode(c).encode('utf-8'))
+                buf.write(encode(c))
                count -= 1
                if count == 0:
-                    self.buf.write('=')
+                    buf.write('=')
                    state = 'get value length'

            elif state == 'get href length':
                count = oc - 1
-                if count <= 0 or count > (len(self.bin) - index):
+                if count <= 0 or count > (len(bin) - index):
                    raise LitError('Invalid character count %d' % count)
                href = ''
                state = 'get href'
@ -338,10 +346,11 @@ class UnBinary(object):
                    if frag:
                        path = '#'.join((path, frag))
                    path = urlnormalize(path)
-                    self.buf.write((u'"%s"' % path).encode('utf-8'))
+                    buf.write(encode(u'"%s"' % path))
                    state = 'get attr'
        return index

+
 class DirectoryEntry(object):
    def __init__(self, name, section, offset, size):
        self.name = name
@ -356,6 +365,7 @@ class DirectoryEntry(object):
    def __str__(self):
        return repr(self)

+
 class ManifestItem(object):
    def __init__(self, original, internal, mime_type, offset, root, state):
        self.original = original
@ -383,65 +393,87 @@ class ManifestItem(object):
            % (self.internal, self.path, self.mime_type, self.offset,
               self.root, self.state)

+
 def preserve(function):
    def wrapper(self, *args, **kwargs):
-        opos = self._stream.tell()
+        opos = self.stream.tell()
        try:
            return function(self, *args, **kwargs)
        finally:
-            self._stream.seek(opos)
+            self.stream.seek(opos)
    functools.update_wrapper(wrapper, function)
    return wrapper

-class LitReader(object):
+class LitFile(object):
    PIECE_SIZE = 16
-    XML_PARSER = etree.XMLParser(
-        recover=True, resolve_entities=False)
+
+    def __init__(self, filename_or_stream):
+        if hasattr(filename_or_stream, 'read'):
+            self.stream = filename_or_stream
+        else:
+            self.stream = open(filename_or_stream, 'rb')
+        try:
+            self.opf_path = os.path.splitext(
+                os.path.basename(self.stream.name))[0] + '.opf'
+        except AttributeError:
+            self.opf_path = 'content.opf'
+        if self.magic != 'ITOLITLS':
+            raise LitError('Not a valid LIT file')
+        if self.version != 1:
+            raise LitError('Unknown LIT version %d' % (self.version,))
+        self.read_secondary_header()
+        self.read_header_pieces()
+        self.read_section_names()
+        self.read_manifest()
+        self.read_drm()
+
+    def warn(self, msg):
+        print "WARNING: %s" % (msg,)

    def magic():
        @preserve
        def fget(self):
-            self._stream.seek(0)
-            return self._stream.read(8)
+            self.stream.seek(0)
+            return self.stream.read(8)
        return property(fget=fget)
    magic = magic()

    def version():
        def fget(self):
-            self._stream.seek(8)
-            return u32(self._stream.read(4))
+            self.stream.seek(8)
+            return u32(self.stream.read(4))
        return property(fget=fget)
    version = version()

    def hdr_len():
        @preserve
        def fget(self):
-            self._stream.seek(12)
-            return int32(self._stream.read(4))
+            self.stream.seek(12)
+            return int32(self.stream.read(4))
        return property(fget=fget)
    hdr_len = hdr_len()

    def num_pieces():
        @preserve
        def fget(self):
-            self._stream.seek(16)
-            return int32(self._stream.read(4))
+            self.stream.seek(16)
+            return int32(self.stream.read(4))
        return property(fget=fget)
    num_pieces = num_pieces()

    def sec_hdr_len():
        @preserve
        def fget(self):
-            self._stream.seek(20)
-            return int32(self._stream.read(4))
+            self.stream.seek(20)
+            return int32(self.stream.read(4))
        return property(fget=fget)
    sec_hdr_len = sec_hdr_len()

    def guid():
        @preserve
        def fget(self):
-            self._stream.seek(24)
-            return self._stream.read(16)
+            self.stream.seek(24)
+            return self.stream.read(16)
        return property(fget=fget)
    guid = guid()

@ -451,44 +483,27 @@ class LitReader(object):
            size = self.hdr_len \
                + (self.num_pieces * self.PIECE_SIZE) \
                + self.sec_hdr_len
-            self._stream.seek(0)
-            return self._stream.read(size)
+            self.stream.seek(0)
+            return self.stream.read(size)
        return property(fget=fget)
    header = header()

-    def __init__(self, filename_or_stream):
-        if hasattr(filename_or_stream, 'read'):
-            self._stream = filename_or_stream
-        else:
-            self._stream = open(filename_or_stream, 'rb')
-        if self.magic != 'ITOLITLS':
-            raise LitError('Not a valid LIT file')
-        if self.version != 1:
-            raise LitError('Unknown LIT version %d' % (self.version,))
-        self.entries = {}
-        self._read_secondary_header()
-        self._read_header_pieces()
-        self._read_section_names()
-        self._read_manifest()
-        self._read_meta()
-        self._read_drm()
-
    @preserve
    def __len__(self):
-        self._stream.seek(0, 2)
-        return self._stream.tell()
+        self.stream.seek(0, 2)
+        return self.stream.tell()

    @preserve
-    def _read_raw(self, offset, size):
-        self._stream.seek(offset)
-        return self._stream.read(size)
+    def read_raw(self, offset, size):
+        self.stream.seek(offset)
+        return self.stream.read(size)

-    def _read_content(self, offset, size):
-        return self._read_raw(self.content_offset + offset, size)
+    def read_content(self, offset, size):
+        return self.read_raw(self.content_offset + offset, size)

-    def _read_secondary_header(self):
+    def read_secondary_header(self):
        offset = self.hdr_len + (self.num_pieces * self.PIECE_SIZE)
-        bytes = self._read_raw(offset, self.sec_hdr_len)
+        bytes = self.read_raw(offset, self.sec_hdr_len)
        offset = int32(bytes[4:])
        while offset < len(bytes):
            blocktype = bytes[offset:offset+4]
@ -516,21 +531,21 @@ class LitReader(object):
        if not hasattr(self, 'content_offset'):
            raise LitError('Could not figure out the content offset')

-    def _read_header_pieces(self):
+    def read_header_pieces(self):
        src = self.header[self.hdr_len:]
        for i in xrange(self.num_pieces):
            piece = src[i * self.PIECE_SIZE:(i + 1) * self.PIECE_SIZE]
            if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
                raise LitError('Piece %s has 64bit value' % repr(piece))
            offset, size = u32(piece), int32(piece[8:])
-            piece = self._read_raw(offset, size)
+            piece = self.read_raw(offset, size)
            if i == 0:
                continue # Dont need this piece
            elif i == 1:
                if u32(piece[8:])  != self.entry_chunklen or \
                   u32(piece[12:]) != self.entry_unknown:
                    raise LitError('Secondary header does not match piece')
-                self._read_directory(piece)
+                self.read_directory(piece)
            elif i == 2:
                if u32(piece[8:])  != self.count_chunklen or \
                   u32(piece[12:]) != self.count_unknown:
@ -541,12 +556,13 @@ class LitReader(object):
            elif i == 4:
                self.piece4_guid = piece

-    def _read_directory(self, piece):
+    def read_directory(self, piece):
        if not piece.startswith('IFCM'):
            raise LitError('Header piece #1 is not main directory.')
        chunk_size, num_chunks = int32(piece[8:12]), int32(piece[24:28])
        if (32 + (num_chunks * chunk_size)) != len(piece):
-            raise LitError('IFCM HEADER has incorrect length')
+            raise LitError('IFCM header has incorrect length')
+        self.entries = {}
        for i in xrange(num_chunks):
            offset = 32 + (i * chunk_size)
            chunk = piece[offset:offset + chunk_size]
@ -580,17 +596,17 @@ class LitReader(object):
                entry = DirectoryEntry(name, section, offset, size)
                self.entries[name] = entry

-    def _read_section_names(self):
+    def read_section_names(self):
        if '::DataSpace/NameList' not in self.entries:
            raise LitError('Lit file does not have a valid NameList')
        raw = self.get_file('::DataSpace/NameList')
        if len(raw) < 4:
            raise LitError('Invalid Namelist section')
        pos = 4
-        self.num_sections = u16(raw[2:pos])
-        self.section_names = [""]*self.num_sections
-        self.section_data = [None]*self.num_sections
-        for section in xrange(self.num_sections):
+        num_sections = u16(raw[2:pos])
+        self.section_names = [""] * num_sections
+        self.section_data = [None] * num_sections
+        for section in xrange(num_sections):
            size = u16(raw[pos:pos+2])
            pos += 2
            size = size*2 + 2
@ -600,11 +616,12 @@ class LitReader(object):
                raw[pos:pos+size].decode('utf-16-le').rstrip('\000')
            pos += size

-    def _read_manifest(self):
+    def read_manifest(self):
        if '/manifest' not in self.entries:
            raise LitError('Lit file does not have a valid manifest')
        raw = self.get_file('/manifest')
        self.manifest = {}
+        self.paths = {self.opf_path: None}
        while raw:
            slen, raw = ord(raw[0]), raw[1:]
            if slen == 0: break
@ -645,28 +662,9 @@ class LitReader(object):
        for item in mlist:
            if item.path[0] == '/':
                item.path = os.path.basename(item.path)
+            self.paths[item.path] = item

-    def _pretty_print(self, xml):
-        f = cStringIO.StringIO(xml.encode('utf-8'))
-        doc = etree.parse(f, parser=self.XML_PARSER)
-        pretty = etree.tostring(doc, encoding='ascii', pretty_print=True)
-        return XML_DECL + unicode(pretty)
-                
-    def _read_meta(self):
-        path = 'content.opf'
-        raw = self.get_file('/meta')
-        xml = OPF_DECL
-        try:
-            xml += unicode(UnBinary(raw, path, self.manifest, OPF_MAP))
-        except LitError:
-            if 'PENGUIN group' not in raw: raise
-            print "WARNING: attempting PENGUIN malformed OPF fix"
-            raw = raw.replace(
-                'PENGUIN group', '\x00\x01\x18\x00PENGUIN group', 1)
-            xml += unicode(UnBinary(raw, path, self.manifest, OPF_MAP))
-        self.meta = xml
-
-    def _read_drm(self):
+    def read_drm(self):
        self.drmlevel = 0
        if '/DRMStorage/Licenses/EUL' in self.entries:
            self.drmlevel = 5
@ -677,7 +675,7 @@ class LitReader(object):
        else:
            return
        if self.drmlevel < 5:
-            msdes.deskey(self._calculate_deskey(), msdes.DE1)
+            msdes.deskey(self.calculate_deskey(), msdes.DE1)
            bookkey = msdes.des(self.get_file('/DRMStorage/DRMSealed'))
            if bookkey[0] != '\000':
                raise LitError('Unable to decrypt title key!')
@ -685,7 +683,7 @@ class LitReader(object):
        else:
            raise DRMError("Cannot access DRM-protected book")

-    def _calculate_deskey(self):
+    def calculate_deskey(self):
        hashfiles = ['/meta', '/DRMStorage/DRMSource']
        if self.drmlevel == 3:
            hashfiles.append('/DRMStorage/DRMBookplate')
@ -709,18 +707,18 @@ class LitReader(object):
    def get_file(self, name):
        entry = self.entries[name]
        if entry.section == 0:
-            return self._read_content(entry.offset, entry.size)
+            return self.read_content(entry.offset, entry.size)
        section = self.get_section(entry.section)
        return section[entry.offset:entry.offset+entry.size]

    def get_section(self, section):
        data = self.section_data[section]
        if not data:
-            data = self._get_section(section)
+            data = self.get_section_uncached(section)
            self.section_data[section] = data
        return data

-    def _get_section(self, section):
+    def get_section_uncached(self, section):
        name = self.section_names[section]
        path = '::DataSpace/Storage/' + name
        transform = self.get_file(path + '/Transform/List')
@ -732,29 +730,29 @@ class LitReader(object):
                raise LitError("ControlData is too short")
            guid = msguid(transform)
            if guid == DESENCRYPT_GUID:
-                content = self._decrypt(content)
+                content = self.decrypt(content)
                control = control[csize:]
            elif guid == LZXCOMPRESS_GUID:
                reset_table = self.get_file(
                    '/'.join(('::DataSpace/Storage', name, 'Transform',
                              LZXCOMPRESS_GUID, 'InstanceData/ResetTable')))
-                content = self._decompress(content, control, reset_table)
+                content = self.decompress(content, control, reset_table)
                control = control[csize:]
            else:
                raise LitError("Unrecognized transform: %s." % repr(guid))
            transform = transform[16:]
        return content

-    def _decrypt(self, content):
+    def decrypt(self, content):
        length = len(content)
        extra = length & 0x7
        if extra > 0:
-            self._warn("content length not a multiple of block size")
+            self.warn("content length not a multiple of block size")
            content += "\0" * (8 - extra)
        msdes.deskey(self.bookkey, msdes.DE1)
        return msdes.des(content)

-    def _decompress(self, content, control, reset_table):
+    def decompress(self, content, control, reset_table):
        if len(control) < 32 or control[CONTROL_TAG:CONTROL_TAG+4] != "LZXC":
            raise LitError("Invalid ControlData tag value")
        if len(reset_table) < (RESET_INTERVAL + 8):
@ -795,7 +793,7 @@ class LitReader(object):
                        result.append(
                            lzx.decompress(content[base:size], window_bytes))
                    except lzx.LZXError:
-                        self._warn("LZX decompression error; skipping chunk")
+                        self.warn("LZX decompression error; skipping chunk")
                    bytes_remaining -= window_bytes
                    base = size
            accum += int32(reset_table[RESET_INTERVAL:])
@ -805,7 +803,7 @@ class LitReader(object):
            try:
                result.append(lzx.decompress(content[base:], bytes_remaining))
            except lzx.LZXError:
-                self._warn("LZX decompression error; skipping chunk")
+                self.warn("LZX decompression error; skipping chunk")
            bytes_remaining = 0
        if bytes_remaining > 0:
            raise LitError("Failed to completely decompress section")
@ -842,75 +840,56 @@ class LitReader(object):
            self._warn("damaged or invalid atoms attributes table")
        return (tags, attrs)

-    def get_entry_content(self, entry, pretty_print=False):
-        if 'spine' in entry.state:
-            name = '/'.join(('/data', entry.internal, 'content'))
-            path = entry.path
-            raw = self.get_file(name)
-            decl, map = (OPF_DECL, OPF_MAP) \
-                if name == '/meta' else (HTML_DECL, HTML_MAP)
-            atoms = self.get_atoms(entry)
-            content = decl + unicode(UnBinary(raw, path, self.manifest, map, atoms))
-            if pretty_print:
-                content = self._pretty_print(content)
-            content = content.encode('utf-8')
+
+class LitContainer(object):
+    """Simple Container-interface, read-only accessor for LIT files."""
+
+    def __init__(self, filename_or_stream):
+        self._litfile = LitFile(filename_or_stream)
+
+    def namelist(self):
+        return self._litfile.paths.keys()
+
+    def exists(self, name):
+        return urlunquote(name) in self._litfile.paths
+
+    def read(self, name):
+        entry = self._litfile.paths[urlunquote(name)] if name else None
+        if entry is None:
+            content = OPF_DECL + self._read_meta()
+        elif 'spine' in entry.state:
+            internal = '/'.join(('/data', entry.internal, 'content'))
+            raw = self._litfile.get_file(internal)
+            manifest = self._litfile.manifest
+            atoms = self._litfile.get_atoms(entry)
+            unbin = UnBinary(raw, name, manifest, HTML_MAP, atoms)
+            content = HTML_DECL + str(unbin)
        else:
-            name = '/'.join(('/data', entry.internal))
-            content = self.get_file(name)
+            internal = '/'.join(('/data', entry.internal))
+            content = self._litfile.get_file(internal)
        return content

-    def extract_content(self, output_dir=os.getcwdu(), pretty_print=False):
-        output_dir = os.path.abspath(output_dir)
+    def _read_meta(self):
+        path = 'content.opf'
+        raw = self._litfile.get_file('/meta')
        try:
-            opf_path = os.path.splitext(
-                os.path.basename(self._stream.name))[0] + '.opf'
-        except AttributeError:
-            opf_path = 'content.opf'
-        opf_path = os.path.join(output_dir, opf_path)
-        self._ensure_dir(opf_path)
-        with open(opf_path, 'wb') as f:
-            xml = self.meta
-            if pretty_print:
-                xml = self._pretty_print(xml)
-            f.write(xml.encode('utf-8'))
-        for entry in self.manifest.values():
-            path = os.path.join(output_dir, entry.path)
-            self._ensure_dir(path)
-            with open(path, 'wb') as f:
-                f.write(self.get_entry_content(entry, pretty_print))
+            unbin = UnBinary(raw, path, self._litfile.manifest, OPF_MAP)
+        except LitError:
+            if 'PENGUIN group' not in raw: raise
+            print "WARNING: attempting PENGUIN malformed OPF fix"
+            raw = raw.replace(
+                'PENGUIN group', '\x00\x01\x18\x00PENGUIN group', 1)
+            unbin = UnBinary(raw, path, self._litfile.manifest, OPF_MAP)
+        return str(unbin)

-    def _ensure_dir(self, path):
-        dir = os.path.dirname(path)
-        if not os.path.isdir(dir):
-            os.makedirs(dir)
+    def get_metadata(self):
+        return self._read_meta()

-    def _warn(self, msg):
-        print "WARNING: %s" % (msg,)

-def option_parser():
-    from calibre.utils.config import OptionParser
-    parser = OptionParser(usage=_('%prog [options] LITFILE'))
-    parser.add_option(
-        '-o', '--output-dir', default='.', 
-        help=_('Output directory. Defaults to current directory.'))
-    parser.add_option(
-        '-p', '--pretty-print', default=False, action='store_true',
-        help=_('Legibly format extracted markup. May modify meaningful whitespace.'))
-    parser.add_option(
-        '--verbose', default=False, action='store_true',
-        help=_('Useful for debugging.'))
-    return parser
+class LitReader(OEBReader):
+    Container = LitContainer
+    DEFAULT_PROFILE = 'MSReader'

-def main(args=sys.argv):
-    parser = option_parser()
-    opts, args = parser.parse_args(args)
-    if len(args) != 2:
-        parser.print_help()
-        return 1
-    lr = LitReader(args[1])
-    lr.extract_content(opts.output_dir, opts.pretty_print)
-    print _('OEB ebook created in'), opts.output_dir
-    return 0

 try:
    import psyco
@ -918,6 +897,3 @@ try:
    psyco.bind(UnBinary.binary_to_text)
 except ImportError:
    pass
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/lit/writer.py
+++ b/src/calibre/ebooks/lit/writer.py
@ -6,8 +6,6 @@ from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'

-import sys
-import os
 from cStringIO import StringIO
 from struct import pack
 from itertools import izip, count, chain
@ -17,7 +15,6 @@ import re
 import copy
 import uuid
 import functools
-import logging
 from urlparse import urldefrag
 from urllib import unquote as urlunquote
 from lxml import etree
@ -25,22 +22,14 @@ from calibre.ebooks.lit.reader import DirectoryEntry
 import calibre.ebooks.lit.maps as maps
 from calibre.ebooks.oeb.base import OEB_DOCS, XHTML_MIME, OEB_STYLES, \
    CSS_MIME, OPF_MIME, XML_NS, XML
-from calibre.ebooks.oeb.base import namespace, barename, prefixname, \
-    urlnormalize, xpath
-from calibre.ebooks.oeb.base import Logger, OEBBook
-from calibre.ebooks.oeb.profile import Context
+from calibre.ebooks.oeb.base import prefixname, \
+    urlnormalize
 from calibre.ebooks.oeb.stylizer import Stylizer
-from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
-from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
-from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
-from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
-from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
 from calibre.ebooks.lit.lzx import Compressor
 import calibre
 from calibre import plugins
 msdes, msdeserror = plugins['msdes']
 import calibre.ebooks.lit.mssha1 as mssha1
-from calibre.customize.ui import run_plugins_on_postprocess

 __all__ = ['LitWriter']

@ -277,7 +266,7 @@ class ReBinary(object):

    def build_ahc(self):
        if len(self.anchors) > 6:
-            self.logger.log_warn("More than six anchors in file %r. " \
+            self.logger.warn("More than six anchors in file %r. " \
                "Some links may not work properly." % self.item.href)
        data = StringIO()
        data.write(unichr(len(self.anchors)).encode('utf-8'))
@ -308,18 +297,18 @@ class LitWriter(object):

    def _litize_oeb(self):
        oeb = self._oeb
-        oeb.metadata.add('calibre-oeb2lit-version', calibre.__version__)
+        oeb.metadata.add('calibre-version', calibre.__version__)
        cover = None
        if oeb.metadata.cover:
            id = str(oeb.metadata.cover[0])
-            cover = oeb.manifest[id]
+            cover = oeb.manifest.ids[id]
            for type, title in ALL_MS_COVER_TYPES:
                if type not in oeb.guide:
                    oeb.guide.add(type, title, cover.href)
        else:
            self._logger.warn('No suitable cover image found.')

-    def dump(self, oeb, path):
+    def __call__(self, oeb, path):
        if hasattr(path, 'write'):
            return self._dump_stream(oeb, path)
        with open(path, 'w+b') as stream:
@ -468,7 +457,7 @@ class LitWriter(object):
        self._add_folder('/data')
        for item in self._oeb.manifest.values():
            if item.media_type not in LIT_MIMES:
-                self._logger.log_warn("File %r of unknown media-type %r " \
+                self._logger.warn("File %r of unknown media-type %r " \
                    "excluded from output." % (item.href, item.media_type))
                continue
            name = '/data/' + item.id
@ -485,6 +474,8 @@ class LitWriter(object):
                secnum = 1
            elif isinstance(data, unicode):
                data = data.encode('utf-8')
+            elif hasattr(data, 'cssText'):
+                data = str(data)
            self._add_file(name, data, secnum)
            item.size = len(data)

@ -720,53 +711,3 @@ class LitWriter(object):
        return dcounts, dchunks, ichunk


-def option_parser():
-    from calibre.utils.config import OptionParser
-    parser = OptionParser(usage=_('%prog [options] OPFFILE'))
-    parser.add_option(
-        '-o', '--output', default=None, 
-        help=_('Output file. Default is derived from input filename.'))
-    parser.add_option(
-        '-v', '--verbose', default=0, action='count',
-        help=_('Useful for debugging.'))
-    return parser
-
-def oeb2lit(opts, inpath):
-    logger = Logger(logging.getLogger('oeb2lit'))
-    logger.setup_cli_handler(opts.verbose)
-    outpath = opts.output
-    if outpath is None:
-        outpath = os.path.basename(inpath)
-        outpath = os.path.splitext(outpath)[0] + '.lit'
-    outpath = os.path.abspath(outpath)
-    context = Context('Browser', 'MSReader')
-    oeb = OEBBook(inpath, logger=logger)
-    tocadder = HTMLTOCAdder()
-    tocadder.transform(oeb, context)
-    mangler = CaseMangler()
-    mangler.transform(oeb, context)
-    fbase = context.dest.fbase
-    flattener = CSSFlattener(fbase=fbase, unfloat=True, untable=True)
-    flattener.transform(oeb, context)
-    rasterizer = SVGRasterizer()
-    rasterizer.transform(oeb, context)
-    trimmer = ManifestTrimmer()
-    trimmer.transform(oeb, context)
-    lit = LitWriter()
-    lit.dump(oeb, outpath)
-    run_plugins_on_postprocess(outpath, 'lit')
-    logger.info(_('Output written to ') + outpath)
-    
-
-def main(argv=sys.argv):
-    parser = option_parser()
-    opts, args = parser.parse_args(argv[1:])
-    if len(args) != 1:
-        parser.print_help()
-        return 1
-    inpath = args[0]
-    oeb2lit(opts, inpath)
-    return 0
-    
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/lrf/init.py
+++ b/src/calibre/ebooks/lrf/init.py
@ -4,40 +4,16 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 This package contains logic to read and write LRF files.
 The LRF file format is documented at U{http://www.sven.de/librie/Librie/LrfFormat}.
 """
-import sys, os
-from optparse import OptionValueError
-from htmlentitydefs import name2codepoint
 from uuid import uuid4

 from calibre.ebooks.lrf.pylrs.pylrs import Book as _Book
-from calibre.ebooks.lrf.pylrs.pylrs import TextBlock, Header, PutObj, \
-                                             Paragraph, TextStyle, BlockStyle
+from calibre.ebooks.lrf.pylrs.pylrs import TextBlock, Header, \
+                                             TextStyle, BlockStyle
 from calibre.ebooks.lrf.fonts import FONT_FILE_MAP
 from calibre.ebooks import ConversionError
-from calibre import __appname__, __version__, __author__, iswindows
-from calibre.utils.config import OptionParser

 __docformat__ = "epytext"

-preferred_source_formats = [
-                            'LIT',
-                            'MOBI',
-                            'EPUB',
-                            'ODT',
-                            'HTML',
-                            'HTM',
-                            'XHTM',
-                            'XHTML',
-                            'PRC',
-                            'AZW',
-                            'FB2',
-                            'RTF',
-                            'PDF',
-                            'TXT',
-                            'ZIP',
-                            'RAR'
-                            ]
-
 class LRFParseError(Exception):
    pass

@ -58,172 +34,6 @@ class PRS500_PROFILE(object):

    name = 'prs500'

-profile_map = {
-               PRS500_PROFILE.name : PRS500_PROFILE,
-               }
-    
-def profile_from_string(option, opt_str, value, parser):
-    try:
-        profile = profile_map[value]
-        setattr(parser.values, option.dest, profile)
-    except KeyError:
-        raise OptionValueError('Profile: '+value+' is not implemented. Implemented profiles: %s'%(profile_map.keys()))
-    
-def option_parser(usage, gui_mode=False):
-    parser = OptionParser(usage=usage, gui_mode=gui_mode)
-    metadata = parser.add_option_group('METADATA OPTIONS')
-    metadata.add_option("-t", "--title", action="store", type="string", default=None,\
-                    dest="title", help=_("Set the title. Default: filename."))
-    metadata.add_option("-a", "--author", action="store", type="string", \
-                    dest="author", help=_("Set the author(s). Multiple authors should be set as a comma separated list. Default: %default"), 
-                    default=_('Unknown'))
-    metadata.add_option("--comment", action="store", type="string", \
-                    dest="freetext", help=_("Set the comment."), default=_('Unknown'))
-    metadata.add_option("--category", action="store", type="string", \
-                    dest="category", help=_("Set the category"), default=_('Unknown'))    
-    metadata.add_option('--title-sort', action='store', default='', dest='title_sort',
-                      help=_('Sort key for the title'))
-    metadata.add_option('--author-sort', action='store', default='', dest='author_sort',
-                      help=_('Sort key for the author'))
-    metadata.add_option('--publisher', action='store', default=_('Unknown'), dest='publisher',
-                      help=_('Publisher'))
-    metadata.add_option('--cover', action='store', dest='cover', default=None, \
-                        help=_('Path to file containing image to be used as cover'))
-    metadata.add_option('--use-metadata-cover', action='store_true', default=False, 
-                        help=_('If there is a cover graphic detected in the source file, use that instead of the specified cover.'))
-     
-    parser.add_option('-o', '--output', action='store', default=None, \
-                      help=_('Output file name. Default is derived from input filename'))
-    parser.add_option('--ignore-tables', action='store_true', default=False, dest='ignore_tables',
-                      help=_('Render HTML tables as blocks of text instead of actual tables. This is neccessary if the HTML contains very large or complex tables.'))
-    laf = parser.add_option_group('LOOK AND FEEL')
-    laf.add_option('--base-font-size', action='store', type='float', default=10.,
-                   help=_('''Specify the base font size in pts. All fonts are rescaled accordingly. This option obsoletes the --font-delta option and takes precedence over it. To use --font-delta, set this to 0. Default: %defaultpt'''))
-    laf.add_option('--enable-autorotation', action='store_true', default=False, 
-                   help=_('Enable autorotation of images that are wider than the screen width.'), 
-                   dest='autorotation')
-    laf.add_option('--wordspace', dest='wordspace', default=2.5, type='float',
-                   help=_('Set the space between words in pts. Default is %default'))
-    laf.add_option('--blank-after-para', action='store_true', default=False,
-                   dest='blank_after_para', help=_('Separate paragraphs by blank lines.'))
-    laf.add_option('--header', action='store_true', default=False, dest='header',
-                      help=_('Add a header to all the pages with title and author.'))
-    laf.add_option('--headerformat', default="%t by %a", dest='headerformat', type='string',
-                        help=_('Set the format of the header. %a is replaced by the author and %t by the title. Default is %default'))
-    laf.add_option('--header-separation', default=0, type='int', 
-                   help=_('Add extra spacing below the header. Default is %default px.'))
-    laf.add_option('--override-css', default=None, dest='_override_css', type='string',
-                   help=_('Override the CSS. Can be either a path to a CSS stylesheet or a string. If it is a string it is interpreted as CSS.'))
-    laf.add_option('--use-spine', default=False, dest='use_spine', action='store_true',
-                   help=_('Use the <spine> element from the OPF file to determine the order in which the HTML files are appended to the LRF. The .opf file must be in the same directory as the base HTML file.'))
-    laf.add_option('--minimum-indent', default=0, type='float', 
-                   help=_('Minimum paragraph indent (the indent of the first line of a paragraph) in pts. Default: %default'))
-    laf.add_option('--font-delta', action='store', type='float', default=0., \
-                  help=_("""Increase the font size by 2 * FONT_DELTA pts and """
-                  '''the line spacing by FONT_DELTA pts. FONT_DELTA can be a fraction.'''
-                  """If FONT_DELTA is negative, the font size is decreased."""),
-                  dest='font_delta')
-    laf.add_option('--ignore-colors', action='store_true', default=False, dest='ignore_colors',
-                      help=_('Render all content as black on white instead of the colors specified by the HTML or CSS.'))
-    
-    page = parser.add_option_group('PAGE OPTIONS')
-    profiles = profile_map.keys()
-    page.add_option('-p', '--profile', default=PRS500_PROFILE, dest='profile', type='choice',
-                      choices=profiles, action='callback', callback=profile_from_string,
-                      help=_('''Profile of the target device for which this LRF is '''
-                      '''being generated. The profile determines things like the '''
-                      '''resolution and screen size of the target device. '''
-                      '''Default: %s Supported profiles: ''')%(PRS500_PROFILE.name,)+\
-                      ', '.join(profiles))
-    page.add_option('--left-margin', default=20, dest='left_margin', type='int',
-                    help=_('''Left margin of page. Default is %default px.'''))
-    page.add_option('--right-margin', default=20, dest='right_margin', type='int',
-                    help=_('''Right margin of page. Default is %default px.'''))
-    page.add_option('--top-margin', default=10, dest='top_margin', type='int',
-                    help=_('''Top margin of page. Default is %default px.'''))
-    page.add_option('--bottom-margin', default=0, dest='bottom_margin', type='int',
-                    help=_('''Bottom margin of page. Default is %default px.'''))
-    page.add_option('--render-tables-as-images', default=False, action='store_true',
-                   help=_('Render tables in the HTML as images (useful if the document has large or complex tables)'))
-    page.add_option('--text-size-multiplier-for-rendered-tables', type='float', default=1.0,
-                   help=_('Multiply the size of text in rendered tables by this factor. Default is %default'))
-    
-    link = parser.add_option_group('LINK PROCESSING OPTIONS')
-    link.add_option('--link-levels', action='store', type='int', default=sys.maxint, \
-                      dest='link_levels',
-                      help=_(r'''The maximum number of levels to recursively process '''
-                              '''links. A value of 0 means thats links are not followed. '''
-                              '''A negative value means that <a> tags are ignored.'''))
-    link.add_option('--link-exclude', dest='link_exclude', default='@',
-                      help=_('''A regular expression. <a> tags whose href '''
-                      '''matches will be ignored. Defaults to %default'''))
-    link.add_option('--no-links-in-toc', action='store_true', default=False,
-                      dest='no_links_in_toc',
-                      help=_('''Don't add links to the table of contents.'''))
-    chapter = parser.add_option_group('CHAPTER OPTIONS')
-    chapter.add_option('--disable-chapter-detection', action='store_true', 
-                      default=False, dest='disable_chapter_detection', 
-                      help=_('''Prevent the automatic detection chapters.'''))
-    chapter.add_option('--chapter-regex', dest='chapter_regex', 
-                      default='chapter|book|appendix',
-                      help=_('''The regular expression used to detect chapter titles.'''
-                      ''' It is searched for in heading tags (h1-h6). Defaults to %default'''))
-    chapter.add_option('--chapter-attr', default='$,,$', 
-                       help=_('Detect a chapter beginning at an element having the specified attribute. The format for this option is tagname regexp,attribute name,attribute value regexp. For example to match all heading tags that have the attribute class="chapter" you would use "h\d,class,chapter". You can set the attribute to "none" to match only on tag names. So for example, to match all h2 tags, you would use "h2,none,". Default is %default'''))
-    chapter.add_option('--page-break-before-tag', dest='page_break', default='h[12]',
-                      help=_('''If html2lrf does not find any page breaks in the '''
-                      '''html file and cannot detect chapter headings, it will '''
-                      '''automatically insert page-breaks before the tags whose '''
-                      '''names match this regular expression. Defaults to %default. '''
-                      '''You can disable it by setting the regexp to "$". '''
-                      '''The purpose of this option is to try to ensure that '''
-                      '''there are no really long pages as this degrades the page '''
-                      '''turn performance of the LRF. Thus this option is ignored '''
-                      '''if the current page has only a few elements.'''))
-    chapter.add_option('--force-page-break-before-tag', dest='force_page_break',
-                       default='$', help=_('Force a page break before tags whose names match this regular expression.'))
-    chapter.add_option('--force-page-break-before-attr', dest='force_page_break_attr',
-                       default='$,,$', help=_('Force a page break before an element having the specified attribute. The format for this option is tagname regexp,attribute name,attribute value regexp. For example to match all heading tags that have the attribute class="chapter" you would use "h\d,class,chapter". Default is %default'''))
-    chapter.add_option('--add-chapters-to-toc', action='store_true', 
-                      default=False, dest='add_chapters_to_toc', 
-                      help=_('''Add detected chapters to the table of contents.'''))
-    prepro = parser.add_option_group('PREPROCESSING OPTIONS')
-    prepro.add_option('--baen', action='store_true', default=False, dest='baen',
-                      help=_('''Preprocess Baen HTML files to improve generated LRF.'''))
-    prepro.add_option('--pdftohtml', action='store_true', default=False, dest='pdftohtml',
-                      help=_('''You must add this option if processing files generated by pdftohtml, otherwise conversion will fail.'''))
-    prepro.add_option('--book-designer', action='store_true', default=False, dest='book_designer',
-                      help=_('''Use this option on html0 files from Book Designer.'''))
-    
-    fonts = parser.add_option_group('FONT FAMILIES', 
-    _('''Specify trutype font families for serif, sans-serif and monospace fonts. '''
-    '''These fonts will be embedded in the LRF file. Note that custom fonts lead to '''
-    '''slower page turns. '''
-    '''For example: '''
-    '''--serif-family "Times New Roman"
-    '''))
-    fonts.add_option('--serif-family',  
-                     default=None, dest='serif_family', type='string',
-                     help=_('The serif family of fonts to embed'))
-    fonts.add_option('--sans-family',   
-                     default=None, dest='sans_family', type='string',
-                     help=_('The sans-serif family of fonts to embed'))
-    fonts.add_option('--mono-family',   
-                     default=None, dest='mono_family', type='string',
-                     help=_('The monospace family of fonts to embed'))
-    
-    debug = parser.add_option_group('DEBUG OPTIONS')
-    debug.add_option('--verbose', dest='verbose', action='store_true', default=False,
-                      help=_('''Be verbose while processing'''))
-    debug.add_option('--lrs', action='store_true', dest='lrs', \
-                      help=_('Convert to LRS'), default=False)
-    parser.add_option('--minimize-memory-usage', action='store_true', default=False,
-                      help=_('Minimize memory usage at the cost of longer processing times. Use this option if you are on a memory constrained machine.'))
-    parser.add_option('--encoding', default=None, 
-                      help=_('Specify the character encoding of the source file. If the output LRF file contains strange characters, try changing this option. A common encoding for files from windows computers is cp-1252. Another common choice is utf-8. The default is to try and guess the encoding.'))
-    
-    return parser
-
 def find_custom_fonts(options, logger):
    from calibre.utils.fontconfig import files_for_family
    fonts = {'serif' : None, 'sans' : None, 'mono' : None}
@ -299,4 +109,3 @@ def Book(options, logger, font_delta=0, header=None,
            raise ConversionError, 'Could not find the normal version of the ' + family + ' font'
    return book, fonts

-from calibre import entity_to_unicode
--- a/src/calibre/ebooks/lrf/any/init.py
+++ b/src/calibre/ebooks/lrf/any/init.py
@ -1,2 +0,0 @@
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
--- a/src/calibre/ebooks/lrf/any/convert_from.py
+++ b/src/calibre/ebooks/lrf/any/convert_from.py
@ -1,199 +0,0 @@
-from __future__ import with_statement
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-'''Convert any ebook file into a LRF file.'''
-
-import sys, os, logging, shutil, tempfile, re
-
-from calibre.ebooks import UnknownFormatError
-from calibre.ebooks.lrf import option_parser as _option_parser
-from calibre import __appname__, setup_cli_handlers, extract
-from calibre.ptempfile import TemporaryDirectory
-from calibre.ebooks.lrf.lit.convert_from  import process_file as lit2lrf
-from calibre.ebooks.lrf.pdf.convert_from  import process_file as pdf2lrf
-from calibre.ebooks.lrf.rtf.convert_from  import process_file as rtf2lrf
-from calibre.ebooks.lrf.txt.convert_from  import process_file as txt2lrf
-from calibre.ebooks.lrf.html.convert_from import process_file as html2lrf
-from calibre.ebooks.lrf.epub.convert_from import process_file as epub2lrf
-from calibre.ebooks.lrf.mobi.convert_from import process_file as mobi2lrf
-from calibre.ebooks.lrf.fb2.convert_from  import process_file as fb22lrf
-
-from calibre.customize.ui import run_plugins_on_postprocess, run_plugins_on_preprocess
-
-def largest_file(files):
-    maxsize, file = 0, None
-    for f in files:
-        size = os.stat(f).st_size
-        if size > maxsize:
-            maxsize = size
-            file = f
-    return file
-
-def find_htmlfile(dir):
-    ext_pat = re.compile(r'\.(x){0,1}htm(l){0,1}', re.IGNORECASE)
-    toc_pat = re.compile(r'toc', re.IGNORECASE)
-    index_pat = re.compile(r'index', re.IGNORECASE)
-    toc_files, index_files, files = [], [], []
-    
-    for root, dirs, _files in os.walk(dir):
-        for f in _files:
-            f = os.path.abspath(os.path.join(root, f))
-            ext = os.path.splitext(f)[1]
-            if ext and ext_pat.match(ext):
-                toc_files.append(f) if toc_pat.search(f) else \
-                index_files.append(f) if index_pat.search(f) else \
-                files.append(f)
-    a = toc_files if toc_files else index_files if index_files else files
-    if a:
-        return largest_file(a)
-
-def number_of_unhidden_files(base, listing):
-    ans = 0
-    for i in listing:
-        i = os.path.join(base, i)
-        if os.path.isdir(i) or os.path.basename(i).startswith('.'):
-            continue
-        ans += 1
-    return ans
-
-def unhidden_directories(base, listing):
-    ans = []
-    for i in listing:
-        if os.path.isdir(os.path.join(base, i)) and not i.startswith('__') and \
-           not i.startswith('.'):
-            ans.append(i)
-    return ans
-
-def traverse_subdirs(tdir):
-    temp = os.listdir(tdir)
-    if number_of_unhidden_files(tdir, temp) == 0:
-        try:
-            cdir = os.path.join(tdir, unhidden_directories(tdir, temp)[0])
-            return traverse_subdirs(cdir)
-        except IndexError:
-            pass
-    return tdir
-
-def handle_archive(path):
-    tdir = tempfile.mkdtemp(prefix=__appname__+'_'+'archive_')
-    extract(path, tdir)
-    files = []
-    cdir = traverse_subdirs(tdir)
-    file = None
-    exts = ['lit', 'rtf', 'fb2','pdf', 'txt', 'epub', 'mobi', 'prc']
-    candidates = map(lambda x:os.path.join(cdir, x), os.listdir(cdir))
-    for ext in exts:
-        for f in candidates:
-            if f.lower().endswith('.'+ext):
-                files.append(f)
-    file = largest_file(files)
-    if not file:
-        file = find_htmlfile(cdir)
-    if isinstance(file, str):
-        file = file.decode(sys.getfilesystemencoding())
-    return tdir, file 
-
-def odt2lrf(path, options, logger):
-    from calibre.ebooks.odt.to_oeb import Extract
-    from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
-    
-    if logger is None:
-        level = logging.DEBUG if options.verbose else logging.INFO
-        logger = logging.getLogger('odt2lrf')
-        setup_cli_handlers(logger, level)
-        
-    with TemporaryDirectory('_odt2lrf') as tdir:
-        opf = Extract()(path, tdir)
-        options.use_spine = True
-        options.encoding = 'utf-8'
-        html_process_file(opf.replace('metadata.opf', 'index.html'), options, logger)
-
-def process_file(path, options, logger=None):
-    path = os.path.abspath(os.path.expanduser(path))
-    path = run_plugins_on_preprocess(path)
-    tdir = None
-    if logger is None:
-        level = logging.DEBUG if options.verbose else logging.INFO
-        logger = logging.getLogger('any2lrf')
-        setup_cli_handlers(logger, level)
-    if not os.access(path, os.R_OK):
-        logger.critical('Cannot read from %s', path)
-        return 1
-    ext = os.path.splitext(path)[1]
-    if not ext or ext == '.':
-        logger.critical('Unknown file type: %s', path)
-        return 1
-    ext = ext[1:].lower()
-    cwd = os.getcwd()
-    if not options.output:
-        fmt = '.lrs' if options.lrs else '.lrf'
-        options.output = os.path.splitext(os.path.basename(path))[0] + fmt
-    options.output = os.path.abspath(os.path.expanduser(options.output))
-    if ext in ['zip', 'rar', 'oebzip']:
-        newpath = None
-        try:
-            tdir, newpath = handle_archive(path)
-        except:
-            logger.exception(' ')
-        if not newpath:
-            raise UnknownFormatError('Could not find ebook in archive')
-        path = newpath
-        logger.info('Found ebook in archive: %s', repr(path))
-    try:
-        ext = os.path.splitext(path)[1][1:].lower()
-        convertor = None
-        if 'htm' in ext:
-            convertor = html2lrf
-        elif 'lit' == ext:
-            convertor = lit2lrf
-        elif 'pdf' == ext:
-            convertor = pdf2lrf
-        elif 'rtf' == ext:
-            convertor = rtf2lrf
-        elif 'txt' == ext:
-            convertor = txt2lrf
-        elif 'epub' == ext:
-            convertor = epub2lrf
-        elif ext in ['mobi', 'prc', 'azw']:
-            convertor = mobi2lrf
-        elif ext == 'fb2':
-            convertor = fb22lrf
-        elif ext == 'odt':
-            convertor = odt2lrf
-        if not convertor:
-            raise UnknownFormatError(_('Converting from %s to LRF is not supported.')%ext)
-        convertor(path, options, logger)
-        
-    finally:
-        os.chdir(cwd)
-        if tdir and os.path.exists(tdir):
-            shutil.rmtree(tdir)
-    return 0
-    
-
-def option_parser(gui_mode=False):
-    return _option_parser(usage=_('''\
-any2lrf [options] myfile
-
-Convert any ebook format into LRF. Supported formats are:
-LIT, RTF, TXT, HTML, EPUB, MOBI, PRC and PDF. any2lrf will also process a RAR or
-ZIP archive, looking for an ebook inside the archive.
-    '''), gui_mode=gui_mode)
-
-
-def main(args=sys.argv, logger=None, gui_mode=False):
-    parser = option_parser(gui_mode) 
-    options, args = parser.parse_args(args)
-    if len(args) != 2:
-        parser.print_help()
-        print
-        print _('No file to convert specified.')
-        return 1
-    
-    src = args[1]
-    if not isinstance(src, unicode):
-        src = src.decode(sys.getfilesystemencoding())
-    return process_file(src, options, logger)
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/lrf/comic/convert_from.py
+++ b/src/calibre/ebooks/lrf/comic/convert_from.py
@ -1,562 +0,0 @@
-from __future__ import with_statement
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-__docformat__ = 'restructuredtext en'
-
-'''
-Based on ideas from comiclrf created by FangornUK.
-'''
-
-import os, sys, shutil, traceback, textwrap, fnmatch
-from uuid import uuid4
-
-
-
-
-from calibre import extract, terminal_controller, __appname__, __version__
-from calibre.utils.config import Config, StringConfig
-from calibre.ptempfile import PersistentTemporaryDirectory
-from calibre.parallel import Server, ParallelJob
-from calibre.utils.terminfo import ProgressBar
-from calibre.ebooks.lrf.pylrs.pylrs import Book, BookSetting, ImageStream, ImageBlock
-from calibre.ebooks.metadata import MetaInformation
-from calibre.ebooks.metadata.opf import OPFCreator
-from calibre.ebooks.epub.from_html import config as html2epub_config, convert as html2epub
-from calibre.customize.ui import run_plugins_on_preprocess
-try:
-    from calibre.utils.PythonMagickWand import \
-            NewMagickWand, NewPixelWand, \
-            MagickSetImageBorderColor, \
-            MagickReadImage, MagickRotateImage, \
-            MagickTrimImage, PixelSetColor,\
-            MagickNormalizeImage, MagickGetImageWidth, \
-            MagickGetImageHeight, \
-            MagickResizeImage, MagickSetImageType, \
-            GrayscaleType, CatromFilter,  MagickSetImagePage, \
-            MagickBorderImage, MagickSharpenImage, MagickDespeckleImage, \
-            MagickQuantizeImage, RGBColorspace, \
-            MagickWriteImage, DestroyPixelWand, \
-            DestroyMagickWand, CloneMagickWand, \
-            MagickThumbnailImage, MagickCropImage, ImageMagick
-    _imagemagick_loaded = True
-except:
-    _imagemagick_loaded = False
-
-PROFILES = {
-            # Name : (width, height) in pixels
-            'prs500':(584, 754),
-            # The SONY's LRF renderer (on the PRS500) only uses the first 800x600 block of the image
-            'prs500-landscape': (784, 1012)
-            }
-
-def extract_comic(path_to_comic_file):
-    '''
-    Un-archive the comic file.
-    '''
-    tdir = PersistentTemporaryDirectory(suffix='_comic_extract')
-    extract(path_to_comic_file, tdir)
-    return tdir
-
-def find_pages(dir, sort_on_mtime=False, verbose=False):
-    '''
-    Find valid comic pages in a previously un-archived comic.
-
-    :param dir: Directory in which extracted comic lives
-    :param sort_on_mtime: If True sort pages based on their last modified time.
-                          Otherwise, sort alphabetically.
-    '''
-    extensions = ['jpeg', 'jpg', 'gif', 'png']
-    pages = []
-    for datum in os.walk(dir):
-        for name in datum[-1]:
-            path = os.path.join(datum[0], name)
-            if '__MACOSX' in path: continue
-            for ext in extensions:
-                if path.lower().endswith('.'+ext):
-                    pages.append(path)
-                    break
-    if sort_on_mtime:
-        comparator = lambda x, y : cmp(os.stat(x).st_mtime, os.stat(y).st_mtime)
-    else:
-        comparator = lambda x, y : cmp(os.path.basename(x), os.path.basename(y))
-
-    pages.sort(cmp=comparator)
-    if verbose:
-        print 'Found comic pages...'
-        print '\t'+'\n\t'.join([os.path.basename(p) for p in pages])
-    return pages
-
-class PageProcessor(list):
-    '''
-    Contains the actual image rendering logic. See :method:`render` and
-    :method:`process_pages`.
-    '''
-
-    def __init__(self, path_to_page, dest, opts, num):
-        list.__init__(self)
-        self.path_to_page = path_to_page
-        self.opts         = opts
-        self.num          = num
-        self.dest         = dest
-        self.rotate       = False
-        self.render()
-
-
-    def render(self):
-        img = NewMagickWand()
-        if img < 0:
-            raise RuntimeError('Cannot create wand.')
-        if not MagickReadImage(img, self.path_to_page):
-            raise IOError('Failed to read image from: %'%self.path_to_page)
-        width  = MagickGetImageWidth(img)
-        height = MagickGetImageHeight(img)
-        if self.num == 0: # First image so create a thumbnail from it
-            thumb = CloneMagickWand(img)
-            if thumb < 0:
-                raise RuntimeError('Cannot create wand.')
-            MagickThumbnailImage(thumb, 60, 80)
-            MagickWriteImage(thumb, os.path.join(self.dest, 'thumbnail.png'))
-            DestroyMagickWand(thumb)
-        self.pages = [img]
-        if width > height:
-            if self.opts.landscape:
-                self.rotate = True
-            else:
-                split1, split2 = map(CloneMagickWand, (img, img))
-                DestroyMagickWand(img)
-                if split1 < 0 or split2 < 0:
-                    raise RuntimeError('Cannot create wand.')
-                MagickCropImage(split1, (width/2)-1, height, 0, 0)
-                MagickCropImage(split2, (width/2)-1, height, width/2, 0 )
-                self.pages = [split2, split1] if self.opts.right2left else [split1, split2]
-        self.process_pages()
-
-    def process_pages(self):
-        for i, wand in enumerate(self.pages):
-            pw = NewPixelWand()
-            try:
-                if pw < 0:
-                    raise RuntimeError('Cannot create wand.')
-                PixelSetColor(pw, 'white')
-
-                MagickSetImageBorderColor(wand, pw)
-                if self.rotate:
-                    MagickRotateImage(wand, pw, -90)
-
-                # 25 percent fuzzy trim?
-                if not self.opts.disable_trim:
-                    MagickTrimImage(wand, 25*65535/100)
-                MagickSetImagePage(wand, 0,0,0,0)   #Clear page after trim, like a "+repage"
-                # Do the Photoshop "Auto Levels" equivalent
-                if not self.opts.dont_normalize:
-                    MagickNormalizeImage(wand)
-                sizex = MagickGetImageWidth(wand)
-                sizey = MagickGetImageHeight(wand)
-
-                SCRWIDTH, SCRHEIGHT = PROFILES[self.opts.profile]
-
-                if self.opts.keep_aspect_ratio:
-                    # Preserve the aspect ratio by adding border
-                    aspect = float(sizex) / float(sizey)
-                    if aspect <= (float(SCRWIDTH) / float(SCRHEIGHT)):
-                        newsizey = SCRHEIGHT
-                        newsizex = int(newsizey * aspect)
-                        deltax = (SCRWIDTH - newsizex) / 2
-                        deltay = 0
-                    else:
-                        newsizex = SCRWIDTH
-                        newsizey = int(newsizex / aspect)
-                        deltax = 0
-                        deltay = (SCRHEIGHT - newsizey) / 2
-                    MagickResizeImage(wand, newsizex, newsizey, CatromFilter, 1.0)
-                    MagickSetImageBorderColor(wand, pw)
-                    MagickBorderImage(wand, pw, deltax, deltay)
-                elif self.opts.wide:
-                    # Keep aspect and Use device height as scaled image width so landscape mode is clean
-                    aspect = float(sizex) / float(sizey)
-                    screen_aspect = float(SCRWIDTH) / float(SCRHEIGHT)
-                    # Get dimensions of the landscape mode screen
-                    # Add 25px back to height for the battery bar.
-                    wscreenx = SCRHEIGHT + 25
-                    wscreeny = int(wscreenx / screen_aspect)
-                    if aspect <= screen_aspect:
-                        newsizey = wscreeny
-                        newsizex = int(newsizey * aspect)
-                        deltax = (wscreenx - newsizex) / 2
-                        deltay = 0
-                    else:
-                        newsizex = wscreenx
-                        newsizey = int(newsizex / aspect)
-                        deltax = 0
-                        deltay = (wscreeny - newsizey) / 2
-                    MagickResizeImage(wand, newsizex, newsizey, CatromFilter, 1.0)
-                    MagickSetImageBorderColor(wand, pw)
-                    MagickBorderImage(wand, pw, deltax, deltay)
-                else:
-                    MagickResizeImage(wand, SCRWIDTH, SCRHEIGHT, CatromFilter, 1.0)
-
-                if not self.opts.dont_sharpen:
-                    MagickSharpenImage(wand, 0.0, 1.0)
-
-                MagickSetImageType(wand, GrayscaleType)
-
-                if self.opts.despeckle:
-                    MagickDespeckleImage(wand)
-
-                MagickQuantizeImage(wand, self.opts.colors, RGBColorspace, 0, 1, 0)
-                dest = '%d_%d.png'%(self.num, i)
-                dest = os.path.join(self.dest, dest)
-                MagickWriteImage(wand, dest+'8')
-                os.rename(dest+'8', dest)
-                self.append(dest)
-            finally:
-                if pw > 0:
-                    DestroyPixelWand(pw)
-                DestroyMagickWand(wand)
-
-def render_pages(tasks, dest, opts, notification=None):
-    '''
-    Entry point for the job server.
-    '''
-    failures, pages = [], []
-    with ImageMagick():
-        for num, path in tasks:
-            try:
-                pages.extend(PageProcessor(path, dest, opts, num))
-                msg = _('Rendered %s')%path
-            except:
-                failures.append(path)
-                msg = _('Failed %s')%path
-                if opts.verbose:
-                    msg += '\n' + traceback.format_exc()
-            if notification is not None:
-                notification(0.5, msg)
-
-    return pages, failures
-
-
-class JobManager(object):
-    '''
-    Simple job manager responsible for keeping track of overall progress.
-    '''
-
-    def __init__(self, total, update):
-        self.total  = total
-        self.update = update
-        self.done   = 0
-        self.add_job        = lambda j: j
-        self.output         = lambda j: j
-        self.start_work     = lambda j: j
-        self.job_done       = lambda j: j
-
-    def status_update(self, job):
-        self.done += 1
-        #msg = msg%os.path.basename(job.args[0])
-        self.update(float(self.done)/self.total, job.msg)
-
-def process_pages(pages, opts, update):
-    '''
-    Render all identified comic pages.
-    '''
-    if not _imagemagick_loaded:
-        raise RuntimeError('Failed to load ImageMagick')
-
-    tdir = PersistentTemporaryDirectory('_comic2lrf_pp')
-    job_manager = JobManager(len(pages), update)
-    server = Server()
-    jobs = []
-    tasks = server.split(pages)
-    for task in tasks:
-        jobs.append(ParallelJob('render_pages', lambda s:s, job_manager=job_manager,
-                                args=[task, tdir, opts]))
-        server.add_job(jobs[-1])
-    server.wait()
-    server.killall()
-    server.close()
-    ans, failures = [], []
-
-    for job in jobs:
-        if job.result is None:
-            raise Exception(_('Failed to process comic: %s\n\n%s')%(job.exception, job.traceback))
-        pages, failures_ = job.result
-        ans += pages
-        failures += failures_
-    return ans, failures, tdir
-
-def config(defaults=None,output_format='lrf'):
-    desc = _('Options to control the conversion of comics (CBR, CBZ) files into ebooks')
-    if defaults is None:
-        c = Config('comic', desc)
-    else:
-        c = StringConfig(defaults, desc)
-    c.add_opt('title', ['-t', '--title'],
-              help=_('Title for generated ebook. Default is to use the filename.'))
-    c.add_opt('author', ['-a', '--author'],
-              help=_('Set the author in the metadata of the generated ebook. Default is %default'),
-              default=_('Unknown'))
-    c.add_opt('output', ['-o', '--output'],
-              help=_('Path to output file. By default a file is created in the current directory.'))
-    c.add_opt('colors', ['-c', '--colors'], type='int', default=64,
-              help=_('Number of colors for grayscale image conversion. Default: %default'))
-    c.add_opt('dont_normalize', ['-n', '--disable-normalize'], default=False,
-              help=_('Disable normalize (improve contrast) color range for pictures. Default: False'))
-    c.add_opt('keep_aspect_ratio', ['-r', '--keep-aspect-ratio'], default=False,
-              help=_('Maintain picture aspect ratio. Default is to fill the screen.'))
-    c.add_opt('dont_sharpen', ['-s', '--disable-sharpen'], default=False,
-              help=_('Disable sharpening.'))
-    c.add_opt('disable_trim', ['--disable-trim'], default=False,
-              help=_('Disable trimming of comic pages. For some comics, '
-                     'trimming might remove content as well as borders.'))
-    c.add_opt('landscape', ['-l', '--landscape'], default=False,
-              help=_("Don't split landscape images into two portrait images"))
-    c.add_opt('wide', ['-w', '--wide-aspect'], default=False,
-              help=_("Keep aspect ratio and scale image using screen height as image width for viewing in landscape mode."))
-    c.add_opt('right2left', ['--right2left'], default=False, action='store_true',
-              help=_('Used for right-to-left publications like manga. Causes landscape pages to be split into portrait pages from right to left.'))
-    c.add_opt('despeckle', ['-d', '--despeckle'], default=False,
-              help=_('Enable Despeckle. Reduces speckle noise. May greatly increase processing time.'))
-    c.add_opt('no_sort', ['--no-sort'], default=False,
-              help=_("Don't sort the files found in the comic alphabetically by name. Instead use the order they were added to the comic."))
-    c.add_opt('profile', ['-p', '--profile'], default='prs500', choices=PROFILES.keys(),
-              help=_('Choose a profile for the device you are generating this file for. The default is the SONY PRS-500 with a screen size of 584x754 pixels. This is suitable for any reader with the same screen size. Choices are %s')%PROFILES.keys())
-    c.add_opt('verbose', ['-v', '--verbose'], default=0, action='count',
-              help=_('Be verbose, useful for debugging. Can be specified multiple times for greater verbosity.'))
-    c.add_opt('no_progress_bar', ['--no-progress-bar'], default=False,
-                      help=_("Don't show progress bar."))
-    if output_format == 'pdf':
-        c.add_opt('no_process',['--no_process'], default=False,
-    		      help=_("Apply no processing to the image"))
-    return c
-
-def option_parser(output_format='lrf'):
-    c = config(output_format=output_format)
-    return c.option_parser(usage=_('''\
-%prog [options] comic.cb[z|r]
-
-Convert a comic in a CBZ or CBR file to an ebook.
-'''))
-
-def create_epub(pages, profile, opts, thumbnail=None):
-    wrappers = []
-    WRAPPER = textwrap.dedent('''\
-    <html>
-        <head>
-            <title>Page #%d</title>
-            <style type="text/css">@page {margin:0pt; padding: 0pt;}</style>
-        </head>
-        <body style="margin: 0pt; padding: 0pt">
-            <div style="text-align:center">
-                <img src="%s" alt="comic page #%d" />
-            </div>
-        </body>
-    </html>
-    ''')
-    dir = os.path.dirname(pages[0])
-    for i, page in enumerate(pages):
-        wrapper = WRAPPER%(i+1, os.path.basename(page), i+1)
-        page = os.path.join(dir, 'page_%d.html'%(i+1))
-        open(page, 'wb').write(wrapper)
-        wrappers.append(page)
-
-    mi  = MetaInformation(opts.title, [opts.author])
-    opf = OPFCreator(dir, mi)
-    opf.create_manifest([(w, None) for w in wrappers])
-    opf.create_spine(wrappers)
-    metadata = os.path.join(dir, 'metadata.opf')
-    opf.render(open(metadata, 'wb'))
-    opts2 = html2epub_config('margin_left=0\nmargin_right=0\nmargin_top=0\nmargin_bottom=0').parse()
-    opts2.output = opts.output
-    html2epub(metadata, opts2)
-
-def create_lrf(pages, profile, opts, thumbnail=None):
-    width, height = PROFILES[profile]
-    ps = {}
-    ps['topmargin']      = 0
-    ps['evensidemargin'] = 0
-    ps['oddsidemargin']  = 0
-    ps['textwidth']      = width
-    ps['textheight']     = height
-    book = Book(title=opts.title, author=opts.author,
-            bookid=uuid4().hex,
-            publisher='%s %s'%(__appname__, __version__), thumbnail=thumbnail,
-            category='Comic', pagestyledefault=ps,
-            booksetting=BookSetting(screenwidth=width, screenheight=height))
-    for page in pages:
-        imageStream = ImageStream(page)
-        _page = book.create_page()
-        _page.append(ImageBlock(refstream=imageStream,
-                    blockwidth=width, blockheight=height, xsize=width,
-                    ysize=height, x1=width, y1=height))
-        book.append(_page)
-
-    book.renderLrf(open(opts.output, 'wb'))
-    print _('Output written to'), opts.output
-
-
-def create_pdf(pages, profile, opts, thumbnail=None,toc=None):
-    width, height = PROFILES[profile]
-
-    from reportlab.pdfgen import canvas
-
-    cur_page=0
-    heading = []
-    if toc != None:
-        if len(toc) == 1:
-            toc = None
-        else:
-            toc_index = 0
-            base_cur = 0
-            rem = 0
-            breaker = False
-            while True:
-                letter=toc[0][0][base_cur]
-                for i in range(len(toc)):
-                    if letter != toc[i][0][base_cur]:
-                        breaker = True
-                if breaker:
-                    break
-                if letter == os.sep:
-                    rem=base_cur
-                base_cur += 1
-            toc.append(("Not seen",-1))
-
-
-    pdf = canvas.Canvas(filename=opts.output, pagesize=(width,height+15))
-    pdf.setAuthor(opts.author)
-    pdf.setTitle(opts.title)
-
-
-    for page in pages:
-        if opts.keep_aspect_ratio:
-            img = NewMagickWand()
-            if img < 0:
-                raise RuntimeError('Cannot create wand.')
-            if not MagickReadImage(img, page):
-                raise IOError('Failed to read image from: %'%page)
-            sizex  = MagickGetImageWidth(img)
-            sizey = MagickGetImageHeight(img)
-            if opts.keep_aspect_ratio:
-                # Preserve the aspect ratio by adding border
-                aspect = float(sizex) / float(sizey)
-                if aspect <= (float(width) / float(height)):
-                    newsizey = height
-                    newsizex = int(newsizey * aspect)
-                    deltax = (width - newsizex) / 2
-                    deltay = 0
-                else:
-                    newsizex = width
-                    newsizey = int(newsizex / aspect)
-                    deltax = 0
-                    deltay = (height - newsizey) / 2
-            pdf.drawImage(page, x=deltax,y=deltay,width=newsizex, height=newsizey)
-        else:
-            pdf.drawImage(page, x=0,y=0,width=width, height=height)
-        if toc != None:
-            if toc[toc_index][1] == cur_page:
-                tmp=toc[toc_index][0]
-                toc_current=tmp[rem:len(tmp)-4]
-                index=0
-                while True:
-                    key = 'page%d-%d' % (cur_page, index)
-                    pdf.bookmarkPage(key)
-                    (head,dummy,list)=toc_current.partition(os.sep)
-                    try:
-                        if heading[index] != head:
-                            heading[index] = head
-                            pdf.addOutlineEntry(title=head,key=key,level=index)
-                    except:
-                        heading.append(head)
-                        pdf.addOutlineEntry(title=head,key=key,level=index)
-                    index += 1
-                    toc_current=list
-                    if dummy == "":
-                        break
-                toc_index += 1
-            cur_page += 1
-        pdf.showPage()
-    # Write the document to disk
-    pdf.save()
-
-
-def do_convert(path_to_file, opts, notification=lambda m, p: p, output_format='lrf'):
-    path_to_file = run_plugins_on_preprocess(path_to_file)
-    source = path_to_file
-    to_delete = []
-    toc = []
-    list = []
-    pages = []
-
-
-    if not opts.title:
-        opts.title = os.path.splitext(os.path.basename(source))[0]
-    if not opts.output:
-        opts.output = os.path.abspath(os.path.splitext(os.path.basename(source))[0]+'.'+output_format)
-    if os.path.isdir(source):
-        for path in all_files( source , '*.cbr|*.cbz' ):
-            list.append( path )
-    else:
-            list= [ os.path.abspath(source) ]
-
-    for source in list:
-        tdir  = extract_comic(source)
-        new_pages = find_pages(tdir, sort_on_mtime=opts.no_sort, verbose=opts.verbose)
-        thumbnail = None
-        if not new_pages:
-            raise ValueError('Could not find any pages in the comic: %s'%source)
-        if not getattr(opts, 'no_process', False):
-            new_pages, failures, tdir2 = process_pages(new_pages, opts, notification)
-            if not new_pages:
-                raise ValueError('Could not find any valid pages in the comic: %s'%source)
-            if failures:
-                print 'Could not process the following pages (run with --verbose to see why):'
-                for f in failures:
-                    print '\t', f
-            thumbnail = os.path.join(tdir2, 'thumbnail.png')
-            if not os.access(thumbnail, os.R_OK):
-                thumbnail = None
-        toc.append((source,len(pages)))
-        pages.extend(new_pages)
-        to_delete.append(tdir)
-
-
-    if output_format == 'lrf':
-        create_lrf(pages, opts.profile, opts, thumbnail=thumbnail)
-    if output_format == 'epub':
-        create_epub(pages, opts.profile, opts, thumbnail=thumbnail)
-    if output_format == 'pdf':
-        create_pdf(pages, opts.profile, opts, thumbnail=thumbnail,toc=toc)
-    for tdir in to_delete:
-        shutil.rmtree(tdir)
-
-
-def all_files(root, patterns='*'):
-    # Expand patterns from semicolon-separated string to list
-    patterns = patterns.split('|')
-    for path, subdirs, files in os.walk(root):
-        files.sort( )
-        for name in files:
-            for pattern in patterns:
-                if fnmatch.fnmatch(name, pattern):
-                    yield os.path.join(path, name)
-                    break
-
-
-def main(args=sys.argv, notification=None, output_format='lrf'):
-    parser = option_parser(output_format=output_format)
-    opts, args = parser.parse_args(args)
-    if len(args) < 2:
-        parser.print_help()
-        print '\nYou must specify a file to convert'
-        return 1
-
-    if not callable(notification):
-        pb = ProgressBar(terminal_controller, _('Rendering comic pages...'),
-                         no_progress_bar=opts.no_progress_bar or getattr(opts, 'no_process', False))
-        notification = pb.update
-
-    source = os.path.abspath(args[1])
-    do_convert(source, opts, notification, output_format=output_format)
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/lrf/epub/init.py
+++ b/src/calibre/ebooks/lrf/epub/init.py
@ -1,3 +0,0 @@
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-
--- a/src/calibre/ebooks/lrf/epub/convert_from.py
+++ b/src/calibre/ebooks/lrf/epub/convert_from.py
@ -1,75 +0,0 @@
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-
-import os, sys, shutil, logging
-from calibre.ebooks.lrf import option_parser as lrf_option_parser
-from calibre.ebooks import ConversionError, DRMError
-from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
-from calibre.ebooks.metadata.opf import OPF
-from calibre.ebooks.metadata.epub import OCFDirReader
-from calibre.utils.zipfile import ZipFile
-from calibre import setup_cli_handlers
-from calibre.ptempfile import PersistentTemporaryDirectory
-
-
-def option_parser():
-    return lrf_option_parser(
-_('''Usage: %prog [options] mybook.epub
-        
-        
-%prog converts mybook.epub to mybook.lrf''')
-        )
-
-def generate_html(pathtoepub, logger):
-    if not os.access(pathtoepub, os.R_OK):
-        raise ConversionError('Cannot read from ' + pathtoepub)
-    tdir = PersistentTemporaryDirectory('_epub2lrf')
-    #os.rmdir(tdir)
-    try:
-        ZipFile(pathtoepub).extractall(tdir)
-    except:
-        raise ConversionError, '.epub extraction failed'
-    if os.path.exists(os.path.join(tdir, 'META-INF', 'encryption.xml')):
-            raise DRMError(os.path.basename(pathtoepub))
-    
-    return tdir
-
-def process_file(path, options, logger=None):
-    if logger is None:
-        level = logging.DEBUG if options.verbose else logging.INFO
-        logger = logging.getLogger('epub2lrf')
-        setup_cli_handlers(logger, level)
-    epub = os.path.abspath(os.path.expanduser(path))
-    tdir = generate_html(epub, logger)
-    try:
-        ocf = OCFDirReader(tdir)
-        htmlfile = ocf.opf.spine[0].path
-        options.opf = os.path.join(tdir, ocf.container[OPF.MIMETYPE])
-        if not options.output:
-            ext = '.lrs' if options.lrs else '.lrf'
-            options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
-        options.output = os.path.abspath(os.path.expanduser(options.output))
-        options.use_spine = True
-        
-        html_process_file(htmlfile, options, logger=logger)
-    finally:
-        try:
-            shutil.rmtree(tdir)
-        except:
-            logger.warning('Failed to delete temporary directory '+tdir)
-
-
-def main(args=sys.argv, logger=None):
-    parser = option_parser()
-    options, args = parser.parse_args(args)
-    if len(args) != 2:
-        parser.print_help()
-        print
-        print 'No epub file specified'
-        return 1
-    process_file(args[1], options, logger)
-    return 0        
-        
-            
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/lrf/fb2/convert_from.py
+++ b/src/calibre/ebooks/lrf/fb2/convert_from.py
@ -1,125 +0,0 @@
-from __future__ import with_statement
-__license__   = 'GPL v3'
-__copyright__ = '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
-"""
-Convert .fb2 files to .lrf
-"""
-import os, sys, shutil, logging
-from base64 import b64decode
-from lxml import etree
-    
-from calibre.ebooks.lrf import option_parser as lrf_option_parser
-from calibre.ebooks.metadata.meta import get_metadata
-from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
-from calibre import setup_cli_handlers
-from calibre.resources import fb2_xsl
-from calibre.ptempfile import PersistentTemporaryDirectory
-from calibre.ebooks.metadata.opf import OPFCreator
-from calibre.ebooks.metadata import MetaInformation
-
-
-def option_parser():
-    parser = lrf_option_parser(
-_('''%prog [options] mybook.fb2
-
-
-%prog converts mybook.fb2 to mybook.lrf'''))
-    parser.add_option('--debug-html-generation', action='store_true', default=False,
-                      dest='debug_html_generation', help=_('Print generated HTML to stdout and quit.'))
-    parser.add_option('--keep-intermediate-files', action='store_true', default=False,
-                      help=_('Keep generated HTML files after completing conversion to LRF.'))
-    return parser
-    
-def extract_embedded_content(doc):
-    for elem in doc.xpath('./*'):
-        if 'binary' in elem.tag and elem.attrib.has_key('id'):
-            fname = elem.attrib['id']
-            data = b64decode(elem.text.strip())
-            open(fname, 'wb').write(data)
-
-def to_html(fb2file, tdir):
-    fb2file = os.path.abspath(fb2file)
-    cwd = os.getcwd()
-    try:
-        os.chdir(tdir)
-        print 'Parsing XML...'
-        parser = etree.XMLParser(recover=True, no_network=True)
-        doc = etree.parse(fb2file, parser)
-        extract_embedded_content(doc)
-        print 'Converting XML to HTML...'
-        styledoc = etree.fromstring(fb2_xsl)
-    
-        transform = etree.XSLT(styledoc)
-        result = transform(doc)
-        open('index.html', 'wb').write(transform.tostring(result))
-        try:
-            mi = get_metadata(open(fb2file, 'rb'), 'fb2')
-        except:
-            mi = MetaInformation(None, None)
-        if not mi.title:
-            mi.title = os.path.splitext(os.path.basename(fb2file))[0]
-        if not mi.authors:
-            mi.authors = [_('Unknown')]
-        opf = OPFCreator(tdir, mi)
-        opf.create_manifest([('index.html', None)])
-        opf.create_spine(['index.html'])
-        opf.render(open('metadata.opf', 'wb'))
-        return os.path.join(tdir, 'metadata.opf')
-    finally:
-        os.chdir(cwd)
-
-    
-def generate_html(fb2file, encoding, logger):
-    tdir = PersistentTemporaryDirectory('_fb22lrf')
-    to_html(fb2file, tdir)
-    return os.path.join(tdir, 'index.html')
-    
-def process_file(path, options, logger=None):
-    if logger is None:
-        level = logging.DEBUG if options.verbose else logging.INFO
-        logger = logging.getLogger('fb22lrf')
-        setup_cli_handlers(logger, level)
-    fb2 = os.path.abspath(os.path.expanduser(path))
-    f = open(fb2, 'rb')
-    mi = get_metadata(f, 'fb2')
-    f.close()
-    htmlfile = generate_html(fb2, options.encoding, logger)
-    tdir = os.path.dirname(htmlfile)
-    cwd = os.getcwdu()
-    try:
-        if not options.output:
-            ext = '.lrs' if options.lrs else '.lrf'
-            options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
-        options.output = os.path.abspath(os.path.expanduser(options.output))
-        if not mi.title:
-            mi.title = os.path.splitext(os.path.basename(fb2))[0]
-        if (not options.title or options.title == _('Unknown')):
-            options.title = mi.title
-        if (not options.author or options.author == _('Unknown')) and mi.authors:
-            options.author = mi.authors.pop()
-        if (not options.category or options.category == _('Unknown')) and mi.category:
-            options.category = mi.category
-        if (not options.freetext or options.freetext == _('Unknown')) and mi.comments:
-            options.freetext = mi.comments
-        os.chdir(tdir)
-        html_process_file(htmlfile, options, logger)
-    finally:
-        os.chdir(cwd)
-        if getattr(options, 'keep_intermediate_files', False):
-            logger.debug('Intermediate files in '+ tdir)
-        else:
-            shutil.rmtree(tdir)
-
-def main(args=sys.argv, logger=None):
-    parser = option_parser()    
-    options, args = parser.parse_args(args)
-    if len(args) != 2:
-        parser.print_help()
-        print
-        print 'No fb2 file specified'
-        return 1
-    process_file(args[1], options, logger)
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/lrf/feeds/init.py
+++ b/src/calibre/ebooks/lrf/feeds/init.py
@ -1,4 +0,0 @@
-#!/usr/bin/env  python
-
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
--- a/src/calibre/ebooks/lrf/feeds/convert_from.py
+++ b/src/calibre/ebooks/lrf/feeds/convert_from.py
@ -1,59 +0,0 @@
-from __future__ import with_statement
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-'''
-Convert web feeds to LRF files.
-'''
-from calibre.ebooks.lrf import option_parser as lrf_option_parser
-from calibre.ebooks.lrf.html.convert_from import process_file
-from calibre.web.feeds.main import option_parser as feeds_option_parser
-from calibre.web.feeds.main import run_recipe
-from calibre.ptempfile import TemporaryDirectory
-from calibre import sanitize_file_name, strftime
-
-import sys, os
-
-def option_parser():
-    parser = feeds_option_parser()
-    parser.remove_option('--output-dir')
-    parser.remove_option('--lrf')
-    parser.subsume('FEEDS2DISK OPTIONS', _('Options to control the behavior of feeds2disk'))
-    lrf_parser = lrf_option_parser('')
-    lrf_parser.subsume('HTML2LRF OPTIONS', _('Options to control the behavior of html2lrf'))
-    parser.merge(lrf_parser)
-    return parser
-
-def main(args=sys.argv, notification=None, handler=None):
-    parser = option_parser()
-    opts, args = parser.parse_args(args)
-    opts.lrf = True
-    
-    if len(args) != 2 and opts.feeds is None:
-        parser.print_help()
-        return 1
-    
-    recipe_arg = args[1] if len(args) > 1 else None
-    
-    with TemporaryDirectory('_feeds2lrf') as tdir:
-        opts.output_dir = tdir
-        
-        recipe = run_recipe(opts, recipe_arg, parser, notification=notification, handler=handler)
-        
-        htmlfile = os.path.join(tdir, 'index.html')
-        if not os.access(htmlfile, os.R_OK):
-            raise RuntimeError(_('Fetching of recipe failed: ')+recipe_arg)
-        
-        lparser = lrf_option_parser('')
-        ropts = lparser.parse_args(['html2lrf']+recipe.html2lrf_options)[0]
-        parser.merge_options(ropts, opts)
-        
-        if not opts.output:
-            ext = '.lrs' if opts.lrs else '.lrf'
-            fname = recipe.title + strftime(recipe.timefmt)+ext
-            opts.output = os.path.join(os.getcwd(), sanitize_file_name(fname))
-        print 'Generating LRF...'
-        process_file(htmlfile, opts)
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/lrf/html/convert_from.py
+++ b/src/calibre/ebooks/lrf/html/convert_from.py
@ -6,7 +6,7 @@ Code to convert HTML ebooks into LRF ebooks.
 I am indebted to esperanc for the initial CSS->Xylog Style conversion code
 and to Falstaff for pylrs.
 """
-import os, re, sys, copy, glob, logging, tempfile
+import os, re, sys, copy, glob, tempfile
 from collections import deque
 from urllib import unquote
 from urlparse import urlparse
@ -16,6 +16,7 @@ from calibre.customize.ui import run_plugins_on_postprocess

 try:
    from PIL import Image as PILImage
+    PILImage
 except ImportError:
    import Image as PILImage

@ -26,14 +27,13 @@ from calibre.ebooks.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, \
                Plot, Image, BlockSpace, RuledLine, BookSetting, Canvas, DropCaps, \
                LrsError, Sup, Sub, EmpLine
 from calibre.ebooks.lrf.pylrs.pylrs import Span
-from calibre.ebooks.lrf import Book, entity_to_unicode
-from calibre.ebooks.lrf import option_parser as lrf_option_parser
+from calibre.ebooks.lrf import Book
 from calibre.ebooks import ConversionError
 from calibre.ebooks.lrf.html.table import Table
-from calibre import filename_to_utf8,  setup_cli_handlers, __appname__, \
-                    fit_image, LoggingInterface, preferred_encoding
+from calibre import filename_to_utf8, __appname__, \
+                    fit_image, preferred_encoding, entity_to_unicode
 from calibre.ptempfile import PersistentTemporaryFile
-from calibre.devices.interface import Device
+from calibre.devices.interface import DevicePlugin as Device
 from calibre.ebooks.lrf.html.color_map import lrs_color
 from calibre.ebooks.chardet import xml_to_unicode

@ -78,7 +78,7 @@ def tag_regex(tagname):
    return dict(open=r'(?:<\s*%(t)s\s+[^<>]*?>|<\s*%(t)s\s*>)'%dict(t=tagname), \
                close=r'</\s*%(t)s\s*>'%dict(t=tagname))

-class HTMLConverter(object, LoggingInterface):
+class HTMLConverter(object):
    SELECTOR_PAT   = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}")
    PAGE_BREAK_PAT = re.compile(r'page-break-(?:after|before)\s*:\s*(\w+)', re.IGNORECASE)
    IGNORED_TAGS   = (Comment, Declaration, ProcessingInstruction)
@ -213,7 +213,7 @@ class HTMLConverter(object, LoggingInterface):
        '''
        # Defaults for various formatting tags
        object.__setattr__(self, 'options', options)
-        LoggingInterface.__init__(self, logger)
+        self.log = logger
        self.fonts = fonts #: dict specifying font families to use
        # Memory
        self.scaled_images    = {}    #: Temporary files with scaled version of images
@ -288,9 +288,9 @@ class HTMLConverter(object, LoggingInterface):
                        if link['path'] == path:
                            self.links.remove(link)
                            break
-                    self.log_warn('Could not process '+path)
+                    self.log.warn('Could not process '+path)
                    if self.verbose:
-                        self.log_exception(' ')
+                        self.log.exception(' ')
            self.links = self.process_links()
            self.link_level += 1
            paths = [link['path'] for link in self.links]
@ -302,7 +302,7 @@ class HTMLConverter(object, LoggingInterface):
            self.book.addTocEntry(text, tb)

        if self.base_font_size > 0:
-            self.log_info('\tRationalizing font sizes...')
+            self.log.info('\tRationalizing font sizes...')
            self.book.rationalize_font_sizes(self.base_font_size)

    def is_baen(self, soup):
@ -318,9 +318,9 @@ class HTMLConverter(object, LoggingInterface):

        if not self.book_designer and self.is_book_designer(raw):
            self.book_designer = True
-            self.log_info(_('\tBook Designer file detected.'))
+            self.log.info(_('\tBook Designer file detected.'))

-        self.log_info(_('\tParsing HTML...'))
+        self.log.info(_('\tParsing HTML...'))

        if self.baen:
            nmassage.extend(HTMLConverter.BAEN)
@ -343,7 +343,7 @@ class HTMLConverter(object, LoggingInterface):
                raise
        if not self.baen and self.is_baen(soup):
            self.baen = True
-            self.log_info(_('\tBaen file detected. Re-parsing...'))
+            self.log.info(_('\tBaen file detected. Re-parsing...'))
            return self.preprocess(raw)
        if self.book_designer:
            t = soup.find(id='BookTitle')
@ -359,7 +359,7 @@ class HTMLConverter(object, LoggingInterface):
            try:
                dump = open(os.path.join(tdir, 'html2lrf-verbose.html'), 'wb')
                dump.write(unicode(soup).encode('utf-8'))
-                self.log_info(_('Written preprocessed HTML to ')+dump.name)
+                self.log.info(_('Written preprocessed HTML to ')+dump.name)
                dump.close()
            except:
                pass
@ -377,7 +377,7 @@ class HTMLConverter(object, LoggingInterface):

        upath = path.encode(sys.getfilesystemencoding()) if isinstance(path, unicode) else path
        self.file_name = os.path.basename(upath.decode(sys.getfilesystemencoding()))
-        self.log_info(_('Processing %s'), repr(upath) if self.verbose else repr(self.file_name))
+        self.log.info(_('Processing %s')%( repr(upath) if self.verbose else repr(self.file_name)))

        if not os.path.exists(upath):
            upath = upath.replace('&', '%26') #convertlit replaces & with %26 in file names
@ -391,7 +391,7 @@ class HTMLConverter(object, LoggingInterface):
            raw = xml_to_unicode(raw, self.verbose)[0]
        f.close()
        soup = self.preprocess(raw)
-        self.log_info(_('\tConverting to BBeB...'))
+        self.log.info(_('\tConverting to BBeB...'))
        self.current_style = {}
        self.page_break_found = False
        if not isinstance(path, unicode):
@ -542,7 +542,7 @@ class HTMLConverter(object, LoggingInterface):
                try:
                    index = self.book.pages().index(opage)
                except ValueError:
-                    self.log_warning(_('%s is an empty file')%self.file_name)
+                    self.log.warning(_('%s is an empty file')%self.file_name)
                    tb = self.book.create_text_block()
                    self.current_page.append(tb)
                    return tb
@ -606,7 +606,7 @@ class HTMLConverter(object, LoggingInterface):
               hasattr(target.parent, 'objId'):
                self.book.addTocEntry(ascii_text, tb)
            else:
-                self.log_debug(_("Cannot add link %s to TOC"), ascii_text)
+                self.log.debug(_("Cannot add link %s to TOC")%ascii_text)


        def get_target_block(fragment, targets):
@ -937,7 +937,7 @@ class HTMLConverter(object, LoggingInterface):
        try:
            im = PILImage.open(path)
        except IOError, err:
-            self.log_warning('Unable to process image: %s\n%s', original_path, err)
+            self.log.warning('Unable to process image: %s\n%s'%( original_path, err))
            return
        encoding = detect_encoding(im)

@ -955,7 +955,7 @@ class HTMLConverter(object, LoggingInterface):
                self.scaled_images[path] = pt
                return pt.name
            except (IOError, SystemError), err: # PIL chokes on interlaced PNG images as well a some GIF images
-                self.log_warning(_('Unable to process image %s. Error: %s')%(path, err))
+                self.log.warning(_('Unable to process image %s. Error: %s')%(path, err))

        if width == None or height == None:
            width, height = im.size
@ -1000,7 +1000,7 @@ class HTMLConverter(object, LoggingInterface):
                self.rotated_images[path] = pt
                width, height = im.size
            except IOError: # PIL chokes on interlaced PNG files and since auto-rotation is not critical we ignore the error
-                self.log_debug(_('Unable to process interlaced PNG %s'), original_path)                 
+                self.log.debug(_('Unable to process interlaced PNG %s')% original_path)
            finally:
                pt.close()

@ -1015,7 +1015,8 @@ class HTMLConverter(object, LoggingInterface):
            try:
                self.images[path] = ImageStream(path, encoding=encoding)
            except LrsError, err:
-                self.log_warning(_('Could not process image: %s\n%s'), original_path, err)
+                self.log.warning(_('Could not process image: %s\n%s')%(
+                    original_path, err))
                return

        im = Image(self.images[path], x0=0, y0=0, x1=width, y1=height,\
@ -1077,7 +1078,7 @@ class HTMLConverter(object, LoggingInterface):

            if number_of_paragraphs > 2:
                self.end_page()
-                self.log_debug('Forcing page break at %s', tagname)
+                self.log.debug('Forcing page break at %s'%tagname)
        return end_page

    def block_properties(self, tag_css):
@ -1467,7 +1468,7 @@ class HTMLConverter(object, LoggingInterface):
            (self.chapter_attr[1].lower() == 'none' or \
             (tag.has_key(self.chapter_attr[1]) and \
              self.chapter_attr[2].match(tag[self.chapter_attr[1]])))):
-                self.log_debug('Detected chapter %s', tagname)
+                self.log.debug('Detected chapter %s'%tagname)
                self.end_page()
                self.page_break_found = True

@ -1507,7 +1508,7 @@ class HTMLConverter(object, LoggingInterface):
                                    self.targets[self.target_prefix+tag[key]] = self.current_block
                                    self.current_block.must_append = True
                        else:
-                            self.log_debug('Could not follow link to '+tag['href'])
+                            self.log.debug('Could not follow link to '+tag['href'])
                            self.process_children(tag, tag_css, tag_pseudo_css)
                elif tag.has_key('name') or tag.has_key('id'):
                    self.process_anchor(tag, tag_css, tag_pseudo_css)
@ -1529,9 +1530,9 @@ class HTMLConverter(object, LoggingInterface):
                        self.process_image(path, tag_css, width, height,
                                           dropcaps=dropcaps, rescale=True)
                    elif not urlparse(tag['src'])[0]:
-                        self.log_warn('Could not find image: '+tag['src'])
+                        self.log.warn('Could not find image: '+tag['src'])
                else:
-                    self.log_debug("Failed to process: %s", str(tag))
+                    self.log.debug("Failed to process: %s"%str(tag))
            elif tagname in ['style', 'link']:
                ncss, npcss = {}, {}
                if tagname == 'style':
@ -1552,7 +1553,7 @@ class HTMLConverter(object, LoggingInterface):
                            self.page_break_found = True
                        ncss, npcss = self.parse_css(src)
                    except IOError:
-                        self.log_warn('Could not read stylesheet: '+tag['href'])
+                        self.log.warn('Could not read stylesheet: '+tag['href'])
                if ncss:
                    update_css(ncss, self.css)
                    self.css.update(self.override_css)
@ -1687,7 +1688,7 @@ class HTMLConverter(object, LoggingInterface):

                if not self.disable_chapter_detection and tagname.startswith('h'):
                    if self.chapter_regex.search(src):
-                        self.log_debug('Detected chapter %s', src)
+                        self.log.debug('Detected chapter %s'%src)
                        self.end_page()
                        self.page_break_found = True

@ -1769,9 +1770,9 @@ class HTMLConverter(object, LoggingInterface):
                    try:
                        self.process_table(tag, tag_css)
                    except Exception, err:
-                        self.log_warning(_('An error occurred while processing a table: %s. Ignoring table markup.'), repr(err))
-                        self.log_debug('', exc_info=True)
-                        self.log_debug(_('Bad table:\n%s'), unicode(tag)[:300])
+                        self.log.warning(_('An error occurred while processing a table: %s. Ignoring table markup.')%repr(err))
+                        self.log.exception('')
+                        self.log.debug(_('Bad table:\n%s')%unicode(tag)[:300])
                        self.in_table = False
                        self.process_children(tag, tag_css, tag_pseudo_css)
                    finally:
@ -1821,14 +1822,7 @@ class HTMLConverter(object, LoggingInterface):
        for _file in self.scaled_images.values() + self.rotated_images.values():
            _file.__del__()

-def process_file(path, options, logger=None):
-    if re.match('http://|https://', path):
-        raise ConversionError, _('You have to save the website %s as an html file first and then run html2lrf on it.')%(path,)
-    if logger is None:
-        level = logging.DEBUG if options.verbose else logging.INFO
-        logger = logging.getLogger('html2lrf')
-        setup_cli_handlers(logger, level)
-    
+def process_file(path, options, logger):
    if not isinstance(path, unicode):
        path = path.decode(sys.getfilesystemencoding())
    path = os.path.abspath(path)
@ -1939,7 +1933,6 @@ def process_file(path, options, logger=None):
    oname = os.path.abspath(os.path.expanduser(oname))
    conv.writeto(oname, lrs=options.lrs)
    run_plugins_on_postprocess(oname, 'lrf')
-    conv.log_info('Output written to %s', oname)
    conv.cleanup()
    return oname

@ -1998,38 +1991,3 @@ def try_opf(path, options, logger):



-def option_parser():
-    return lrf_option_parser(
-_('''Usage: %prog [options] mybook.html
-
-
-%prog converts mybook.html to mybook.lrf. 
-%prog follows all links in mybook.html that point 
-to local files recursively. Thus, you can use it to 
-convert a whole tree of HTML files.'''))
-
-def main(args=sys.argv):
-    try:
-        parser = option_parser()
-        options, args = parser.parse_args(args)    
-        if options.output:
-            options.output = os.path.abspath(os.path.expanduser(options.output))
-        if len(args) != 2:
-            parser.print_help()
-            return 1
-        src = args[1]
-        if options.verbose:
-            import warnings
-            warnings.defaultaction = 'error'
-    except Exception, err:
-        print >> sys.stderr, err
-        return 1
-    if not isinstance(src, unicode):
-        src = src.decode(sys.getfilesystemencoding())
-    process_file(src, options)
-    return 0
-
-
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/lrf/lit/init.py
+++ b/src/calibre/ebooks/lrf/lit/init.py
@ -1,3 +0,0 @@
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-
--- a/src/calibre/ebooks/lrf/lit/convert_from.py
+++ b/src/calibre/ebooks/lrf/lit/convert_from.py
@ -1,90 +0,0 @@
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-
-import os, sys, shutil, glob, logging
-from tempfile import mkdtemp
-from calibre.ebooks.lrf import option_parser as lrf_option_parser
-from calibre.ebooks.lit.reader import LitReader
-from calibre.ebooks import ConversionError
-from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
-from calibre.ebooks.metadata.opf import OPFReader
-from calibre import __appname__, setup_cli_handlers
-
-def option_parser():
-    parser = lrf_option_parser(
-_('''Usage: %prog [options] mybook.lit
-
-
-%prog converts mybook.lit to mybook.lrf''')
-        )
-    return parser
-
-def generate_html(pathtolit, logger):
-    if not os.access(pathtolit, os.R_OK):
-        raise ConversionError, 'Cannot read from ' + pathtolit
-    tdir = mkdtemp(prefix=__appname__+'_'+'lit2oeb_')
-    lr = LitReader(pathtolit)
-    print 'Extracting LIT file to', tdir
-    lr.extract_content(tdir)
-    return tdir
-
-def process_file(path, options, logger=None):
-    if logger is None:
-        level = logging.DEBUG if options.verbose else logging.INFO
-        logger = logging.getLogger('lit2lrf')
-        setup_cli_handlers(logger, level)
-    lit = os.path.abspath(os.path.expanduser(path))
-    tdir = generate_html(lit, logger)
-    try:
-        opf = glob.glob(os.path.join(tdir, '*.opf'))
-        if opf:
-            path = opf[0]
-            opf = OPFReader(path)
-            htmlfile = opf.spine[0].path.replace('&', '%26') #convertlit replaces & with %26
-            options.opf = path
-        else:    
-            l = glob.glob(os.path.join(tdir, '*toc*.htm*'))
-            if not l:
-                l = glob.glob(os.path.join(tdir, '*top*.htm*'))
-            if not l:
-                l = glob.glob(os.path.join(tdir, '*contents*.htm*'))
-            if not l:
-                l = glob.glob(os.path.join(tdir, '*.htm*'))
-                if not l:
-                    l = glob.glob(os.path.join(tdir, '*.txt*')) # Some lit file apparently have .txt files in them
-                    if not l:
-                        raise ConversionError('Conversion of lit to html failed. Cannot find html file.')
-                maxsize, htmlfile = 0, None
-                for c in l:
-                    sz = os.path.getsize(c)
-                    if sz > maxsize:
-                        maxsize, htmlfile = sz, c
-            else:
-                htmlfile = l[0]
-        if not options.output:
-            ext = '.lrs' if options.lrs else '.lrf'
-            options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
-        options.output = os.path.abspath(os.path.expanduser(options.output))
-        options.use_spine = True
-        html_process_file(htmlfile, options, logger=logger)
-    finally:
-        try:
-            shutil.rmtree(tdir)
-        except:
-            logger.warning('Failed to delete temporary directory '+tdir)
-
-
-def main(args=sys.argv, logger=None):
-    parser = option_parser()
-    options, args = parser.parse_args(args)
-    if len(args) != 2:
-        parser.print_help()
-        print
-        print 'No lit file specified'
-        return 1
-    process_file(args[1], options, logger)
-    return 0        
-        
-            
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/lrf/meta.py
+++ b/src/calibre/ebooks/lrf/meta.py
@ -229,6 +229,9 @@ def get_metadata(stream):
    mi.author = lrf.author.strip()
    mi.comments = lrf.free_text.strip()
    mi.category = lrf.category.strip()+', '+lrf.classification.strip()
+    tags = [x.strip() for x in mi.category.split(',') if x.strip()]
+    if tags:
+        mi.tags = tags
    mi.publisher = lrf.publisher.strip()
    mi.cover_data = lrf.get_cover()
    try:
@ -624,7 +627,9 @@ def set_metadata(stream, mi):
        lrf.title = mi.title
    if mi.authors:
        lrf.author = ', '.join(mi.authors)
-    if mi.category:
+    if mi.tags:
+        lrf.category = mi.tags[0]
+    if getattr(mi, 'category', False):
        lrf.category = mi.category
    if mi.comments:    
        lrf.free_text = mi.comments
--- a/src/calibre/ebooks/lrf/mobi/convert_from.py
+++ b/src/calibre/ebooks/lrf/mobi/convert_from.py
@ -1,63 +0,0 @@
-#!/usr/bin/env  python
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
-
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-''''''
-
-import sys, tempfile, os, logging, shutil
-
-from calibre import setup_cli_handlers, __appname__
-from calibre.ebooks.mobi.reader import MobiReader
-from calibre.ebooks.lrf import option_parser as lrf_option_parser
-from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
-
-def generate_html(mobifile, tdir):
-    mr = MobiReader(mobifile)
-    mr.extract_content(tdir)
-    return mr.htmlfile
-
-def process_file(path, options, logger=None):
-    if logger is None:
-        level = logging.DEBUG if options.verbose else logging.INFO
-        logger = logging.getLogger('lit2lrf')
-        setup_cli_handlers(logger, level)
-    mobi = os.path.abspath(os.path.expanduser(path))
-    tdir = tempfile.mkdtemp('mobi2lrf', __appname__)
-    try:
-        htmlfile = generate_html(mobi, tdir)
-        if not options.output:
-            ext = '.lrs' if options.lrs else '.lrf'
-            options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
-        options.output = os.path.abspath(os.path.expanduser(options.output))
-        options.use_spine = True
-        html_process_file(htmlfile, options, logger=logger)
-    finally:
-        try:
-            shutil.rmtree(tdir)
-        except:
-            logger.warning('Failed to delete temporary directory '+tdir)
-
-def option_parser():
-    return lrf_option_parser(
-_('''Usage: %prog [options] mybook.mobi|prc
-
-
-%prog converts mybook.mobi to mybook.lrf''')
-        )
-
-
-def main(args=sys.argv, logger=None):
-    parser = option_parser()
-    options, args = parser.parse_args(args)
-    if len(args) != 2:            
-        parser.print_help()
-        print
-        print 'No mobi file specified'
-        return 1
-    process_file(args[1], options, logger)
-
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/lrf/objects.py
+++ b/src/calibre/ebooks/lrf/objects.py
@ -2,7 +2,8 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 import struct, array, zlib, cStringIO, collections, re

-from calibre.ebooks.lrf import LRFParseError, PRS500_PROFILE, entity_to_unicode
+from calibre.ebooks.lrf import LRFParseError, PRS500_PROFILE
+from calibre import entity_to_unicode
 from calibre.ebooks.lrf.tags import Tag

 ruby_tags = {
--- a/src/calibre/ebooks/lrf/output.py
+++ b/src/calibre/ebooks/lrf/output.py
@ -0,0 +1,170 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import sys, os
+
+from calibre.customize.conversion import OutputFormatPlugin
+from calibre.customize.conversion import OptionRecommendation
+
+class LRFOptions(object):
+
+    def __init__(self, output, opts, oeb):
+        def f2s(f):
+            try:
+                return unicode(f[0])
+            except:
+                return ''
+        m = oeb.metadata
+        self.title = None
+        self.author = self.publisher = _('Unknown')
+        self.freetext = f2s(m.description)
+        self.category = f2s(m.tags)
+        self.title_sort = self.author_sort = ''
+        self.cover = None
+        self.use_metadata_cover = True
+        self.output = output
+        self.ignore_tables = opts.linearize_tables
+        self.base_font_size = 0
+        self.blank_after_para = opts.insert_blank_line
+        self.use_spine = True
+        self.font_delta = 0
+        self.ignore_colors = False
+        from calibre.ebooks.lrf import PRS500_PROFILE
+        self.profile = PRS500_PROFILE
+        self.link_levels = sys.maxint
+        self.link_exclude = '@'
+        self.no_links_in_toc = True
+        self.disable_chapter_detection = True
+        self.chapter_regex = 'dsadcdswcdec'
+        self.chapter_attr = '$,,$'
+        self.override_css = self._override_css = ''
+        self.page_break = 'h[12]'
+        self.force_page_break = '$'
+        self.force_page_break_attr = '$'
+        self.add_chapters_to_toc = False
+        self.baen = self.pdftohtml = self.book_designer = False
+        self.verbose = opts.verbose
+        self.encoding = 'utf-8'
+        self.lrs = False
+        self.minimize_memory_usage = False
+        self.autorotation = opts.enable_autorotation
+        self.header_separation = (self.profile.dpi/72.) * opts.header_separation
+
+
+        for x in ('top', 'bottom', 'left', 'right'):
+            setattr(self, x+'_margin', (self.profile.dpi/72.) * getattr(opts,
+            'margin_'+x))
+
+        for x in ('wordspace', 'header', 'header_format',
+                'minimum_indent', 'serif_family',
+                'render_tables_as_images', 'sans_family', 'mono_family',
+                'text_size_multiplier_for_rendered_tables'):
+            setattr(self, x, getattr(opts, x))
+
+class LRFOutput(OutputFormatPlugin):
+
+    name = 'LRF Output'
+    author = 'Kovid Goyal'
+    file_type = 'lrf'
+
+    options = set([
+        OptionRecommendation(name='enable_autorotation', recommended_value=False,
+            help=_('Enable autorotation of images that are wider than the screen width.')
+        ),
+        OptionRecommendation(name='wordspace',
+            recommended_value=2.5, level=OptionRecommendation.LOW,
+            help=_('Set the space between words in pts. Default is %default')
+        ),
+        OptionRecommendation(name='header', recommended_value=False,
+            help=_('Add a header to all the pages with title and author.')
+        ),
+        OptionRecommendation(name='header_format', recommended_value="%t by %a",
+            help=_('Set the format of the header. %a is replaced by the author '
+            'and %t by the title. Default is %default')
+        ),
+        OptionRecommendation(name='header_separation', recommended_value=0,
+            help=_('Add extra spacing below the header. Default is %default pt.')
+        ),
+        OptionRecommendation(name='minimum_indent', recommended_value=0,
+            help=_('Minimum paragraph indent (the indent of the first line '
+            'of a paragraph) in pts. Default: %default')
+        ),
+        OptionRecommendation(name='render_tables_as_images',
+            recommended_value=False,
+            help=_('Render tables in the HTML as images (useful if the '
+                'document has large or complex tables)')
+        ),
+        OptionRecommendation(name='text_size_multiplier_for_rendered_tables',
+            recommended_value=1.0,
+            help=_('Multiply the size of text in rendered tables by this '
+            'factor. Default is %default')
+        ),
+        OptionRecommendation(name='serif_family', recommended_value=None,
+            help=_('The serif family of fonts to embed')
+        ),
+        OptionRecommendation(name='sans_family', recommended_value=None,
+            help=_('The sans-serif family of fonts to embed')
+        ),
+        OptionRecommendation(name='mono_family', recommended_value=None,
+            help=_('The monospace family of fonts to embed')
+        ),
+
+    ])
+
+    recommendations = set([
+        ('dont_justify', True, OptionRecommendation.HIGH),
+        ])
+
+    def convert_images(self, pages, opts, wide):
+        from calibre.ebooks.lrf.pylrs.pylrs import Book, BookSetting, ImageStream, ImageBlock
+        from uuid import uuid4
+        from calibre.constants import __appname__, __version__
+
+        width, height = (784, 1012) if wide else (584, 754)
+
+        ps = {}
+        ps['topmargin']      = 0
+        ps['evensidemargin'] = 0
+        ps['oddsidemargin']  = 0
+        ps['textwidth']      = width
+        ps['textheight']     = height
+        book = Book(title=opts.title, author=opts.author,
+                bookid=uuid4().hex,
+                publisher='%s %s'%(__appname__, __version__),
+                category=_('Comic'), pagestyledefault=ps,
+                booksetting=BookSetting(screenwidth=width, screenheight=height))
+        for page in pages:
+            imageStream = ImageStream(page)
+            _page = book.create_page()
+            _page.append(ImageBlock(refstream=imageStream,
+                        blockwidth=width, blockheight=height, xsize=width,
+                        ysize=height, x1=width, y1=height))
+            book.append(_page)
+
+        book.renderLrf(open(opts.output, 'wb'))
+
+
+    def convert(self, oeb, output_path, input_plugin, opts, log):
+        self.log, self.opts, self.oeb = log, opts, oeb
+
+        lrf_opts = LRFOptions(output_path, opts, oeb)
+
+        if input_plugin.is_image_collection:
+            self.convert_images(input_plugin.get_images(), lrf_opts,
+                    getattr(opts, 'wide', False))
+            return
+
+        from calibre.ptempfile import TemporaryDirectory
+        with TemporaryDirectory('_lrf_output') as tdir:
+            from calibre.customize.ui import plugin_for_output_format
+            oeb_output = plugin_for_output_format('oeb')
+            oeb_output.convert(oeb, tdir, input_plugin, opts, log)
+            opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
+            from calibre.ebooks.lrf.html.convert_from import process_file
+            process_file(os.path.join(tdir, opf), lrf_opts, self.log)
+
--- a/src/calibre/ebooks/lrf/pdf/init.py
+++ b/src/calibre/ebooks/lrf/pdf/init.py
@ -1,2 +0,0 @@
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
--- a/src/calibre/ebooks/lrf/pdf/convert_from.py
+++ b/src/calibre/ebooks/lrf/pdf/convert_from.py
@ -1,131 +0,0 @@
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-''''''
-
-import sys, os, subprocess, logging
-import errno
-from functools import partial
-from calibre import isosx, setup_cli_handlers, filename_to_utf8, iswindows, islinux
-from calibre.ebooks import ConversionError, DRMError
-from calibre.ptempfile import PersistentTemporaryDirectory
-from calibre.ebooks.lrf import option_parser as lrf_option_parser
-from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
-from calibre.ebooks.metadata import MetaInformation
-from calibre.ebooks.metadata.opf import OPFCreator
-from calibre.ebooks.metadata.pdf import get_metadata
-
-PDFTOHTML = 'pdftohtml'
-popen = subprocess.Popen
-if isosx and hasattr(sys, 'frameworks_dir'):
-    PDFTOHTML = os.path.join(getattr(sys, 'frameworks_dir'), PDFTOHTML)
-if iswindows and hasattr(sys, 'frozen'):
-    PDFTOHTML = os.path.join(os.path.dirname(sys.executable), 'pdftohtml.exe')
-    popen = partial(subprocess.Popen, creationflags=0x08) # CREATE_NO_WINDOW=0x08 so that no ugly console is popped up
-if islinux and getattr(sys, 'frozen_path', False):
-    PDFTOHTML = os.path.join(getattr(sys, 'frozen_path'), 'pdftohtml')
-
-def generate_html(pathtopdf, tdir):
-    '''
-    Convert the pdf into html.
-    @return: Path to a temporary file containing the HTML.
-    '''
-    if isinstance(pathtopdf, unicode):
-        pathtopdf = pathtopdf.encode(sys.getfilesystemencoding())
-    if not os.access(pathtopdf, os.R_OK):
-        raise ConversionError, 'Cannot read from ' + pathtopdf
-    index = os.path.join(tdir, 'index.html')
-    # This is neccessary as pdftohtml doesn't always (linux) respect absolute paths
-    pathtopdf = os.path.abspath(pathtopdf)
-    cmd = (PDFTOHTML, '-enc', 'UTF-8',  '-noframes',  '-p',  '-nomerge',
-            '-nodrm', pathtopdf, os.path.basename(index))
-    cwd = os.getcwd()
-
-    try:
-        os.chdir(tdir)
-        try:
-            p = popen(cmd, stderr=subprocess.PIPE)
-        except OSError, err:
-            if err.errno == 2:
-                raise ConversionError(_('Could not find pdftohtml, check it is in your PATH'), True)
-            else:
-                raise
-
-        '''
-        print p.stdout.read()
-        '''
-        while True:
-            try:
-                ret = p.wait()
-                break
-            except OSError, e:
-                if e.errno == errno.EINTR:
-                    continue
-                else:
-                    raise
-
-        if ret != 0:
-            err = p.stderr.read()
-            raise ConversionError, err
-        if not os.path.exists(index) or os.stat(index).st_size < 100:
-            raise DRMError()
-
-        raw = open(index, 'rb').read()
-        open(index, 'wb').write('<!-- created by calibre\'s pdftohtml -->\n'+raw)
-        if not '<br' in raw[:4000]:
-            raise ConversionError(os.path.basename(pathtopdf) + _(' is an image based PDF. Only conversion of text based PDFs is supported.'), True)
-        try:
-            mi = get_metadata(open(pathtopdf, 'rb'))
-        except:
-            mi = MetaInformation(None, None)
-        if not mi.title:
-            mi.title = os.path.splitext(os.path.basename(pathtopdf))[0]
-        if not mi.authors:
-            mi.authors = [_('Unknown')]
-        opf = OPFCreator(tdir, mi)
-        opf.create_manifest([('index.html', None)])
-        opf.create_spine(['index.html'])
-        opf.render(open('metadata.opf', 'wb'))
-    finally:
-        os.chdir(cwd)
-    return index
-
-def option_parser():
-    return lrf_option_parser(
-_('''%prog [options] mybook.pdf
-
-
-%prog converts mybook.pdf to mybook.lrf''')
-        )
-
-def process_file(path, options, logger=None):
-    if logger is None:
-        level = logging.DEBUG if options.verbose else logging.INFO
-        logger = logging.getLogger('pdf2lrf')
-        setup_cli_handlers(logger, level)
-    pdf = os.path.abspath(os.path.expanduser(path))
-    tdir = PersistentTemporaryDirectory('_pdf2lrf')
-    htmlfile = generate_html(pdf, tdir)
-    if not options.output:
-        ext = '.lrs' if options.lrs else '.lrf'
-        options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
-    else:
-        options.output = os.path.abspath(options.output)
-    options.pdftohtml = True
-    if not options.title:
-        options.title = filename_to_utf8(os.path.splitext(os.path.basename(options.output))[0])
-    html_process_file(htmlfile, options, logger)
-
-
-def main(args=sys.argv, logger=None):
-    parser = option_parser()
-    options, args = parser.parse_args(args)
-    if len(args) != 2:
-        parser.print_help()
-        print
-        print 'No pdf file specified'
-        return 1
-    process_file(args[1], options, logger)
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/lrf/pdf/reflow.py
+++ b/src/calibre/ebooks/lrf/pdf/reflow.py
@ -1,426 +0,0 @@
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-'''
-Convert PDF to a reflowable format using pdftoxml.exe as the PDF parsing backend.
-'''
-
-import sys, os, re, tempfile, subprocess, atexit, shutil, logging, xml.parsers.expat
-from xml.etree.ElementTree import parse
-
-from calibre import isosx, setup_cli_handlers, __appname__
-from calibre.utils.config import OptionParser
-from calibre.ebooks import ConversionError
-
-PDFTOXML = 'pdftoxml.exe'
-if isosx and hasattr(sys, 'frameworks_dir'):
-    PDFTOXML = os.path.join(getattr(sys, 'frameworks_dir'), PDFTOXML)
-
-class StyleContainer(object):
-    
-    def set_style(self, iterator):
-        styles = set([])
-        for tok in iterator:
-            if hasattr(tok, 'style'):
-                styles.add(tok.style)
-        counts = [0*i for i in range(len(styles))]
-        for i in range(len(styles)):
-            counts[i] = sum([1 for j in self if j.style == styles[i]])
-        max = max(counts)
-        for i in range(len(counts)):
-            if counts[i] == max:
-                break
-        self.style = styles[i]
-        for obj in iterator:
-            if obj.style == self.style:
-                obj.style = None
-
-
-class Page(object):
-    
-    def __init__(self, attrs):
-        for a in ('number', 'width', 'height'):
-            setattr(self, a, float(attrs[a]))
-        self.id     = attrs['id']
-        self.current_line = None
-        self.lines = []
-        
-    def end_line(self):
-        if self.current_line is not None:
-            self.current_line.finalize()
-            self.lines.append(self.current_line)
-            self.current_line = None
-            
-    def finalize(self):
-        self.identify_groups()
-        self.look_for_page_break()
-    
-    def identify_groups(self):
-        groups = []
-        in_group = False
-        for i in range(len(self.lines)):
-            if not in_group:
-                groups.append(i)
-                in_group = True
-            else:
-                pl = self.lines[i-1]
-                cl = self.lines[i]
-                if cl.left != pl.left and cl.width != pl.width:
-                    groups.append(i)
-        self.groups = []
-        for i in range(len(groups)):
-            start = groups[i]
-            if i +1 == len(groups):
-                stop = len(self.lines)
-            else:
-                stop = groups[i+i]
-            self.groups.append(self.lines[start:stop])
-        
-        if len(self.groups) > 1:
-            self.group[0].test_header(self.width, self.height)
-            self.groups[-1].test_footer(self.width, self.height)
-            
-    def look_for_page_break(self):
-        max = 0
-        for g in self.groups:
-            if not g.is_footer and g.bottom > max:
-                max = g.bottom
-        self.page_break_after = max < 0.8*self.height
-        
-
-class Group(StyleContainer):
-    
-    def __init__(self, lines):
-        self.lines = lines
-        self.set_style(self.lines)
-        self.width = max([i.width for i in self.lines])
-        self.bottom = max([i.bottom for i in self.lines])
-        tot, ltot = 0, 0
-        for i in range(1, len(self.lines)):
-            bot = self.lines[i-1].bottom
-            top = self.lines[i].top
-            tot += abs(top - bot)
-            ltot += self.lines[i].left
-        self.average_line_spacing = tot/float(len(self.lines)-1)
-        ltot += self.lines[0].left
-        self.average_left_margin = ltot/float(len(self.lines))
-        self.left_margin = min([i.left for i in self.lines])
-        
-        self.detect_paragraphs()
-        
-        
-        
-    def detect_paragraphs(self):
-        if not self.lines:
-            return
-        indent_buffer = 5
-        self.lines[0].is_para_start = self.lines[0].left > self.average_left_margin+indent_buffer 
-        for i in range(1, len(self.lines)):
-            pl, l = self.lines[i-1:i+1]
-            c1 = pl.bottom - l.top > self.average_line_spacing
-            c2 = l.left > self.average_left_margin+indent_buffer
-            c3 = pl.width < 0.8 * self.width
-            l.is_para_start = c1 or c2 or c3
-            
-    def test_header(self, page_width, page_height):
-        self.is_header = len(self.lines) == 1 and self.lines[0].width < 0.5*page_width 
-        
-    def test_footer(self, page_width, page_height):
-        self.is_footer = len(self.lines) == 1 and self.lines[0].width < 0.5*page_width
-
-class Text(object):
-    
-    def __init__(self, attrs):
-        for a in ('x', 'y', 'width', 'height'):
-            setattr(self, a, float(attrs[a]))
-        self.id = attrs['id']
-        self.objects = []
-        
-    def add_token(self, tok):
-        if not self.objects:
-            self.objects.append(tok)
-        else:
-            ptok = self.objects[-1]
-            if tok == ptok:
-                ptok.text += ' ' + tok.text
-            else:
-                self.objects.append(tok)
-    
-    def add(self, object):
-        if isinstance(object, Token):
-            self.add_token(object)
-        else:
-            print 'WARNING: Unhandled object', object.__class__.__name__
-            
-    def to_xhtml(self):
-        res = []
-        for obj in self.objects:
-            if isinstance(obj, Token):
-                res.append(obj.to_xhtml())
-        return ' '.join(res)
-                
-
-class Line(list, StyleContainer):
-    
-    def calculate_geometry(self):
-        self.left   = self[0].x
-        self.width  = self[-1].x + self[-1].width - self.left
-        self.top    = min(o.y for o in self)
-        self.bottom = max(o.height+o.y for o in self)
-        
-    def finalize(self):
-        self.calculate_geometry()
-        self.set_style(self)
-        
-    def to_xhtml(self, group_id):
-        ans = '<span class="%s" '%group_id
-        if self.style is not None:
-            ans += 'style="%s"'%self.style.to_css(inline=True)
-        ans += '>%s</span>'
-        res = []
-        for object in self:
-            if isinstance(object, Text):
-                res.append(object.to_xhtml())
-                
-        return ans%(' '.join(res))
-                
-        
-class TextStyle(object):
-    
-    def __init__(self, tok):
-        self.bold   = tok.bold
-        self.italic = tok.italic
-        self.font_name = tok.font_name
-        self.font_size = tok.font_size
-        self.color     = tok.font_color
-        
-    def __eq__(self, other):
-        if isinstance(other, self.__class__):
-            for a in ('font_size', 'bold', 'italic', 'font_name', 'color'):
-                if getattr(self, a) != getattr(other, a):
-                    return False
-            return True
-        return False
-    
-    def to_css(self, inline=False):
-        fw  = 'bold' if self.bold else 'normal'
-        fs  = 'italic' if self.italic else 'normal'
-        fsz = '%dpt'%self.font_size
-        props = ['font-weight: %s;'%fw, 'font-style: %s;'%fs, 'font-size: %s;'%fsz,
-                 'color: rgb(%d, %d, %d);'%self.color]
-        joiner = ' '
-        if not inline:
-            joiner = '\n'
-            props = ['{'] + props + ['}']
-        return joiner.join(props) 
-
-class Token(object):
-    
-    def __init__(self, attrs):
-        for a in ('x', 'y', 'width', 'height', 'rotation', 'angle', 'font-size'):
-            setattr(self, a.replace('-', '_'), float(attrs[a]))
-        for a in ('bold', 'italic'):
-            setattr(self, a, attrs[a]=='yes')
-        self.font_name = attrs['font-name']
-        fc = re.compile(r'#([a-f0-9]{2})([a-f0-9]{2})([a-f0-9]{2})', re.IGNORECASE)
-        fc = fc.match(attrs['font-color'])
-        self.font_color = (int(fc.group(1), 16), int(fc.group(2), 16), int(fc.group(3), 16))
-        self.id = attrs['id']
-        self.text = u''
-        self.style = TextStyle(self)
-        
-    def handle_char_data(self, data):
-        self.text += data
-        
-    def __eq__(self, other):
-        if isinstance(other, self.__class__):
-            for a in ('rotation', 'angle', 'font_size', 'bold', 'italic', 'font_name', 'font_color'):
-                if getattr(self, a) != getattr(other, a):
-                    return False
-            return True
-        return False
-    
-    def to_xhtml(self):
-        if self.style is not None:
-            ans = u'<span style="%s">%s</span>'%(self.style.to_css(inline=True), self.text)
-        else:
-            ans = self.text
-        return ans
-
-class PDFDocument(object):
-    
-    SKIPPED_TAGS = ('DOCUMENT', 'METADATA', 'PDFFILENAME', 'PROCESS', 'VERSION',
-                    'COMMENT', 'CREATIONDATE')
-    
-    def __init__(self, filename):
-        parser = xml.parsers.expat.ParserCreate('UTF-8')
-        parser.buffer_text          = True
-        parser.returns_unicode      = True
-        parser.StartElementHandler  = self.start_element
-        parser.EndElementHandler    = self.end_element
-        
-        self.pages = []
-        self.current_page = None
-        self.current_token = None
-        
-        src = open(filename, 'rb').read()
-        self.parser = parser
-        parser.Parse(src)
-        
-        
-    def start_element(self, name, attrs):
-        if name == 'TOKEN':
-            self.current_token = Token(attrs)
-            self.parser.CharacterDataHandler = self.current_token.handle_char_data
-        elif name == 'TEXT':
-            text = Text(attrs)
-            if self.current_page.current_line is None:
-                self.current_page.current_line = Line()
-                self.current_page.current_line.append(text)
-            else:
-                y, height = self.current_page.current_line[0].y, self.current_page.current_line[0].height
-                if y == text.y or y+height == text.y + text.height:
-                    self.current_page.current_line.append(text)
-                else:
-                    self.current_page.end_line()
-                    self.current_page.current_line = Line()
-                    self.current_page.current_line.append(text)
-        elif name == 'PAGE':
-            self.current_page = Page(attrs)
-        elif name.lower() == 'xi:include':
-            print 'WARNING: Skipping vector image'
-        elif name in self.SKIPPED_TAGS:
-            pass
-        else:
-            print 'WARNING: Unhandled element', name
-        
-    def end_element(self, name):
-        if name == 'TOKEN':
-            if self.current_token.angle == 0 and self.current_token.rotation == 0:
-                self.current_page.current_line[-1].add(self.current_token)
-            self.current_token = None
-            self.parser.CharacterDataHandler = None
-        elif name == 'PAGE':
-            self.current_page.finalize()
-            self.pages.append(self.current_page)
-            self.current_page = None
-    
-    
-    def to_xhtml(self):
-        header = u'''\
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
-    "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml"
-     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-     xsi:schemaLocation="http://www.w3.org/MarkUp/SCHEMA/xhtml11.xsd" >
-<head>
-<style type="text/css">
-%(style)s
-</style>
-</head>
-<body>
-%(body)s
-</body>
-</html>
-'''
-        res = []
-        para = []
-        styles = []
-        for page in self.pages:
-            res.append(u'<a name="%s" />'%page.id)
-            for group in page.groups:
-                if group.is_header or group.is_footer:
-                    continue
-                if group.style is not None:
-                    styles.append(u'.%s %s\n'%(group.id, group.style.to_css()))
-                for line in group.lines:
-                    if line.is_para_start:
-                        indent = group.left_margin - line.left
-                        if para:
-                            res.append(u'<p style="text-indent: %dpt">%s</p>'%(indent, ''.join(para)))
-                            para = []
-                    para.append(line.to_xhtml(group.id))
-            if page.page_break_after:
-                res.append(u'<br style="page-break-after:always" />')
-                if para:
-                    res.append(u'<p>%s</p>'%(''.join(para)))
-                    para = []
-                    
-        return (header%dict(style='\n'.join(styles), body='\n'.join(res))).encode('utf-8')
-
-class PDFConverter(object):
-
-    @classmethod
-    def generate_xml(cls, pathtopdf, logger):
-        pathtopdf = os.path.abspath(pathtopdf)
-        tdir = tempfile.mkdtemp('pdf2xml', __appname__)
-        atexit.register(shutil.rmtree, tdir)
-        xmlfile = os.path.basename(pathtopdf)+'.xml'
-        os.chdir(tdir)
-        cmd = PDFTOXML + ' -outline "%s" "%s"'%(pathtopdf, xmlfile)
-        p = subprocess.Popen(cmd, shell=True, stderr=subprocess.STDOUT, 
-                             stdout=subprocess.PIPE)
-        log = p.stdout.read()
-        ret = p.wait()
-        if ret != 0:
-            raise ConversionError, log
-        xmlfile = os.path.join(tdir, xmlfile)
-        if os.stat(xmlfile).st_size < 20:
-            raise ConversionError(os.path.basename(pathtopdf) + ' does not allow copying of text.')
-        return xmlfile
-
-    
-    def __init__(self, pathtopdf, logger, opts):
-        self.cwd    = os.getcwdu()
-        self.logger = logger
-        self.opts   = opts
-        try:
-            self.logger.info('Converting PDF to XML')
-            self.xmlfile   = self.generate_xml(pathtopdf, self.logger)
-            self.tdir      = os.path.dirname(self.xmlfile)
-            self.data_dir  = self.xmlfile + '_data'
-            outline_file = self.xmlfile.rpartition('.')[0]+'_outline.xml'
-            self.logger.info('Parsing XML')
-            self.document = PDFDocument(self.xmlfile)
-            self.outline  = parse(outline_file)
-        finally:
-            os.chdir(self.cwd)
-            
-    def convert(self, output_dir):
-        doc = self.document.to_xhtml()
-        open(os.path.join(output_dir, 'document.html'), 'wb').write(doc)
-        
-            
-            
-def option_parser():
-    parser = OptionParser(usage=\
-'''
-%prog [options] myfile.pdf
-
-Convert a PDF file to a HTML file.
-''')
-    parser.add_option('-o', '--output-dir', default='.', 
-                      help=_('Path to output directory in which to create the HTML file. Defaults to current directory.'))
-    parser.add_option('--verbose', default=False, action='store_true',
-                      help=_('Be more verbose.'))
-    return parser    
-
-def main(args=sys.argv, logger=None):
-    parser = option_parser()
-    options, args = parser.parse_args()
-    if logger is None:
-        level = logging.DEBUG if options.verbose else logging.INFO
-        logger = logging.getLogger('pdf2html')
-        setup_cli_handlers(logger, level)
-    if len(args) != 1:
-        parser.print_help()
-        print _('You must specify a single PDF file.')
-        return 1
-    options.output_dir = os.path.abspath(options.output_dir)
-    converter = PDFConverter(os.path.abspath(args[0]), logger, options)
-    converter.convert(options.output_dir)
-    
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/lrf/rtf/convert_from.py
+++ b/src/calibre/ebooks/lrf/rtf/convert_from.py
@ -1,190 +0,0 @@
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-import os, sys, shutil, logging, glob
-
-from lxml import etree
-
-from calibre.ebooks.lrf import option_parser as lrf_option_parser
-from calibre.ebooks.metadata.meta import get_metadata
-from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
-from calibre import setup_cli_handlers
-from calibre.libwand import convert, WandException
-from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
-from calibre.ebooks.lrf.rtf.xsl import xhtml
-from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
-from calibre.ptempfile import PersistentTemporaryDirectory
-from calibre.ebooks.metadata import MetaInformation
-from calibre.ebooks.metadata.opf import OPFCreator 
-
-def option_parser():
-    parser = lrf_option_parser(
-_('''%prog [options] mybook.rtf
-
-
-%prog converts mybook.rtf to mybook.lrf''')
-        )
-    parser.add_option('--keep-intermediate-files', action='store_true', default=False)
-    return parser
-
-def convert_images(html, logger):
-    wmfs = glob.glob('*.wmf') + glob.glob('*.WMF')
-    for wmf in wmfs:
-        target = os.path.join(os.path.dirname(wmf), os.path.splitext(os.path.basename(wmf))[0]+'.jpg')
-        try:
-            convert(wmf, target)
-            html = html.replace(os.path.basename(wmf), os.path.basename(target))
-        except WandException, err:
-            logger.warning(u'Unable to convert image %s with error: %s'%(wmf, unicode(err)))
-            continue
-    return html
-
-def process_file(path, options, logger=None):
-    if logger is None:
-        level = logging.DEBUG if options.verbose else logging.INFO
-        logger = logging.getLogger('rtf2lrf')
-        setup_cli_handlers(logger, level)
-    rtf = os.path.abspath(os.path.expanduser(path))
-    f = open(rtf, 'rb')
-    mi = get_metadata(f, 'rtf')
-    f.close()
-    tdir = PersistentTemporaryDirectory('_rtf2lrf')
-    html = generate_html(rtf, tdir)
-    cwd = os.getcwdu()
-    try:
-        if not options.output:
-            ext = '.lrs' if options.lrs else '.lrf'
-            options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
-        options.output = os.path.abspath(os.path.expanduser(options.output))
-        if not mi.title:
-            mi.title = os.path.splitext(os.path.basename(rtf))[0]
-        if (not options.title or options.title == 'Unknown'):
-            options.title = mi.title
-        if (not options.author or options.author == 'Unknown') and mi.author:
-            options.author = mi.author
-        if (not options.category or options.category == 'Unknown') and mi.category:
-            options.category = mi.category
-        if (not options.freetext or options.freetext == 'Unknown') and mi.comments:
-            options.freetext = mi.comments
-        os.chdir(tdir)
-        html_process_file(html, options, logger)
-    finally:
-        os.chdir(cwd)
-        if hasattr(options, 'keep_intermediate_files') and options.keep_intermediate_files:
-            logger.debug('Intermediate files in '+ tdir)
-        else:
-            shutil.rmtree(tdir)
-
-def main(args=sys.argv, logger=None):
-    parser = option_parser()
-    options, args = parser.parse_args(args)
-    if len(args) != 2:
-        parser.print_help()
-        print
-        print 'No rtf file specified'
-        return 1
-    process_file(args[1], options, logger)
-    return 0
-    
-
-def generate_xml(rtfpath, tdir):
-    from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
-    ofile = os.path.join(tdir, 'index.xml')
-    cwd = os.getcwdu()
-    os.chdir(tdir)
-    rtfpath = os.path.abspath(rtfpath)
-    try:
-        parser = ParseRtf(
-            in_file    = rtfpath,
-            out_file   = ofile,
-            # Convert symbol fonts to unicode equivelents. Default
-            # is 1
-            convert_symbol = 1,
-    
-            # Convert Zapf fonts to unicode equivelents. Default
-            # is 1.
-            convert_zapf = 1,
-    
-            # Convert Wingding fonts to unicode equivelents.
-            # Default is 1.
-            convert_wingdings = 1,
-    
-            # Convert RTF caps to real caps.
-            # Default is 1.
-            convert_caps = 1,
-    
-            # Indent resulting XML.
-            # Default is 0 (no indent).
-            indent = 1,
-    
-            # Form lists from RTF. Default is 1.
-            form_lists = 1,
-    
-            # Convert headings to sections. Default is 0.
-            headings_to_sections = 1,
-    
-            # Group paragraphs with the same style name. Default is 1.
-            group_styles = 1,
-    
-            # Group borders. Default is 1.
-            group_borders = 1,
-    
-            # Write or do not write paragraphs. Default is 0.
-            empty_paragraphs = 0,
-        )
-        parser.parse_rtf()
-    finally:
-        os.chdir(cwd)
-    return ofile
-
-
-def generate_html(rtfpath, tdir):
-    print 'Converting RTF to XML...'
-    rtfpath = os.path.abspath(rtfpath)
-    try:
-        xml = generate_xml(rtfpath, tdir)
-    except RtfInvalidCodeException:
-        raise Exception(_('This RTF file has a feature calibre does not support. Convert it to HTML and then convert it.'))
-    tdir = os.path.dirname(xml)
-    cwd = os.getcwdu()
-    os.chdir(tdir)
-    try:
-        print 'Parsing XML...'
-        parser = etree.XMLParser(recover=True, no_network=True)
-        try:
-            doc = etree.parse(xml, parser)
-        except:
-            raise
-            print 'Parsing failed. Trying to clean up XML...'
-            soup = BeautifulStoneSoup(open(xml, 'rb').read())
-            doc = etree.fromstring(str(soup))
-        print 'Converting XML to HTML...'
-        styledoc = etree.fromstring(xhtml)
-        
-        transform = etree.XSLT(styledoc)
-        result = transform(doc)
-        tdir = os.path.dirname(xml)
-        html = os.path.join(tdir, 'index.html')
-        f = open(html, 'wb')
-        res = transform.tostring(result)
-        res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
-        f.write(res)
-        f.close()
-        try:
-            mi = get_metadata(open(rtfpath, 'rb'), 'rtf')
-        except:
-            mi = MetaInformation(None, None)
-        if not mi.title:
-            mi.title = os.path.splitext(os.path.basename(rtfpath))[0]
-        if not mi.authors:
-            mi.authors = [_('Unknown')]
-        opf = OPFCreator(tdir, mi)
-        opf.create_manifest([('index.html', None)])
-        opf.create_spine(['index.html'])
-        opf.render(open('metadata.opf', 'wb'))
-    finally:
-        os.chdir(cwd)
-    return html
-            
-if __name__ == '__main__':
-    sys.exit(main())    
-        
--- a/src/calibre/ebooks/lrf/tags.py
+++ b/src/calibre/ebooks/lrf/tags.py
@ -207,32 +207,32 @@ class Tag(object):
        s += " at %08X, contents: %s" % (self.offset, repr(self.contents))
        return s
    
-    @apply
-    def byte():
+    @dynamic_property
+    def byte(self):
        def fget(self):
            if len(self.contents) != 1:
                raise LRFParseError("Bad parameter for tag ID: %04X" % self.id)
            return struct.unpack("<B", self.contents)[0]
        return property(fget=fget)
    
-    @apply
-    def word():
+    @dynamic_property
+    def word(self):
        def fget(self):
            if len(self.contents) != 2:
                raise LRFParseError("Bad parameter for tag ID: %04X" % self.id)
            return struct.unpack("<H", self.contents)[0]
        return property(fget=fget)
    
-    @apply
-    def sword():
+    @dynamic_property
+    def sword(self):
        def fget(self):
            if len(self.contents) != 2:
                raise LRFParseError("Bad parameter for tag ID: %04X" % self.id)
            return struct.unpack("<h", self.contents)[0]
        return property(fget=fget)
    
-    @apply
-    def dword():
+    @dynamic_property
+    def dword(self):
        def fget(self):
            if len(self.contents) != 4:
                raise LRFParseError("Bad parameter for tag ID: %04X" % self.id)
--- a/src/calibre/ebooks/lrf/txt/init.py
+++ b/src/calibre/ebooks/lrf/txt/init.py
@ -1,2 +0,0 @@
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
--- a/src/calibre/ebooks/lrf/txt/convert_from.py
+++ b/src/calibre/ebooks/lrf/txt/convert_from.py
@ -1,112 +0,0 @@
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-"""
-Convert .txt files to .lrf
-"""
-import os, sys, codecs, logging, re, shutil
-
-from calibre.ptempfile import PersistentTemporaryDirectory
-from calibre.ebooks.lrf import option_parser as lrf_option_parser
-from calibre.ebooks import ConversionError
-from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
-from calibre.ebooks.markdown import markdown
-from calibre import setup_cli_handlers
-from calibre.ebooks.metadata import MetaInformation
-from calibre.ebooks.metadata.opf import OPFCreator
-
-def option_parser():
-    parser = lrf_option_parser(
-_('''%prog [options] mybook.txt
-
-
-%prog converts mybook.txt to mybook.lrf'''))
-    parser.add_option('--debug-html-generation', action='store_true', default=False,
-                      dest='debug_html_generation', help=_('Print generated HTML to stdout and quit.'))
-    return parser
-    
-def fix_image_includes(sdir, tdir, match):
-    path = match.group(1).split('/')
-    src = os.path.join(sdir, *path)
-    dest = os.path.join(tdir, *path)
-    p = os.path.dirname(dest)
-    if not os.path.exists(p):
-        os.makedirs(p)
-    if not os.path.exists(dest):
-        shutil.copyfile(src, dest)
-    
-
-def generate_html(txtfile, encoding, tdir):
-    '''
-    Convert txtfile to html and return a PersistentTemporaryFile object pointing
-    to the file with the HTML.
-    '''
-    txtfile = os.path.abspath(txtfile)
-    enc = encoding
-    if not encoding:
-        encodings = ['cp1252', 'latin-1', 'utf8', 'iso-8859-1', 'koi8_r', 'koi8_u']
-        txt, enc = None, None
-        for encoding in encodings:
-            try:
-                txt = codecs.open(txtfile, 'rb', encoding).read()
-            except UnicodeDecodeError:
-                continue
-            enc = encoding
-            break
-        if txt == None:
-            raise ConversionError, 'Could not detect encoding of %s'%(txtfile,)
-    else:
-        txt = codecs.open(txtfile, 'rb', enc).read()
-    
-    print 'Converting text to HTML...'
-    md = markdown.Markdown(
-                       extensions=['footnotes', 'tables', 'toc'],
-                       safe_mode=False,
-                       )
-    html = '<html><body>'+md.convert(txt)+'</body></html>'
-    for match in re.finditer(r'<img\s+[^>]*src="([^"]+)"', html):
-        fix_image_includes(os.path.dirname(txtfile), tdir, match)
-    p = os.path.join(tdir, 'index.html')
-    open(p, 'wb').write(html.encode('utf-8'))
-    mi = MetaInformation(os.path.splitext(os.path.basename(txtfile))[0], [_('Unknown')])
-    opf = OPFCreator(tdir, mi)
-    opf.create_manifest([(os.path.join(tdir, 'index.html'), None)])
-    opf.create_spine([os.path.join(tdir, 'index.html')])
-    opf.render(open(os.path.join(tdir, 'metadata.opf'), 'wb'))
-    return p
-        
-def process_file(path, options, logger=None):
-    if logger is None:
-        level = logging.DEBUG if options.verbose else logging.INFO
-        logger = logging.getLogger('txt2lrf')
-        setup_cli_handlers(logger, level)
-    txt = os.path.abspath(os.path.expanduser(path))
-    if not hasattr(options, 'debug_html_generation'):
-        options.debug_html_generation = False
-    tdir = PersistentTemporaryDirectory('_txt2lrf')
-    htmlfile = generate_html(txt, options.encoding, tdir)
-    options.encoding = 'utf-8'
-    if not options.debug_html_generation:
-        options.force_page_break = 'h2'
-        if not options.output:
-            ext = '.lrs' if options.lrs else '.lrf'
-            options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
-        options.output = os.path.abspath(os.path.expanduser(options.output))
-        if not options.title:
-            options.title = os.path.splitext(os.path.basename(path))[0]
-        html_process_file(htmlfile, options, logger)
-    else:
-        print open(htmlfile, 'rb').read()        
-
-def main(args=sys.argv, logger=None):
-    parser = option_parser()    
-    options, args = parser.parse_args(args)
-    if len(args) != 2:
-        parser.print_help()
-        print
-        print 'No txt file specified'
-        return 1
-    process_file(args[1], options, logger)
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/lrf/txt/demo/demo.txt
+++ b/src/calibre/ebooks/lrf/txt/demo/demo.txt
@ -1,89 +0,0 @@
-Demonstration of `txt2lrf`
-==========================
-
-`txt2lrf` provides a convenient way to create LRF files with good formatting. 
-`txt2lrf` recognizes a simple markup language called *markdown*.
-
-The idea is to provide a lightweight markup that can be used to create
-TXT files that can be read by themselves or automatically converted to LRF.
-[{@name=toc}]()
-
-<br /><br />
-
-///Table of Contents///
-
-
-Text formatting
---------------
-**Bold** and *italic* text is easily specified.
-
-> Blockquotes are also very simple to specify.
-> This is a basic blockquote paragraph. I absolutely
-> love block quotes don't you?
-
-    This is a preformatted code block. No formatting rules are applied to text in this block and it is rendered in a monospaced font.
-
-  
-For details on the text formatting syntax visit 
-
-    http://daringfireball.net/projects/markdown/syntax
-___
-[Table of Contents](#toc)
-
-Lists
-----
-Both ordered and unordered lists are supported.
-  
-
-### Unordered lists
-
-+ What a
-+ *nice*
-+ list
-
-  
-
-### Ordered lists
-
-1. One
-2. Two
-3. Three
-
-**Note:** Nested lists are not supported
-
-___
-[Table of Contents](#toc)
-
-Tables
------
-
-Simple tables are easily generated
-
-|         |* Col 1  *|* Col 2 *|
-|* Row 1 *| (1, 1)   | (1, 2)   |
-|* Row 2 *| (2, 1)   | (2, 2)   |
-
-**Note:** Nested tables are not supported
-
-___
-[Table of Contents](#toc)
-
-Images
------
-
-`txt2lrf` also has support for inline images like 
-![this one](small.jpg) this one. 
-
-___
-[Table of Contents](#toc)
-
-Automatic TOC Creation
----------------------
-
-By inserting `///Table of Contents///` into the text at some point 
-a table of contents is automatically generated with links that point 
-to all headings underlined with `-------`.
-
-___
-[Table of Contents](#toc)
-
--- a/src/calibre/ebooks/lrf/txt/demo/small.jpg
+++ b/src/calibre/ebooks/lrf/txt/demo/small.jpg
--- a/src/calibre/ebooks/lrf/web/init.py
+++ b/src/calibre/ebooks/lrf/web/init.py
@ -1,6 +0,0 @@
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-
-
-builtin_profiles   = []
-available_profiles = [i.__module__.rpartition('.')[2] for i in builtin_profiles]
--- a/src/calibre/ebooks/lrf/web/convert_from.py
+++ b/src/calibre/ebooks/lrf/web/convert_from.py
@ -1,183 +0,0 @@
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-'''Convert websites into LRF files.'''
-
-import sys, tempfile, shutil, os, logging, imp, inspect, re
-from urlparse import urlsplit
-
-from calibre import __appname__, setup_cli_handlers, CommandLineError, strftime
-from calibre.ebooks.lrf import option_parser as lrf_option_parser
-from calibre.ebooks.lrf.html.convert_from import process_file
-
-from calibre.web.fetch.simple import create_fetcher
-
-from calibre.ebooks.lrf.web.profiles import DefaultProfile, FullContentProfile, create_class
-from calibre.ebooks.lrf.web import builtin_profiles, available_profiles
- 
-
-def option_parser():
-    parser = lrf_option_parser(usage='''%prog [options] website_profile\n\n'''
-                          '''%prog downloads a site from the web and converts it '''
-                          '''into a LRF file for use with the SONY Reader. '''
-                          '''website_profile is one of '''+str(available_profiles)+\
-                          ''' If you specify a website_profile of default or do not specify '''
-                          '''it, you must specify the --url option.'''
-                          )
-    
-    parser.add_option('-u', '--url', dest='url', default=None,  
-                      help='The URL to download. You only need to specify this if you are not specifying a website_profile.')
-    parser.add_option('--user-profile', default=None,
-                      help='Path to a python file containing a user created profile. For help visit http://%s.kovidgoyal.net/wiki/UserProfiles'%__appname__)
-    parser.add_option('--username', dest='username', default=None, 
-                      help='Specify the username to be used while downloading. Only used if the profile supports it.')
-    parser.add_option('--password', dest='password', default=None,
-                      help='Specify the password to be used while downloading. Only used if the profile supports it.')
-    parser.add_option('--timeout', help='Timeout in seconds to wait for a response from the server. Default: %d s'%DefaultProfile.timeout,
-                      default=None, type='int', dest='timeout')
-    parser.add_option('-r', '--max-recursions', help='Maximum number of levels to recurse i.e. depth of links to follow. Default %d'%DefaultProfile.timeout,
-                      default=None, type='int', dest='max_recursions')
-    parser.add_option('-n', '--max-files', default=None, type='int', dest='max_files',
-                      help='The maximum number of files to download. This only applies to files from <a href> tags. Default is %d'%DefaultProfile.timeout)
-    parser.add_option('--delay', default=None, dest='delay', type='int',
-                      help='Minimum interval in seconds between consecutive fetches. Default is %d s'%DefaultProfile.timeout)
-    parser.add_option('--dont-download-stylesheets', action='store_true', default=None,
-                      help='Do not download CSS stylesheets.', dest='no_stylesheets')
-    parser.add_option('--match-regexp', dest='match_regexps', default=[], action='append',
-                      help='Only links that match this regular expression will be followed. This option can be specified multiple times, in which case as long as a link matches any one regexp, it will be followed. By default all links are followed.')
-    parser.add_option('--filter-regexp', default=[], action='append', dest='filter_regexps',
-                      help='Any link that matches this regular expression will be ignored. This option can be specified multiple times, in which case as long as any regexp matches a link, it will be ignored.By default, no links are ignored. If both --filter-regexp and --match-regexp are specified, then --filter-regexp is applied first.')
-    parser.add_option('--keep-downloaded-files', default=False, action='store_true',
-                      help='''Do not delete the downloaded files after creating the LRF''')
-    return parser
-    
-def fetch_website(options, logger):
-    tdir = tempfile.mkdtemp(prefix=__appname__+'_', suffix='_web2lrf')
-    options.dir = tdir
-    fetcher = create_fetcher(options, logger)
-    fetcher.preprocess_regexps = options.preprocess_regexps
-    return fetcher.start_fetch(options.url), tdir
-    
-def create_lrf(htmlfile, options, logger):
-    if not options.author or options.author.lower() == 'unknown':
-        options.author = __appname__
-    options.header = True
-    if options.output:
-        options.output = os.path.abspath(os.path.expanduser(options.output))
-    else:
-        options.output = os.path.abspath(os.path.expanduser(options.title + ('.lrs' if options.lrs else '.lrf')))
-        
-    process_file(htmlfile, options, logger)
-
-def process_profile(args, options, logger=None):
-    tdir = None
-    try:
-        if logger is None:
-            level = logging.DEBUG if options.verbose else logging.INFO
-            logger = logging.getLogger('web2lrf')
-            setup_cli_handlers(logger, level)
-        index = -1
-        
-        if len(args) == 2 and re.search(r'class\s+\S+\(\S+\)\s*\:', args[1]):
-            profile = create_class(args[1])
-        else:        
-            if options.user_profile is not None:
-                path = os.path.abspath(options.user_profile)
-                name = os.path.splitext(os.path.basename(path))[0]
-                res = imp.find_module(name, [os.path.dirname(path)])
-                module =  imp.load_module(name, *res)
-                classes = inspect.getmembers(module, 
-                    lambda x : inspect.isclass(x) and issubclass(x, DefaultProfile)\
-                               and x is not DefaultProfile and x is not FullContentProfile)
-                if not classes:
-                    raise CommandLineError('Invalid user profile '+path)
-                builtin_profiles.append(classes[0][1])
-                available_profiles.append(name)
-                if len(args) < 2:
-                    args.append(name)
-                args[1] = name
-            index = -1
-            if len(args) == 2:
-                try:
-                    if isinstance(args[1], basestring):
-                        if args[1] != 'default':
-                            index = available_profiles.index(args[1])
-                except ValueError:
-                    raise CommandLineError('Unknown profile: %s\nValid profiles: %s'%(args[1], available_profiles))
-            else:
-                raise CommandLineError('Only one profile at a time is allowed.')
-            profile = DefaultProfile if index == -1 else builtin_profiles[index]
-        
-        
-        
-        profile = profile(logger, options.verbose, options.username, options.password)
-        if profile.browser is not None:
-            options.browser = profile.browser
-        
-        for opt in ('url', 'timeout', 'max_recursions', 'max_files', 'delay', 'no_stylesheets'):
-            val = getattr(options, opt)
-            if val is None:
-                setattr(options, opt, getattr(profile, opt))
-        
-        if not options.url:
-            options.url = profile.url            
-        
-        if not options.url:
-            raise CommandLineError('You must specify the --url option or a profile from one of: %s'%(available_profiles,))
-        
-        if not options.title:
-            title = profile.title
-            if not title:
-                title = urlsplit(options.url).netloc
-            options.title = title + strftime(profile.timefmt)
-        
-        options.match_regexps += profile.match_regexps
-        options.preprocess_regexps = profile.preprocess_regexps
-        options.filter_regexps += profile.filter_regexps
-        
-        options.encoding = profile.encoding if options.encoding is None else options.encoding 
-        
-        if len(args) == 2 and args[1] != 'default':
-            options.anchor_ids = False
-        
-        htmlfile, tdir = fetch_website(options, logger)
-        options.encoding = 'utf-8'
-        cwd = os.getcwd()
-        if not options.output:
-            title = options.title.encode(sys.getfilesystemencoding()) if isinstance(options.title, unicode) else options.title
-            options.output = os.path.join(cwd, options.title+('.lrs' if options.lrs else '.lrf'))
-        if not os.path.isabs(options.output):
-            options.output = os.path.join(cwd, options.output)
-        
-        option_parser().parse_args(profile.html2lrf_options, options)
-            
-        try:
-            os.chdir(os.path.dirname(htmlfile))
-            create_lrf(os.path.basename(htmlfile), options, logger)
-        finally:
-            os.chdir(cwd)
-    finally:
-        try:
-            profile.cleanup()
-        except:
-            pass
-        if tdir and os.path.isdir(tdir):
-            if options.keep_downloaded_files:
-                print 'Downloaded files in ', tdir
-            else:
-                shutil.rmtree(tdir)
-    
-
-def main(args=sys.argv, logger=None):
-    parser = option_parser()
-    options, args = parser.parse_args(args)
-    if len(args) > 2 or (len(args) == 1 and not options.user_profile):
-        parser.print_help()
-        return 1
-    try:
-        process_profile(args, options, logger=logger)
-    except CommandLineError, err:
-        print >>sys.stderr, err         
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/lrf/web/profiles/init.py
+++ b/src/calibre/ebooks/lrf/web/profiles/init.py
@ -1,572 +0,0 @@
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-'''
-Contains the Base Profiles that can be used to easily create profiles to download
-particular websites.  
-'''
-
-import tempfile, time, calendar, re, operator, atexit, shutil, os
-from htmlentitydefs import name2codepoint
-from email.utils import formatdate
-
-from calibre import __appname__, iswindows, browser, strftime
-from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, NavigableString, CData, Tag
-
-
-class DefaultProfile(object):
-    
-    #: The title to use for the LRF file
-    #: @type: string    
-    title                 = 'Default Profile'    
-    
-    #: Maximum number of articles to download from each feed
-    #: @type: integer
-    max_articles_per_feed = 10     
-    
-    #: If True process the <description> element of the feed as HTML
-    #: @type: boolean
-    html_description      = True  
-    
-    #: How many days old should the oldest article downloaded from the feeds be
-    #: @type: integer
-    oldest_article        = 7
-    
-    #: Recommend frequency at which to download this profile. In days.
-    recommended_frequency = 7
-    
-    #: Number of levels of links to follow
-    #: @type: integer
-    max_recursions        = 1
-    
-    #: Maximum number of files to download
-    #: @type: integer
-    max_files             = 3000
-    
-    #: Delay between consecutive downloads in seconds
-    #: @type: integer
-    delay                 = 0
-    
-    #: Timeout for fetching files from server in seconds
-    #: @type: integer
-    timeout               = 10
-    
-    #: The format string for the date shown on the first page
-    #: @type: string
-    timefmt               = ' [%a %d %b %Y]'
-    
-    #: The order of elements to search for a URL when parsing the RSS feed. You 
-    #: can replace these elements by completely arbitrary elements to customize
-    #: feed processing. 
-    #: @type: list of strings
-    url_search_order      = ['guid', 'link']
-    
-    #: The format string used to parse the publication date in the RSS feed. 
-    #: If set to None some default heuristics are used, these may fail, 
-    #: in which case set this to the correct string or re-implement 
-    #: L{DefaultProfile.strptime} in your subclass.
-    #: @type: string or None 
-    pubdate_fmt           = None  
-    
-    #: If True will look for a publication date for each article. 
-    #: If False assumes the publication date is the current time.
-    #: @type: boolean
-    use_pubdate           = True, 
-    
-    #: Max number of characters in the short description.
-    #: Used by L{FullContentProfile}
-    #: @type: integer
-    summary_length        = 500
-    
-    #: If True stylesheets are not downloaded and processed
-    #: Convenient flag to disable loading of stylesheets for websites
-    #: that have overly complex stylesheets unsuitable for conversion
-    #: to ebooks formats
-    #: @type: boolean
-    no_stylesheets        = False
-    
-    #: If False articles with the same title in the same feed 
-    #: are not downloaded multiple times
-    #: @type: boolean 
-    allow_duplicates      = False 
-    
-    #: If True the GUI will ask the user for a username and password 
-    #: to use while downloading
-    #: @type: boolean
-    needs_subscription    = False
-    
-    #: Specify an override encoding for sites that have an incorrect
-    #: charset specification. THe most common being specifying latin1 and
-    #: using cp1252 
-    encoding = None
-    
-    #: List of regular expressions that determines which links to follow
-    #: If empty, it is ignored.
-    #: Only one of L{match_regexps} or L{filter_regexps} should be defined
-    #: @type: list of strings
-    match_regexps         = []
-    
-    #: List of regular expressions that determines which links to ignore
-    #: If empty it is ignored
-    #: Only one of L{match_regexps} or L{filter_regexps} should be defined
-    #: @type: list of strings
-    filter_regexps        = []    
-    
-    #: List of options to pass to html2lrf, to customize conversion
-    #: to LRF
-    #: @type: list of strings
-    html2lrf_options   = []
-        
-    #: List of regexp substitution rules to run on the downloaded HTML. Each element of the 
-    #: list should be a two element tuple. The first element of the tuple should
-    #: be a compiled regular expression and the second a callable that takes
-    #: a single match object and returns a string to replace the match.
-    #: @type: list of tuples
-    preprocess_regexps = []
-    
-    # See the built-in profiles for examples of these settings.
-    
-    #: The URL of the website
-    #: @type: string
-    url                   = ''
-    
-    feeds = []
-    CDATA_PAT = re.compile(r'<\!\[CDATA\[(.*?)\]\]>', re.DOTALL)
-
-    def get_feeds(self):
-        '''
-        Return a list of RSS feeds to fetch for this profile. Each element of the list
-        must be a 2-element tuple of the form (title, url).
-        '''
-        if not self.feeds:
-            raise NotImplementedError
-        return self.feeds
-    
-    @classmethod
-    def print_version(cls, url):
-        '''
-        Take a URL pointing to an article and returns the URL pointing to the
-        print version of the article.
-        '''
-        return url
-    
-    @classmethod
-    def get_browser(cls):
-        '''
-        Return a browser instance used to fetch documents from the web.
-        
-        If your profile requires that you login first, override this method
-        in your subclass. See for example the nytimes profile.
-        '''
-        return browser()
-    
-    
-    
-    
-    def __init__(self, logger, verbose=False, username=None, password=None, lrf=True):
-        self.logger = logger
-        self.username = username
-        self.password = password
-        self.verbose  = verbose
-        self.lrf = lrf
-        self.temp_dir = tempfile.mkdtemp(prefix=__appname__+'_')
-        self.browser = self.get_browser()
-        try:
-            self.url = 'file:'+ ('' if iswindows else '//') + self.build_index()
-        except NotImplementedError:
-            self.url = None
-        atexit.register(cleanup, self.temp_dir)
-    
-    def build_index(self):
-        '''Build an RSS based index.html'''
-        articles = self.parse_feeds()
-        encoding = 'utf-8' if self.encoding is None else self.encoding 
-        def build_sub_index(title, items):
-            ilist = ''
-            li = u'<li><a href="%(url)s">%(title)s</a> <span style="font-size: x-small">[%(date)s]</span><br/>\n'+\
-                u'<div style="font-size:small; font-family:sans">%(description)s<br /></div></li>\n'
-            for item in items:
-                if not item.has_key('date'):
-                    item['date'] = time.strftime('%a, %d %b', time.localtime())
-                ilist += li%item
-            return u'''\
-            <html>
-            <body>
-            <h2>%(title)s</h2>
-            <ul>
-            %(items)s
-            </ul>
-            </body>
-            </html>
-            '''%dict(title=title, items=ilist.rstrip())        
-        
-        cnum = 0
-        clist = ''
-        categories = articles.keys()
-        categories.sort()
-        for category in categories:
-            cnum  += 1
-            cfile = os.path.join(self.temp_dir, 'category'+str(cnum)+'.html')
-            prefix = 'file:' if iswindows else ''
-            clist += u'<li><a href="%s">%s</a></li>\n'%(prefix+cfile, category)
-            src = build_sub_index(category, articles[category])
-            open(cfile, 'wb').write(src.encode(encoding))
-                        
-        title = self.title
-        if not isinstance(title, unicode):
-            title = unicode(title, 'utf-8', 'replace')
-        src = u'''\
-        <html>
-        <body>
-        <h1>%(title)s</h1>
-        <div style='text-align: right; font-weight: bold'>%(date)s</div>
-        <ul>
-        %(categories)s
-        </ul>
-        </body>
-        </html>
-        '''%dict(date=strftime('%a, %d %B, %Y'), 
-                 categories=clist, title=title)
-        index = os.path.join(self.temp_dir, 'index.html')
-        open(index, 'wb').write(src.encode(encoding))
-        
-        return index
-
-    
-    @classmethod
-    def tag_to_string(cls, tag, use_alt=True):
-        '''
-        Convenience method to take a BeautifulSoup Tag and extract the text from it
-        recursively, including any CDATA sections and alt tag attributes.
-        @param use_alt: If True try to use the alt attribute for tags that don't have any textual content
-        @type use_alt: boolean
-        @return: A unicode (possibly empty) object
-        @rtype: unicode string
-        '''
-        if not tag:
-            return ''
-        if isinstance(tag, basestring):
-            return tag
-        strings = []
-        for item in tag.contents:
-            if isinstance(item, (NavigableString, CData)):
-                strings.append(item.string)
-            elif isinstance(item, Tag):
-                res = cls.tag_to_string(item)
-                if res:
-                    strings.append(res)
-                elif use_alt and item.has_key('alt'):
-                    strings.append(item['alt'])
-        return u''.join(strings) 
-    
-    def get_article_url(self, item):
-        '''
-        Return the article URL given an item Tag from a feed, or None if no valid URL is found
-        @type item: BeatifulSoup.Tag
-        @param item: A BeautifulSoup Tag instance corresponding to the <item> tag from a feed.
-        @rtype: string or None
-        '''
-        url = None
-        for element in self.url_search_order:
-            url = item.find(element.lower())
-            if url:
-                break
-        return url
-        
-    
-    def parse_feeds(self, require_url=True):
-        '''
-        Create list of articles from a list of feeds.
-        @param require_url: If True skip articles that don't have a link to a HTML page with the full article contents.
-        @type require_url: boolean
-        @rtype: dictionary
-        @return: A dictionary whose keys are feed titles and whose values are each
-        a list of dictionaries. Each list contains dictionaries of the form::
-            {
-            'title'       : article title,
-            'url'         : URL of print version,
-            'date'        : The publication date of the article as a string,
-            'description' : A summary of the article
-            'content'     : The full article (can be an empty string). This is used by FullContentProfile
-            }
-        '''
-        added_articles = {}
-        feeds = self.get_feeds()
-        articles = {}
-        for title, url in feeds:
-            try:
-                src = self.browser.open(url).read()
-            except Exception, err:
-                self.logger.error('Could not fetch feed: %s\nError: %s'%(url, err))
-                if self.verbose:
-                    self.logger.exception(' ')
-                continue
-            
-            articles[title] = []
-            added_articles[title] = []
-            soup = BeautifulStoneSoup(src)
-            for item in soup.findAll('item'):
-                try:
-                    atitle = item.find('title')
-                    if not atitle:
-                        continue
-                    
-                    atitle = self.tag_to_string(atitle)
-                    if self.use_pubdate:
-                        pubdate = item.find('pubdate')
-                        if not pubdate:
-                            pubdate = item.find('dc:date')
-                        if not pubdate or not pubdate.string:
-                            pubdate = formatdate()
-                        pubdate = self.tag_to_string(pubdate)
-                        pubdate = pubdate.replace('+0000', 'GMT')
-                    
-                    
-                    url = self.get_article_url(item)
-                    url = self.tag_to_string(url)
-                    if require_url and not url:
-                        self.logger.debug('Skipping article %s as it does not have a link url'%atitle)
-                        continue
-                    purl = url
-                    try:
-                        purl = self.print_version(url)
-                    except Exception, err:
-                        self.logger.debug('Skipping %s as could not find URL for print version. Error:\n%s'%(url, err))
-                        continue
-                    
-                    content = item.find('content:encoded')
-                    if not content:
-                        content = item.find('description')
-                    if content:
-                        content = self.process_html_description(content, strip_links=False)
-                    else:
-                        content = ''
-                        
-                    d = { 
-                        'title'    : atitle,                 
-                        'url'      : purl,
-                        'timestamp': self.strptime(pubdate) if self.use_pubdate else time.time(),
-                        'date'     : pubdate if self.use_pubdate else formatdate(),
-                        'content'  : content,
-                        }
-                    delta = time.time() - d['timestamp']
-                    if not self.allow_duplicates:
-                        if d['title'] in added_articles[title]:
-                            continue
-                        added_articles[title].append(d['title'])
-                    if delta > self.oldest_article*3600*24:
-                        continue
-                    
-                except Exception, err:
-                    if self.verbose:
-                        self.logger.exception('Error parsing article:\n%s'%(item,))
-                    continue
-                try:
-                    desc = ''
-                    for c in item.findAll('description'):
-                        desc = self.tag_to_string(c)
-                        if desc:
-                            break
-                    d['description'] = self.process_html_description(desc) if  self.html_description else desc.string                    
-                except:
-                    d['description'] = ''
-                articles[title].append(d)
-            articles[title].sort(key=operator.itemgetter('timestamp'), reverse=True)
-            articles[title] = articles[title][:self.max_articles_per_feed+1]
-            #for item in articles[title]:
-            #    item.pop('timestamp')
-            if not articles[title]:
-                articles.pop(title)
-        return articles
-
-    
-    def cleanup(self):
-        '''
-        Called after LRF file has been generated. Use it to do any cleanup like 
-        logging out of subscription sites, etc.
-        '''
-        pass
-    
-    @classmethod
-    def process_html_description(cls, tag, strip_links=True):
-        '''
-        Process a <description> tag that contains HTML markup, either 
-        entity encoded or escaped in a CDATA section. 
-        @return: HTML
-        @rtype: string
-        '''
-        src = '\n'.join(tag.contents) if hasattr(tag, 'contents') else tag
-        match = cls.CDATA_PAT.match(src.lstrip())
-        if match:
-            src = match.group(1)
-        else:
-            replaced_entities = [ 'amp', 'lt', 'gt' , 'ldquo', 'rdquo', 'lsquo', 'rsquo' ]
-            for e in replaced_entities:
-                ent = '&'+e+';'
-                src = src.replace(ent, unichr(name2codepoint[e]))
-        if strip_links:
-            src = re.compile(r'<a.*?>(.*?)</a>', re.IGNORECASE|re.DOTALL).sub(r'\1', src)
-        
-        return src 
-
-    
-    DAY_MAP        = dict(Sun=0, Mon=1, Tue=2, Wed=3, Thu=4, Fri=5, Sat=6)
-    FULL_DAY_MAP   = dict(Sunday=0, Monday=1, Tueday=2, Wednesday=3, Thursday=4, Friday=5, Saturday=6) 
-    MONTH_MAP      = dict(Jan=1, Feb=2, Mar=3, Apr=4, May=5, Jun=6, Jul=7, Aug=8, Sep=9, Oct=10, Nov=11, Dec=12)
-    FULL_MONTH_MAP = dict(January=1, February=2, March=3, April=4, May=5, June=6, 
-                      July=7, August=8, September=9, October=10, 
-                      November=11, December=12)
-        
-    @classmethod
-    def strptime(cls, src):
-        ''' 
-        Take a string and return the date that string represents, in UTC as
-        an epoch (i.e. number of seconds since Jan 1, 1970). This function uses
-        a bunch of heuristics and is a prime candidate for being overridden in a 
-        subclass.
-        @param src: Timestamp as a string
-        @type src: string
-        @return: time ans a epoch
-        @rtype: number 
-        '''        
-        delta = 0
-        zone = re.search(r'\s*(\+\d\d\:{0,1}\d\d)', src)
-        if zone:
-            delta = zone.group(1)
-            hrs, mins = int(delta[1:3]), int(delta[-2:].rstrip())
-            delta = 60*(hrs*60 + mins) * (-1 if delta.startswith('-') else 1)
-            src = src.replace(zone.group(), '')
-        if cls.pubdate_fmt is None:
-            src = src.strip().split()
-            try:
-                src[0] = str(cls.DAY_MAP[src[0][:-1]])+','
-            except KeyError:
-                src[0] = str(cls.FULL_DAY_MAP[src[0][:-1]])+','
-            try:
-                src[2] = str(cls.MONTH_MAP[src[2]])
-            except KeyError:
-                src[2] = str(cls.FULL_MONTH_MAP[src[2]])
-            fmt = '%w, %d %m %Y %H:%M:%S'
-            src = src[:5] # Discard extra information
-            try:
-                time_t = time.strptime(' '.join(src), fmt)
-            except ValueError:
-                time_t = time.strptime(' '.join(src), fmt.replace('%Y', '%y'))
-            return calendar.timegm(time_t)-delta
-        else:
-            return calendar.timegm(time.strptime(src, cls.pubdate_fmt))
-    
-    def command_line_options(self):
-        args = []
-        args.append('--max-recursions='+str(self.max_recursions))
-        args.append('--delay='+str(self.delay))
-        args.append('--max-files='+str(self.max_files))
-        for i in self.match_regexps:
-            args.append('--match-regexp="'+i+'"')
-        for i in self.filter_regexps:
-            args.append('--filter-regexp="'+i+'"')
-        return args
-        
-    
-class FullContentProfile(DefaultProfile):
-    '''
-    This profile is designed for feeds that embed the full article content in the RSS file.
-    '''
-    
-    max_recursions = 0
-    article_counter = 0
-    
-    
-    def build_index(self):
-        '''Build an RSS based index.html. '''
-        articles = self.parse_feeds(require_url=False)
-        
-        def build_sub_index(title, items):
-            ilist = ''
-            li = u'<li><a href="%(url)s">%(title)s</a> <span style="font-size: x-small">[%(date)s]</span><br/>\n'+\
-                u'<div style="font-size:small; font-family:sans">%(description)s<br /></div></li>\n'
-            for item in items:
-                content = item['content']
-                if not content:
-                    self.logger.debug('Skipping article as it has no content:%s'%item['title'])
-                    continue
-                item['description'] = cutoff(item['description'], self.summary_length)+'&hellip;'
-                self.article_counter = self.article_counter + 1
-                url = os.path.join(self.temp_dir, 'article%d.html'%self.article_counter)
-                item['url'] = url
-                open(url, 'wb').write((u'''\
-                    <html>
-                    <body>
-                    <h2>%s</h2>
-                    <div>
-                    %s
-                    </div>
-                    </body>
-                    </html>'''%(item['title'], content)).encode('utf-8')
-                    )
-                ilist += li%item
-            return u'''\
-            <html>
-            <body>
-            <h2>%(title)s</h2>
-            <ul>
-            %(items)s
-            </ul>
-            </body>
-            </html>
-            '''%dict(title=title, items=ilist.rstrip())        
-        
-        cnum = 0
-        clist = ''
-        categories = articles.keys()
-        categories.sort()
-        for category in categories:
-            cnum  += 1
-            cfile = os.path.join(self.temp_dir, 'category'+str(cnum)+'.html')
-            prefix = 'file:' if iswindows else ''
-            clist += u'<li><a href="%s">%s</a></li>\n'%(prefix+cfile, category)
-            src = build_sub_index(category, articles[category])
-            open(cfile, 'wb').write(src.encode('utf-8'))        
-        
-        src = '''\
-        <html>
-        <body>
-        <h1>%(title)s</h1>
-        <div style='text-align: right; font-weight: bold'>%(date)s</div>
-        <ul>
-        %(categories)s
-        </ul>
-        </body>
-        </html>
-        '''%dict(date=time.strftime('%a, %d %B, %Y', time.localtime()), 
-                 categories=clist, title=self.title)
-        index = os.path.join(self.temp_dir, 'index.html')
-        open(index, 'wb').write(src.encode('utf-8'))
-        return index
-
-def cutoff(src, pos, fuzz=50):
-    si = src.find(';', pos)
-    if si > 0 and si-pos > fuzz:
-        si = -1
-    gi = src.find('>', pos)
-    if gi > 0 and gi-pos > fuzz:
-        gi = -1
-    npos = max(si, gi)
-    if npos < 0:
-        npos = pos
-    return src[:npos+1]
-
-def create_class(src):
-    environment = {'FullContentProfile':FullContentProfile, 'DefaultProfile':DefaultProfile}
-    exec src in environment
-    for item in environment.values():
-        if hasattr(item, 'build_index'):
-            if item.__name__ not in ['DefaultProfile', 'FullContentProfile']:
-                return item
-   
-def cleanup(tdir):
-    try:
-        if os.path.isdir(tdir):
-            shutil.rmtree(tdir)
-    except:
-        pass
-    
--- a/src/calibre/ebooks/lrf/web/profiles/ap.py
+++ b/src/calibre/ebooks/lrf/web/profiles/ap.py
@ -1,38 +0,0 @@
-import re
-from calibre.ebooks.lrf.web.profiles import DefaultProfile
-
-
-class AssociatedPress(DefaultProfile):
-
-    title = 'Associated Press'
-    max_recursions = 2
-    max_articles_per_feed = 15
-    html2lrf_options = ['--force-page-break-before-tag="chapter"']
-
-    
-    preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in 
-[
-        (r'<HEAD>.*?</HEAD>' , lambda match : '<HEAD></HEAD>'),
-        (r'<body class="apple-rss-no-unread-mode" onLoad="setup(null)">.*?<!-- start Entries -->', lambda match : '<body>'),
-        (r'<!-- end apple-rss-content-area -->.*?</body>', lambda match : '</body>'),
-        (r'<script.*?>.*?</script>', lambda match : ''),
-        (r'<body.*?>.*?<span class="headline">', lambda match : '<body><span class="headline"><chapter>'),
-        (r'<tr><td><div class="body">.*?<p class="ap-story-p">', lambda match : '<p class="ap-story-p">'),
-        (r'<p class="ap-story-p">', lambda match : '<p>'),
-        (r'Learn more about our <a href="http://apdigitalnews.com/privacy.html">Privacy Policy</a>.*?</body>', lambda match : '</body>'),
-    ]
-    ]   
-     
-
-  
-    def get_feeds(self):
-        return [ ('AP Headlines', 'http://hosted.ap.org/lineups/TOPHEADS-rss_2.0.xml?SITE=ORAST&SECTION=HOME'),
-                  ('AP US News', 'http://hosted.ap.org/lineups/USHEADS-rss_2.0.xml?SITE=CAVIC&SECTION=HOME'),
-                   ('AP World News', 'http://hosted.ap.org/lineups/WORLDHEADS-rss_2.0.xml?SITE=SCAND&SECTION=HOME'),
-                   ('AP Political News', 'http://hosted.ap.org/lineups/POLITICSHEADS-rss_2.0.xml?SITE=ORMED&SECTION=HOME'),
-                   ('AP Washington State News', 'http://hosted.ap.org/lineups/WASHINGTONHEADS-rss_2.0.xml?SITE=NYPLA&SECTION=HOME'),
-                   ('AP Technology News', 'http://hosted.ap.org/lineups/TECHHEADS-rss_2.0.xml?SITE=CTNHR&SECTION=HOME'),
-                   ('AP Health News', 'http://hosted.ap.org/lineups/HEALTHHEADS-rss_2.0.xml?SITE=FLDAY&SECTION=HOME'),
-                   ('AP Science News', 'http://hosted.ap.org/lineups/SCIENCEHEADS-rss_2.0.xml?SITE=OHCIN&SECTION=HOME'),
-                   ('AP Strange News', 'http://hosted.ap.org/lineups/STRANGEHEADS-rss_2.0.xml?SITE=WCNC&SECTION=HOME'),
-        ]
--- a/src/calibre/ebooks/lrf/web/profiles/atlantic.py
+++ b/src/calibre/ebooks/lrf/web/profiles/atlantic.py
@ -1,47 +0,0 @@
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-import re
-from calibre.ebooks.lrf.web.profiles import DefaultProfile
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
-
-class Atlantic(DefaultProfile):
-    
-    title = 'The Atlantic'
-    max_recursions = 2
-    INDEX = 'http://www.theatlantic.com/doc/current'
-    
-    preprocess_regexps = [
-                          (re.compile(r'<body.*?<div id="storytop"', re.DOTALL|re.IGNORECASE), 
-                           lambda m: '<body><div id="storytop"')
-                          ]
-    
-    def parse_feeds(self):
-        articles = []
-        
-        src = self.browser.open(self.INDEX).read()
-        soup = BeautifulSoup(src)
-        
-        issue = soup.find('span', attrs={'class':'issue'})
-        if issue:
-            self.timefmt = ' [%s]'%self.tag_to_string(issue).rpartition('|')[-1].strip().replace('/', '-')
-        
-        for item in soup.findAll('div', attrs={'class':'item'}):
-            a = item.find('a')
-            if a and a.has_key('href'):
-                url = a['href']
-                url = 'http://www.theatlantic.com/'+url.replace('/doc', 'doc/print')
-                title = self.tag_to_string(a)
-                byline = item.find(attrs={'class':'byline'})
-                date = self.tag_to_string(byline) if byline else ''
-                description = ''
-                articles.append({
-                                 'title':title,
-                                 'date':date,
-                                 'url':url,
-                                 'description':description
-                                })
-                
-        
-        return {'Current Issue' : articles }
-        
-        
--- a/src/calibre/ebooks/lrf/web/profiles/automatic.py
+++ b/src/calibre/ebooks/lrf/web/profiles/automatic.py
@ -1,75 +0,0 @@
-#!/usr/bin/env  python
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-import os
-
-from calibre.ebooks.lrf.web.profiles import DefaultProfile
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
-from calibre import iswindows
-from calibre.ebooks.chardet import xml_to_unicode
-
-class AutomaticRSSProfile(DefaultProfile):
-    '''
-    Make downloading of RSS feeds completely automatic. Only input 
-    required is the URL of the feed.
-    '''
-    
-    max_recursions = 2
-    
-    def __init__(self, *args, **kwargs):
-        self.cindex = 1
-        DefaultProfile.__init__(*args, **kwargs)
-    
-    def fetch_content(self, index):
-        raw = open(index, 'rb').read()
-        if self.encoding:
-            raw = raw.decode(self.encoding)
-            enc = self.encoding
-        else:
-            raw, enc = xml_to_unicode(raw)
-        isoup = BeautifulSoup(raw)
-        for a in isoup.findAll('a', href=True):
-            src = a['href']
-            if src.startswith('file:'):
-                src = src[5:]
-            if os.access(src, os.R_OK):
-                self.fetch_content(src)
-                continue
-            try:
-                src = self.browser.open(src).read()
-            except:
-                continue
-            soup  = BeautifulSoup(src)
-            header, content = [], []
-            head = soup.find('head')
-            if head is not None:
-                for style in head('style'):
-                    header.append(unicode(style))
-            body = soup.find('body')
-            if body is None:
-                continue
-            for tag in body(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
-                in_table = False
-                c = tag.parent
-                while c is not None:
-                    if c.name == 'table':
-                        in_table = True
-                        break
-                    c = c.parent
-                if in_table:
-                    continue
-                content.append(unicode(tag))
-                
-            cfile = 'content%d.html'%self.cindex
-            self.cindex += 1
-            cfile = os.path.join(os.path.dirname(index), cfile)
-            html = '<html>\n<head>%s</head>\n<body>%s</body></html>'%('\n'.join(header), '\n'.join(content))
-            
-            open(cfile, 'wb').write(html.encode(enc))
-            a['href'] = ('file:' if iswindows else '') + cfile
-        open(index, 'wb').write(unicode(isoup).encode(enc)) 
-    
-    def build_index(self):
-        index = DefaultProfile.build_index(self)
-        self.fetch_content(index)
-    
--- a/src/calibre/ebooks/lrf/web/profiles/barrons.py
+++ b/src/calibre/ebooks/lrf/web/profiles/barrons.py
@ -1,90 +0,0 @@
-##
-##    web2lrf profile to download articles from Barrons.com 
-##    can download subscriber-only content if username and  
-##    password are supplied.
-##
-''' 
-''' 
- 
-import re 
- 
-from calibre.ebooks.lrf.web.profiles import DefaultProfile  
-         
-class Barrons(DefaultProfile): 
-    
-        title = 'Barron\'s' 
-        max_recursions = 3
-        max_articles_per_feed = 50
-        needs_subscription    = True
-        timefmt  = ' [%a, %b %d, %Y]' 
-        html_description = True 
-        no_stylesheets = False
-        match_regexps = ['http://online.barrons.com/.*?html\?mod=.*?|file:.*']
-        html2lrf_options = [('--ignore-tables'),('--base-font-size=10')]
-        ##delay = 1
-        
-        ## Don't grab articles more than 7 days old 
-        oldest_article = 7 
-
-
-        preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in  
-                [ 
-                ## Remove anything before the body of the article. 
-                (r'<body.*?<!-- article start', lambda match: '<body><!-- article start'), 
- 
-                ## Remove any insets from the body of the article. 
-                (r'<div id="inset".*?</div>.?</div>.?<p', lambda match : '<p'), 
-
-                ## Remove any reprint info from the body of the article. 
-                (r'<hr size.*?<p', lambda match : '<p'), 
-
-                ## Remove anything after the end of the article. 
-                (r'<!-- article end.*?</body>', lambda match : '</body>'), 
-                ] 
-        ] 
- 
-        def get_browser(self): 
-            br = DefaultProfile.get_browser() 
-            if self.username is not None and self.password is not None: 
-                br.open('http://commerce.barrons.com/auth/login') 
-                br.select_form(name='login_form') 
-                br['user']   = self.username 
-                br['password'] = self.password 
-                br.submit() 
-            return br 
- 
-## Use the print version of a page when available. 
- 
-        def print_version(self, url): 
-                return url.replace('/article/', '/article_print/') 
- 
-## Comment out the feeds you don't want retrieved. 
-## Because these feeds are sorted alphabetically when converted to LRF, you may want to number them to put them in the order you desire 
- 
-        def get_feeds(self): 
-                return  [ 
-                ('This Week\'s Magazine', 'http://online.barrons.com/xml/rss/3_7510.xml'), 
-                ('Online Exclusives', 'http://online.barrons.com/xml/rss/3_7515.xml'), 
-                ('Companies', 'http://online.barrons.com/xml/rss/3_7516.xml'), 
-                ('Markets', 'http://online.barrons.com/xml/rss/3_7517.xml'), 
-                ('Technology', 'http://online.barrons.com/xml/rss/3_7518.xml'), 
-                ('Funds/Q&A', 'http://online.barrons.com/xml/rss/3_7519.xml'), 
-                ]
-
-        ## Logout of website
-        ## NOT CURRENTLY WORKING
-        # def cleanup(self):
-            # try:
-                # self.browser.set_debug_responses(True)
-                # import sys, logging
-                # logger = logging.getLogger("mechanize")
-                # logger.addHandler(logging.StreamHandler(sys.stdout))
-                # logger.setLevel(logging.INFO)
-
-                # res = self.browser.open('http://online.barrons.com/logout')
-            # except:
-                # import traceback
-                # traceback.print_exc()
-
-
-
--- a/src/calibre/ebooks/lrf/web/profiles/bbc.py
+++ b/src/calibre/ebooks/lrf/web/profiles/bbc.py
@ -1,45 +0,0 @@
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-'''
-Fetch the BBC.
-'''
-import re
-
-from calibre.ebooks.lrf.web.profiles import DefaultProfile
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
-
-class BBC(DefaultProfile):
-    
-    title = 'The BBC'
-    max_recursions = 2
-    timefmt  = ' [%a, %d %b, %Y]'
-    no_stylesheets = True
-    
-    preprocess_regexps = \
-        [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in 
-              [
-               # Remove footer from individual stories
-               (r'<div class=.footer.>.*?Published', 
-                lambda match : '<p></p><div class="footer">Published'),
-               # Add some style info in place of disabled stylesheet
-               (r'<link.*?type=.text/css.*?>', lambda match :
-                '''<style type="text/css">
-                    .headline {font-size: x-large;}
-                    .fact { padding-top: 10pt  }
-                    </style>'''),
-               ]
-                  ]
-    
-        
-    def print_version(self, url):
-        return url.replace('http://', 'http://newsvote.bbc.co.uk/mpapps/pagetools/print/')
-    
-    def get_feeds(self):
-        src = self.browser.open('http://news.bbc.co.uk/1/hi/help/3223484.stm').read()
-        soup = BeautifulSoup(src[src.index('<html'):])
-        feeds = []
-        ul =  soup.find('ul', attrs={'class':'rss'})
-        for link in ul.findAll('a'):
-            feeds.append((link.string, link['href']))
-        return feeds
-
--- a/src/calibre/ebooks/lrf/web/profiles/chr_mon.py
+++ b/src/calibre/ebooks/lrf/web/profiles/chr_mon.py
@ -1,46 +0,0 @@
-
-import re, time
-from calibre.ebooks.lrf.web.profiles import DefaultProfile
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
-
-class ChristianScienceMonitor(DefaultProfile):
-
-    title = 'Christian Science Monitor'
-    max_recursions = 2
-    max_articles_per_feed = 20
-    no_stylesheets = True
-    
-  
-
-    
-    preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in 
-        [
-        (r'<body.*?<div id="story"', lambda match : '<body><div id="story"'),
-        (r'<div class="pubdate">.*?</div>', lambda m: ''),
-        (r'Full HTML version of this story which may include photos, graphics, and related links.*</body>',
-              lambda match : '</body>'),
-        ]]
-     
-
-    def parse_feeds(self):
-        soup = BeautifulSoup(self.browser.open('http://www.csmonitor.com/textedition'))
-        articles = {}
-        feed = []
-        for tag in soup.findAll(['h2', 'p']):
-            if tag.name == 'h2':
-                title = self.tag_to_string(tag)
-                feed = [] 
-                articles[title] = feed
-            elif tag.has_key('class') and tag['class'] == 'story':
-                a = tag.find('a')
-                if a is not None and a.has_key('href'):
-                    feed.append({
-                         'title': self.tag_to_string(a),
-                         'url'  : 'http://www.csmonitor.com'+a['href'],
-                         'date' : time.strftime('%d %b'),
-                         'content' : '',
-                         })
-                    a.extract()
-                    feed[-1]['description'] = self.tag_to_string(tag).strip()
-        return articles
-      
--- a/src/calibre/ebooks/lrf/web/profiles/cnn.py
+++ b/src/calibre/ebooks/lrf/web/profiles/cnn.py
@ -1,51 +0,0 @@
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-'''
-Profile to download CNN
-'''
-import re
-from calibre.ebooks.lrf.web.profiles import DefaultProfile
-
-class CNN(DefaultProfile):
-    
-    title = 'CNN'
-    max_recursions = 2
-    timefmt  = ' [%d %b %Y]'
-    html_description = True
-    no_stylesheets = True
-    oldest_article        = 15
-
-    preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in [
-        (r'<head>.*?<title', lambda match : '<head><title'),
-        (r'</title>.*?</head>', lambda match : '</title></head>'),
-        (r'<body.*?<\!\-\-Article.*?>', lambda match : ''),
-        (r'<\!\-\-Article End\-\->.*?</body>', lambda match : '</body>'),
-        (r'(</h\d>)<ul>.*?</ul>', lambda match : match.group(1)), # drop story highlights
-        (r'<h2>(.*?)</h2><h1>(.*?)</h1>', lambda match : '<h1>' + match.group(1) + '</h1><h2>' + match.group(2) + '</h2>'), # sports uses h2 for main title and h1 for subtitle (???) switch these around
-        (r'<span class="cnnEmbeddedMosLnk">.*?</span>', lambda match : ''), # drop 'watch more' links
-        (r'(<div class="cnnstorybody">).*?(<p)', lambda match : match.group(1) + match.group(2)), # drop sports photos
-        (r'</?table.*?>|</?tr.*?>|</?td.*?>', lambda match : ''), # drop table formatting
-        (r'<div class="cnnendofstorycontent".*?>.*?</div>', lambda match : ''), # drop extra business links
-        (r'<a href="#TOP">.*?</a>', lambda match : '') # drop business 'to top' link
-        ] ]
-
-    def print_version(self, url):
-        return 'http://www.printthis.clickability.com/pt/printThis?clickMap=printThis&fb=Y&url=' + url
-    
-    def get_feeds(self):
-        return [
-             ('Top News', 'http://rss.cnn.com/rss/cnn_topstories.rss'),
-             ('World', 'http://rss.cnn.com/rss/cnn_world.rss'),
-             ('U.S.', 'http://rss.cnn.com/rss/cnn_us.rss'),
-             ('Sports', 'http://rss.cnn.com/rss/si_topstories.rss'),
-             ('Business', 'http://rss.cnn.com/rss/money_latest.rss'),
-             ('Politics', 'http://rss.cnn.com/rss/cnn_allpolitics.rss'),
-             ('Law', 'http://rss.cnn.com/rss/cnn_law.rss'),
-             ('Technology', 'http://rss.cnn.com/rss/cnn_tech.rss'),
-             ('Science & Space', 'http://rss.cnn.com/rss/cnn_space.rss'),
-             ('Health', 'http://rss.cnn.com/rss/cnn_health.rss'),
-             ('Entertainment', 'http://rss.cnn.com/rss/cnn_showbiz.rss'),
-             ('Education', 'http://rss.cnn.com/rss/cnn_education.rss'),
-             ('Offbeat', 'http://rss.cnn.com/rss/cnn_offbeat.rss'),
-             ('Most Popular', 'http://rss.cnn.com/rss/cnn_mostpopular.rss')
-             ]
--- a/Show More
+++ b/Show More