Sync to pluginize

2025-07-09 03:04:10 -04:00 · 2009-03-07 13:58:12 -05:00 · 2009-03-07 13:58:12 -05:00 · 413da29dec
commit 413da29dec
parent e12384139a eebecb0001
60 changed files with 2646 additions and 1895 deletions
--- a/.pydevproject
+++ b/.pydevproject
@ -2,9 +2,9 @@
 <?eclipse-pydev version="1.0"?>

 <pydev_project>
-<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.5</pydev_property>
+<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.6</pydev_property>
 <pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
-<path>/calibre/src</path>
+<path>/calibre-pluginize/src</path>
 </pydev_pathproperty>
 <pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
 </pydev_project>
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -2,7 +2,9 @@
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-import sys, os, re, logging, time, subprocess, atexit, mimetypes
+import sys, os, re, logging, time, subprocess, atexit, mimetypes, \
+       __builtin__
+__builtin__.__dict__['dynamic_property'] = lambda(func): func(None)
 from htmlentitydefs import name2codepoint
 from math import floor
 from logging import Formatter
@ -73,26 +75,26 @@ def sanitize_file_name(name, substitute='_', as_unicode=False):
    return one.replace('..', '_')


+def prints(*args, **kwargs):
+    '''
+    Print unicode arguments safely by encoding them to preferred_encoding
+    Has the same signature as the print function from Python 3.
+    '''
+    file = kwargs.get('file', sys.stdout)
+    sep  = kwargs.get('sep', ' ')
+    end  = kwargs.get('end', '\n')
+    for i, arg in enumerate(args):
+        if isinstance(arg, unicode):
+            arg = arg.encode(preferred_encoding)
+        file.write(arg)
+        if i != len(args)-1:
+            file.write(sep)
+    file.write(end)
+
 class CommandLineError(Exception):
    pass

-class ColoredFormatter(Formatter):

-    def format(self, record):
-        ln = record.__dict__['levelname']
-        col = ''
-        if ln == 'CRITICAL':
-            col = terminal_controller.YELLOW
-        elif ln == 'ERROR':
-            col = terminal_controller.RED
-        elif ln in ['WARN', 'WARNING']:
-            col = terminal_controller.BLUE
-        elif ln == 'INFO':
-            col = terminal_controller.GREEN
-        elif ln == 'DEBUG':
-            col = terminal_controller.CYAN
-        record.__dict__['levelname'] = col + record.__dict__['levelname'] + terminal_controller.NORMAL
-        return Formatter.format(self, record)


 def setup_cli_handlers(logger, level):
@ -316,66 +318,23 @@ def english_sort(x, y):
    '''
    return cmp(_spat.sub('', x), _spat.sub('', y))

-class LoggingInterface:
+class ColoredFormatter(Formatter):

-    def __init__(self, logger):
-        self.__logger = self.logger = logger
-        
-    def setup_cli_handler(self, verbosity):
-        for handler in self.__logger.handlers:
-            if isinstance(handler, logging.StreamHandler):
-                return
-        if os.environ.get('CALIBRE_WORKER', None) is not None and self.__logger.handlers:
-            return
-        stream    = sys.stdout
-        formatter = logging.Formatter()
-        level     = logging.INFO
-        if verbosity > 0:
-            formatter = ColoredFormatter('[%(levelname)s] %(message)s') if verbosity > 1 else \
-                        ColoredFormatter('%(levelname)s: %(message)s')
-            level     = logging.DEBUG
-            if verbosity > 1:
-                stream = sys.stderr
-        
-        handler = logging.StreamHandler(stream)
-        handler.setFormatter(formatter)
-        handler.setLevel(level)
-        self.__logger.addHandler(handler)
-        self.__logger.setLevel(level)
-
-
-    def ___log(self, func, msg, args, kwargs):
-        args = [msg] + list(args)
-        for i in range(len(args)):
-            if not isinstance(args[i], basestring):
-                continue
-            if sys.version_info[:2] > (2, 5):
-                if not isinstance(args[i], unicode):
-                    args[i] = args[i].decode(preferred_encoding, 'replace')
-            elif isinstance(args[i], unicode):
-                args[i] = args[i].encode(preferred_encoding, 'replace')
-        func(*args, **kwargs)
-
-    def log_debug(self, msg, *args, **kwargs):
-        self.___log(self.__logger.debug, msg, args, kwargs)
-
-    def log_info(self, msg, *args, **kwargs):
-        self.___log(self.__logger.info, msg, args, kwargs)
-
-    def log_warning(self, msg, *args, **kwargs):
-        self.___log(self.__logger.warning, msg, args, kwargs)
-
-    def log_warn(self, msg, *args, **kwargs):
-        self.___log(self.__logger.warning, msg, args, kwargs)
-
-    def log_error(self, msg, *args, **kwargs):
-        self.___log(self.__logger.error, msg, args, kwargs)
-
-    def log_critical(self, msg, *args, **kwargs):
-        self.___log(self.__logger.critical, msg, args, kwargs)
-
-    def log_exception(self, msg, *args):
-        self.___log(self.__logger.exception, msg, args, {})
+    def format(self, record):
+        ln = record.__dict__['levelname']
+        col = ''
+        if ln == 'CRITICAL':
+            col = terminal_controller.YELLOW
+        elif ln == 'ERROR':
+            col = terminal_controller.RED
+        elif ln in ['WARN', 'WARNING']:
+            col = terminal_controller.BLUE
+        elif ln == 'INFO':
+            col = terminal_controller.GREEN
+        elif ln == 'DEBUG':
+            col = terminal_controller.CYAN
+        record.__dict__['levelname'] = col + record.__dict__['levelname'] + terminal_controller.NORMAL
+        return Formatter.format(self, record)

 def walk(dir):
    ''' A nice interface to os.walk '''
--- a/src/calibre/customize/init.py
+++ b/src/calibre/customize/init.py
@ -220,4 +220,6 @@ class MetadataWriterPlugin(Plugin):

        '''
        pass
-    
+
+
+   
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -242,8 +242,13 @@ class MOBIMetadataWriter(MetadataWriterPlugin):
        set_metadata(stream, mi)


-plugins = [HTML2ZIP]
+from calibre.ebooks.epub.input import EPUBInput
+from calibre.ebooks.mobi.input import MOBIInput
+from calibre.customize.profiles import input_profiles
+
+plugins = [HTML2ZIP, EPUBInput, MOBIInput]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataReader')]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataWriter')]
+plugins += input_profiles
--- a/src/calibre/customize/conversion.py
+++ b/src/calibre/customize/conversion.py
@ -0,0 +1,189 @@
+from __future__ import with_statement
+'''
+Defines the plugin sytem for conversions.
+'''
+import re, os, shutil
+
+from lxml import html
+
+from calibre import CurrentDir
+from calibre.customize import Plugin
+
+class ConversionOption(object):
+    
+    '''
+    Class representing conversion options
+    '''
+    
+    def __init__(self, name=None, help=None, long_switch=None, 
+                 short_switch=None, choices=None):
+        self.name = name
+        self.help = help
+        self.long_switch = long_switch
+        self.short_switch = short_switch
+        self.choices = choices
+        
+        if self.long_switch is None:
+            self.long_switch = '--'+self.name.replace('_', '-')
+        
+        self.validate_parameters()
+        
+    def validate_parameters(self):
+        '''
+        Validate the parameters passed to :method:`__init__`.
+        '''
+        if re.match(r'[a-zA-Z_]([a-zA-Z0-9_])*', self.name) is None:
+            raise ValueError(self.name + ' is not a valid Python identifier')
+        if not self.help:
+            raise ValueError('You must set the help text')
+        
+        
+class OptionRecommendation(object):
+    LOW  = 1
+    MED  = 2
+    HIGH = 3
+    
+    def __init__(self, recommeded_value, level=LOW, **kwargs):
+        '''
+        An option recommendation. That is, an option as well as its recommended 
+        value and the level of the recommendation.
+        '''
+        self.level = level
+        self.recommended_value = recommeded_value
+        self.option = kwargs.pop('option', None)
+        if self.option is None:
+            self.option = ConversionOption(**kwargs)
+            
+        self.validate_parameters()
+    
+    def validate_parameters(self):
+        if self.option.choices and self.recommended_value not in \
+                                                    self.option.choices:
+            raise ValueError('Recommended value not in choices')
+        if not (isinstance(self.recommended_value, (int, float, str, unicode))\
+            or self.default is None):
+            raise ValueError(unicode(self.default) + 
+                             ' is not a string or a number')
+         
+
+class InputFormatPlugin(Plugin):
+    '''
+    InputFormatPlugins are responsible for converting a document into 
+    HTML+OPF+CSS+etc.
+    The results of the conversion *must* be encoded in UTF-8.
+    The main action happens in :method:`convert`.
+    '''
+    
+    type = _('Conversion Input')
+    can_be_disabled = False
+    supported_platforms = ['windows', 'osx', 'linux']
+    
+    #: Set of file types for which this plugin should be run
+    #: For example: ``set(['azw', 'mobi', 'prc'])``
+    file_types     = set([])
+    
+    #: Options shared by all Input format plugins. Do not override
+    #: in sub-classes. Use :member:`options` instead. Every option must be an
+    #: instance of :class:`OptionRecommendation`. 
+    common_options = set([
+        OptionRecommendation(name='debug_input',
+            recommended_value=None, level=OptionRecommendation.LOW,
+            help=_('Save the output from the input plugin to the specified '
+                   'directory. Useful if you are unsure at which stage '
+                   'of the conversion process a bug is occurring. '
+                   'WARNING: This completely deletes the contents of '
+                   'the specified directory.')
+        ),
+        
+        OptionRecommendation(name='input_encoding',
+            recommended_value=None, level=OptionRecommendation.LOW,
+            help=_('Specify the character encoding of the input document. If '
+                   'set this option will override any encoding declared by the '
+                   'document itself. Particularly useful for documents that '
+                   'do not declare an encoding or that have erroneous '
+                   'encoding declarations.')
+        ),
+        
+    ])
+    
+    #: Options to customize the behavior of this plugin. Every option must be an
+    #: instance of :class:`OptionRecommendation`.  
+    options = set([])
+    
+    def convert(self, stream, options, file_ext, parse_cache, log):
+        '''
+        This method must be implemented in sub-classes. It must return
+        the path to the created OPF file. All output should be contained in 
+        the current directory. If this plugin creates files outside the current
+        directory they must be deleted/marked for deletion before this method 
+        returns.
+        
+        :param stream:   A file like object that contains the input file.
+        
+        :param options:  Options to customize the conversion process. 
+                         Guaranteed to have attributes corresponding
+                         to all the options declared by this plugin. In 
+                         addition, it will have a verbose attribute that
+                         takes integral values from zero upwards. Higher numbers
+                         mean be more verbose. Another useful attribute is 
+                         ``input_profile`` that is an instance of 
+                         :class:`calibre.customize.profiles.InputProfile`.
+                         
+        :param file_ext: The extension (without the .) of the input file. It
+                         is guaranteed to be one of the `file_types` supported
+                         by this plugin.
+        
+        :param parse_cache:    A dictionary that maps absolute file paths to
+                               parsed representations of their contents. For
+                               HTML the representation is an lxml element of 
+                               the root of the tree. For CSS it is a cssutils
+                               stylesheet. If this plugin parses any of the
+                               output files, it should add them to the cache
+                               so that later stages of the conversion wont
+                               have to re-parse them. If a parsed representation
+                               is in the cache, there is no need to actually 
+                               write the file to disk.
+        
+        :param log: A :class:`calibre.utils.logging.Log` object. All output 
+                    should use this object.
+        '''
+        raise NotImplementedError
+    
+    def __call__(self, stream, options, file_ext, parse_cache, log, output_dir):
+        log('InputFormatPlugin: %s running'%self.name, end=' ')
+        if hasattr(stream, 'name'):
+            log('on', stream.name)
+        
+        with CurrentDir(output_dir):
+            for x in os.listdir('.'):
+                shutil.rmtree(x) if os.path.isdir(x) else os.remove(x)
+                    
+                    
+            ret = self.convert(stream, options, file_ext, parse_cache, log)
+            for key in list(parse_cache.keys()):
+                if os.path.abspath(key) != key:
+                    log.warn(('InputFormatPlugin: %s returned a '
+                             'relative path: %s')%(self.name, key)
+                             )
+                    parse_cache[os.path.abspath(key)] = parse_cache.pop(key)
+                    
+        if options.debug_input is not None:
+            options.debug_input = os.path.abspath(options.debug_input)
+            if not os.path.exists(options.debug_input):
+                os.makedirs(options.debug_input)
+            shutil.rmtree(options.debug_input)
+            for f, obj in parse_cache.items():
+                if hasattr(obj, 'cssText'):
+                    raw = obj.cssText
+                else:
+                    raw = html.tostring(obj, encoding='utf-8', method='xml', 
+                         include_meta_content_type=True, pretty_print=True)
+                if isinstance(raw, unicode):
+                    raw = raw.encode('utf-8')
+                open(f, 'wb').write(raw)
+            shutil.copytree('.', options.debug_input)
+                
+            
+                    
+        return ret
+ 
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@ -0,0 +1,27 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.customize import Plugin
+
+class InputProfile(Plugin):
+    
+    author = 'Kovid Goyal'
+    supported_platforms = set(['windows', 'osx', 'linux'])
+    can_be_disabled = False
+    type = _('Input profile')
+
+# TODO: Add some real information to this profile. All other profiles must
+#       inherit from this profile and override as needed
+
+    name        = 'Default Input Profile'
+    short_name  = 'default' # Used in the CLI so dont spaces etc. in it
+    description = _('This profile tries to provide sane defaults and is useful '
+                    'if you know nothing about the input document.')
+                  
+input_profiles = [InputProfile]
+    
+
+
+    
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -6,13 +6,14 @@ import os, shutil, traceback, functools, sys

 from calibre.customize import Plugin, FileTypePlugin, MetadataReaderPlugin, \
                              MetadataWriterPlugin
+from calibre.customize.conversion import InputFormatPlugin
+from calibre.customize.profiles import InputProfile
 from calibre.customize.builtins import plugins as builtin_plugins
 from calibre.constants import __version__, iswindows, isosx
 from calibre.ebooks.metadata import MetaInformation
 from calibre.utils.config import make_config_dir, Config, ConfigProxy, \
                                 plugin_dir, OptionParser

-
 version = tuple([int(x) for x in __version__.split('.')])

 platform = 'linux'
@ -70,7 +71,10 @@ _on_import           = {}
 _on_preprocess       = {}
 _on_postprocess      = {}

-
+def input_profiles():
+    for plugin in _initialized_plugins:
+        if isinstance(plugin, InputProfile):
+            yield plugin

 def reread_filetype_plugins():
    global _on_import
@ -114,7 +118,19 @@ def reread_metadata_plugins():
                    _metadata_writers[ft] = []
                _metadata_writers[ft].append(plugin) 
                
-    
+def metadata_readers():
+    ans = set([])
+    for plugins in _metadata_readers.values():
+        for plugin in plugins:
+            ans.add(plugin)
+    return ans
+
+def metadata_writers():
+    ans = set([])
+    for plugins in _metadata_writers.values():
+        for plugin in plugins:
+            ans.add(plugin)
+    return ans    
                
 def get_file_type_metadata(stream, ftype):
    mi = MetaInformation(None, None)
@ -222,6 +238,17 @@ def find_plugin(name):
        if plugin.name == name:
            return plugin

+def input_format_plugins():
+    for plugin in _initialized_plugins:
+        if isinstance(plugin, InputFormatPlugin):
+            yield plugin
+        
+def plugin_for_input_format(fmt):
+    for plugin in input_format_plugins():
+        if fmt in plugin.file_types:
+            return plugin
+    
+
 def disable_plugin(plugin_or_name):
    x = getattr(plugin_or_name, 'name', plugin_or_name)
    plugin = find_plugin(x)
--- a/src/calibre/devices/cybookg3/driver.py
+++ b/src/calibre/devices/cybookg3/driver.py
@ -60,7 +60,7 @@ class CYBOOKG3(USBMS):

        if on_card and size > self.free_space()[2] - 1024*1024:
            raise FreeSpaceError(_("There is insufficient free space on the storage card"))
-        if not on_card and size > self.free_space()[0] - 2*1024*1024: 
+        if not on_card and size > self.free_space()[0] - 2*1024*1024:
            raise FreeSpaceError(_("There is insufficient free space in main memory"))

        paths = []
@ -91,7 +91,7 @@ class CYBOOKG3(USBMS):
            if not os.path.exists(newpath):
                os.makedirs(newpath)
            
-            filepath = os.path.join(newpath, names.next())                
+            filepath = os.path.join(newpath, names.next())
            paths.append(filepath)
            
            if hasattr(infile, 'read'):
@ -100,7 +100,7 @@ class CYBOOKG3(USBMS):
                dest = open(filepath, 'wb')
                shutil.copyfileobj(infile, dest, 10*1024*1024)

-                dest.flush()                
+                dest.flush()
                dest.close()
            else:
                shutil.copy2(infile, filepath)
--- a/src/calibre/devices/libusb.py
+++ b/src/calibre/devices/libusb.py
@ -116,8 +116,8 @@ class Device(Structure):
            raise Error("Cannot open device")
        return handle.contents    
    
-    @apply
-    def configurations():
+    @dynamic_property
+    def configurations(self):
        doc = """ List of device configurations. See L{ConfigDescriptor} """
        def fget(self):
            ans = []
@ -127,8 +127,8 @@ class Device(Structure):
        return property(doc=doc, fget=fget)

 class Bus(Structure):
-    @apply
-    def device_list():
+    @dynamic_property
+    def device_list(self):
        doc = \
        """ 
        Flat list of devices on this bus. 
@ -360,4 +360,4 @@ def get_devices():
        for dev in devices:
            device = (dev.device_descriptor.idVendor, dev.device_descriptor.idProduct, dev.device_descriptor.bcdDevice)
            ans.append(device)
-    return ans
+    return ans
--- a/src/calibre/devices/prs500/books.py
+++ b/src/calibre/devices/prs500/books.py
@ -55,8 +55,8 @@ class Book(object):
    size         = book_metadata_field("size", formatter=int)
    # When setting this attribute you must use an epoch
    datetime     = book_metadata_field("date", formatter=strptime, setter=strftime)
-    @apply
-    def title_sorter():
+    @dynamic_property
+    def title_sorter(self):
        doc = '''String to sort the title. If absent, title is returned'''
        def fget(self):
            src = self.elem.getAttribute('titleSorter').strip()
@ -67,8 +67,8 @@ class Book(object):
            self.elem.setAttribute('titleSorter', sortable_title(unicode(val)))
        return property(doc=doc, fget=fget, fset=fset)
    
-    @apply
-    def thumbnail():
+    @dynamic_property
+    def thumbnail(self):
        doc = \
        """ 
        The thumbnail. Should be a height 68 image. 
@ -88,15 +88,15 @@ class Book(object):
                return decode(rc)
        return property(fget=fget, doc=doc)
    
-    @apply
-    def path():
+    @dynamic_property
+    def path(self):
        doc = """ Absolute path to book on device. Setting not supported. """
        def fget(self):  
            return self.root + self.rpath
        return property(fget=fget, doc=doc)
    
-    @apply
-    def db_id():
+    @dynamic_property
+    def db_id(self):
        doc = '''The database id in the application database that this file corresponds to'''
        def fget(self):
            match = re.search(r'_(\d+)$', self.rpath.rpartition('.')[0])
@ -378,4 +378,4 @@ class BookList(_BookList):
    
    def write(self, stream):
        """ Write XML representation of DOM tree to C{stream} """
-        stream.write(self.document.toxml('utf-8'))
+        stream.write(self.document.toxml('utf-8'))
--- a/src/calibre/devices/prs500/cli/main.py
+++ b/src/calibre/devices/prs500/cli/main.py
@ -39,8 +39,8 @@ class FileFormatter(object):
        self.name        = file.name
        self.path        = file.path
    
-    @apply
-    def mode_string():
+    @dynamic_property
+    def mode_string(self):
        doc=""" The mode string for this file. There are only two modes read-only and read-write """
        def fget(self):
            mode, x = "-", "-"      
@ -50,8 +50,8 @@ class FileFormatter(object):
            return mode
        return property(doc=doc, fget=fget)
    
-    @apply
-    def isdir_name():
+    @dynamic_property
+    def isdir_name(self):
        doc='''Return self.name + '/' if self is a directory'''
        def fget(self):
            name = self.name
@ -61,8 +61,8 @@ class FileFormatter(object):
        return property(doc=doc, fget=fget)
            
    
-    @apply
-    def name_in_color():
+    @dynamic_property
+    def name_in_color(self):
        doc=""" The name in ANSI text. Directories are blue, ebooks are green """
        def fget(self):
            cname = self.name
@ -75,22 +75,22 @@ class FileFormatter(object):
            return cname
        return property(doc=doc, fget=fget)
    
-    @apply
-    def human_readable_size():
+    @dynamic_property
+    def human_readable_size(self):
        doc=""" File size in human readable form """
        def fget(self):
            return human_readable(self.size)
        return property(doc=doc, fget=fget)
    
-    @apply
-    def modification_time():
+    @dynamic_property
+    def modification_time(self):
        doc=""" Last modified time in the Linux ls -l format """
        def fget(self):
            return time.strftime("%Y-%m-%d %H:%M", time.localtime(self.wtime))
        return property(doc=doc, fget=fget)
    
-    @apply
-    def creation_time():
+    @dynamic_property
+    def creation_time(self):
        doc=""" Last modified time in the Linux ls -l format """
        def fget(self):
            return time.strftime("%Y-%m-%d %H:%M", time.localtime(self.ctime))
@ -334,4 +334,4 @@ def main():
    return 0

 if __name__ == '__main__':
-    main()
+    main()
--- a/src/calibre/devices/prs500/prstypes.py
+++ b/src/calibre/devices/prs500/prstypes.py
@ -284,8 +284,8 @@ class Command(TransferBuffer):
    # Length of the data part of this packet
    length = field(start=12, fmt=DWORD) 
    
-    @apply
-    def data():
+    @dynamic_property
+    def data(self):
        doc = \
        """ 
        The data part of this command. Returned/set as/by a TransferBuffer. 
@ -447,8 +447,8 @@ class LongCommand(Command):
        self.length  = 16
        self.command = command
    
-    @apply
-    def command():
+    @dynamic_property
+    def command(self):
        doc = \
        """ 
        Usually carries extra information needed for the command
@ -568,8 +568,8 @@ class FileOpen(PathCommand):
        PathCommand.__init__(self, path, FileOpen.NUMBER, path_len_at_byte=20)
        self.mode = mode
    
-    @apply
-    def mode():
+    @dynamic_property
+    def mode(self):
        doc = \
                    """ 
                    The file open mode. Is either L{FileOpen.READ} 
@ -651,8 +651,8 @@ class Response(Command):
            raise PacketError("Response packets must have their number set to " \
            + hex(0x00001000))
    
-    @apply
-    def data():
+    @dynamic_property
+    def data(self):
        doc = \
                  """ 
                  The last 3 DWORDs (12 bytes) of data in this 
@ -681,43 +681,43 @@ class ListResponse(Response):
    PATH_NOT_FOUND = 0xffffffd7 #: Queried path is not found 
    PERMISSION_DENIED = 0xffffffd6 #: Permission denied
    
-    @apply
-    def is_file():
+    @dynamic_property
+    def is_file(self):
        doc = """ True iff queried path is a file """
        def fget(self):      
            return self.code == ListResponse.IS_FILE
        return property(doc=doc, fget=fget)
    
-    @apply
-    def is_invalid():
+    @dynamic_property
+    def is_invalid(self):
        doc = """ True iff queried path is invalid """
        def fget(self):    
            return self.code == ListResponse.IS_INVALID
        return property(doc=doc, fget=fget)
    
-    @apply
-    def path_not_found():
+    @dynamic_property
+    def path_not_found(self):
        doc = """ True iff queried path is not found """
        def fget(self):    
            return self.code == ListResponse.PATH_NOT_FOUND
        return property(doc=doc, fget=fget)
    
-    @apply
-    def permission_denied():
+    @dynamic_property
+    def permission_denied(self):
        doc = """ True iff permission is denied for path operations """
        def fget(self):    
            return self.code == ListResponse.PERMISSION_DENIED
        return property(doc=doc, fget=fget)
    
-    @apply
-    def is_unmounted():
+    @dynamic_property
+    def is_unmounted(self):
        doc = """ True iff queried path is unmounted (i.e. removed storage card) """
        def fget(self):
            return self.code == ListResponse.IS_UNMOUNTED
        return property(doc=doc, fget=fget)
    
-    @apply
-    def is_eol():
+    @dynamic_property
+    def is_eol(self):
        doc = """ True iff there are no more items in the list """
        def fget(self):
            return self.code == ListResponse.IS_EOL
@ -759,8 +759,8 @@ class FileProperties(Answer):
    # 0 = default permissions, 4 = read only
    permissions = field(start=36, fmt=DWORD)  
    
-    @apply
-    def is_dir():
+    @dynamic_property
+    def is_dir(self):
        doc = """True if path points to a directory, False if it points to a file."""    
        
        def fget(self):
@ -776,8 +776,8 @@ class FileProperties(Answer):
        return property(doc=doc, fget=fget, fset=fset)
    
    
-    @apply
-    def is_readonly():
+    @dynamic_property
+    def is_readonly(self):
        doc = """ Whether this file is readonly."""
        
        def fget(self):
@ -801,8 +801,8 @@ class IdAnswer(Answer):
    
    """ Defines the structure of packets that contain identifiers for queries. """
    
-    @apply
-    def id():
+    @dynamic_property
+    def id(self):
        doc = \
        """ 
        The identifier. C{unsigned int} stored in 4 bytes 
@ -841,8 +841,8 @@ class ListAnswer(Answer):
    name_length = field(start=20, fmt=DWORD)
    name        = stringfield(name_length, start=24)
    
-    @apply
-    def is_dir():
+    @dynamic_property
+    def is_dir(self):
        doc = \
        """ 
        True if list item points to a directory, False if it points to a file.
@ -859,4 +859,3 @@ class ListAnswer(Answer):
        
        return property(doc=doc, fget=fget, fset=fset)

-
--- a/src/calibre/devices/prs505/books.py
+++ b/src/calibre/devices/prs505/books.py
@ -64,8 +64,8 @@ class Book(object):
    # When setting this attribute you must use an epoch
    datetime     = book_metadata_field("date", formatter=strptime, setter=strftime)
    
-    @apply
-    def title_sorter():
+    @dynamic_property
+    def title_sorter(self):
        doc = '''String to sort the title. If absent, title is returned'''
        def fget(self):
            src = self.elem.getAttribute('titleSorter').strip()
@ -76,8 +76,8 @@ class Book(object):
            self.elem.setAttribute('titleSorter', sortable_title(unicode(val)))
        return property(doc=doc, fget=fget, fset=fset)
    
-    @apply
-    def thumbnail():
+    @dynamic_property
+    def thumbnail(self):
        doc = \
        """ 
        The thumbnail. Should be a height 68 image. 
@ -99,15 +99,15 @@ class Book(object):
                return decode(rc)
        return property(fget=fget, doc=doc)
    
-    @apply
-    def path():
+    @dynamic_property
+    def path(self):
        doc = """ Absolute path to book on device. Setting not supported. """
        def fget(self):  
            return self.mountpath + self.rpath
        return property(fget=fget, doc=doc)
    
-    @apply
-    def db_id():
+    @dynamic_property
+    def db_id(self):
        doc = '''The database id in the application database that this file corresponds to'''
        def fget(self):
            match = re.search(r'_(\d+)$', self.rpath.rpartition('.')[0])
@ -415,4 +415,4 @@ def fix_ids(main, card):
    regen_ids(main)
    regen_ids(card)
        
-    main.set_next_id(str(main.max_id()+1))
+    main.set_next_id(str(main.max_id()+1))
--- a/src/calibre/devices/usbms/books.py
+++ b/src/calibre/devices/usbms/books.py
@ -21,15 +21,15 @@ class Book(object):
    def __eq__(self, other):
        return self.path == other.path
        
-    @apply
-    def title_sorter():
+    @dynamic_property
+    def title_sorter(self):
        doc = '''String to sort the title. If absent, title is returned'''
        def fget(self):
            return re.sub('^\s*A\s+|^\s*The\s+|^\s*An\s+', '', self.title).rstrip()
        return property(doc=doc, fget=fget)
    
-    @apply
-    def thumbnail():
+    @dynamic_property
+    def thumbnail(self):
        return None
        
    def __str__(self):
@ -44,4 +44,3 @@ class BookList(_BookList):
    def set_tags(self, book, tags):
        pass

-
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@ -37,7 +37,7 @@ class USBMS(Device):
    SUPPORTS_SUB_DIRS = False

    def __init__(self, key='-1', log_packets=False, report_progress=None):
-        Device.__init__(self, key=key, log_packets=log_packets, 
+        Device.__init__(self, key=key, log_packets=log_packets,
                        report_progress=report_progress)

    def get_device_information(self, end_session=True):
@ -103,7 +103,7 @@ class USBMS(Device):

                if 'tags' in mdata.keys():
                    for tag in mdata['tags']:
-                        if tag.startswith('News'):
+                        if tag.startswith(_('News')):
                            newpath = os.path.join(newpath, 'news')
                            newpath = os.path.join(newpath, mdata.get('title', ''))
                            newpath = os.path.join(newpath, mdata.get('timestamp', ''))
--- a/src/calibre/ebooks/conversion/init.py
+++ b/src/calibre/ebooks/conversion/init.py
@ -0,0 +1,4 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -0,0 +1,30 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+from calibre.customize.conversion import OptionRecommendation 
+from calibre.customize.ui import input_profiles
+
+pipeline_options = [
+
+OptionRecommendation(name='verbose', 
+            recommended_value=0, level=OptionRecommendation.LOW,
+            short_switch='v', 
+            help=_('Level of verbosity. Specify multiple times for greater '
+                   'verbosity.')
+        ),
+
+
+OptionRecommendation(name='input_profile',
+            recommended_value='default', level=OptionRecommendation.LOW,
+            choices=[x.short_name for x in input_profiles()],
+            help=_('Specify the input profile. The input profile gives the '
+                   'conversion system information on how to interpret '
+                   'various information in the input document. For '
+                   'example resolution dependent lengths (i.e. lengths in '
+                   'pixels).')
+        ),
+
+]
--- a/src/calibre/ebooks/epub/init.py
+++ b/src/calibre/ebooks/epub/init.py
@ -40,38 +40,6 @@ def rules(stylesheets):
                if r.type == r.STYLE_RULE:
                    yield r

-def decrypt_font(key, path):
-    raw = open(path, 'rb').read()
-    crypt = raw[:1024]
-    key = cycle(iter(key))
-    decrypt = ''.join([chr(ord(x)^key.next()) for x in crypt])
-    with open(path, 'wb') as f:
-        f.write(decrypt)
-        f.write(raw[1024:])
-
-def process_encryption(encfile, opf):
-    key = None
-    m = re.search(r'(?i)(urn:uuid:[0-9a-f-]+)', open(opf, 'rb').read())
-    if m:
-        key = m.group(1)
-        key = list(map(ord, uuid.UUID(key).bytes))
-    try:
-        root = etree.parse(encfile)
-        for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
-            algorithm = em.get('Algorithm', '')
-            if algorithm != 'http://ns.adobe.com/pdf/enc#RC':
-                return False
-            cr = em.getparent().xpath('descendant::*[contains(name(), "CipherReference")]')[0]
-            uri = cr.get('URI')
-            path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
-            if os.path.exists(path):
-                decrypt_font(key, path)
-        return True
-    except:
-        import traceback
-        traceback.print_exc()
-    return False
-
 def initialize_container(path_to_container, opf_name='metadata.opf'):
    '''
    Create an empty EPUB document, with a default skeleton.
--- a/src/calibre/ebooks/epub/input.py
+++ b/src/calibre/ebooks/epub/input.py
@ -0,0 +1,76 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os, re, uuid
+from itertools import cycle
+
+from lxml import etree
+
+from calibre.customize.conversion import InputFormatPlugin
+
+class EPUBInput(InputFormatPlugin):
+    
+    name        = 'EPUB Input'
+    author      = 'Kovid Goyal'
+    description = 'Convert EPUB files (.epub) to HTML'
+    file_types  = set(['epub'])
+    
+    @classmethod
+    def decrypt_font(cls, key, path):
+        raw = open(path, 'rb').read()
+        crypt = raw[:1024]
+        key = cycle(iter(key))
+        decrypt = ''.join([chr(ord(x)^key.next()) for x in crypt])
+        with open(path, 'wb') as f:
+            f.write(decrypt)
+            f.write(raw[1024:])
+    
+    @classmethod
+    def process_ecryption(cls, encfile, opf, log):
+        key = None
+        m = re.search(r'(?i)(urn:uuid:[0-9a-f-]+)', open(opf, 'rb').read())
+        if m:
+            key = m.group(1)
+            key = list(map(ord, uuid.UUID(key).bytes))
+        try:
+            root = etree.parse(encfile)
+            for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
+                algorithm = em.get('Algorithm', '')
+                if algorithm != 'http://ns.adobe.com/pdf/enc#RC':
+                    return False
+                cr = em.getparent().xpath('descendant::*[contains(name(), "CipherReference")]')[0]
+                uri = cr.get('URI')
+                path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
+                if os.path.exists(path):
+                    cls.decrypt_font(key, path)
+            return True
+        except:
+            import traceback
+            traceback.print_exc()
+        return False
+
+    def convert(self, stream, options, file_ext, parse_cache, log):
+        from calibre.utils.zipfile import ZipFile
+        from calibre import walk
+        from calibre.ebooks import DRMError
+        zf = ZipFile(stream)
+        zf.extractall(os.getcwd())
+        encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml'))
+        opf = None
+        for f in walk('.'):
+            if f.lower().endswith('.opf'):
+                opf = f
+                break
+        path = getattr(stream, 'name', 'stream')
+        
+        if opf is None:
+            raise ValueError('%s is not a valid EPUB file'%path)
+        
+        if os.path.exists(encfile):
+            if not self.process_encryption(encfile, opf, log):
+                raise DRMError(os.path.basename(path))
+        
+        return opf
+        
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@ -31,8 +31,8 @@ from cssutils import CSSParser

 class HTMLElement(HtmlElement):
    
-    @apply
-    def specified_font_size():
+    @dynamic_property
+    def specified_font_size(self):
        
        def fget(self):
            ans = self.get('specified_font_size', '')
@ -47,8 +47,8 @@ class HTMLElement(HtmlElement):
                     
        return property(fget=fget, fset=fset)
    
-    @apply
-    def computed_font_size():
+    @dynamic_property
+    def computed_font_size(self):
        def fget(self):
            ans = self.get('computed_font_size', '')
            if ans == '':
@ -1183,4 +1183,4 @@ output  = %s
    

 if __name__ == '__main__':
-    sys.exit(main())
+    sys.exit(main())
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@ -7,21 +7,25 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
    'and Marshall T. Vandegrift <llasram@gmail.com>'

-import sys, struct, cStringIO, os
+import sys, struct, os
 import functools
 import re
 from urlparse import urldefrag
+from cStringIO import StringIO
 from urllib import unquote as urlunquote
 from lxml import etree
 from calibre.ebooks.lit import LitError
 from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
 import calibre.ebooks.lit.mssha1 as mssha1
 from calibre.ebooks.oeb.base import urlnormalize
+from calibre.ebooks.oeb.reader import OEBReader
 from calibre.ebooks import DRMError
 from calibre import plugins
 lzx, lxzerror = plugins['lzx']
 msdes, msdeserror = plugins['msdes']

+__all__ = ["LitReader"]
+
 XML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
 """
 OPF_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
@ -109,6 +113,9 @@ def consume_sized_utf8_string(bytes, zpad=False):
        pos += 1
    return u''.join(result), bytes[pos:]

+def encode(string):
+    return unicode(string).encode('ascii', 'xmlcharrefreplace')
+
 class UnBinary(object):
    AMPERSAND_RE = re.compile(
        r'&(?!(?:#[0-9]+|#x[0-9a-fA-F]+|[a-zA-Z_:][a-zA-Z0-9.-_:]+);)')
@ -120,14 +127,16 @@ class UnBinary(object):
    def __init__(self, bin, path, manifest={}, map=HTML_MAP, atoms=EMPTY_ATOMS):
        self.manifest = manifest
        self.tag_map, self.attr_map, self.tag_to_attr_map = map
+        self.is_html = map is HTML_MAP
        self.tag_atoms, self.attr_atoms = atoms
        self.opf = map is OPF_MAP
        self.bin = bin
        self.dir = os.path.dirname(path)
-        self.buf = cStringIO.StringIO()
-        self.binary_to_text()
-        self.raw = self.buf.getvalue().lstrip().decode('utf-8')
+        buf = StringIO()
+        self.binary_to_text(bin, buf)
+        self.raw = buf.getvalue().lstrip()
        self.escape_reserved()
+        self._tree = None

    def escape_reserved(self):
        raw = self.raw
@ -154,18 +163,20 @@ class UnBinary(object):
        return '/'.join(relpath)
    
    def __unicode__(self):
+        return self.raw.decode('utf-8')
+
+    def __str__(self):
        return self.raw
-    
-    def binary_to_text(self, base=0, depth=0):
+
+    def binary_to_text(self, bin, buf, index=0, depth=0):
        tag_name = current_map = None
        dynamic_tag = errors = 0
        in_censorship = is_goingdown = False
        state = 'text'
-        index = base
        flags = 0
        
-        while index < len(self.bin):
-            c, index = read_utf8_char(self.bin, index)
+        while index < len(bin):
+            c, index = read_utf8_char(bin, index)
            oc = ord(c)
            
            if state == 'text':
@ -178,7 +189,7 @@ class UnBinary(object):
                    c = '>>'
                elif c == '<':
                    c = '<<'
-                self.buf.write(c.encode('ascii', 'xmlcharrefreplace'))
+                buf.write(encode(c))
            
            elif state == 'get flags':
                if oc == 0:
@ -191,7 +202,7 @@ class UnBinary(object):
                state = 'text' if oc == 0 else 'get attr'
                if flags & FLAG_OPENING:
                    tag = oc
-                    self.buf.write('<')
+                    buf.write('<')
                    if not (flags & FLAG_CLOSING):
                        is_goingdown = True
                    if tag == 0x8000:
@ -211,7 +222,7 @@ class UnBinary(object):
                        tag_name = '?'+unichr(tag)+'?'
                        current_map = self.tag_to_attr_map[tag]
                        print 'WARNING: tag %s unknown' % unichr(tag)
-                    self.buf.write(unicode(tag_name).encode('utf-8'))
+                    buf.write(encode(tag_name))
                elif flags & FLAG_CLOSING:
                    if depth == 0:
                        raise LitError('Extra closing tag')
@ -223,15 +234,14 @@ class UnBinary(object):
                    if not is_goingdown:
                        tag_name = None
                        dynamic_tag = 0
-                        self.buf.write(' />')
+                        buf.write(' />')
                    else:
-                        self.buf.write('>')
-                        index = self.binary_to_text(base=index, depth=depth+1)
+                        buf.write('>')
+                        index = self.binary_to_text(bin, buf, index, depth+1)
                        is_goingdown = False
                        if not tag_name:
                            raise LitError('Tag ends before it begins.')
-                        self.buf.write(u''.join(
-                                ('</', tag_name, '>')).encode('utf-8'))
+                        buf.write(encode(u''.join(('</', tag_name, '>'))))
                        dynamic_tag = 0
                        tag_name = None
                    state = 'text'
@ -251,7 +261,7 @@ class UnBinary(object):
                        in_censorship = True
                        state = 'get value length'
                        continue
-                    self.buf.write(' ' + unicode(attr).encode('utf-8') + '=')
+                    buf.write(' ' + encode(attr) + '=')
                    if attr in ['href', 'src']:
                        state = 'get href length'
                    else:
@ -259,24 +269,24 @@ class UnBinary(object):
            
            elif state == 'get value length':
                if not in_censorship:
-                    self.buf.write('"')
+                    buf.write('"')
                count = oc - 1
                if count == 0:
                    if not in_censorship:
-                        self.buf.write('"')
+                        buf.write('"')
                    in_censorship = False
                    state = 'get attr'
                    continue
                state = 'get value'
                if oc == 0xffff:
                    continue
-                if count < 0 or count > (len(self.bin) - index):
+                if count < 0 or count > (len(bin) - index):
                    raise LitError('Invalid character count %d' % count)
            
            elif state == 'get value':
                if count == 0xfffe:
                    if not in_censorship:
-                        self.buf.write('%s"' % (oc - 1))
+                        buf.write('%s"' % (oc - 1))
                    in_censorship = False
                    state = 'get attr'
                elif count > 0:
@ -289,13 +299,13 @@ class UnBinary(object):
                    count -= 1
                if count == 0:
                    if not in_censorship:
-                        self.buf.write('"')
+                        buf.write('"')
                    in_censorship = False
                    state = 'get attr'
            
            elif state == 'get custom length':
                count = oc - 1
-                if count <= 0 or count > len(self.bin)-index:
+                if count <= 0 or count > len(bin)-index:
                    raise LitError('Invalid character count %d' % count)
                dynamic_tag += 1
                state = 'get custom'
@ -305,26 +315,26 @@ class UnBinary(object):
                tag_name += c
                count -= 1
                if count == 0:
-                    self.buf.write(unicode(tag_name).encode('utf-8'))
+                    buf.write(encode(tag_name))
                    state = 'get attr'
            
            elif state == 'get attr length':
                count = oc - 1
-                if count <= 0 or count > (len(self.bin) - index):
+                if count <= 0 or count > (len(bin) - index):
                    raise LitError('Invalid character count %d' % count)
-                self.buf.write(' ')
+                buf.write(' ')
                state = 'get custom attr'
            
            elif state == 'get custom attr':
-                self.buf.write(unicode(c).encode('utf-8'))
+                buf.write(encode(c))
                count -= 1
                if count == 0:
-                    self.buf.write('=')
+                    buf.write('=')
                    state = 'get value length'

            elif state == 'get href length':
                count = oc - 1
-                if count <= 0 or count > (len(self.bin) - index):
+                if count <= 0 or count > (len(bin) - index):
                    raise LitError('Invalid character count %d' % count)
                href = ''
                state = 'get href'
@ -338,10 +348,11 @@ class UnBinary(object):
                    if frag:
                        path = '#'.join((path, frag))
                    path = urlnormalize(path)
-                    self.buf.write((u'"%s"' % path).encode('utf-8'))
+                    buf.write(encode(u'"%s"' % path))
                    state = 'get attr'
        return index
    
+
 class DirectoryEntry(object):
    def __init__(self, name, section, offset, size):
        self.name = name
@ -356,6 +367,7 @@ class DirectoryEntry(object):
    def __str__(self):
        return repr(self)

+
 class ManifestItem(object):
    def __init__(self, original, internal, mime_type, offset, root, state):
        self.original = original
@ -383,65 +395,87 @@ class ManifestItem(object):
            % (self.internal, self.path, self.mime_type, self.offset,
               self.root, self.state)

+
 def preserve(function):
    def wrapper(self, *args, **kwargs):
-        opos = self._stream.tell()
+        opos = self.stream.tell()
        try:
            return function(self, *args, **kwargs)
        finally:
-            self._stream.seek(opos)
+            self.stream.seek(opos)
    functools.update_wrapper(wrapper, function)
    return wrapper
    
-class LitReader(object):
+class LitFile(object):
    PIECE_SIZE = 16
-    XML_PARSER = etree.XMLParser(
-        recover=True, resolve_entities=False)
+
+    def __init__(self, filename_or_stream):
+        if hasattr(filename_or_stream, 'read'):
+            self.stream = filename_or_stream
+        else:
+            self.stream = open(filename_or_stream, 'rb')
+        try:
+            self.opf_path = os.path.splitext(
+                os.path.basename(self.stream.name))[0] + '.opf'
+        except AttributeError:
+            self.opf_path = 'content.opf'
+        if self.magic != 'ITOLITLS':
+            raise LitError('Not a valid LIT file')
+        if self.version != 1:
+            raise LitError('Unknown LIT version %d' % (self.version,))
+        self.read_secondary_header()
+        self.read_header_pieces()
+        self.read_section_names()
+        self.read_manifest()
+        self.read_drm()
+
+    def warn(self, msg):
+        print "WARNING: %s" % (msg,)

    def magic():
        @preserve
        def fget(self):
-            self._stream.seek(0)
-            return self._stream.read(8)
+            self.stream.seek(0)
+            return self.stream.read(8)
        return property(fget=fget)
    magic = magic()
    
    def version():
        def fget(self):
-            self._stream.seek(8)
-            return u32(self._stream.read(4))
+            self.stream.seek(8)
+            return u32(self.stream.read(4))
        return property(fget=fget)
    version = version()
    
    def hdr_len():
        @preserve
        def fget(self):
-            self._stream.seek(12)
-            return int32(self._stream.read(4))
+            self.stream.seek(12)
+            return int32(self.stream.read(4))
        return property(fget=fget)
    hdr_len = hdr_len()
    
    def num_pieces():
        @preserve
        def fget(self):
-            self._stream.seek(16)
-            return int32(self._stream.read(4))
+            self.stream.seek(16)
+            return int32(self.stream.read(4))
        return property(fget=fget)
    num_pieces = num_pieces()
    
    def sec_hdr_len():
        @preserve
        def fget(self):
-            self._stream.seek(20)
-            return int32(self._stream.read(4))
+            self.stream.seek(20)
+            return int32(self.stream.read(4))
        return property(fget=fget)
    sec_hdr_len = sec_hdr_len()
    
    def guid():
        @preserve
        def fget(self):
-            self._stream.seek(24)
-            return self._stream.read(16)
+            self.stream.seek(24)
+            return self.stream.read(16)
        return property(fget=fget)
    guid = guid()
    
@ -451,44 +485,27 @@ class LitReader(object):
            size = self.hdr_len \
                + (self.num_pieces * self.PIECE_SIZE) \
                + self.sec_hdr_len
-            self._stream.seek(0)
-            return self._stream.read(size)
+            self.stream.seek(0)
+            return self.stream.read(size)
        return property(fget=fget)
    header = header()
    
-    def __init__(self, filename_or_stream):
-        if hasattr(filename_or_stream, 'read'):
-            self._stream = filename_or_stream
-        else:
-            self._stream = open(filename_or_stream, 'rb')
-        if self.magic != 'ITOLITLS':
-            raise LitError('Not a valid LIT file')
-        if self.version != 1:
-            raise LitError('Unknown LIT version %d' % (self.version,))
-        self.entries = {}
-        self._read_secondary_header()
-        self._read_header_pieces()
-        self._read_section_names()
-        self._read_manifest()
-        self._read_meta()
-        self._read_drm()
-
    @preserve
    def __len__(self):
-        self._stream.seek(0, 2)
-        return self._stream.tell()
+        self.stream.seek(0, 2)
+        return self.stream.tell()

    @preserve
-    def _read_raw(self, offset, size):
-        self._stream.seek(offset)
-        return self._stream.read(size)
+    def read_raw(self, offset, size):
+        self.stream.seek(offset)
+        return self.stream.read(size)

-    def _read_content(self, offset, size):
-        return self._read_raw(self.content_offset + offset, size)
+    def read_content(self, offset, size):
+        return self.read_raw(self.content_offset + offset, size)
    
-    def _read_secondary_header(self):
+    def read_secondary_header(self):
        offset = self.hdr_len + (self.num_pieces * self.PIECE_SIZE)
-        bytes = self._read_raw(offset, self.sec_hdr_len)
+        bytes = self.read_raw(offset, self.sec_hdr_len)
        offset = int32(bytes[4:])
        while offset < len(bytes):
            blocktype = bytes[offset:offset+4]
@ -516,21 +533,21 @@ class LitReader(object):
        if not hasattr(self, 'content_offset'):
            raise LitError('Could not figure out the content offset')
    
-    def _read_header_pieces(self):
+    def read_header_pieces(self):
        src = self.header[self.hdr_len:]
        for i in xrange(self.num_pieces):
            piece = src[i * self.PIECE_SIZE:(i + 1) * self.PIECE_SIZE]
            if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
                raise LitError('Piece %s has 64bit value' % repr(piece))
            offset, size = u32(piece), int32(piece[8:])
-            piece = self._read_raw(offset, size)
+            piece = self.read_raw(offset, size)
            if i == 0:
                continue # Dont need this piece
            elif i == 1:
                if u32(piece[8:])  != self.entry_chunklen or \
                   u32(piece[12:]) != self.entry_unknown:
                    raise LitError('Secondary header does not match piece')
-                self._read_directory(piece)
+                self.read_directory(piece)
            elif i == 2:
                if u32(piece[8:])  != self.count_chunklen or \
                   u32(piece[12:]) != self.count_unknown:
@ -541,12 +558,13 @@ class LitReader(object):
            elif i == 4:
                self.piece4_guid = piece
                
-    def _read_directory(self, piece):
+    def read_directory(self, piece):
        if not piece.startswith('IFCM'):
            raise LitError('Header piece #1 is not main directory.')
        chunk_size, num_chunks = int32(piece[8:12]), int32(piece[24:28])
        if (32 + (num_chunks * chunk_size)) != len(piece):
-            raise LitError('IFCM HEADER has incorrect length')
+            raise LitError('IFCM header has incorrect length')
+        self.entries = {}
        for i in xrange(num_chunks):
            offset = 32 + (i * chunk_size)
            chunk = piece[offset:offset + chunk_size]
@ -580,17 +598,17 @@ class LitReader(object):
                entry = DirectoryEntry(name, section, offset, size)
                self.entries[name] = entry

-    def _read_section_names(self):
+    def read_section_names(self):
        if '::DataSpace/NameList' not in self.entries:
            raise LitError('Lit file does not have a valid NameList')
        raw = self.get_file('::DataSpace/NameList')
        if len(raw) < 4:
            raise LitError('Invalid Namelist section')
        pos = 4
-        self.num_sections = u16(raw[2:pos])
-        self.section_names = [""]*self.num_sections
-        self.section_data = [None]*self.num_sections
-        for section in xrange(self.num_sections):
+        num_sections = u16(raw[2:pos])
+        self.section_names = [""] * num_sections
+        self.section_data = [None] * num_sections
+        for section in xrange(num_sections):
            size = u16(raw[pos:pos+2])
            pos += 2
            size = size*2 + 2
@ -600,11 +618,12 @@ class LitReader(object):
                raw[pos:pos+size].decode('utf-16-le').rstrip('\000')
            pos += size

-    def _read_manifest(self):
+    def read_manifest(self):
        if '/manifest' not in self.entries:
            raise LitError('Lit file does not have a valid manifest')
        raw = self.get_file('/manifest')
        self.manifest = {}
+        self.paths = {self.opf_path: None}
        while raw:
            slen, raw = ord(raw[0]), raw[1:]
            if slen == 0: break
@ -645,28 +664,9 @@ class LitReader(object):
        for item in mlist:
            if item.path[0] == '/':
                item.path = os.path.basename(item.path)
+            self.paths[item.path] = item

-    def _pretty_print(self, xml):
-        f = cStringIO.StringIO(xml.encode('utf-8'))
-        doc = etree.parse(f, parser=self.XML_PARSER)
-        pretty = etree.tostring(doc, encoding='ascii', pretty_print=True)
-        return XML_DECL + unicode(pretty)
-                
-    def _read_meta(self):
-        path = 'content.opf'
-        raw = self.get_file('/meta')
-        xml = OPF_DECL
-        try:
-            xml += unicode(UnBinary(raw, path, self.manifest, OPF_MAP))
-        except LitError:
-            if 'PENGUIN group' not in raw: raise
-            print "WARNING: attempting PENGUIN malformed OPF fix"
-            raw = raw.replace(
-                'PENGUIN group', '\x00\x01\x18\x00PENGUIN group', 1)
-            xml += unicode(UnBinary(raw, path, self.manifest, OPF_MAP))
-        self.meta = xml
-
-    def _read_drm(self):
+    def read_drm(self):
        self.drmlevel = 0
        if '/DRMStorage/Licenses/EUL' in self.entries:
            self.drmlevel = 5
@ -677,7 +677,7 @@ class LitReader(object):
        else:
            return
        if self.drmlevel < 5:
-            msdes.deskey(self._calculate_deskey(), msdes.DE1)
+            msdes.deskey(self.calculate_deskey(), msdes.DE1)
            bookkey = msdes.des(self.get_file('/DRMStorage/DRMSealed'))
            if bookkey[0] != '\000':
                raise LitError('Unable to decrypt title key!')
@ -685,7 +685,7 @@ class LitReader(object):
        else:
            raise DRMError("Cannot access DRM-protected book")

-    def _calculate_deskey(self):
+    def calculate_deskey(self):
        hashfiles = ['/meta', '/DRMStorage/DRMSource']
        if self.drmlevel == 3:
            hashfiles.append('/DRMStorage/DRMBookplate')
@ -709,18 +709,18 @@ class LitReader(object):
    def get_file(self, name):
        entry = self.entries[name]
        if entry.section == 0:
-            return self._read_content(entry.offset, entry.size)
+            return self.read_content(entry.offset, entry.size)
        section = self.get_section(entry.section)
        return section[entry.offset:entry.offset+entry.size]

    def get_section(self, section):
        data = self.section_data[section]
        if not data:
-            data = self._get_section(section)
+            data = self.get_section_uncached(section)
            self.section_data[section] = data
        return data

-    def _get_section(self, section):
+    def get_section_uncached(self, section):
        name = self.section_names[section]
        path = '::DataSpace/Storage/' + name
        transform = self.get_file(path + '/Transform/List')
@ -732,29 +732,29 @@ class LitReader(object):
                raise LitError("ControlData is too short")
            guid = msguid(transform)
            if guid == DESENCRYPT_GUID:
-                content = self._decrypt(content)
+                content = self.decrypt(content)
                control = control[csize:]
            elif guid == LZXCOMPRESS_GUID:
                reset_table = self.get_file(
                    '/'.join(('::DataSpace/Storage', name, 'Transform',
                              LZXCOMPRESS_GUID, 'InstanceData/ResetTable')))
-                content = self._decompress(content, control, reset_table)
+                content = self.decompress(content, control, reset_table)
                control = control[csize:]
            else:
                raise LitError("Unrecognized transform: %s." % repr(guid))
            transform = transform[16:]
        return content

-    def _decrypt(self, content):
+    def decrypt(self, content):
        length = len(content)
        extra = length & 0x7
        if extra > 0:
-            self._warn("content length not a multiple of block size")
+            self.warn("content length not a multiple of block size")
            content += "\0" * (8 - extra)
        msdes.deskey(self.bookkey, msdes.DE1)
        return msdes.des(content)

-    def _decompress(self, content, control, reset_table):
+    def decompress(self, content, control, reset_table):
        if len(control) < 32 or control[CONTROL_TAG:CONTROL_TAG+4] != "LZXC":
            raise LitError("Invalid ControlData tag value")
        if len(reset_table) < (RESET_INTERVAL + 8):
@ -795,7 +795,7 @@ class LitReader(object):
                        result.append(
                            lzx.decompress(content[base:size], window_bytes))
                    except lzx.LZXError:
-                        self._warn("LZX decompression error; skipping chunk")
+                        self.warn("LZX decompression error; skipping chunk")
                    bytes_remaining -= window_bytes
                    base = size
            accum += int32(reset_table[RESET_INTERVAL:])
@ -805,7 +805,7 @@ class LitReader(object):
            try:
                result.append(lzx.decompress(content[base:], bytes_remaining))
            except lzx.LZXError:
-                self._warn("LZX decompression error; skipping chunk")
+                self.warn("LZX decompression error; skipping chunk")
            bytes_remaining = 0
        if bytes_remaining > 0:
            raise LitError("Failed to completely decompress section")
@ -855,62 +855,51 @@ class LitReader(object):
                content = self._pretty_print(content)
            content = content.encode('utf-8')
        else:
-            name = '/'.join(('/data', entry.internal))
-            content = self.get_file(name)
+            internal = '/'.join(('/data', entry.internal))
+            content = self._litfile.get_file(internal)
        return content
-                    
-    def extract_content(self, output_dir=os.getcwdu(), pretty_print=False):
-        output_dir = os.path.abspath(output_dir)
+ 
+
+class LitContainer(object):
+    """Simple Container-interface, read-only accessor for LIT files."""
+
+    def __init__(self, filename_or_stream):
+        self._litfile = LitFile(filename_or_stream)
+    
+    def namelist(self):
+        return self._litfile.paths.keys()
+
+    def exists(self, name):
+        return urlunquote(name) in self._litfile.paths
+    
+    def read(self, name):
+        entry = self._litfile.paths[urlunquote(name)] if name else None
+        if entry is None:
+            content = OPF_DECL + self._read_meta()
+        elif 'spine' in entry.state:
+            internal = '/'.join(('/data', entry.internal, 'content'))
+            raw = self._litfile.get_file(internal)
+            unbin = UnBinary(raw, name, self._litfile.manifest, HTML_MAP)
+            content = HTML_DECL + str(unbin)
+   
+    def _read_meta(self):
+        path = 'content.opf'
+        raw = self._litfile.get_file('/meta')
        try:
-            opf_path = os.path.splitext(
-                os.path.basename(self._stream.name))[0] + '.opf'
-        except AttributeError:
-            opf_path = 'content.opf'
-        opf_path = os.path.join(output_dir, opf_path)
-        self._ensure_dir(opf_path)
-        with open(opf_path, 'wb') as f:
-            xml = self.meta
-            if pretty_print:
-                xml = self._pretty_print(xml)
-            f.write(xml.encode('utf-8'))
-        for entry in self.manifest.values():
-            path = os.path.join(output_dir, entry.path)
-            self._ensure_dir(path)
-            with open(path, 'wb') as f:
-                f.write(self.get_entry_content(entry, pretty_print))
+            unbin = UnBinary(raw, path, self._litfile.manifest, OPF_MAP)
+        except LitError:
+            if 'PENGUIN group' not in raw: raise
+            print "WARNING: attempting PENGUIN malformed OPF fix"
+            raw = raw.replace(
+                'PENGUIN group', '\x00\x01\x18\x00PENGUIN group', 1)
+            unbin = UnBinary(raw, path, self._litfile.manifest, OPF_MAP)
+        return str(unbin)

-    def _ensure_dir(self, path):
-        dir = os.path.dirname(path)
-        if not os.path.isdir(dir):
-            os.makedirs(dir)

-    def _warn(self, msg):
-        print "WARNING: %s" % (msg,)
+class LitReader(OEBReader):
+    Container = LitContainer
+    DEFAULT_PROFILE = 'MSReader'

-def option_parser():
-    from calibre.utils.config import OptionParser
-    parser = OptionParser(usage=_('%prog [options] LITFILE'))
-    parser.add_option(
-        '-o', '--output-dir', default='.', 
-        help=_('Output directory. Defaults to current directory.'))
-    parser.add_option(
-        '-p', '--pretty-print', default=False, action='store_true',
-        help=_('Legibly format extracted markup. May modify meaningful whitespace.'))
-    parser.add_option(
-        '--verbose', default=False, action='store_true',
-        help=_('Useful for debugging.'))
-    return parser
-
-def main(args=sys.argv):
-    parser = option_parser()
-    opts, args = parser.parse_args(args)
-    if len(args) != 2:
-        parser.print_help()
-        return 1
-    lr = LitReader(args[1])
-    lr.extract_content(opts.output_dir, opts.pretty_print)
-    print _('OEB ebook created in'), opts.output_dir
-    return 0

 try:
    import psyco
@ -918,6 +907,3 @@ try:
    psyco.bind(UnBinary.binary_to_text)
 except ImportError:
    pass
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/lit/writer.py
+++ b/src/calibre/ebooks/lit/writer.py
@ -312,7 +312,7 @@ class LitWriter(object):
        cover = None
        if oeb.metadata.cover:
            id = str(oeb.metadata.cover[0])
-            cover = oeb.manifest[id]
+            cover = oeb.manifest.ids[id]
            for type, title in ALL_MS_COVER_TYPES:
                if type not in oeb.guide:
                    oeb.guide.add(type, title, cover.href)
--- a/src/calibre/ebooks/lrf/meta.py
+++ b/src/calibre/ebooks/lrf/meta.py
@ -229,6 +229,9 @@ def get_metadata(stream):
    mi.author = lrf.author.strip()
    mi.comments = lrf.free_text.strip()
    mi.category = lrf.category.strip()+', '+lrf.classification.strip()
+    tags = [x.strip() for x in mi.category.split(',') if x.strip()]
+    if tags:
+        mi.tags = tags
    mi.publisher = lrf.publisher.strip()
    mi.cover_data = lrf.get_cover()
    try:
@ -624,7 +627,9 @@ def set_metadata(stream, mi):
        lrf.title = mi.title
    if mi.authors:
        lrf.author = ', '.join(mi.authors)
-    if mi.category:
+    if mi.tags:
+        lrf.category = mi.tags[0]
+    if getattr(mi, 'category', False):
        lrf.category = mi.category
    if mi.comments:    
        lrf.free_text = mi.comments
--- a/src/calibre/ebooks/lrf/tags.py
+++ b/src/calibre/ebooks/lrf/tags.py
@ -207,32 +207,32 @@ class Tag(object):
        s += " at %08X, contents: %s" % (self.offset, repr(self.contents))
        return s
    
-    @apply
-    def byte():
+    @dynamic_property
+    def byte(self):
        def fget(self):
            if len(self.contents) != 1:
                raise LRFParseError("Bad parameter for tag ID: %04X" % self.id)
            return struct.unpack("<B", self.contents)[0]
        return property(fget=fget)
    
-    @apply
-    def word():
+    @dynamic_property
+    def word(self):
        def fget(self):
            if len(self.contents) != 2:
                raise LRFParseError("Bad parameter for tag ID: %04X" % self.id)
            return struct.unpack("<H", self.contents)[0]
        return property(fget=fget)
    
-    @apply
-    def sword():
+    @dynamic_property
+    def sword(self):
        def fget(self):
            if len(self.contents) != 2:
                raise LRFParseError("Bad parameter for tag ID: %04X" % self.id)
            return struct.unpack("<h", self.contents)[0]
        return property(fget=fget)
    
-    @apply
-    def dword():
+    @dynamic_property
+    def dword(self):
        def fget(self):
            if len(self.contents) != 4:
                raise LRFParseError("Bad parameter for tag ID: %04X" % self.id)
--- a/src/calibre/ebooks/metadata/init.py
+++ b/src/calibre/ebooks/metadata/init.py
@ -4,9 +4,9 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'

 """
-Provides abstraction for metadata reading.writing from a variety of ebook formats. 
+Provides abstraction for metadata reading.writing from a variety of ebook formats.
 """
-import os, mimetypes, sys
+import os, mimetypes, sys, re
 from urllib import unquote, quote
 from urlparse import urlparse

@ -36,32 +36,28 @@ def author_to_author_sort(author):
 def authors_to_sort_string(authors):
    return ' & '.join(map(author_to_author_sort, authors))

-def get_parser(extension):
-    ''' Return an option parser with the basic metadata options already setup'''
-    parser = OptionParser(usage='%prog [options] myfile.'+extension+'\n\nRead and write metadata from an ebook file.')
-    parser.add_option("-t", "--title", action="store", type="string", \
-                    dest="title", help=_("Set the book title"), default=None)
-    parser.add_option("-a", "--authors", action="store", type="string", \
-                    dest="authors", help=_("Set the authors"), default=None)
-    parser.add_option("-c", "--category", action="store", type="string", \
-                    dest="category", help=_("The category this book belongs to. E.g.: History"), default=None)
-    parser.add_option('--comment', dest='comment', default=None, action='store',
-                      help=_('Set the comment'))
-    return parser
+_title_pat = re.compile('^(A|The|An)\s+', re.IGNORECASE)
+def title_sort(title):
+    match = _title_pat.search(title)
+    if match:
+        prep = match.group(1)
+        title = title.replace(prep, '') + ', ' + prep
+    return title.strip()
+

 class Resource(object):
    '''
-    Represents a resource (usually a file on the filesystem or a URL pointing 
+    Represents a resource (usually a file on the filesystem or a URL pointing
    to the web. Such resources are commonly referred to in OPF files.
-    
+
    They have the interface:
-    
+
    :member:`path`
    :member:`mime_type`
    :method:`href`
-    
+
    '''
-    
+
    def __init__(self, href_or_path, basedir=os.getcwd(), is_path=True):
        self._href = None
        self._basedir = basedir
@ -91,13 +87,13 @@ class Resource(object):
                pc = unquote(pc).decode('utf-8')
                self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
                self.fragment = unquote(url[-1])
-        
-    
+
+
    def href(self, basedir=None):
        '''
        Return a URL pointing to this resource. If it is a file on the filesystem
        the URL is relative to `basedir`.
-        
+
        `basedir`: If None, the basedir of this resource is used (see :method:`set_basedir`).
        If this resource has no basedir, then the current working directory is used as the basedir.
        '''
@ -119,54 +115,54 @@ class Resource(object):
        if isinstance(rpath, unicode):
            rpath = rpath.encode('utf-8')
        return quote(rpath.replace(os.sep, '/'))+frag
-    
+
    def set_basedir(self, path):
        self._basedir = path
-        
+
    def basedir(self):
        return self._basedir
-    
+
    def __repr__(self):
        return 'Resource(%s, %s)'%(repr(self.path), repr(self.href()))
-        
-        
+
+
 class ResourceCollection(object):
-    
+
    def __init__(self):
        self._resources = []
-        
+
    def __iter__(self):
        for r in self._resources:
            yield r
-            
+
    def __len__(self):
        return len(self._resources)
-    
+
    def __getitem__(self, index):
        return self._resources[index]
-    
+
    def __bool__(self):
        return len(self._resources) > 0
-    
+
    def __str__(self):
        resources = map(repr, self)
        return '[%s]'%', '.join(resources)
-    
+
    def __repr__(self):
        return str(self)
-    
+
    def append(self, resource):
        if not isinstance(resource, Resource):
            raise ValueError('Can only append objects of type Resource')
        self._resources.append(resource)
-        
+
    def remove(self, resource):
        self._resources.remove(resource)
-    
+
    def replace(self, start, end, items):
        'Same as list[start:end] = items'
        self._resources[start:end] = items
-        
+
    @staticmethod
    def from_directory_contents(top, topdown=True):
        collection = ResourceCollection()
@ -176,30 +172,30 @@ class ResourceCollection(object):
            res.set_basedir(top)
            collection.append(res)
        return collection
-    
+
    def set_basedir(self, path):
        for res in self:
            res.set_basedir(path)
-        
+


 class MetaInformation(object):
    '''Convenient encapsulation of book metadata'''
-    
+
    @staticmethod
    def copy(mi):
        ans = MetaInformation(mi.title, mi.authors)
        for attr in ('author_sort', 'title_sort', 'comments', 'category',
                     'publisher', 'series', 'series_index', 'rating',
                     'isbn', 'tags', 'cover_data', 'application_id', 'guide',
-                     'manifest', 'spine', 'toc', 'cover', 'language', 
+                     'manifest', 'spine', 'toc', 'cover', 'language',
                     'book_producer', 'timestamp'):
            if hasattr(mi, attr):
                setattr(ans, attr, getattr(mi, attr))
-        
+
    def __init__(self, title, authors=[_('Unknown')]):
        '''
-        @param title: title or "Unknown" or a MetaInformation object
+        @param title: title or ``_('Unknown')`` or a MetaInformation object
        @param authors: List of strings or []
        '''
        mi = None
@ -214,14 +210,14 @@ class MetaInformation(object):
        self.tags = getattr(mi, 'tags', [])
        #: mi.cover_data = (ext, data)
        self.cover_data   = getattr(mi, 'cover_data', (None, None))
-        
+
        for x in ('author_sort', 'title_sort', 'comments', 'category', 'publisher',
                  'series', 'series_index', 'rating', 'isbn', 'language',
                  'application_id', 'manifest', 'toc', 'spine', 'guide', 'cover',
                  'book_producer', 'timestamp'
                  ):
            setattr(self, x, getattr(mi, x, None))
-    
+
    def smart_update(self, mi):
        '''
        Merge the information in C{mi} into self. In case of conflicts, the information
@ -229,59 +225,66 @@ class MetaInformation(object):
        '''
        if mi.title and mi.title != _('Unknown'):
            self.title = mi.title
-            
+
        if mi.authors and mi.authors[0] != _('Unknown'):
            self.authors = mi.authors
-            
+
        for attr in ('author_sort', 'title_sort', 'comments', 'category',
                     'publisher', 'series', 'series_index', 'rating',
-                     'isbn', 'application_id', 'manifest', 'spine', 'toc', 
-                     'cover', 'language', 'guide', 'book_producer', 
+                     'isbn', 'application_id', 'manifest', 'spine', 'toc',
+                     'cover', 'language', 'guide', 'book_producer',
                     'timestamp'):
-            if hasattr(mi, attr):
-                val = getattr(mi, attr)
-                if val is not None:
-                    setattr(self, attr, val)
-                    
-        self.tags += mi.tags
+            val = getattr(mi, attr, None)
+            if val is not None:
+                setattr(self, attr, val)
+
+        if mi.tags:
+            self.tags += mi.tags
        self.tags = list(set(self.tags))
-        
+
        if getattr(mi, 'cover_data', None) and mi.cover_data[0] is not None:
            self.cover_data = mi.cover_data
-            
+
    def format_series_index(self):
        try:
            x = float(self.series_index)
        except ValueError:
            x = 1.0
        return '%d'%x if int(x) == x else '%.2f'%x
-            
+
    def __unicode__(self):
-        ans = u''
-        ans += u'Title    : ' + unicode(self.title) + u'\n'
+        ans = []
+        def fmt(x, y):
+            ans.append(u'%-20s: %s'%(unicode(x), unicode(y)))
+
+        fmt('Title', self.title)
+        if self.title_sort:
+            fmt('Title sort', self.title_sort)
        if self.authors:
-            ans += u'Author   : ' + (' & '.join(self.authors) if self.authors is not None else _('Unknown'))
-            ans += ((' [' + self.author_sort + ']') if self.author_sort else '') + u'\n'
+            fmt('Author(s)',  authors_to_string(self.authors) + \
+               ((' [' + self.author_sort + ']') if self.author_sort else ''))
        if self.publisher:
-            ans += u'Publisher: '+ unicode(self.publisher) + u'\n'
+            fmt('Publisher', self.publisher)
        if getattr(self, 'book_producer', False):
-            ans += u'Producer : '+ unicode(self.book_producer) + u'\n'
-        if self.category: 
+            fmt('Book Producer', self.book_producer)
+        if self.category:
            ans += u'Category : ' + unicode(self.category) + u'\n'
        if self.comments:
-            ans += u'Comments : ' + unicode(self.comments) + u'\n'
+            fmt('Comments', self.comments)
        if self.isbn:
-            ans += u'ISBN     : '     + unicode(self.isbn) + u'\n'
+            fmt('ISBN', self.isbn)
        if self.tags:
-            ans += u'Tags     : ' + u', '.join([unicode(t) for t in self.tags]) + '\n'
+            fmt('Tags', u', '.join([unicode(t) for t in self.tags]))
        if self.series:
-            ans += u'Series   : '+unicode(self.series) + ' #%s\n'%self.format_series_index()  
+            fmt('Series', self.series + ' #%s'%self.format_series_index())
        if self.language:
-            ans += u'Language : '     + unicode(self.language) + u'\n'
+            fmt('Language', self.language)
+        if self.rating is not None:
+            fmt('Rating', self.rating)
        if self.timestamp is not None:
-            ans += u'Timestamp : ' + self.timestamp.isoformat(' ')
-        return ans.strip()
-    
+            fmt('Timestamp', self.timestamp.isoformat(' '))
+        return u'\n'.join(ans)
+
    def to_html(self):
        ans = [(_('Title'), unicode(self.title))]
        ans += [(_('Author(s)'), (authors_to_string(self.authors) if self.authors else _('Unknown')))]
@ -298,9 +301,9 @@ class MetaInformation(object):
        for i, x in enumerate(ans):
            ans[i] = u'<tr><td><b>%s</b></td><td>%s</td></tr>'%x
        return u'<table>%s</table>'%u'\n'.join(ans)
-        
+
    def __str__(self):
        return self.__unicode__().encode('utf-8')
-    
+
    def __nonzero__(self):
-        return bool(self.title or self.author or self.comments or self.category)
+        return bool(self.title or self.author or self.comments or self.tags)
--- a/src/calibre/ebooks/metadata/cli.py
+++ b/src/calibre/ebooks/metadata/cli.py
@ -0,0 +1,198 @@
+from __future__ import with_statement
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net'
+__docformat__ = 'restructuredtext en'
+
+'''
+ebook-meta
+'''
+USAGE='%%prog ebook_file [' + _('options') + ']\n' + \
+_('''
+Read/Write metadata from/to ebook files.
+
+Supported formats for reading metadata: %s
+
+Supported formats for writing metadata: %s
+
+Different file types support different kinds of metadata. If you try to set
+some metadata on a file type that does not support it, the metadata will be
+silently ignored.
+''')
+
+import sys, os
+
+from calibre.utils.config import StringConfig
+from calibre.customize.ui import metadata_readers, metadata_writers
+from calibre.ebooks.metadata.meta import get_metadata, set_metadata
+from calibre.ebooks.metadata import string_to_authors, authors_to_sort_string, \
+                    title_sort, MetaInformation
+from calibre.ebooks.lrf.meta import LRFMetaFile
+from calibre import prints
+
+def config():
+    c = StringConfig('')
+    c.add_opt('title', ['-t', '--title'],
+              help=_('Set the title.'))
+    c.add_opt('authors', ['-a', '--authors'],
+              help=_('Set the authors. Multiple authors should be separated '
+                     'by the & character. Author names should be in the order '
+                     'Firstname Lastname.'))
+    c.add_opt('title_sort', ['--title-sort'],
+              help=_('The version of the title to be used for sorting. '
+                     'If unspecified, and the title is specified, it will ' 
+                     'be auto-generated from the title.'))
+    c.add_opt('author_sort', ['--author-sort'],
+              help=_('String to be used when sorting by author. '
+                     'If unspecified, and the author(s) are specified, it will ' 
+                     'be auto-generated from the author(s).'))
+    c.add_opt('cover', ['--cover'],
+              help=_('Set the cover to the specified file.'))
+    c.add_opt('comments', ['-c', '--comments'],
+              help=_('Set the ebook description.'))
+    c.add_opt('publisher', ['-p', '--publisher'],
+              help=_('Set the ebook publisher.'))
+    c.add_opt('category', ['--category'],
+              help=_('Set the book category.'))
+    c.add_opt('series', ['-s', '--series'],
+              help=_('Set the series this ebook belongs to.'))
+    c.add_opt('series_index', ['-i', '--index'],
+              help=_('Set the index of the book in this series.'))
+    c.add_opt('rating', ['-r', '--rating'],
+              help=_('Set the rating. Should be a number between 1 and 5.'))
+    c.add_opt('isbn', ['--isbn'],
+              help=_('Set the ISBN of the book.'))
+    c.add_opt('tags', ['--tags'],
+              help=_('Set the tags for the book. Should be a comma separated list.'))
+    c.add_opt('book_producer', ['-k', '--book-producer'],
+              help=_('Set the book producer.'))
+    c.add_opt('language', ['-l', '--language'],
+              help=_('Set the language.'))
+    
+    c.add_opt('get_cover', ['--get-cover'],
+              help=_('Get the cover from the ebook and save it at as the '
+                     'specified file.'))
+    c.add_opt('to_opf', ['--to-opf'],
+              help=_('Specify the name of an OPF file. The metadata will '
+                     'be written to the OPF file.'))
+    c.add_opt('from_opf', ['--from-opf'],
+              help=_('Read metadata from the specified OPF file and use it to '
+                     'set metadata in the ebook. Metadata specified on the'
+                     'command line will override metadata read from the OPF file'))
+    
+    c.add_opt('lrf_bookid', ['--lrf-bookid'],
+              help=_('Set the BookID in LRF files'))
+    return c
+
+def filetypes():
+    readers = set([])
+    for r in metadata_readers():
+        readers = readers.union(set(r.file_types))
+    return readers
+
+def option_parser():
+    writers = set([])
+    for w in metadata_writers():
+        writers = writers.union(set(w.file_types))
+    return config().option_parser(USAGE%(list(filetypes()), list(writers)))
+
+def do_set_metadata(opts, mi, stream, stream_type):
+    mi = MetaInformation(mi)
+    for x in ('guide', 'toc', 'manifest', 'spine'):
+        setattr(mi, x, None)
+    
+    from_opf = getattr(opts, 'from_opf', None)
+    if from_opf is not None:
+        from calibre.ebooks.metadata.opf2 import OPF
+        opf_mi = MetaInformation(OPF(open(from_opf, 'rb')))
+        mi.smart_update(opf_mi)
+        
+    for pref in config().option_set.preferences:
+        if pref.name in ('to_opf', 'from_opf', 'authors', 'title_sort', 
+                         'author_sort', 'get_cover', 'cover', 'tags', 
+                         'lrf_bookid'):
+            continue
+        val = getattr(opts, pref.name, None)
+        if val is not None:
+            setattr(mi, pref.name, val)
+    if getattr(opts, 'authors', None) is not None:
+        mi.authors = string_to_authors(opts.authors)
+        mi.author_sort = authors_to_sort_string(mi.authors)
+    if getattr(opts, 'author_sort', None) is not None:
+        mi.author_sort = opts.author_sort
+    if getattr(opts, 'title_sort', None) is not None:
+        mi.title_sort = opts.title_sort
+    elif getattr(opts, 'title', None) is not None:
+        mi.title_sort = title_sort(opts.title)
+    if getattr(opts, 'tags', None) is not None:
+        mi.tags = [t.strip() for t in opts.tags.split(',')]
+    
+    if getattr(opts, 'cover', None) is not None:
+        ext = os.path.splitext(opts.cover)[1].replace('.', '').upper()
+        mi.cover_data = (ext, open(opts.cover, 'rb').read())
+    
+    set_metadata(stream, mi, stream_type)
+    
+
+def main(args=sys.argv):
+    parser = option_parser()
+    opts, args = parser.parse_args(args)
+    if len(args) < 2:
+        parser.print_help()
+        prints(_('No file specified'), file=sys.stderr)
+        return 1
+    path = args[1]
+    stream = open(path, 'r+b')
+    stream_type = os.path.splitext(path)[1].replace('.', '').lower()
+    
+    trying_to_set = False
+    for pref in config().option_set.preferences:
+        if pref.name in ('to_opf', 'get_cover'):
+            continue
+        if getattr(opts, pref.name) is not None:
+            trying_to_set = True
+            break
+    mi = get_metadata(stream, stream_type)
+    if trying_to_set:
+        prints(_('Original metadata')+'::')
+    metadata = unicode(mi)
+    if trying_to_set:
+        metadata = '\t'+'\n\t'.join(metadata.split('\n'))
+    prints(metadata)
+    
+    if trying_to_set:
+        stream.seek(0)
+        do_set_metadata(opts, mi, stream, stream_type)
+        stream.seek(0)
+        stream.flush()
+        lrf = None
+        if stream_type == 'lrf':
+            if opts.lrf_bookid is not None:
+                lrf = LRFMetaFile(stream)
+                lrf.book_id = opts.lrf_bookid
+        mi = get_metadata(stream, stream_type)
+        prints('\n' + _('Changed metadata') + '::')
+        metadata = unicode(mi)
+        metadata = '\t'+'\n\t'.join(metadata.split('\n'))
+        prints(metadata)
+        if lrf is not None:
+            prints('\tBookID:', lrf.book_id)
+        
+    if opts.to_opf is not None:
+        from calibre.ebooks.metadata.opf2 import OPFCreator
+        opf = OPFCreator(os.getcwdu(), mi)
+        with open(opts.opf, 'wb') as f:
+            opf.render(f)
+        prints(_('OPF created in'), opts.opf)
+    
+    if opts.get_cover is not None:
+        if mi.cover_data and mi.cover_data[1]:
+            with open(opts.get_cover, 'wb') as f:
+                f.write(mi.cover_data[1])
+                prints(_('Cover saved to'), f.name)
+        else:
+            prints(_('No cover found'), file=sys.stderr)
+    
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/src/calibre/ebooks/metadata/epub.py
+++ b/src/calibre/ebooks/metadata/epub.py
@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

 '''Read meta information from epub files'''

-import sys, os, time
+import os, time
 from cStringIO import StringIO
 from contextlib import closing

@ -15,7 +15,7 @@ from PyQt4.QtWebKit import QWebPage

 from calibre.utils.zipfile import ZipFile, BadZipfile, safe_replace
 from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
-from calibre.ebooks.metadata import get_parser, MetaInformation
+from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.metadata.opf2 import OPF
 from calibre.ptempfile import TemporaryDirectory
 from calibre import CurrentDir
@ -191,67 +191,10 @@ def get_metadata(stream, extract_cover=True):
 def set_metadata(stream, mi):
    stream.seek(0)
    reader = OCFZipReader(stream, root=os.getcwdu())
+    mi = MetaInformation(mi)
+    for x in ('guide', 'toc', 'manifest', 'spine'):
+        setattr(mi, x, None)
    reader.opf.smart_update(mi)
    newopf = StringIO(reader.opf.render())
    safe_replace(stream, reader.container[OPF.MIMETYPE], newopf)
    
-def option_parser():
-    parser = get_parser('epub')
-    parser.remove_option('--category')
-    parser.add_option('--tags', default=None, 
-                      help=_('A comma separated list of tags to set'))
-    parser.add_option('--series', default=None,
-                      help=_('The series to which this book belongs'))
-    parser.add_option('--series-index', default=None,
-                      help=_('The series index'))
-    parser.add_option('--language', default=None,
-                      help=_('The book language'))
-    parser.add_option('--get-cover', default=False, action='store_true',
-                      help=_('Extract the cover'))
-    return parser
-
-def main(args=sys.argv):
-    parser = option_parser()
-    opts, args = parser.parse_args(args)
-    if len(args) != 2:
-        parser.print_help()
-        return 1
-    with open(args[1], 'r+b') as stream:
-        mi = get_metadata(stream, extract_cover=opts.get_cover)
-        changed = False
-        if opts.title:
-            mi.title = opts.title
-            changed = True
-        if opts.authors:
-            mi.authors = opts.authors.split(',')
-            changed = True
-        if opts.tags:
-            mi.tags = opts.tags.split(',')
-            changed = True
-        if opts.comment:
-            mi.comments = opts.comment
-            changed = True
-        if opts.series:
-            mi.series = opts.series
-            changed = True
-        if opts.series_index:
-            mi.series_index = opts.series_index
-            changed = True
-        if opts.language is not None:
-            mi.language = opts.language
-            changed = True
-        
-        if changed:
-            set_metadata(stream, mi)
-        print unicode(get_metadata(stream, extract_cover=False)).encode('utf-8')
-        
-    if mi.cover_data[1] is not None:
-        cpath = os.path.splitext(os.path.basename(args[1]))[0] + '_cover.jpg'
-        with open(cpath, 'wb') as f:
-            f.write(mi.cover_data[1])
-            print 'Cover saved to', f.name
-    
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/metadata/fb2.py
+++ b/src/calibre/ebooks/metadata/fb2.py
@ -48,15 +48,3 @@ def get_metadata(stream):
    if cdata:
        mi.cover_data = cdata
    return mi
-
-def main(args=sys.argv):
-    if len(args) != 2 or '--help' in args or '-h' in args:
-        print >>sys.stderr, _('Usage:'), args[0], 'mybook.fb2'
-        return 1
-    
-    path = os.path.abspath(os.path.expanduser(args[1]))
-    print unicode(get_metadata(open(path, 'rb')))
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/metadata/imp.py
+++ b/src/calibre/ebooks/metadata/imp.py
@ -46,17 +46,3 @@ def get_metadata(stream):
        msg = u'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode(err))
        print >>sys.stderr, msg.encode('utf8')
    return mi
-        
-            
-def main(args=sys.argv):
-    if len(args) != 2:
-        print >>sys.stderr, _('Usage: imp-meta file.imp')
-        print >>sys.stderr, _('No filename specified.')
-        return 1
-    
-    path = os.path.abspath(os.path.expanduser(args[1]))
-    print get_metadata(open(path, 'rb'))
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/metadata/lit.py
+++ b/src/calibre/ebooks/metadata/lit.py
@ -30,21 +30,3 @@ def get_metadata(stream):
    mi.cover_data = ('jpg', covers[-1])
    return mi

-def main(args=sys.argv):
-    if len(args) != 2:
-        print >>sys.stderr, _('Usage: %s file.lit') % args[0]
-        return 1
-    fname = args[1]
-    mi = get_metadata(open(fname, 'rb'))
-    print unicode(mi)
-    if mi.cover_data[1]:
-        cover = os.path.abspath(
-            '.'.join((os.path.splitext(os.path.basename(fname))[0],
-                      mi.cover_data[0])))
-        open(cover, 'wb').write(mi.cover_data[1])
-        print _('Cover saved to'), cover
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
-
--- a/src/calibre/ebooks/metadata/lrx.py
+++ b/src/calibre/ebooks/metadata/lrx.py
@ -80,10 +80,3 @@ def get_metadata(f):
    else:
        raise ValueError('Not a LRX file')
    
-
-def main(args=sys.argv):
-    print get_metadata(open(args[1], 'rb'))
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/metadata/mobi.py
+++ b/src/calibre/ebooks/metadata/mobi.py
@ -13,7 +13,6 @@ import sys
 import os
 from struct import pack, unpack
 from cStringIO import StringIO
-from calibre.ebooks.metadata import get_parser
 from calibre.ebooks.mobi import MobiError
 from calibre.ebooks.mobi.reader import get_metadata
 from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN
@ -178,63 +177,3 @@ def set_metadata(stream, mi):
    mu = MetadataUpdater(stream)
    mu.update(mi)
    return
-
-
-def option_parser():
-    parser = get_parser('mobi')
-    parser.remove_option('--category')
-    parser.add_option('--tags', default=None,
-                      help=_('Set the subject tags'))
-    parser.add_option('--language', default=None,
-                      help=_('Set the language'))
-    parser.add_option('--publisher', default=None,
-                      help=_('Set the publisher'))
-    parser.add_option('--isbn', default=None,
-                      help=_('Set the ISBN'))
-    return parser
-
-def main(args=sys.argv):
-    parser = option_parser()
-    opts, args = parser.parse_args(args)
-    if len(args) != 2:
-        parser.print_help()
-        print >>sys.stderr, 'Usage: %s file.mobi' % args[0]
-        return 1
-    fname = args[1]
-    changed = False
-    with open(fname, 'r+b') as stream:
-        mi = get_metadata(stream)
-        if opts.title:
-            mi.title = opts.title
-            changed = True
-        if opts.authors:
-            mi.authors = opts.authors.split(',')
-            changed = True
-        if opts.comment:
-            mi.comments = opts.comment
-            changed = True
-        if opts.tags is not None:
-            mi.tags = opts.tags.split(',')
-            changed = True
-        if opts.language is not None:
-            mi.language = opts.language
-            changed = True
-        if opts.publisher is not None:
-            mi.publisher = opts.publisher
-            changed = True
-        if opts.isbn is not None:
-            mi.isbn = opts.isbn
-            changed = True
-        if changed:
-            set_metadata(stream, mi)
-        print unicode(get_metadata(stream))
-    if not changed and mi.cover_data[1]:
-        cover = os.path.abspath(
-            '.'.join((os.path.splitext(os.path.basename(fname))[0],
-                      mi.cover_data[0].lower())))
-        open(cover, 'wb').write(mi.cover_data[1])
-        print _('Cover saved to'), cover
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/metadata/odt.py
+++ b/src/calibre/ebooks/metadata/odt.py
@ -164,103 +164,3 @@ def get_metadata(stream):
    
    return mi

-def main(args=sys.argv):
-    if len(args) != 2:
-        print 'Usage: %s file.odt'%args[0]
-        return 1
-    mi = get_metadata(open(args[1], 'rb'))
-    print mi
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
-
-#now = time.localtime()[:6]
-#outputfile = "-"
-#writemeta = False   # Do we change any meta data?
-#usenormalize = False
-#
-#try:
-#    opts, args = getopt.getopt(sys.argv[1:], "cdlI:A:a:o:x:X:")
-#except getopt.GetoptError:
-#    exitwithusage()
-#
-#if len(opts) == 0:
-#    opts = [ ('-l','') ]
-#
-#for o, a in opts:
-#    if o in ('-a','-A','-I'):
-#        writemeta = True
-#        if a.find(":") >= 0:
-#            k,v = a.split(":",1)
-#        else:
-#            k,v = (a, "")
-#        if len(k) == 0:
-#            exitwithusage()
-#        k = fields.get(k,k)
-#        addfields[k] = unicode(v,'utf-8')
-#    if o == '-a':
-#        yieldfields[k] = True
-#    if o == '-I':
-#        deletefields[k] = True
-#    if o == '-d':
-#        writemeta = True
-#        addfields[(DCNS,u'date')] = "%04d-%02d-%02dT%02d:%02d:%02d" % now
-#        deletefields[(DCNS,u'date')] = True
-#    if o == '-c':
-#        usenormalize = True
-#    if o == '-l':
-#        Xfields = fields.values()
-#    if o == "-x":
-#        xfields.append(fields.get(a,a))
-#    if o == "-X":
-#        Xfields.append(fields.get(a,a))
-#    if o == "-o":
-#        outputfile = a
-#
-## The specification says we should change the element to our own,
-## and must not export the original identifier.
-#if writemeta:
-#    addfields[(METANS,u'generator')] = TOOLSVERSION
-#    deletefields[(METANS,u'generator')] = True
-#
-#odfs = odfmetaparser()
-#parser = xml.sax.make_parser()
-#parser.setFeature(xml.sax.handler.feature_namespaces, 1)
-#parser.setContentHandler(odfs)
-#
-#if len(args) == 0:
-#    zin = zipfile.ZipFile(sys.stdin,'r')
-#else:
-#    if not zipfile.is_zipfile(args[0]):
-#        exitwithusage()
-#    zin = zipfile.ZipFile(args[0], 'r')
-#
-#content = zin.read('meta.xml')
-#parser.parse(StringIO(content))
-#
-#if writemeta:
-#    if outputfile == '-':
-#        if sys.stdout.isatty():
-#           sys.stderr.write("Won't write ODF file to terminal\n")
-#           sys.exit(1)
-#        zout = zipfile.ZipFile(sys.stdout,"w")
-#    else:
-#        zout = zipfile.ZipFile(outputfile,"w")
-#
-#
-#
-#    # Loop through the input zipfile and copy the content to the output until we
-#    # get to the meta.xml. Then substitute.
-#    for zinfo in zin.infolist():
-#        if zinfo.filename == "meta.xml":
-#            # Write meta
-#            zi = zipfile.ZipInfo("meta.xml", now)
-#            zi.compress_type = zipfile.ZIP_DEFLATED
-#            zout.writestr(zi,odfs.meta() )
-#        else:
-#            payload = zin.read(zinfo.filename)
-#            zout.writestr(zinfo, payload)
-#
-#    zout.close()
-#zin.close()
--- a/src/calibre/ebooks/metadata/opf.py
+++ b/src/calibre/ebooks/metadata/opf.py
@ -11,7 +11,7 @@ from calibre.constants import __appname__, __version__
 from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, BeautifulSoup
 from calibre.ebooks.lrf import entity_to_unicode
-from calibre.ebooks.metadata import get_parser, Resource, ResourceCollection
+from calibre.ebooks.metadata import Resource, ResourceCollection
 from calibre.ebooks.metadata.toc import TOC

 class OPFSoup(BeautifulStoneSoup):
@ -38,8 +38,8 @@ class ManifestItem(Resource):
                res.mime_type = mt
            return res
    
-    @apply
-    def media_type():
+    @dynamic_property
+    def media_type(self):
        def fget(self):
            return self.mime_type
        def fset(self, val):
@ -242,14 +242,14 @@ class OPF(MetaInformation):
    def __init__(self):
        raise NotImplementedError('Abstract base class')
    
-    @apply
-    def package():
+    @dynamic_property
+    def package(self):
        def fget(self):
            return self.soup.find(re.compile('package'))
        return property(fget=fget)
    
-    @apply
-    def metadata():
+    @dynamic_property
+    def metadata(self):
        def fget(self):
            return self.package.find(re.compile('metadata'))
        return property(fget=fget)
@ -540,46 +540,4 @@ class OPFCreator(MetaInformation):
        if toc is not None and ncx_stream is not None:
            toc.render(ncx_stream, self.application_id)
            ncx_stream.flush()
-    
-def option_parser():
-    return get_parser('opf')
-
-def main(args=sys.argv):
-    parser = option_parser()
-    opts, args = parser.parse_args(args)
-    if len(args) != 2:
-        parser.print_help()
-        return 1
-    mi = MetaInformation(OPFReader(open(args[1], 'rb'), os.path.abspath(os.path.dirname(args[1]))))
-    write = False
-    if opts.title is not None:
-        mi.title = opts.title.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
-        write = True
-    if opts.authors is not None:
-        aus = [i.strip().replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;') for i in opts.authors.split(',')]
-        mi.authors = aus
-        write = True
-    if opts.category is not None:
-        mi.category = opts.category.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
-        write = True
-    if opts.comment is not None:
-        mi.comments = opts.comment.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
-        write = True
-    if write:
-        mo = OPFCreator(os.path.dirname(args[1]), mi)
-        ncx = cStringIO.StringIO()
-        mo.render(open(args[1], 'wb'), ncx)
-        ncx = ncx.getvalue()
-        if ncx:
-            f = glob.glob(os.path.join(os.path.dirname(args[1]), '*.ncx'))
-            if f:
-                f = open(f[0], 'wb')
-            else:
-                f = open(os.path.splitext(args[1])[0]+'.ncx', 'wb')
-            f.write(ncx)
-            f.close()
-    print MetaInformation(OPFReader(open(args[1], 'rb'), os.path.abspath(os.path.dirname(args[1]))))
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
+    
--- a/src/calibre/ebooks/metadata/opf.xml
+++ b/src/calibre/ebooks/metadata/opf.xml
@ -12,7 +12,7 @@
        <dc:identifier opf:scheme="${__appname__}" id="${__appname__}_id">${mi.application_id}</dc:identifier>
        <dc:date py:if="getattr(mi, 'timestamp', None) is not None">${mi.timestamp.isoformat()}</dc:date>    
        <dc:language>${mi.language if mi.language else 'UND'}</dc:language>
-        <dc:type py:if="mi.category">${mi.category}</dc:type>
+        <dc:type py:if="getattr(mi, 'category', False)">${mi.category}</dc:type>
        <dc:description py:if="mi.comments">${mi.comments}</dc:description>
        <dc:publisher py:if="mi.publisher">${mi.publisher}</dc:publisher>
        <dc:identifier opf:scheme="ISBN" py:if="mi.isbn">${mi.isbn}</dc:identifier>
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@ -169,8 +169,8 @@ class ManifestItem(Resource):
                res.mime_type = mt
            return res

-    @apply
-    def media_type():
+    @dynamic_property
+    def media_type(self):
        def fget(self):
            return self.mime_type
        def fset(self, val):
@ -608,8 +608,8 @@ class OPF(object):
        for item in self.iterguide():
            item.set('href', get_href(item))

-    @apply
-    def authors():
+    @dynamic_property
+    def authors(self):

        def fget(self):
            ans = []
@ -628,8 +628,8 @@ class OPF(object):
            
        return property(fget=fget, fset=fset)

-    @apply
-    def author_sort():
+    @dynamic_property
+    def author_sort(self):

        def fget(self):
            matches = self.authors_path(self.metadata)
@ -651,8 +651,8 @@ class OPF(object):

        return property(fget=fget, fset=fset)

-    @apply
-    def title_sort():
+    @dynamic_property
+    def title_sort(self):

        def fget(self):
            matches = self.title_path(self.metadata)
@ -674,8 +674,28 @@ class OPF(object):

        return property(fget=fget, fset=fset)

-    @apply
-    def tags():
+    @dynamic_property
+    def title_sort(self):
+
+        def fget(self):
+            matches = self.title_path(self.metadata)
+            if matches:
+                for match in matches:
+                    ans = match.get('{%s}file-as'%self.NAMESPACES['opf'], None)
+                    if not ans:
+                        ans = match.get('file-as', None)
+                    if ans:
+                        return ans
+
+        def fset(self, val):
+            matches = self.title_path(self.metadata)
+            if matches:
+                matches[0].set('file-as', unicode(val))
+
+        return property(fget=fget, fset=fset)
+
+    @dynamic_property
+    def tags(self):

        def fget(self):
            ans = []
@ -692,8 +712,8 @@ class OPF(object):

        return property(fget=fget, fset=fset)

-    @apply
-    def isbn():
+    @dynamic_property
+    def isbn(self):

        def fget(self):
            for match in self.isbn_path(self.metadata):
@ -709,8 +729,8 @@ class OPF(object):

        return property(fget=fget, fset=fset)

-    @apply
-    def application_id():
+    @dynamic_property
+    def application_id(self):

        def fget(self):
            for match in self.application_id_path(self.metadata):
@ -726,8 +746,8 @@ class OPF(object):

        return property(fget=fget, fset=fset)

-    @apply
-    def book_producer():
+    @dynamic_property
+    def book_producer(self):

        def fget(self):
            for match in self.bkp_path(self.metadata):
@ -764,8 +784,8 @@ class OPF(object):
                            return cpath


-    @apply
-    def cover():
+    @dynamic_property
+    def cover(self):

        def fget(self):
            if self.guide is not None:
@ -1001,62 +1021,19 @@ class OPFTest(unittest.TestCase):
        self.opf.smart_update(MetaInformation(self.opf))
        self.testReading()

+    def testCreator(self):
+        opf = OPFCreator(os.getcwd(), self.opf)
+        buf = cStringIO.StringIO()
+        opf.render(buf)
+        raw = buf.getvalue()
+        self.testReading(opf=OPF(cStringIO.StringIO(raw), os.getcwd()))
+
+    def testSmartUpdate(self):
+        self.opf.smart_update(self.opf)
+        self.testReading()
+
 def suite():
    return unittest.TestLoader().loadTestsFromTestCase(OPFTest)

 def test():
-    unittest.TextTestRunner(verbosity=2).run(suite())
-
-
-def option_parser():
-    from calibre.ebooks.metadata import get_parser
-    parser = get_parser('opf')
-    parser.add_option('--language', default=None, help=_('Set the dc:language field'))
-    return parser
-
-def main(args=sys.argv):
-    parser = option_parser()
-    opts, args = parser.parse_args(args)
-    if len(args) != 2:
-        parser.print_help()
-        return 1
-    opfpath = os.path.abspath(args[1])
-    basedir = os.path.dirname(opfpath)
-    mi = MetaInformation(OPF(open(opfpath, 'rb'), basedir))
-    write = False
-    if opts.title is not None:
-        mi.title = opts.title
-        write = True
-    if opts.authors is not None:
-        aus = [i.strip() for i in opts.authors.split(',')]
-        mi.authors = aus
-        write = True
-    if opts.category is not None:
-        mi.category = opts.category
-        write = True
-    if opts.comment is not None:
-        mi.comments = opts.comment
-        write = True
-    if opts.language is not None:
-        mi.language = opts.language
-        write = True
-    if write:
-        mo = OPFCreator(basedir, mi)
-        ncx = cStringIO.StringIO()
-        mo.render(open(args[1], 'wb'), ncx)
-        ncx = ncx.getvalue()
-        if ncx:
-            f = glob.glob(os.path.join(os.path.dirname(args[1]), '*.ncx'))
-            if f:
-                f = open(f[0], 'wb')
-            else:
-                f = open(os.path.splitext(args[1])[0]+'.ncx', 'wb')
-            f.write(ncx)
-            f.close()
-    print MetaInformation(OPF(open(opfpath, 'rb'), basedir))
-    return 0
-
-
-
-if __name__ == '__main__':
-    sys.exit(main())
+    unittest.TextTestRunner(verbosity=2).run(suite())
--- a/src/calibre/ebooks/metadata/rb.py
+++ b/src/calibre/ebooks/metadata/rb.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'
 '''Read meta information from RB files'''

-import sys, os, struct
+import sys, struct

 from calibre.ebooks.metadata import MetaInformation

@ -53,16 +53,4 @@ def get_metadata(stream):
        raise
    return mi
        
-            
-def main(args=sys.argv):
-    if len(args) != 2:
-        print >>sys.stderr, _('Usage: rb-meta file.rb')
-        print >>sys.stderr, _('No filename specified.')
-        return 1
-    
-    path = os.path.abspath(os.path.expanduser(args[1]))
-    print get_metadata(open(path, 'rb'))
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
+         
--- a/src/calibre/ebooks/metadata/rtf.py
+++ b/src/calibre/ebooks/metadata/rtf.py
@ -5,7 +5,7 @@ Edit metadata in RTF files.
 """
 import re, cStringIO, sys

-from calibre.ebooks.metadata import MetaInformation, get_parser
+from calibre.ebooks.metadata import MetaInformation

 title_pat    = re.compile(r'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL)
 author_pat   = re.compile(r'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL)
@ -166,22 +166,3 @@ def set_metadata(stream, options):
        stream.write(src)
        stream.write(after)
    
-def option_parser():
-    return get_parser('rtf')
-
-def main(args=sys.argv):
-    parser = option_parser()
-    options, args = parser.parse_args(args)
-    if len(args) != 2:
-        parser.print_help()
-        sys.exit(1)
-    stream = open(args[1], 'r+b')
-    if options.authors:
-        options.authors = options.authors.split(',')
-    options.comments = options.comment 
-    set_metadata(stream, options)
-    mi = get_metadata(stream)
-    return mi
-
-if __name__ == '__main__':
-    main()
--- a/src/calibre/ebooks/metadata/toc.py
+++ b/src/calibre/ebooks/metadata/toc.py
@ -86,8 +86,8 @@ class TOC(list):
            for i in obj.flat():
                yield i
    
-    @apply
-    def abspath():
+    @dynamic_property
+    def abspath(self):
        doc='Return the file this toc entry points to as a absolute path to a file on the system.'
        def fget(self):
            if self.href is None:
@ -208,4 +208,4 @@ class TOC(list):
        template = MarkupTemplate(ncx_template)
        raw = template.generate(uid=uid, toc=self, __appname__=__appname__)
        raw = raw.render(doctype=doctype)
-        stream.write(raw)
+        stream.write(raw)
--- a/src/calibre/ebooks/mobi/input.py
+++ b/src/calibre/ebooks/mobi/input.py
@ -0,0 +1,29 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os
+
+from calibre.customize.conversion import InputFormatPlugin
+
+class MOBIInput(InputFormatPlugin):
+    
+    name        = 'MOBI Input'
+    author      = 'Kovid Goyal'
+    description = 'Convert MOBI files (.mobi, .prc, .azw) to HTML'
+    file_types  = set(['mobi', 'prc', 'azw'])
+    
+    def convert(self, stream, options, file_ext, parse_cache, log):
+        from calibre.ebooks.mobi.reader import MobiReader
+        mr = MobiReader(stream, log, options.input_encoding, 
+                        options.debug_input)
+        mr.extract_content(output_dir=os.getcwdu(), parse_cache)
+        raw = parse_cache.get('calibre_raw_mobi_markup', False)
+        if raw:
+            if isinstance(raw, unicode):
+                raw = raw.encode('utf-8')
+            open('debug-raw.html', 'wb').write(raw)
+            
+        return mr.created_opf_path
+        
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@ -80,7 +80,20 @@ class MobiMLizer(object):
    def __init__(self, ignore_tables=False):
        self.ignore_tables = ignore_tables
    
-    def transform(self, oeb, context):
+    @classmethod
+    def config(cls, cfg):
+        group = cfg.add_group('mobiml', _('Mobipocket markup options.'))
+        group('ignore_tables', ['--ignore-tables'], default=False,
+              help=_('Render HTML tables as blocks of text instead of actual '
+                     'tables. This is neccessary if the HTML contains very '
+                     'large or complex tables.'))
+        return cfg
+
+    @classmethod
+    def generate(cls, opts):
+        return cls(ignore_tables=opts.ignore_tables)
+    
+    def __call__(self, oeb, context):
        oeb.logger.info('Converting XHTML to Mobipocket markup...')
        self.oeb = oeb
        self.profile = profile = context.dest
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 Read data from .mobi files
 '''

-import sys, struct, os, cStringIO, re, functools
+import struct, os, cStringIO, re, functools

 try:
    from PIL import Image as PILImage
@ -35,8 +35,10 @@ class EXTHHeader(object):
        pos = 0
        self.mi = MetaInformation(_('Unknown'), [_('Unknown')])
        self.has_fake_cover = True
+        left = self.num_items
        
-        for i in range(self.num_items):
+        while left > 0:
+            left -= 1
            id, size = struct.unpack('>LL', raw[pos:pos+8])
            content = raw[pos+8:pos+size]
            pos += size
@ -76,7 +78,8 @@ class EXTHHeader(object):

 class BookHeader(object):
    
-    def __init__(self, raw, ident):
+    def __init__(self, raw, ident, user_encoding, log):
+        self.log = log
        self.compression_type = raw[:2]
        self.records, self.records_size = struct.unpack('>HH', raw[8:12])
        self.encryption_type, = struct.unpack('>H', raw[12:14])
@ -92,8 +95,8 @@ class BookHeader(object):
        else:
            self.ancient = False
            self.doctype = raw[16:20]
-            self.length, self.type, self.codepage, self.unique_id, self.version = \
-                     struct.unpack('>LLLLL', raw[20:40])
+            self.length, self.type, self.codepage, self.unique_id, \
+                self.version = struct.unpack('>LLLLL', raw[20:40])
                    
            
            try:
@ -102,8 +105,9 @@ class BookHeader(object):
                          65001 : 'utf-8',
                          }[self.codepage]
            except (IndexError, KeyError):
-                print '[WARNING] Unknown codepage %d. Assuming cp-1252'%self.codepage
-                self.codec = 'cp1252'
+                self.codec = 'cp1252' if user_encoding is None else user_encoding
+                log.warn('Unknown codepage %d. Assuming %s'%(self.codepage,
+                                                            self.codec))
            
            if ident == 'TEXTREAD' or self.length < 0xE4 or 0xE8 < self.length:
                self.extra_flags = 0
@ -138,9 +142,24 @@ class MobiReader(object):
    PAGE_BREAK_PAT = re.compile(r'(<[/]{0,1}mbp:pagebreak\s*[/]{0,1}>)+', re.IGNORECASE)
    IMAGE_ATTRS = ('lowrecindex', 'recindex', 'hirecindex')
    
-    def __init__(self, filename_or_stream, verbose=False):
-        self.verbose = verbose
+    def __init__(self, filename_or_stream, log, user_encoding=None, debug=None):
+        self.log = log
+        self.debug = debug
        self.embedded_mi = None
+        self.base_css_rules = '''
+                blockquote { margin: 0em 0em 0em 1.25em; text-align: justify }
+                
+                p { margin: 0em; text-align: justify }
+                
+                .bold { font-weight: bold }
+                
+                .italic { font-style: italic }
+                
+                .mbp_pagebreak {
+                    page-break-after: always; margin: 0; display: block
+                }
+                '''
+        self.tag_css_rules = []
        
        if hasattr(filename_or_stream, 'read'):
            stream = filename_or_stream
@ -177,17 +196,21 @@ class MobiReader(object):
            self.sections.append((section(i), self.section_headers[i])) 
         
            
-        self.book_header = BookHeader(self.sections[0][0], self.ident)
+        self.book_header = BookHeader(self.sections[0][0], self.ident, 
+                                      user_encoding, self.log)
        self.name = self.name.decode(self.book_header.codec, 'replace')
        
-    def extract_content(self, output_dir=os.getcwdu()):
+    def extract_content(self, output_dir, parse_cache):
        output_dir = os.path.abspath(output_dir)
        if self.book_header.encryption_type != 0:
            raise DRMError(self.name)
        
        processed_records = self.extract_text()
+        if self.debug is not None:
+            self.parse_cache['calibre_raw_mobi_markup'] = self.mobi_html
        self.add_anchors()
-        self.processed_html = self.processed_html.decode(self.book_header.codec, 'ignore')
+        self.processed_html = self.processed_html.decode(self.book_header.codec,
+                                                          'ignore')
        for pat in ENCODING_PATS:
            self.processed_html = pat.sub('', self.processed_html)
        e2u = functools.partial(entity_to_unicode, 
@ -203,16 +226,10 @@ class MobiReader(object):
        self.processed_html = \
            re.compile('<head>', re.IGNORECASE).sub(
                '\n<head>\n'
-                '<style type="text/css">\n'
-                'blockquote { margin: 0em 0em 0em 1.25em; text-align: justify; }\n'
-                'p { margin: 0em; text-align: justify; }\n'
-                '.bold { font-weight: bold; }\n'
-                '.italic { font-style: italic; }\n'
-                '</style>\n',
+                '\t<link type="text/css" href="styles.css" />\n',
                self.processed_html)
        
-        if self.verbose:
-            print 'Parsing HTML...'
+        self.log.debug('Parsing HTML...')
        root = html.fromstring(self.processed_html)
        self.upshift_markup(root)
        guides = root.xpath('//guide')
@ -230,25 +247,23 @@ class MobiReader(object):
                    ref.attrib['href'] = os.path.basename(htmlfile)+ref.attrib['href']
        except AttributeError:
            pass
-        if self.verbose:
-            print 'Serializing...'
-        with open(htmlfile, 'wb') as f:
-            raw = html.tostring(root, encoding='utf-8', method='xml', 
-                         include_meta_content_type=True, pretty_print=True)
-            raw = raw.replace('<head>', 
-            '<head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n')
-            f.write(raw)
+        parse_cache[htmlfile] = root
        self.htmlfile = htmlfile
-        
-        if self.book_header.exth is not None or self.embedded_mi is not None:
-            if self.verbose:
-                print 'Creating OPF...'
-            ncx = cStringIO.StringIO()
-            opf = self.create_opf(htmlfile, guide, root)
-            opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'), ncx)
-            ncx = ncx.getvalue()
-            if ncx:
-                open(os.path.splitext(htmlfile)[0]+'.ncx', 'wb').write(ncx)
+        self.log.debug('Creating OPF...')
+        ncx = cStringIO.StringIO()
+        opf = self.create_opf(htmlfile, guide, root)
+        self.created_opf_path = os.path.splitext(htmlfile)[0]+'.opf' 
+        opf.render(open(self.created_opf_path, 'wb'), ncx)
+        ncx = ncx.getvalue()
+        if ncx:
+            open(os.path.splitext(htmlfile)[0]+'.ncx', 'wb').write(ncx)
+                
+        with open('styles.css', 'wb') as s:
+            s.write(self.base_css_rules+'\n\n')
+            for rule in self.tag_css_rules:
+                if isinstance(rule, unicode):
+                    rule = rule.encode('utf-8')
+                s.write(rule+'\n\n')
    
    def read_embedded_metadata(self, root, elem, guide):
        raw = '<package>'+html.tostring(elem, encoding='utf-8')+'</package>'
@ -274,11 +289,9 @@ class MobiReader(object):
                                elem.getparent().remove(elem)
                                break
                    break
-        
    
    def cleanup_html(self):
-        if self.verbose:
-            print 'Cleaning up HTML...'
+        self.log.debug('Cleaning up HTML...')
        self.processed_html = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.processed_html)
        if self.book_header.ancient and '<html' not in self.mobi_html[:300].lower():
            self.processed_html = '<html><p>'+self.processed_html.replace('\n\n', '<p>')+'</html>'
@ -286,8 +299,7 @@ class MobiReader(object):
        self.processed_html = self.processed_html.replace('> <', '>\n<')
        
    def upshift_markup(self, root):
-        if self.verbose:
-            print 'Converting style information to CSS...'
+        self.log.debug('Converting style information to CSS...')
        size_map = {
                    'xx-small' : '0.5',
                    'x-small'  : '1',
@ -298,7 +310,7 @@ class MobiReader(object):
                    'xx-large' : '6',
                    }
        mobi_version = self.book_header.mobi_version
-        for tag in root.iter(etree.Element):
+        for i, tag in enumerate(root.iter(etree.Element)):
            if tag.tag in ('country-region', 'place', 'placetype', 'placename',
                           'state', 'city'):
                tag.tag = 'span'
@ -352,8 +364,7 @@ class MobiReader(object):
            elif tag.tag == 'pre':
                if not tag.text:
                    tag.tag = 'div'
-            if styles:
-                attrib['style'] = '; '.join(styles)
+            
            if 'filepos-id' in attrib:
                attrib['id'] = attrib.pop('filepos-id')
            if 'filepos' in attrib:
@ -362,15 +373,24 @@ class MobiReader(object):
                    attrib['href'] = "#filepos%d" % int(filepos)
                except ValueError:
                    pass
+            
+            if styles:
+                attrib['id'] = attrib.get('id', 'calibre_mr_gid%d'%i)
+                self.tag_css_rules.append('#%s {%s}'%(attrib['id'], 
+                                                      '; '.join(styles)))
+    
    
    def create_opf(self, htmlfile, guide=None, root=None):
        mi = getattr(self.book_header.exth, 'mi', self.embedded_mi)
+        if mi is None:
+            mi = MetaInformation(self.title, [_('Unknown')])
        opf = OPFCreator(os.path.dirname(htmlfile), mi)
        if hasattr(self.book_header.exth, 'cover_offset'):
            opf.cover = 'images/%05d.jpg'%(self.book_header.exth.cover_offset+1)
        elif mi.cover is not None:
            opf.cover = mi.cover
-        manifest = [(htmlfile, 'text/x-oeb1-document')]
+        manifest = [(htmlfile, 'text/x-oeb1-document'), 
+                    (os.path.abspath('styles.css'), 'text/css')]
        bp = os.path.dirname(htmlfile)
        for i in getattr(self, 'image_names', []):
            manifest.append((os.path.join(bp, 'images/', i), 'image/jpeg'))
@ -441,8 +461,7 @@ class MobiReader(object):
        return data[:len(data)-trail_size]
    
    def extract_text(self):
-        if self.verbose:
-            print 'Extracting text...'
+        self.log.debug('Extracting text...')
        text_sections = [self.text_section(i) for i in range(1, self.book_header.records+1)]
        processed_records = list(range(0, self.book_header.records+1))
        
@ -472,12 +491,11 @@ class MobiReader(object):
    
    def replace_page_breaks(self):
        self.processed_html = self.PAGE_BREAK_PAT.sub(
-            '<div class="mbp_pagebreak" style="page-break-after: always; margin: 0; display: block" />',
+            '<div class="mbp_pagebreak" />',
            self.processed_html)
    
    def add_anchors(self):
-        if self.verbose:
-            print 'Adding anchors...'
+        self.log.debug('Adding anchors...')
        positions = set([])
        link_pattern = re.compile(r'''<[^<>]+filepos=['"]{0,1}(\d+)[^<>]*>''',
                                  re.IGNORECASE)
@ -507,8 +525,7 @@ class MobiReader(object):
        
    
    def extract_images(self, processed_records, output_dir):
-        if self.verbose:
-            print 'Extracting images...'
+        self.log.debug('Extracting images...')
        output_dir = os.path.abspath(os.path.join(output_dir, 'images'))
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
@ -535,14 +552,17 @@ class MobiReader(object):
            im.convert('RGB').save(open(path, 'wb'), format='JPEG')

 def get_metadata(stream):
-    mr = MobiReader(stream)
+    from calibre.utils.logging import Log
+    log = Log()
+    mr = MobiReader(stream, log)
    if mr.book_header.exth is None:
        mi = MetaInformation(mr.name, [_('Unknown')])
    else:
        mi = mr.create_opf('dummy.html')
        try:
            if hasattr(mr.book_header.exth, 'cover_offset'):
-                cover_index = mr.book_header.first_image_index + mr.book_header.exth.cover_offset
+                cover_index = mr.book_header.first_image_index + \
+                              mr.book_header.exth.cover_offset
                data  = mr.sections[int(cover_index)][0]
            else:
                data  = mr.sections[mr.book_header.first_image_index][0]
@ -552,42 +572,6 @@ def get_metadata(stream):
            im.convert('RGBA').save(obuf, format='JPEG')
            mi.cover_data = ('jpg', obuf.getvalue())
        except:
-            import traceback
-            traceback.print_exc()
+            log.exception()
    return mi
-        
-        
-def option_parser():
-    from calibre.utils.config import OptionParser
-    parser = OptionParser(usage=_('%prog [options] myebook.mobi'))
-    parser.add_option('-o', '--output-dir', default='.', 
-                      help=_('Output directory. Defaults to current directory.'))
-    parser.add_option('-v', '--verbose', default=False, action='store_true',
-                      help='Useful for debugging.')
-    return parser
-    

-def main(args=sys.argv):
-    parser = option_parser()
-    opts, args = parser.parse_args(args)
-    if len(args) != 2:
-        parser.print_help()
-        return 1
-    
-    mr = MobiReader(args[1], verbose=opts.verbose)
-    opts.output_dir = os.path.abspath(opts.output_dir)
-    mr.extract_content(opts.output_dir)
-    if opts.verbose:
-        oname = os.path.join(opts.output_dir, 'debug-raw.html')
-        dat = mr.mobi_html
-        if isinstance(dat, unicode):
-            dat = dat.encode('utf-8')
-        open(oname, 'wb').write(dat)
-        print _('Raw MOBI HTML saved in'), oname
-    
-    print _('OEB ebook created in'), opts.output_dir
-    
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -292,9 +292,29 @@ class Serializer(object):
                buffer.seek(hoff)
                buffer.write('%010d' % ioff)

-    
+
+class MobiFlattener(object):
+    def config(self, cfg):
+        return cfg
+
+    def generate(self, opts):
+        return self
+
+    def __call__(self, oeb, context):
+        fbase = context.dest.fbase
+        fkey = context.dest.fnums.values()
+        flattener = CSSFlattener(
+            fbase=fbase, fkey=fkey, unfloat=True, untable=True)
+        return flattener(oeb, context)
+
+                
 class MobiWriter(object):
    COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
+
+    DEFAULT_PROFILE = 'CybookG3'
+
+    TRANSFORMS = [HTMLTOCAdder, CaseMangler, MobiFlattener(), SVGRasterizer,
+                  ManifestTrimmer, MobiMLizer]
    
    def __init__(self, compression=None, imagemax=None,
                 prefer_author_sort=False):
@ -302,7 +322,32 @@ class MobiWriter(object):
        self._imagemax = imagemax or OTHER_MAX_IMAGE_SIZE
        self._prefer_author_sort = prefer_author_sort

-    def dump(self, oeb, path):
+    @classmethod
+    def config(cls, cfg):
+        """Add any book-writing options to the :class:`Config` object
+        :param:`cfg`.
+        """
+        mobi = cfg.add_group('mobipocket', _('Mobipocket-specific options.'))
+        mobi('compress', ['--compress'], default=False,
+             help=_('Compress file text using PalmDOC compression. '
+                    'Results in smaller files, but takes a long time to run.'))
+        mobi('rescale_images', ['--rescale-images'], default=False, 
+             help=_('Modify images to meet Palm device size limitations.'))
+        mobi('prefer_author_sort', ['--prefer-author-sort'], default=False,
+             help=_('When present, use the author sorting information for '
+                    'generating the Mobipocket author metadata.'))
+        return cfg
+
+    @classmethod
+    def generate(cls, opts):
+        """Generate a Writer instance from command-line options."""
+        compression = PALMDOC if opts.compress else UNCOMPRESSED
+        imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
+        prefer_author_sort = opts.prefer_author_sort
+        return cls(compression=compression, imagemax=imagemax,
+                   prefer_author_sort=prefer_author_sort)
+    
+    def __call__(self, oeb, path):
        if hasattr(path, 'write'):
            return self._dump_stream(oeb, path)
        with open(path, 'w+b') as stream:
@ -542,21 +587,6 @@ def config(defaults=None):
    else:
        c = StringConfig(defaults, desc)
        
-    mobi = c.add_group('mobipocket', _('Mobipocket-specific options.'))
-    mobi('compress', ['--compress'], default=False,
-         help=_('Compress file text using PalmDOC compression. '
-               'Results in smaller files, but takes a long time to run.'))
-    mobi('rescale_images', ['--rescale-images'], default=False, 
-        help=_('Modify images to meet Palm device size limitations.'))
-    mobi('toc_title', ['--toc-title'], default=None, 
-         help=_('Title for any generated in-line table of contents.'))
-    mobi('ignore_tables', ['--ignore-tables'], default=False,
-         help=_('Render HTML tables as blocks of text instead of actual '
-                'tables. This is neccessary if the HTML contains very large '
-                'or complex tables.'))
-    mobi('prefer_author_sort', ['--prefer-author-sort'], default=False,
-         help=_('When present, use the author sorting information for '
-                'generating the Mobipocket author metadata.'))
    profiles = c.add_group('profiles', _('Device renderer profiles. '
        'Affects conversion of font sizes, image rescaling and rasterization '
        'of tables. Valid profiles are: %s.') % ', '.join(_profiles))
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
--- a/src/calibre/ebooks/oeb/factory.py
+++ b/src/calibre/ebooks/oeb/factory.py
@ -0,0 +1,98 @@
+'''
+Registry associating file extensions with Reader classes.
+'''
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
+
+import sys, os, logging
+from itertools import chain
+from calibre.ebooks.oeb.base import OEBError
+from calibre.ebooks.oeb.reader import OEBReader
+from calibre.ebooks.oeb.writer import OEBWriter
+from calibre.ebooks.lit.reader import LitReader
+from calibre.ebooks.lit.writer import LitWriter
+from calibre.ebooks.mobi.reader import MobiReader
+from calibre.ebooks.mobi.writer import MobiWriter
+from calibre.ebooks.oeb.base import Logger, OEBBook
+from calibre.ebooks.oeb.profile import Context
+from calibre.utils.config import Config
+
+__all__ = ['get_reader']
+
+REGISTRY = {
+    '.opf': (OEBReader, None),
+    '.lit': (LitReader, LitWriter),
+    '.mobi': (MobiReader, MobiWriter),
+    }
+
+def ReaderFactory(path):
+    if os.path.isdir(path):
+        return OEBReader
+    ext = os.path.splitext(path)[1].lower()
+    Reader = REGISTRY.get(ext, (None, None))[0]
+    if Reader is None:
+        raise OEBError('Unknown e-book file extension %r' % ext)
+    return Reader
+
+def WriterFactory(path):
+    if os.path.isdir(path):
+        return OEBWriter
+    ext = os.path.splitext(path)[1].lower()
+    if not os.path.exists(path) and not ext:
+        return OEBWriter
+    Writer = REGISTRY.get(ext, (None, None))[1]
+    if Writer is None:
+        raise OEBError('Unknown e-book file extension %r' % ext)
+    return Writer
+
+
+def option_parser(Reader, Writer):
+    cfg = Config('ebook-convert', _('Options to control e-book conversion.'))
+    Reader.config(cfg)
+    for Transform in chain(Reader.TRANSFORMS, Writer.TRANSFORMS):
+        Transform.config(cfg)
+    Writer.config(cfg)
+    parser = cfg.option_parser()
+    parser.add_option('--encoding', default=None,
+        help=_('Character encoding for input. Default is to auto detect.'))
+    parser.add_option('-o', '--output', default=None, 
+        help=_('Output file. Default is derived from input filename.'))
+    parser.add_option('-p', '--pretty-print', action='store_true',
+        default=False, help=_('Produce more human-readable XML output.'))
+    parser.add_option('-v', '--verbose', default=0, action='count',
+        help=_('Useful for debugging.'))
+    return parser
+
+def main(argv=sys.argv):
+    if len(argv) < 3:
+        print _("Usage: ebook-convert INFILE OUTFILE [OPTIONS..]")
+        return 1
+    inpath, outpath = argv[1], argv[2]
+    Reader = ReaderFactory(inpath)
+    Writer = WriterFactory(outpath)
+    parser = option_parser(Reader, Writer)
+    opts, args = parser.parse_args(argv[3:])
+    if len(args) != 0:
+        parser.print_help()
+        return 1
+    logger = Logger(logging.getLogger('ebook-convert'))
+    logger.setup_cli_handler(opts.verbose)
+    encoding = opts.encoding
+    pretty_print = opts.pretty_print
+    oeb = OEBBook(encoding=encoding, pretty_print=pretty_print, logger=logger)
+    context = Context(Reader.DEFAULT_PROFILE, Writer.DEFAULT_PROFILE)
+    reader = Reader.generate(opts)
+    writer = Writer.generate(opts)
+    transforms = []
+    for Transform in chain(Reader.TRANSFORMS, Writer.TRANSFORMS):
+        transforms.append(Transform.generate(opts))
+    reader(oeb, inpath)
+    for transform in transforms:
+        transform(oeb, context)
+    writer(oeb, outpath)
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@ -0,0 +1,562 @@
+"""
+Container-/OPF-based input OEBBook reader.
+"""
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
+
+import sys, os, uuid, copy
+from itertools import izip, chain
+from urlparse import urldefrag, urlparse
+from urllib import unquote as urlunquote
+from mimetypes import guess_type
+from collections import defaultdict
+from lxml import etree
+from calibre.ebooks.oeb.base import OPF1_NS, OPF2_NS, OPF2_NSMAP, DC11_NS, \
+    DC_NSES, OPF
+from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, OEB_IMAGES, \
+    PAGE_MAP_MIME, JPEG_MIME, NCX_MIME, SVG_MIME
+from calibre.ebooks.oeb.base import XMLDECL_RE, COLLAPSE_RE, CSSURL_RE, \
+    ENTITY_RE, LINK_SELECTORS, MS_COVER_TYPE
+from calibre.ebooks.oeb.base import namespace, barename, qname, XPath, xpath
+from calibre.ebooks.oeb.base import urlnormalize, xml2str
+from calibre.ebooks.oeb.base import OEBError, OEBBook, DirContainer
+from calibre.ebooks.oeb.writer import OEBWriter
+from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
+from calibre.ebooks.metadata.epub import CoverRenderer
+from calibre.startup import get_lang
+from calibre.ptempfile import TemporaryDirectory
+
+__all__ = ['OEBReader']
+
+class OEBReader(object):
+    """Read an OEBPS 1.x or OPF/OPS 2.0 file collection."""
+    
+    COVER_SVG_XP    = XPath('h:body//svg:svg[position() = 1]')
+    COVER_OBJECT_XP = XPath('h:body//h:object[@data][position() = 1]')
+
+    Container = DirContainer
+    """Container type used to access book files.  Override in sub-classes."""
+    
+    DEFAULT_PROFILE = 'PRS505'
+    """Default renderer profile for content read with this Reader."""
+
+    TRANSFORMS = []
+    """List of transforms to apply to content read with this Reader."""
+
+    def __init__(self):
+        return
+    
+    @classmethod
+    def config(cls, cfg):
+        """Add any book-reading options to the :class:`Config` object
+        :param:`cfg`.
+        """
+        return
+
+    @classmethod
+    def generate(cls, opts):
+        """Generate a Reader instance from command-line options."""
+        return cls()
+
+    def __call__(self, oeb, path):
+        """Read the book at :param:`path` into the :class:`OEBBook` object
+        :param:`oeb`.
+        """
+        self.oeb = oeb
+        self.logger = oeb.logger
+        oeb.container = self.Container(path)
+        opf = self._read_opf()
+        self._all_from_opf(opf)
+        return oeb
+    
+    def _clean_opf(self, opf):
+        nsmap = {}
+        for elem in opf.iter(tag=etree.Element):
+            nsmap.update(elem.nsmap)
+        for elem in opf.iter(tag=etree.Element):
+            if namespace(elem.tag) in ('', OPF1_NS):
+                elem.tag = OPF(barename(elem.tag))
+        nsmap.update(OPF2_NSMAP)
+        attrib = dict(opf.attrib)
+        nroot = etree.Element(OPF('package'),
+            nsmap={None: OPF2_NS}, attrib=attrib)
+        metadata = etree.SubElement(nroot, OPF('metadata'), nsmap=nsmap)
+        ignored = (OPF('dc-metadata'), OPF('x-metadata'))
+        for elem in xpath(opf, 'o2:metadata//*'):
+            if elem.tag in ignored:
+                continue
+            if namespace(elem.tag) in DC_NSES:
+                tag = barename(elem.tag).lower()
+                elem.tag = '{%s}%s' % (DC11_NS, tag)
+            metadata.append(elem)
+        for element in xpath(opf, 'o2:metadata//o2:meta'):
+            metadata.append(element)
+        for tag in ('o2:manifest', 'o2:spine', 'o2:tours', 'o2:guide'):
+            for element in xpath(opf, tag):
+                nroot.append(element)
+        return nroot
+    
+    def _read_opf(self):
+        data = self.oeb.container.read(None)
+        data = self.oeb.decode(data)
+        data = XMLDECL_RE.sub('', data)
+        try:
+            opf = etree.fromstring(data)
+        except etree.XMLSyntaxError:
+            repl = lambda m: ENTITYDEFS.get(m.group(1), m.group(0))
+            data = ENTITY_RE.sub(repl, data)
+            opf = etree.fromstring(data)
+            self.logger.warn('OPF contains invalid HTML named entities')
+        ns = namespace(opf.tag)
+        if ns not in ('', OPF1_NS, OPF2_NS):
+            raise OEBError('Invalid namespace %r for OPF document' % ns)
+        opf = self._clean_opf(opf)
+        return opf
+    
+    def _metadata_from_opf(self, opf):
+        uid = opf.get('unique-identifier', None)
+        self.oeb.uid = None
+        metadata = self.oeb.metadata
+        for elem in xpath(opf, '/o2:package/o2:metadata//*'):
+            term = elem.tag
+            value = elem.text
+            attrib = dict(elem.attrib)
+            nsmap = elem.nsmap
+            if term == OPF('meta'):
+                term = qname(attrib.pop('name', None), nsmap)
+                value = attrib.pop('content', None)
+            if value:
+                value = COLLAPSE_RE.sub(' ', value.strip())
+            if term and (value or attrib):
+                metadata.add(term, value, attrib, nsmap=nsmap)
+        haveuuid = haveid = False
+        for ident in metadata.identifier:
+            if unicode(ident).startswith('urn:uuid:'):
+                haveuuid = True
+            if 'id' in ident.attrib:
+                haveid = True
+        if not (haveuuid and haveid):
+            bookid = "urn:uuid:%s" % str(uuid.uuid4())
+            metadata.add('identifier', bookid, id='calibre-uuid')
+        if uid is None:
+            self.logger.warn(u'Unique-identifier not specified')
+        for item in metadata.identifier:
+            if not item.id:
+                continue
+            if uid is None or item.id == uid:
+                self.oeb.uid = item
+                break
+        else:
+            self.logger.warn(u'Unique-identifier %r not found' % uid)
+            for ident in metadata.identifier:
+                if 'id' in ident.attrib:
+                    self.oeb.uid = metadata.identifier[0]
+                    break
+        if not metadata.language:
+            self.logger.warn(u'Language not specified')
+            metadata.add('language', get_lang())
+        if not metadata.creator:
+            self.logger.warn('Creator not specified')
+            metadata.add('creator', self.oeb.translate(__('Unknown')))
+        if not metadata.title:
+            self.logger.warn('Title not specified')
+            metadata.add('title', self.oeb.translate(__('Unknown')))
+
+    def _manifest_add_missing(self):
+        manifest = self.oeb.manifest
+        known = set(manifest.hrefs)
+        unchecked = set(manifest.values())
+        while unchecked:
+            new = set()
+            for item in unchecked:
+                if (item.media_type in OEB_DOCS or
+                    item.media_type[-4:] in ('/xml', '+xml')) and \
+                   item.data is not None:
+                    hrefs = [sel(item.data) for sel in LINK_SELECTORS]
+                    for href in chain(*hrefs):
+                        href, _ = urldefrag(href)
+                        if not href:
+                            continue
+                        href = item.abshref(urlnormalize(href))
+                        scheme = urlparse(href).scheme
+                        if not scheme and href not in known:
+                            new.add(href)
+                elif item.media_type in OEB_STYLES:
+                    for match in CSSURL_RE.finditer(item.data):
+                        href, _ = urldefrag(match.group('url'))
+                        href = item.abshref(urlnormalize(href))
+                        scheme = urlparse(href).scheme
+                        if not scheme and href not in known:
+                            new.add(href)
+            unchecked.clear()
+            for href in new:
+                known.add(href)
+                if not self.oeb.container.exists(href):
+                    self.logger.warn('Referenced file %r not found' % href)
+                    continue
+                self.logger.warn('Referenced file %r not in manifest' % href)
+                id, _ = manifest.generate(id='added')
+                guessed = guess_type(href)[0]
+                media_type = guessed or BINARY_MIME
+                added = manifest.add(id, href, media_type)
+                unchecked.add(added)
+    
+    def _manifest_from_opf(self, opf):
+        manifest = self.oeb.manifest
+        for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'):
+            id = elem.get('id')
+            href = elem.get('href')
+            media_type = elem.get('media-type', None)
+            if media_type is None:
+                media_type = elem.get('mediatype', None)
+            if media_type is None or media_type == 'text/xml':
+                guessed = guess_type(href)[0]
+                media_type = guessed or media_type or BINARY_MIME
+            fallback = elem.get('fallback')
+            if href in manifest.hrefs:
+                self.logger.warn(u'Duplicate manifest entry for %r' % href)
+                continue
+            if not self.oeb.container.exists(href):
+                self.logger.warn(u'Manifest item %r not found' % href)
+                continue
+            if id in manifest.ids:
+                self.logger.warn(u'Duplicate manifest id %r' % id)
+                id, href = manifest.generate(id, href)
+            manifest.add(id, href, media_type, fallback)
+        self._manifest_add_missing()
+    
+    def _spine_add_extra(self):
+        manifest = self.oeb.manifest
+        spine = self.oeb.spine
+        unchecked = set(spine)
+        selector = XPath('h:body//h:a/@href')
+        extras = set()
+        while unchecked:
+            new = set()
+            for item in unchecked:
+                if item.media_type not in OEB_DOCS:
+                    # TODO: handle fallback chains
+                    continue
+                for href in selector(item.data):
+                    href, _ = urldefrag(href)
+                    if not href:
+                        continue
+                    href = item.abshref(urlnormalize(href))
+                    if href not in manifest.hrefs:
+                        continue
+                    found = manifest.hrefs[href]
+                    if found.media_type not in OEB_DOCS or \
+                       found in spine or found in extras:
+                        continue
+                    new.add(found)
+            extras.update(new)
+            unchecked = new
+        version = int(self.oeb.version[0])
+        for item in sorted(extras):
+            if version >= 2:
+                self.logger.warn(
+                    'Spine-referenced file %r not in spine' % item.href)
+            spine.add(item, linear=False)
+    
+    def _spine_from_opf(self, opf):
+        spine = self.oeb.spine
+        manifest = self.oeb.manifest
+        for elem in xpath(opf, '/o2:package/o2:spine/o2:itemref'):
+            idref = elem.get('idref')
+            if idref not in manifest.ids:
+                self.logger.warn(u'Spine item %r not found' % idref)
+                continue
+            item = manifest.ids[idref]
+            spine.add(item, elem.get('linear'))
+        if len(spine) == 0:
+            raise OEBError("Spine is empty")
+        self._spine_add_extra()
+    
+    def _guide_from_opf(self, opf):
+        guide = self.oeb.guide
+        manifest = self.oeb.manifest
+        for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'):
+            href = elem.get('href')
+            path = urldefrag(href)[0]
+            if path not in manifest.hrefs:
+                self.logger.warn(u'Guide reference %r not found' % href)
+                continue
+            guide.add(elem.get('type'), elem.get('title'), href)
+    
+    def _find_ncx(self, opf):
+        result = xpath(opf, '/o2:package/o2:spine/@toc')
+        if result:
+            id = result[0]
+            if id not in self.oeb.manifest.ids:
+                return None
+            item = self.oeb.manifest.ids[id]
+            self.oeb.manifest.remove(item)
+            return item
+        for item in self.oeb.manifest.values():
+            if item.media_type == NCX_MIME:
+                self.oeb.manifest.remove(item)
+                return item                
+        return None
+    
+    def _toc_from_navpoint(self, item, toc, navpoint):
+        children = xpath(navpoint, 'ncx:navPoint')
+        for child in children:
+            title = ''.join(xpath(child, 'ncx:navLabel/ncx:text/text()'))
+            title = COLLAPSE_RE.sub(' ', title.strip())
+            href = xpath(child, 'ncx:content/@src')
+            if not title or not href:
+                continue
+            href = item.abshref(urlnormalize(href[0]))
+            path, _ = urldefrag(href)
+            if path not in self.oeb.manifest.hrefs:
+                self.logger.warn('TOC reference %r not found' % href)
+                continue
+            id = child.get('id')
+            klass = child.get('class')
+            node = toc.add(title, href, id=id, klass=klass)
+            self._toc_from_navpoint(item, node, child)
+    
+    def _toc_from_ncx(self, item):
+        if item is None:
+            return False
+        ncx = item.data
+        title = ''.join(xpath(ncx, 'ncx:docTitle/ncx:text/text()'))
+        title = COLLAPSE_RE.sub(' ', title.strip())
+        title = title or unicode(self.oeb.metadata.title[0])
+        toc = self.oeb.toc
+        toc.title = title
+        navmaps = xpath(ncx, 'ncx:navMap')
+        for navmap in navmaps:
+            self._toc_from_navpoint(item, toc, navmap)
+        return True
+    
+    def _toc_from_tour(self, opf):
+        result = xpath(opf, 'o2:tours/o2:tour')
+        if not result:
+            return False
+        tour = result[0]
+        toc = self.oeb.toc
+        toc.title = tour.get('title')
+        sites = xpath(tour, 'o2:site')
+        for site in sites:
+            title = site.get('title')
+            href = site.get('href')
+            if not title or not href:
+                continue
+            path, _ = urldefrag(urlnormalize(href))
+            if path not in self.oeb.manifest.hrefs:
+                self.logger.warn('TOC reference %r not found' % href)
+                continue            
+            id = site.get('id')
+            toc.add(title, href, id=id)
+        return True
+    
+    def _toc_from_html(self, opf):
+        if 'toc' not in self.oeb.guide:
+            return False
+        itempath, frag = urldefrag(self.oeb.guide['toc'].href)
+        item = self.oeb.manifest.hrefs[itempath]
+        html = item.data
+        if frag:
+            elems = xpath(html, './/*[@id="%s"]' % frag)
+            if not elems:
+                elems = xpath(html, './/*[@name="%s"]' % frag)
+            elem = elems[0] if elems else html
+            while elem != html and not xpath(elem, './/h:a[@href]'):
+                elem = elem.getparent()
+            html = elem
+        titles = defaultdict(list)
+        order = []
+        for anchor in xpath(html, './/h:a[@href]'):
+            href = anchor.attrib['href']
+            href = item.abshref(urlnormalize(href))
+            path, frag = urldefrag(href)
+            if path not in self.oeb.manifest.hrefs:
+                continue
+            title = ' '.join(xpath(anchor, './/text()'))
+            title = COLLAPSE_RE.sub(' ', title.strip())
+            if href not in titles:
+                order.append(href)
+            titles[href].append(title)
+        toc = self.oeb.toc
+        for href in order:
+            toc.add(' '.join(titles[href]), href)
+        return True
+    
+    def _toc_from_spine(self, opf):
+        toc = self.oeb.toc
+        titles = []
+        headers = []
+        for item in self.oeb.spine:
+            if not item.linear: continue
+            html = item.data
+            title = ''.join(xpath(html, '/h:html/h:head/h:title/text()'))
+            title = COLLAPSE_RE.sub(' ', title.strip())
+            if title:
+                titles.append(title)
+            headers.append('(unlabled)')
+            for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
+                expr = '/h:html/h:body//h:%s[position()=1]/text()'
+                header = ''.join(xpath(html, expr % tag))
+                header = COLLAPSE_RE.sub(' ', header.strip())
+                if header:
+                    headers[-1] = header
+                    break
+        use = titles
+        if len(titles) > len(set(titles)):
+            use = headers
+        for title, item in izip(use, self.oeb.spine):
+            if not item.linear: continue
+            toc.add(title, item.href)
+        return True
+    
+    def _toc_from_opf(self, opf, item):
+        if self._toc_from_ncx(item): return
+        if self._toc_from_tour(opf): return
+        self.logger.warn('No metadata table of contents found')
+        if self._toc_from_html(opf): return
+        self._toc_from_spine(opf)
+    
+    def _pages_from_ncx(self, opf, item):
+        if item is None:
+            return False
+        ncx = item.data
+        ptargets = xpath(ncx, 'ncx:pageList/ncx:pageTarget')
+        if not ptargets:
+            return False
+        pages = self.oeb.pages
+        for ptarget in ptargets:
+            name = ''.join(xpath(ptarget, 'ncx:navLabel/ncx:text/text()'))
+            name = COLLAPSE_RE.sub(' ', name.strip())
+            href = xpath(ptarget, 'ncx:content/@src')
+            if not href:
+                continue
+            href = item.abshref(urlnormalize(href[0]))
+            id = ptarget.get('id')
+            type = ptarget.get('type', 'normal')
+            klass = ptarget.get('class')
+            pages.add(name, href, type=type, id=id, klass=klass)
+        return True
+    
+    def _find_page_map(self, opf):
+        result = xpath(opf, '/o2:package/o2:spine/@page-map')
+        if result:
+            id = result[0]
+            if id not in self.oeb.manifest.ids:
+                return None
+            item = self.oeb.manifest.ids[id]
+            self.oeb.manifest.remove(item)
+            return item
+        for item in self.oeb.manifest.values():
+            if item.media_type == PAGE_MAP_MIME:
+                self.oeb.manifest.remove(item)
+                return item
+        return None
+    
+    def _pages_from_page_map(self, opf):
+        item = self._find_page_map(opf)
+        if item is None:
+            return False
+        pmap = item.data
+        pages = self.oeb.pages
+        for page in xpath(pmap, 'o2:page'):
+            name = page.get('name', '')
+            href = page.get('href')
+            if not href:
+                continue
+            name = COLLAPSE_RE.sub(' ', name.strip())
+            href = item.abshref(urlnormalize(href))
+            type = 'normal'
+            if not name:
+                type = 'special'
+            elif name.lower().strip('ivxlcdm') == '':
+                type = 'front'
+            pages.add(name, href, type=type)
+        return True
+    
+    def _pages_from_opf(self, opf, item):
+        if self._pages_from_ncx(opf, item): return
+        if self._pages_from_page_map(opf): return
+        return
+    
+    def _cover_from_html(self, hcover):
+        with TemporaryDirectory('_html_cover') as tdir:
+            writer = OEBWriter()
+            writer(self.oeb, tdir)
+            path = os.path.join(tdir, urlunquote(hcover.href))
+            renderer = CoverRenderer(path)
+            data = renderer.image_data
+        id, href = self.oeb.manifest.generate('cover', 'cover.jpeg')
+        item = self.oeb.manifest.add(id, href, JPEG_MIME, data=data)
+        return item
+        
+    def _locate_cover_image(self):
+        if self.oeb.metadata.cover:
+            id = str(self.oeb.metadata.cover[0])
+            item = self.oeb.manifest.ids.get(id, None)
+            if item is not None and item.media_type in OEB_IMAGES:
+                return item
+            else:
+                self.logger.warn('Invalid cover image @id %r' % id)
+        hcover = self.oeb.spine[0]
+        if 'cover' in self.oeb.guide:
+            href = self.oeb.guide['cover'].href
+            item = self.oeb.manifest.hrefs[href]
+            media_type = item.media_type
+            if media_type in OEB_IMAGES:
+                return item
+            elif media_type in OEB_DOCS:
+                hcover = item
+        html = hcover.data
+        if MS_COVER_TYPE in self.oeb.guide:
+            href = self.oeb.guide[MS_COVER_TYPE].href
+            item = self.oeb.manifest.hrefs.get(href, None)
+            if item is not None and item.media_type in OEB_IMAGES:
+                return item
+        if self.COVER_SVG_XP(html):
+            svg = copy.deepcopy(self.COVER_SVG_XP(html)[0])
+            href = os.path.splitext(hcover.href)[0] + '.svg'
+            id, href = self.oeb.manifest.generate(hcover.id, href)
+            item = self.oeb.manifest.add(id, href, SVG_MIME, data=svg)
+            return item
+        if self.COVER_OBJECT_XP(html):
+            object = self.COVER_OBJECT_XP(html)[0]
+            href = hcover.abshref(object.get('data'))
+            item = self.oeb.manifest.hrefs.get(href, None)
+            if item is not None and item.media_type in OEB_IMAGES:
+                return item
+        return self._cover_from_html(hcover)
+        
+    def _ensure_cover_image(self):
+        cover = self._locate_cover_image()
+        if self.oeb.metadata.cover:
+            self.oeb.metadata.cover[0].value = cover.id
+            return
+        self.oeb.metadata.add('cover', cover.id)
+    
+    def _all_from_opf(self, opf):
+        self.oeb.version = opf.get('version', '1.2')
+        self._metadata_from_opf(opf)
+        self._manifest_from_opf(opf)
+        self._spine_from_opf(opf)
+        self._guide_from_opf(opf)
+        item = self._find_ncx(opf)
+        self._toc_from_opf(opf, item)
+        self._pages_from_opf(opf, item)
+        self._ensure_cover_image()
+    
+
+def main(argv=sys.argv):
+    reader = OEBReader()
+    for arg in argv[1:]:
+        oeb = reader(OEBBook(), arg)
+        for name, doc in oeb.to_opf1().values():
+            print etree.tostring(doc, pretty_print=True)
+        for name, doc in oeb.to_opf2(page_map=True).values():
+            print etree.tostring(doc, pretty_print=True)
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@ -98,7 +98,15 @@ class CSSFlattener(object):
        self.unfloat = unfloat
        self.untable = untable

-    def transform(self, oeb, context):
+    @classmethod
+    def config(cls, cfg):
+        return cfg
+
+    @classmethod
+    def generate(cls, opts):
+        return cls()
+    
+    def __call__(self, oeb, context):
        oeb.logger.info('Flattening CSS and remapping font sizes...')
        self.oeb = oeb
        self.context = context
--- a/src/calibre/ebooks/oeb/transforms/htmltoc.py
+++ b/src/calibre/ebooks/oeb/transforms/htmltoc.py
@ -52,7 +52,18 @@ class HTMLTOCAdder(object):
        self.title = title
        self.style = style
    
-    def transform(self, oeb, context):
+    @classmethod
+    def config(cls, cfg):
+        group = cfg.add_group('htmltoc', _('HTML TOC generation options.'))
+        group('toc_title', ['--toc-title'], default=None, 
+              help=_('Title for any generated in-line table of contents.'))
+        return cfg
+
+    @classmethod
+    def generate(cls, opts):
+        return cls(title=opts.toc_title)
+    
+    def __call__(self, oeb, context):
        if 'toc' in oeb.guide:
            return
        oeb.logger.info('Generating in-line TOC...')
--- a/src/calibre/ebooks/oeb/transforms/manglecase.py
+++ b/src/calibre/ebooks/oeb/transforms/manglecase.py
@ -29,7 +29,15 @@ CASE_MANGLER_CSS = """
 TEXT_TRANSFORMS = set(['capitalize', 'uppercase', 'lowercase'])

 class CaseMangler(object):
-    def transform(self, oeb, context):
+    @classmethod
+    def config(cls, cfg):
+        return cfg
+
+    @classmethod
+    def generate(cls, opts):
+        return cls()
+    
+    def __call__(self, oeb, context):
        oeb.logger.info('Applying case-transforming CSS...')
        self.oeb = oeb
        self.profile = context.source
--- a/src/calibre/ebooks/oeb/transforms/rasterize.py
+++ b/src/calibre/ebooks/oeb/transforms/rasterize.py
@ -34,7 +34,15 @@ class SVGRasterizer(object):
        if QApplication.instance() is None:
            QApplication([])

-    def transform(self, oeb, context):
+    @classmethod
+    def config(cls, cfg):
+        return cfg
+
+    @classmethod
+    def generate(cls, opts):
+        return cls()
+    
+    def __call__(self, oeb, context):
        oeb.logger.info('Rasterizing SVG images...')
        self.oeb = oeb
        self.profile = context.dest
--- a/src/calibre/ebooks/oeb/transforms/trimmanifest.py
+++ b/src/calibre/ebooks/oeb/transforms/trimmanifest.py
@ -13,7 +13,15 @@ from calibre.ebooks.oeb.base import LINK_SELECTORS, CSSURL_RE
 from calibre.ebooks.oeb.base import urlnormalize

 class ManifestTrimmer(object):
-    def transform(self, oeb, context):
+    @classmethod
+    def config(cls, cfg):
+        return cfg
+
+    @classmethod
+    def generate(cls, opts):
+        return cls()
+    
+    def __call__(self, oeb, context):
        oeb.logger.info('Trimming unused files from manifest...')
        used = set()
        hrefs = oeb.manifest.hrefs
--- a/src/calibre/ebooks/oeb/writer.py
+++ b/src/calibre/ebooks/oeb/writer.py
@ -0,0 +1,75 @@
+'''
+Directory output OEBBook writer.
+'''
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
+
+import sys, os, logging
+from calibre.ebooks.oeb.base import OPF_MIME, xml2str
+from calibre.ebooks.oeb.base import Logger, DirContainer, OEBBook
+
+__all__ = ['OEBWriter']
+
+class OEBWriter(object):
+    DEFAULT_PROFILE = 'PRS505'
+    """Default renderer profile for content written with this Writer."""
+
+    TRANSFORMS = []
+    """List of transforms to apply to content written with this Writer."""
+    
+    def __init__(self, version='2.0', page_map=False, pretty_print=False):
+        self.version = version
+        self.page_map = page_map
+        self.pretty_print = pretty_print
+
+    @classmethod
+    def config(cls, cfg):
+        """Add any book-writing options to the :class:`Config` object
+        :param:`cfg`.
+        """
+        oeb = cfg.add_group('oeb', _('OPF/NCX/etc. generation options.'))
+        versions = ['1.2', '2.0']
+        oeb('opf_version', ['--opf-version'], default='2.0', choices=versions,
+            help=_('OPF version to generate. Default is %default.'))
+        oeb('adobe_page_map', ['--adobe-page-map'], default=False,
+            help=_('Generate an Adobe "page-map" file if pagination '
+                   'information is avaliable.'))
+        return cfg
+
+    @classmethod
+    def generate(cls, opts):
+        """Generate a Writer instance from command-line options."""
+        version = opts.opf_version
+        page_map = opts.adobe_page_map
+        pretty_print = opts.pretty_print
+        return cls(version=version, page_map=page_map,
+                   pretty_print=pretty_print)
+    
+    def __call__(self, oeb, path):
+        """Read the book in the :class:`OEBBook` object :param:`oeb` to a file
+        at :param:`path`.
+        """
+        version = int(self.version[0])
+        opfname = None
+        if os.path.splitext(path)[1].lower() == '.opf':
+            opfname = os.path.basename(path)
+            path = os.path.dirname(path)
+        if not os.path.isdir(path):
+            os.mkdir(path)
+        output = DirContainer(path)
+        for item in oeb.manifest.values():
+            output.write(item.href, str(item))
+        if version == 1:
+            metadata = oeb.to_opf1()
+        elif version == 2:
+            metadata = oeb.to_opf2(page_map=self.page_map)
+        else:
+            raise OEBError("Unrecognized OPF version %r" % self.version)
+        pretty_print = self.pretty_print
+        for mime, (href, data) in metadata.items():
+            if opfname and mime == OPF_MIME:
+                href = opfname
+            output.write(href, xml2str(data, pretty_print=pretty_print))
+        return
--- a/src/calibre/gui2/viewer/documentview.py
+++ b/src/calibre/gui2/viewer/documentview.py
@ -201,14 +201,14 @@ class Document(QWebPage):
    def bookmark(self):
        return self.javascript('calculate_bookmark(%d)'%(self.ypos+25), 'string')
    
-    @apply
-    def at_bottom():
+    @dynamic_property
+    def at_bottom(self):
        def fget(self):
            return self.height - self.ypos <= self.window_height
        return property(fget=fget)
    
-    @apply
-    def at_top():
+    @dynamic_property
+    def at_top(self):
        def fget(self):
            return self.ypos <= 0
        return property(fget=fget)
@ -217,32 +217,32 @@ class Document(QWebPage):
    def test(self):
        pass
    
-    @apply
-    def ypos():
+    @dynamic_property
+    def ypos(self):
        def fget(self):
            return self.javascript('window.pageYOffset', 'int')
        return property(fget=fget)
    
-    @apply
-    def window_height():
+    @dynamic_property
+    def window_height(self):
        def fget(self):
            return self.javascript('window.innerHeight', 'int')
        return property(fget=fget)
    
-    @apply
-    def window_width():
+    @dynamic_property
+    def window_width(self):
        def fget(self):
            return self.javascript('window.innerWidth', 'int')
        return property(fget=fget)
        
-    @apply
-    def xpos():
+    @dynamic_property
+    def xpos(self):
        def fget(self):
            return self.javascript('window.pageXOffset', 'int')
        return property(fget=fget)
    
-    @apply
-    def scroll_fraction():
+    @dynamic_property
+    def scroll_fraction(self):
        def fget(self):
            try:
                return float(self.ypos)/(self.height-self.window_height)
@ -250,20 +250,20 @@ class Document(QWebPage):
                return 0.
        return property(fget=fget)
    
-    @apply
-    def hscroll_fraction():
+    @dynamic_property
+    def hscroll_fraction(self):
        def fget(self):
            return float(self.xpos)/self.width
        return property(fget=fget)
    
-    @apply
-    def height():
+    @dynamic_property
+    def height(self):
        def fget(self):
            return self.javascript('document.body.offsetHeight', 'int') # contentsSize gives inaccurate results
        return property(fget=fget)
    
-    @apply
-    def width():
+    @dynamic_property
+    def width(self):
        def fget(self):
            return self.mainFrame().contentsSize().width() # offsetWidth gives inaccurate results
        return property(fget=fget)
@ -342,20 +342,20 @@ class DocumentView(QWebView):
    def sizeHint(self):
        return self._size_hint
    
-    @apply
-    def scroll_fraction():
+    @dynamic_property
+    def scroll_fraction(self):
        def fget(self):
            return self.document.scroll_fraction
        return property(fget=fget)
    
-    @apply
-    def hscroll_fraction():
+    @dynamic_property
+    def hscroll_fraction(self):
        def fget(self):
            return self.document.hscroll_fraction
        return property(fget=fget)
    
-    @apply
-    def content_size():
+    @dynamic_property
+    def content_size(self):
        def fget(self):
            return self.document.width, self.document.height
        return property(fget=fget)
--- a/src/calibre/library/init.py
+++ b/src/calibre/library/init.py
@ -1,16 +1,8 @@
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 ''' Code to manage ebook library'''
-import re
 from calibre.utils.config import Config, StringConfig

-title_pat = re.compile('^(A|The|An)\s+', re.IGNORECASE)
-def title_sort(title):
-    match = title_pat.search(title)
-    if match:
-        prep = match.group(1)
-        title = title.replace(prep, '') + ', ' + prep
-    return title.strip()

 def server_config(defaults=None):
    desc=_('Settings to control the calibre content server')
--- a/src/calibre/library/database.py
+++ b/src/calibre/library/database.py
@ -814,8 +814,8 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
 #        _lock_file = None
        self.conn.close()

-    @apply
-    def user_version():
+    @dynamic_property
+    def user_version(self):
        doc = 'The user version of this database'
        def fget(self):
            return self.conn.get('pragma user_version;', all=False)
@ -1455,4 +1455,4 @@ def text_to_tokens(text):

 if __name__ == '__main__':
    sqlite.enable_callback_tracebacks(True)
-    db = LibraryDatabase('/home/kovid/temp/library1.db.orig')
+    db = LibraryDatabase('/home/kovid/temp/library1.db.orig')
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -33,14 +33,14 @@ from calibre.ebooks import BOOK_EXTENSIONS

 copyfile = os.link if hasattr(os, 'link') else shutil.copyfile

-FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'publisher':3, 'rating':4, 'timestamp':5, 
+FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'publisher':3, 'rating':4, 'timestamp':5,
             'size':6, 'tags':7, 'comments':8, 'series':9, 'series_index':10,
             'sort':11, 'author_sort':12, 'formats':13, 'isbn':14, 'path':15}
 INDEX_MAP = dict(zip(FIELD_MAP.values(), FIELD_MAP.keys()))


 class CoverCache(QThread):
-    
+
    def __init__(self, library_path, parent=None):
        QThread.__init__(self, parent)
        self.library_path = library_path
@ -52,7 +52,7 @@ class CoverCache(QThread):
        self.cache_lock = QReadWriteLock()
        self.id_map_stale = True
        self.keep_running = True
-        
+
    def build_id_map(self):
        self.id_map_lock.lockForWrite()
        self.id_map = {}
@ -65,8 +65,8 @@ class CoverCache(QThread):
                continue
        self.id_map_lock.unlock()
        self.id_map_stale = False
-            
-    
+
+
    def set_cache(self, ids):
        self.cache_lock.lockForWrite()
        already_loaded = set([])
@ -80,8 +80,8 @@ class CoverCache(QThread):
        self.load_queue_lock.lockForWrite()
        self.load_queue = collections.deque(ids)
        self.load_queue_lock.unlock()
-        
-    
+
+
    def run(self):
        while self.keep_running:
            if self.id_map is None or self.id_map_stale:
@ -94,7 +94,7 @@ class CoverCache(QThread):
                    break
                finally:
                    self.load_queue_lock.unlock()
-                
+
                self.cache_lock.lockForRead()
                need = True
                if id in self.cache.keys():
@ -121,19 +121,19 @@ class CoverCache(QThread):
                    self.cache_lock.lockForWrite()
                    self.cache[id] = img
                    self.cache_lock.unlock()
-             
+
            self.sleep(1)
-            
+
    def stop(self):
        self.keep_running = False
-        
+
    def cover(self, id):
        val = None
        if self.cache_lock.tryLockForRead(50):
            val = self.cache.get(id, None)
            self.cache_lock.unlock()
        return val
-    
+
    def clear_cache(self):
        self.cache_lock.lockForWrite()
        self.cache = {}
@ -148,24 +148,24 @@ class CoverCache(QThread):
        for id in ids:
            self.load_queue.appendleft(id)
        self.load_queue_lock.unlock()
-    
+
 class ResultCache(SearchQueryParser):
-    
+
    '''
    Stores sorted and filtered metadata in memory.
    '''
-    
+
    def __init__(self):
        self._map = self._map_filtered = self._data = []
        self.first_sort = True
        SearchQueryParser.__init__(self)
-        
+
    def __getitem__(self, row):
        return self._data[self._map_filtered[row]]
-    
+
    def __len__(self):
        return len(self._map_filtered)
-    
+
    def __iter__(self):
        for id in self._map_filtered:
            yield self._data[id]
@ -194,32 +194,32 @@ class ResultCache(SearchQueryParser):
                        matches.add(item[0])
                        break
        return matches
-            
+
    def remove(self, id):
        self._data[id] = None
        if id in self._map:
            self._map.remove(id)
        if id in self._map_filtered:
            self._map_filtered.remove(id)
-            
+
    def set(self, row, col, val, row_is_id=False):
-        id = row if row_is_id else self._map_filtered[row]  
+        id = row if row_is_id else self._map_filtered[row]
        self._data[id][col] = val
-        
+
    def index(self, id, cache=False):
        x = self._map if cache else self._map_filtered
        return x.index(id)
-        
+
    def row(self, id):
        return self.index(id)
-    
+
    def has_id(self, id):
        try:
            return self._data[id] is not None
        except IndexError:
            pass
        return False
-    
+
    def refresh_ids(self, conn, ids):
        '''
        Refresh the data in the cache for books identified by ids.
@ -232,7 +232,7 @@ class ResultCache(SearchQueryParser):
        except ValueError:
            pass
        return None
-    
+
    def books_added(self, ids, conn):
        if not ids:
            return
@ -241,16 +241,16 @@ class ResultCache(SearchQueryParser):
            self._data[id] = conn.get('SELECT * from meta WHERE id=?', (id,))[0]
        self._map[0:0] = ids
        self._map_filtered[0:0] = ids
-        
+
    def books_deleted(self, ids):
        for id in ids:
            self._data[id] = None
            if id in self._map: self._map.remove(id)
            if id in self._map_filtered: self._map_filtered.remove(id)
-    
+
    def count(self):
        return len(self._map)
-    
+
    def refresh(self, db, field=None, ascending=True):
        temp = db.conn.get('SELECT * FROM meta')
        self._data = list(itertools.repeat(None, temp[-1][0]+2)) if temp else []
@ -260,7 +260,7 @@ class ResultCache(SearchQueryParser):
        if field is not None:
            self.sort(field, ascending)
        self._map_filtered = list(self._map)
-    
+
    def seriescmp(self, x, y):
        try:
            ans = cmp(self._data[x][9].lower(), self._data[y][9].lower()) if str else\
@ -291,28 +291,28 @@ class ResultCache(SearchQueryParser):
            subsort = True
            self.first_sort = False
        fcmp = self.seriescmp if field == 'series' else \
-            functools.partial(self.cmp, FIELD_MAP[field], subsort=subsort, 
+            functools.partial(self.cmp, FIELD_MAP[field], subsort=subsort,
                              str=field not in ('size', 'rating', 'timestamp'))
-        
+
        self._map.sort(cmp=fcmp, reverse=not ascending)
        self._map_filtered = [id for id in self._map if id in self._map_filtered]
-                
+
    def search(self, query):
        if not query or not query.strip():
            self._map_filtered = list(self._map)
            return
        matches = sorted(self.parse(query))
        self._map_filtered = [id for id in self._map if id in matches]
-    
-    
+
+
 class Tag(unicode):
-    
+
    def __new__(cls, *args):
        obj = super(Tag, cls).__new__(cls, *args)
        obj.count = 0
        obj.state = 0
        return obj
-        
+
    def as_string(self):
        return u'[%d] %s'%(self.count, self)

@ -321,19 +321,19 @@ class LibraryDatabase2(LibraryDatabase):
    An ebook metadata database that stores references to ebook files on disk.
    '''
    PATH_LIMIT = 40 if 'win32' in sys.platform else 100
-    @apply
-    def user_version():
+    @dynamic_property
+    def user_version(self):
        doc = 'The user version of this database'
-        
+
        def fget(self):
            return self.conn.get('pragma user_version;', all=False)
-        
+
        def fset(self, val):
            self.conn.execute('pragma user_version=%d'%int(val))
            self.conn.commit()
-        
+
        return property(doc=doc, fget=fget, fset=fset)
-    
+
    def connect(self):
        if 'win32' in sys.platform and len(self.library_path) + 4*self.PATH_LIMIT + 10 > 259:
            raise ValueError('Path to library too long. Must be less than %d characters.'%(259-4*self.PATH_LIMIT-10))
@ -343,9 +343,9 @@ class LibraryDatabase2(LibraryDatabase):
            self.conn.close()
            os.remove(self.dbpath)
            self.conn = connect(self.dbpath, self.row_factory)
-        if self.user_version == 0: 
+        if self.user_version == 0:
            self.initialize_database()
-    
+
    def __init__(self, library_path, row_factory=False):
        if not os.path.exists(library_path):
            os.makedirs(library_path)
@ -358,7 +358,7 @@ class LibraryDatabase2(LibraryDatabase):
        self.connect()
        self.is_case_sensitive = not iswindows and not isosx and \
            not os.path.exists(self.dbpath.replace('metadata.db', 'MeTAdAtA.dB'))
-        # Upgrade database 
+        # Upgrade database
        while True:
            meth = getattr(self, 'upgrade_version_%d'%self.user_version, None)
            if meth is None:
@ -368,7 +368,7 @@ class LibraryDatabase2(LibraryDatabase):
                meth()
                self.conn.commit()
                self.user_version += 1
-        
+
        self.data    = ResultCache()
        self.search  = self.data.search
        self.refresh = functools.partial(self.data.refresh, self)
@ -378,24 +378,24 @@ class LibraryDatabase2(LibraryDatabase):
        self.row     = self.data.row
        self.has_id  = self.data.has_id
        self.count   = self.data.count
-        
+
        self.refresh()
-        
+
        def get_property(idx, index_is_id=False, loc=-1):
            row = self.data._data[idx] if index_is_id else self.data[idx]
            return row[loc]
-        
-        for prop in ('author_sort', 'authors', 'comment', 'comments', 'isbn', 
-                     'publisher', 'rating', 'series', 'series_index', 'tags', 
+
+        for prop in ('author_sort', 'authors', 'comment', 'comments', 'isbn',
+                     'publisher', 'rating', 'series', 'series_index', 'tags',
                     'title', 'timestamp'):
-            setattr(self, prop, functools.partial(get_property, 
+            setattr(self, prop, functools.partial(get_property,
                    loc=FIELD_MAP['comments' if prop == 'comment' else prop]))
-        
+
    def initialize_database(self):
        from calibre.resources import metadata_sqlite
        self.conn.executescript(metadata_sqlite)
        self.user_version = 1
-        
+
    def upgrade_version_1(self):
        '''
        Normalize indices.
@ -407,7 +407,7 @@ class LibraryDatabase2(LibraryDatabase):
        CREATE INDEX series_idx ON series (name COLLATE NOCASE);
        CREATE INDEX series_sort_idx ON books (series_index, id);
        '''))
-        
+
    def upgrade_version_2(self):
        ''' Fix Foreign key constraints for deleting from link tables. '''
        script = textwrap.dedent('''\
@ -426,7 +426,7 @@ class LibraryDatabase2(LibraryDatabase):
        self.conn.executescript(script%dict(ltable='publishers', table='publishers', ltable_col='publisher'))
        self.conn.executescript(script%dict(ltable='tags', table='tags', ltable_col='tag'))
        self.conn.executescript(script%dict(ltable='series', table='series', ltable_col='series'))
-    
+
    def upgrade_version_3(self):
        ' Add path to result cache '
        self.conn.executescript('''
@ -450,25 +450,25 @@ class LibraryDatabase2(LibraryDatabase):
        FROM books;
        ''')

-    
+
    def last_modified(self):
        ''' Return last modified time as a UTC datetime object'''
        return datetime.utcfromtimestamp(os.stat(self.dbpath).st_mtime)
-    
+
    def path(self, index, index_is_id=False):
        'Return the relative path to the directory containing this books files as a unicode string.'
        row = self.data._data[index] if index_is_id else self.data[index]
        return row[FIELD_MAP['path']].replace('/', os.sep)
-        
-    
+
+
    def abspath(self, index, index_is_id=False):
        'Return the absolute path to the directory containing this books files as a unicode string.'
        path = os.path.join(self.library_path, self.path(index, index_is_id=index_is_id))
        if not os.path.exists(path):
            os.makedirs(path)
        return path
-            
-    
+
+
    def construct_path_name(self, id):
        '''
        Construct the directory name for this book based on its metadata.
@ -480,7 +480,7 @@ class LibraryDatabase2(LibraryDatabase):
        title  = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
        path   = author + '/' + title + ' (%d)'%id
        return path
-    
+
    def construct_file_name(self, id):
        '''
        Construct the file name for this book based on its metadata.
@ -492,17 +492,17 @@ class LibraryDatabase2(LibraryDatabase):
        title  = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
        name   = title + ' - ' + author
        return name
-    
+
    def rmtree(self, path):
        if not self.normpath(self.library_path).startswith(self.normpath(path)):
            shutil.rmtree(path)
-    
+
    def normpath(self, path):
        path = os.path.abspath(os.path.realpath(path))
        if not self.is_case_sensitive:
            path = path.lower()
        return path
-    
+
    def set_path(self, index, index_is_id=False):
        '''
        Set the path to the directory containing this books files based on its
@ -524,12 +524,12 @@ class LibraryDatabase2(LibraryDatabase):
                break
        if path == current_path and not changed:
            return
-        
+
        tpath = os.path.join(self.library_path, *path.split('/'))
        if not os.path.exists(tpath):
            os.makedirs(tpath)
        spath = os.path.join(self.library_path, *current_path.split('/'))
-        
+
        if current_path and os.path.exists(spath): # Migrate existing files
            cdata = self.cover(id, index_is_id=True)
            if cdata is not None:
@ -551,14 +551,14 @@ class LibraryDatabase2(LibraryDatabase):
                parent  = os.path.dirname(spath)
                if len(os.listdir(parent)) == 0:
                    self.rmtree(parent)
-            
+
    def add_listener(self, listener):
        '''
        Add a listener. Will be called on change events with two arguments.
        Event name and list of affected ids.
        '''
        self.listeners.add(listener)
-    
+
    def notify(self, event, ids=[]):
        'Notify all listeners'
        for listener in self.listeners:
@ -567,12 +567,12 @@ class LibraryDatabase2(LibraryDatabase):
            except:
                traceback.print_exc()
                continue
-    
-    def cover(self, index, index_is_id=False, as_file=False, as_image=False, 
+
+    def cover(self, index, index_is_id=False, as_file=False, as_image=False,
              as_path=False):
        '''
        Return the cover image as a bytestring (in JPEG format) or None.
-        
+
        `as_file` : If True return the image as an open file object
        `as_image`: If True return the image as a QImage object
        '''
@ -587,7 +587,7 @@ class LibraryDatabase2(LibraryDatabase):
                img.loadFromData(f.read())
                return img
            return f if as_file else f.read()
-    
+
    def get_metadata(self, idx, index_is_id=False, get_cover=False):
        '''
        Convenience method to return metadata as a L{MetaInformation} object.
@ -612,7 +612,7 @@ class LibraryDatabase2(LibraryDatabase):
        if get_cover:
            mi.cover = self.cover(id, index_is_id=True, as_path=True)
        return mi
-    
+
    def has_book(self, mi):
        title = mi.title
        if title:
@ -620,16 +620,16 @@ class LibraryDatabase2(LibraryDatabase):
                title = title.decode(preferred_encoding, 'replace')
            return bool(self.conn.get('SELECT id FROM books where title=?', (title,), all=False))
        return False
-    
+
    def has_cover(self, index, index_is_id=False):
        id = index if  index_is_id else self.id(index)
        path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg')
        return os.access(path, os.R_OK)
-    
+
    def set_cover(self, id, data):
        '''
        Set the cover for this book.
-        
+
        `data`: Can be either a QImage, QPixmap, file object or bytestring
        '''
        path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg')
@ -644,13 +644,13 @@ class LibraryDatabase2(LibraryDatabase):
                data = data.read()
            p.loadFromData(data)
            p.save(path)
-    
+
    def all_formats(self):
        formats = self.conn.get('SELECT format from data')
        if not formats:
            return set([])
        return set([f[0] for f in formats])
-    
+
    def formats(self, index, index_is_id=False):
        ''' Return available formats as a comma separated list or None if there are no available formats '''
        id = index if index_is_id else self.id(index)
@ -667,7 +667,7 @@ class LibraryDatabase2(LibraryDatabase):
            if os.access(os.path.join(path, name+_format), os.R_OK|os.W_OK):
                ans.append(format)
        return ','.join(ans)
-                
+
    def has_format(self, index, format, index_is_id=False):
        id = index if index_is_id else self.id(index)
        name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
@ -677,7 +677,7 @@ class LibraryDatabase2(LibraryDatabase):
            path = os.path.join(path, name+format)
            return os.access(path, os.R_OK|os.W_OK)
        return False
-    
+
    def format_abspath(self, index, format, index_is_id=False):
        'Return absolute path to the ebook file of format `format`'
        id = index if index_is_id else self.id(index)
@ -688,13 +688,13 @@ class LibraryDatabase2(LibraryDatabase):
            path = os.path.join(path, name+format)
            if os.access(path, os.R_OK|os.W_OK):
                return path
-    
+
    def format(self, index, format, index_is_id=False, as_file=False, mode='r+b'):
        '''
        Return the ebook format as a bytestring or `None` if the format doesn't exist,
-        or we don't have permission to write to the ebook file. 
-        
-        `as_file`: If True the ebook format is returned as a file object opened in `mode` 
+        or we don't have permission to write to the ebook file.
+
+        `as_file`: If True the ebook format is returned as a file object opened in `mode`
        '''
        path = self.format_abspath(index, format, index_is_id=index_is_id)
        if path is not None:
@ -702,14 +702,14 @@ class LibraryDatabase2(LibraryDatabase):
            return f if as_file else f.read()
        if self.has_format(index, format, index_is_id):
            self.remove_format(id, format, index_is_id=True)
-        
-    def add_format_with_hooks(self, index, format, fpath, index_is_id=False, 
+
+    def add_format_with_hooks(self, index, format, fpath, index_is_id=False,
                              path=None, notify=True):
        npath = self.run_import_plugins(fpath, format)
        format = os.path.splitext(npath)[-1].lower().replace('.', '').upper()
-        return self.add_format(index, format, open(npath, 'rb'), 
+        return self.add_format(index, format, open(npath, 'rb'),
                               index_is_id=index_is_id, path=path, notify=notify)
-    
+
    def add_format(self, index, format, stream, index_is_id=False, path=None, notify=True):
        id = index if index_is_id else self.id(index)
        if path is None:
@ -768,7 +768,7 @@ class LibraryDatabase2(LibraryDatabase):
            self.refresh_ids([id])
            if notify:
                self.notify('metadata', [id])
-    
+
    def clean(self):
        '''
        Remove orphaned entries.
@ -779,13 +779,13 @@ class LibraryDatabase2(LibraryDatabase):
        self.conn.execute(st%dict(ltable='tags', table='tags', ltable_col='tag'))
        self.conn.execute(st%dict(ltable='series', table='series', ltable_col='series'))
        self.conn.commit()
-    
+
    def get_recipes(self):
        return self.conn.get('SELECT id, script FROM feeds')
-    
+
    def get_recipe(self, id):
        return self.conn.get('SELECT script FROM feeds WHERE id=?', (id,), all=False)
-    
+
    def get_categories(self, sort_on_count=False):
        categories = {}
        def get(name, category, field='name'):
@ -807,11 +807,11 @@ class LibraryDatabase2(LibraryDatabase):
                for tag in tags:
                    tag.count = self.conn.get('SELECT COUNT(format) FROM data WHERE format=?', (tag,), all=False)
            tags.sort(reverse=sort_on_count, cmp=(lambda x,y:cmp(x.count,y.count)) if sort_on_count else cmp)
-        for x in (('authors', 'author'), ('tags', 'tag'), ('publishers', 'publisher'), 
+        for x in (('authors', 'author'), ('tags', 'tag'), ('publishers', 'publisher'),
                  ('series', 'series')):
            get(*x)
        get('data', 'format', 'format')
-        
+
        categories['news'] = []
        newspapers = self.conn.get('SELECT name FROM tags WHERE id IN (SELECT DISTINCT tag FROM books_tags_link WHERE book IN (select book from books_tags_link where tag IN (SELECT id FROM tags WHERE name=?)))', (_('News'),))
        if newspapers:
@ -823,10 +823,10 @@ class LibraryDatabase2(LibraryDatabase):
            categories['news'] = list(map(Tag, newspapers))
            for tag in categories['news']:
                tag.count = self.conn.get('SELECT COUNT(id) FROM books_tags_link WHERE tag IN (SELECT DISTINCT id FROM tags WHERE name=?)', (tag,), all=False)
-                
+
        return categories
-        
-    
+
+
    def tags_older_than(self, tag, delta):
        tag = tag.lower().strip()
        now = datetime.now()
@ -836,9 +836,9 @@ class LibraryDatabase2(LibraryDatabase):
                    tags = r[FIELD_MAP['tags']]
                    if tags and tag in tags.lower():
                        yield r[FIELD_MAP['id']]
-                
-            
-    
+
+
+
    def set(self, row, column, val):
        '''
        Convenience method for setting the title, authors, publisher or rating
@ -861,10 +861,10 @@ class LibraryDatabase2(LibraryDatabase):
        self.data.refresh_ids(self.conn, [id])
        self.set_path(id, True)
        self.notify('metadata', [id])
-    
+
    def set_metadata(self, id, mi):
        '''
-        Set metadata for the book `id` from the `MetaInformation` object `mi` 
+        Set metadata for the book `id` from the `MetaInformation` object `mi`
        '''
        if mi.title:
            self.set_title(id, mi.title)
@ -898,7 +898,7 @@ class LibraryDatabase2(LibraryDatabase):
            self.set_timestamp(id, mi.timestamp, notify=False)
        self.set_path(id, True)
        self.notify('metadata', [id])
-        
+
    def set_authors(self, id, authors, notify=True):
        '''
        `authors`: A list of authors.
@ -929,14 +929,14 @@ class LibraryDatabase2(LibraryDatabase):
        self.conn.execute('UPDATE books SET author_sort=? WHERE id=?',
                          (ss, id))
        self.conn.commit()
-        self.data.set(id, FIELD_MAP['authors'], 
+        self.data.set(id, FIELD_MAP['authors'],
                      ','.join([a.replace(',', '|') for a in authors]), 
                      row_is_id=True)
        self.data.set(id, FIELD_MAP['author_sort'], ss, row_is_id=True) 
        self.set_path(id, True)
        if notify:
            self.notify('metadata', [id])
-        
+
    def set_title(self, id, title, notify=True):
        if not title:
            return
@ -949,7 +949,7 @@ class LibraryDatabase2(LibraryDatabase):
        self.conn.commit()
        if notify:
            self.notify('metadata', [id])
-            
+
    def set_timestamp(self, id, dt, notify=True):
        if dt:
            self.conn.execute('UPDATE books SET timestamp=? WHERE id=?', (dt, id))
@ -957,7 +957,7 @@ class LibraryDatabase2(LibraryDatabase):
            self.conn.commit()
            if notify:
                self.notify('metadata', [id])
-    
+
    def set_publisher(self, id, publisher, notify=True):
        self.conn.execute('DELETE FROM books_publishers_link WHERE book=?',(id,))
        self.conn.execute('DELETE FROM publishers WHERE (SELECT COUNT(id) FROM books_publishers_link WHERE publisher=publishers.id) < 1')
@ -974,7 +974,7 @@ class LibraryDatabase2(LibraryDatabase):
            self.data.set(id, FIELD_MAP['publisher'], publisher, row_is_id=True)
            if notify:
                self.notify('metadata', [id])
-    
+
    def set_tags(self, id, tags, append=False, notify=True):
        '''
        @param tags: list of strings
@ -1018,7 +1018,7 @@ class LibraryDatabase2(LibraryDatabase):
        self.data.set(id, FIELD_MAP['tags'], tags, row_is_id=True)
        if notify:
            self.notify('metadata', [id])
-            
+
    def unapply_tags(self, book_id, tags, notify=True):
        for tag in tags:
            id = self.conn.get('SELECT id FROM tags WHERE name=?', (tag,), all=False)
@ -1028,7 +1028,7 @@ class LibraryDatabase2(LibraryDatabase):
        self.data.refresh_ids(self.conn, [book_id])
        if notify:
            self.notify('metadata', [id])
-    
+
    def is_tag_used(self, tag):
        existing_tags = self.all_tags()
        lt = [t.lower() for t in existing_tags]
@ -1037,7 +1037,7 @@ class LibraryDatabase2(LibraryDatabase):
            return True
        except ValueError:
            return False
-        
+
    def delete_tag(self, tag):
        existing_tags = self.all_tags()
        lt = [t.lower() for t in existing_tags]
@ -1052,7 +1052,7 @@ class LibraryDatabase2(LibraryDatabase):
                self.conn.execute('DELETE FROM tags WHERE id=?', (id,))
                self.conn.commit()

-    
+
    def set_series(self, id, series, notify=True):
        self.conn.execute('DELETE FROM books_series_link WHERE book=?',(id,))
        self.conn.execute('DELETE FROM series WHERE (SELECT COUNT(id) FROM books_series_link WHERE series=series.id) < 1')
@ -1075,7 +1075,7 @@ class LibraryDatabase2(LibraryDatabase):
        self.data.set(id, FIELD_MAP['series'], series, row_is_id=True)
        if notify:
            self.notify('metadata', [id])
-            
+
    def set_series_index(self, id, idx, notify=True):
        if idx is None:
            idx = 1
@ -1091,7 +1091,7 @@ class LibraryDatabase2(LibraryDatabase):
        self.data.set(id, FIELD_MAP['series_index'], int(idx), row_is_id=True)
        if notify:
            self.notify('metadata', [id])
-            
+
    def set_rating(self, id, rating, notify=True):
        rating = int(rating)
        self.conn.execute('DELETE FROM books_ratings_link WHERE book=?',(id,))
@ -1102,7 +1102,7 @@ class LibraryDatabase2(LibraryDatabase):
        self.data.set(id, FIELD_MAP['rating'], rating, row_is_id=True)
        if notify:
            self.notify('metadata', [id])
-            
+
    def set_comment(self, id, text, notify=True):
        self.conn.execute('DELETE FROM comments WHERE book=?', (id,))
        self.conn.execute('INSERT INTO comments(book,text) VALUES (?,?)', (id, text))
@ -1110,21 +1110,21 @@ class LibraryDatabase2(LibraryDatabase):
        self.data.set(id, FIELD_MAP['comments'], text, row_is_id=True)
        if notify:
            self.notify('metadata', [id])
-            
+
    def set_author_sort(self, id, sort, notify=True):
        self.conn.execute('UPDATE books SET author_sort=? WHERE id=?', (sort, id))
        self.conn.commit()
        self.data.set(id, FIELD_MAP['author_sort'], sort, row_is_id=True)
        if notify:
            self.notify('metadata', [id])
-            
+
    def set_isbn(self, id, isbn, notify=True):
        self.conn.execute('UPDATE books SET isbn=? WHERE id=?', (isbn, id))
        self.conn.commit()
        self.data.set(id, FIELD_MAP['isbn'], isbn, row_is_id=True)
        if notify:
            self.notify('metadata', [id])
-        
+
    def add_news(self, path, recipe):
        format = os.path.splitext(path)[1][1:].lower()
        stream = path if hasattr(path, 'read') else open(path, 'rb')
@ -1133,21 +1133,21 @@ class LibraryDatabase2(LibraryDatabase):
        stream.seek(0)
        mi.series_index = 1
        mi.tags = [_('News'), recipe.title]
-        obj = self.conn.execute('INSERT INTO books(title, author_sort) VALUES (?, ?)', 
+        obj = self.conn.execute('INSERT INTO books(title, author_sort) VALUES (?, ?)',
                              (mi.title, mi.authors[0]))
        id = obj.lastrowid
        self.data.books_added([id], self.conn)
        self.set_path(id, index_is_id=True)
        self.conn.commit()
        self.set_metadata(id, mi)
-        
+
        self.add_format(id, format, stream, index_is_id=True)
        if not hasattr(path, 'read'):
            stream.close()
        self.conn.commit()
        self.data.refresh_ids(self.conn, [id]) # Needed to update format list and size
        return id
-    
+
    def run_import_plugins(self, path_or_stream, format):
        format = format.lower()
        if hasattr(path_or_stream, 'seek'):
@ -1185,7 +1185,7 @@ class LibraryDatabase2(LibraryDatabase):
                aus = aus.decode(preferred_encoding, 'replace')
            if isinstance(title, str):
                title = title.decode(preferred_encoding)
-            obj = self.conn.execute('INSERT INTO books(title, uri, series_index, author_sort) VALUES (?, ?, ?, ?)', 
+            obj = self.conn.execute('INSERT INTO books(title, uri, series_index, author_sort) VALUES (?, ?, ?, ?)',
                              (title, uri, series_index, aus))
            id = obj.lastrowid
            self.data.books_added([id], self.conn)
@ -1207,7 +1207,7 @@ class LibraryDatabase2(LibraryDatabase):
            uris     = list(duplicate[3] for duplicate in duplicates)
            return (paths, formats, metadata, uris), len(ids)
        return None, len(ids)
-     
+
    def import_book(self, mi, formats, notify=True):
        series_index = 1 if mi.series_index is None else mi.series_index
        if not mi.title:
@ -1234,7 +1234,7 @@ class LibraryDatabase2(LibraryDatabase):
        self.data.refresh_ids(self.conn, [id]) # Needed to update format list and size
        if notify:
            self.notify('add', [id])
-        
+
    def move_library_to(self, newloc, progress=None):
        header = _(u'<p>Copying books to %s<br><center>')%newloc
        books = self.conn.get('SELECT id, path, title FROM books')
@ -1263,7 +1263,7 @@ class LibraryDatabase2(LibraryDatabase):
            old_dirs.add(srcdir)
            if progress is not None:
                progress.setValue(i+1)
-        
+
        dbpath = os.path.join(newloc, os.path.basename(self.dbpath))
        shutil.copyfile(self.dbpath, dbpath)
        opath = self.dbpath
@ -1279,22 +1279,22 @@ class LibraryDatabase2(LibraryDatabase):
        if progress is not None:
            progress.reset()
            progress.hide()
-            
-    
+
+
    def __iter__(self):
        for record in self.data._data:
            if record is not None:
                yield record
-    
+
    def all_ids(self):
        for i in iter(self):
            yield i['id']
-            
+
    def get_data_as_dict(self, prefix=None, authors_as_string=False):
        '''
        Return all metadata stored in the database as a dict. Includes paths to
        the cover and each format.
-        
+
        :param prefix: The prefix for all paths. By default, the prefix is the absolute path
        to the library folder.
        '''
@ -1325,9 +1325,9 @@ class LibraryDatabase2(LibraryDatabase):
                    x['formats'].append(path%fmt.lower())
                    x['fmt_'+fmt.lower()] = path%fmt.lower()
                x['available_formats'] = [i.upper() for i in formats.split(',')]
-            
+
        return data
-    
+
    def migrate_old(self, db, progress):
        header = _(u'<p>Migrating old database to ebook library in %s<br><center>')%self.library_path
        progress.setValue(0)
@ -1338,23 +1338,23 @@ class LibraryDatabase2(LibraryDatabase):
        books = db.conn.get('SELECT id, title, sort, timestamp, uri, series_index, author_sort, isbn FROM books ORDER BY id ASC')
        progress.setAutoReset(False)
        progress.setRange(0, len(books))
-        
+
        for book in books:
            self.conn.execute('INSERT INTO books(id, title, sort, timestamp, uri, series_index, author_sort, isbn) VALUES(?, ?, ?, ?, ?, ?, ?, ?);', book)
-            
+
        tables = '''
-authors  ratings      tags    series    books_tags_link        
+authors  ratings      tags    series    books_tags_link
 comments               publishers
-books_authors_link     conversion_options     
-books_publishers_link                   
-books_ratings_link                        
+books_authors_link     conversion_options
+books_publishers_link
+books_ratings_link
 books_series_link      feeds
 '''.split()
        for table in tables:
-            rows = db.conn.get('SELECT * FROM %s ORDER BY id ASC'%table) 
+            rows = db.conn.get('SELECT * FROM %s ORDER BY id ASC'%table)
            for row in rows:
                self.conn.execute('INSERT INTO %s VALUES(%s)'%(table, ','.join(repeat('?', len(row)))), row)
-        
+
        self.conn.commit()
        self.refresh('timestamp', True)
        for i, book in enumerate(books):
@ -1379,7 +1379,7 @@ books_series_link      feeds
        self.vacuum()
        progress.reset()
        return len(books)
-    
+
    def export_to_dir(self, dir, indices, byauthor=False, single_dir=False,
                      index_is_id=False, callback=None):
        if not os.path.exists(dir):
@ -1425,7 +1425,7 @@ books_series_link      feeds
                opf = OPFCreator(base, mi)
                opf.render(f)
                f.close()
-                
+
                fmts = self.formats(idx, index_is_id=index_is_id)
                if not fmts:
                    fmts = ''
@ -1449,7 +1449,7 @@ books_series_link      feeds
                    if not callback(count, mi.title):
                        return

-    def export_single_format_to_dir(self, dir, indices, format, 
+    def export_single_format_to_dir(self, dir, indices, format,
                                    index_is_id=False, callback=None):
        dir = os.path.abspath(dir)
        if not index_is_id:
@ -1476,7 +1476,7 @@ books_series_link      feeds
            f.write(data)
            f.seek(0)
            try:
-                set_metadata(f, self.get_metadata(id, index_is_id=True, get_cover=True), 
+                set_metadata(f, self.get_metadata(id, index_is_id=True, get_cover=True),
                             stream_type=format.lower())
            except:
                pass
@ -1485,7 +1485,7 @@ books_series_link      feeds
                if not callback(count, title):
                    break
        return failures
-    
+
    def find_books_in_directory(self, dirpath, single_book_per_directory):
        dirpath = os.path.abspath(dirpath)
        if single_book_per_directory:
@ -1514,12 +1514,12 @@ books_series_link      feeds
                ext = ext[1:].lower()
                if ext not in BOOK_EXTENSIONS:
                    continue
-    
+
                key = os.path.splitext(path)[0]
                if not books.has_key(key):
                    books[key] = []
                books[key].append(path)
-            
+
            for formats in books.values():
                yield formats

@ -1543,7 +1543,7 @@ books_series_link      feeds
        formats = self.find_books_in_directory(dirpath, True)
        if not formats:
            return
-        
+
        mi = metadata_from_formats(formats)
        if mi.title is None:
            return
@ -1552,7 +1552,7 @@ books_series_link      feeds
        self.import_book(mi, formats)
        if callable(callback):
            callback(mi.title)
-            
+
    def recursive_import(self, root, single_book_per_directory=True, callback=None):
        root = os.path.abspath(root)
        duplicates  = []
@ -1565,8 +1565,5 @@ books_series_link      feeds
            if callable(callback):
                if callback(''):
                    break
-            
+
        return duplicates
-
-
-        
--- a/src/calibre/library/sqlite.py
+++ b/src/calibre/library/sqlite.py
@ -14,7 +14,7 @@ from Queue import Queue
 from threading import RLock
 from datetime import tzinfo, datetime, timedelta

-from calibre.library import title_sort
+from calibre.ebooks.metadata import title_sort

 global_lock = RLock()

--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@ -16,66 +16,56 @@ if os.environ.has_key('DESTDIR'):

 entry_points = {
        'console_scripts': [ \
-                             'prs500    = calibre.devices.prs500.cli.main:main',
-                             'lrf-meta  = calibre.ebooks.lrf.meta:main',
-                             'rtf-meta  = calibre.ebooks.metadata.rtf:main',
-                             'pdf-meta  = calibre.ebooks.metadata.pdf:main',
-                             'lit-meta  = calibre.ebooks.metadata.lit:main',
-                             'imp-meta  = calibre.ebooks.metadata.imp:main',
-                             'rb-meta   = calibre.ebooks.metadata.rb:main',
-                             'opf-meta  = calibre.ebooks.metadata.opf2:main',
-                             'odt-meta  = calibre.ebooks.metadata.odt:main',
-                             'epub-meta = calibre.ebooks.metadata.epub:main',
-                             'mobi-meta = calibre.ebooks.metadata.mobi:main',
-                             'txt2lrf   = calibre.ebooks.lrf.txt.convert_from:main',
-                             'html2lrf  = calibre.ebooks.lrf.html.convert_from:main',
-                             'html2oeb  = calibre.ebooks.html:main',
-                             'html2epub = calibre.ebooks.epub.from_html:main',
-                             'odt2oeb   = calibre.ebooks.odt.to_oeb:main',
-                             'markdown-calibre  = calibre.ebooks.markdown.markdown:main',
-                             'lit2lrf   = calibre.ebooks.lrf.lit.convert_from:main',
-                             'epub2lrf  = calibre.ebooks.lrf.epub.convert_from:main',
-                             'rtf2lrf   = calibre.ebooks.lrf.rtf.convert_from:main',
-                             'web2disk  = calibre.web.fetch.simple:main',
-                             'feeds2disk = calibre.web.feeds.main:main',
-                             'calibre-server = calibre.library.server:main',
-                             'feeds2lrf  = calibre.ebooks.lrf.feeds.convert_from:main',
-                             'feeds2epub = calibre.ebooks.epub.from_feeds:main',
-                             'feeds2mobi = calibre.ebooks.mobi.from_feeds:main',
-                             'web2lrf   = calibre.ebooks.lrf.web.convert_from:main',
-                             'pdf2lrf   = calibre.ebooks.lrf.pdf.convert_from:main',
-                             'mobi2lrf  = calibre.ebooks.lrf.mobi.convert_from:main',
-                             'fb22lrf   = calibre.ebooks.lrf.fb2.convert_from:main',
-                             'fb2-meta  = calibre.ebooks.metadata.fb2:main',
-                             'any2lrf   = calibre.ebooks.lrf.any.convert_from:main',
-                             'any2epub  = calibre.ebooks.epub.from_any:main',
-                             'any2lit   = calibre.ebooks.lit.from_any:main',
-                             'any2mobi  = calibre.ebooks.mobi.from_any:main',
-                             'any2pdf  = calibre.ebooks.pdf.from_any:main',
-                             'lrf2lrs   = calibre.ebooks.lrf.lrfparser:main',
-                             'lrs2lrf   = calibre.ebooks.lrf.lrs.convert_from:main',
-                             'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main',
-                             'isbndb    = calibre.ebooks.metadata.isbndb:main',
-                             'librarything = calibre.ebooks.metadata.library_thing:main',
-                             'mobi2oeb  = calibre.ebooks.mobi.reader:main',
-                             'oeb2mobi  = calibre.ebooks.mobi.writer:main',
-                             'lit2oeb   = calibre.ebooks.lit.reader:main',
-                             'oeb2lit   = calibre.ebooks.lit.writer:main',
-                             'comic2lrf = calibre.ebooks.lrf.comic.convert_from:main',
-                             'comic2epub = calibre.ebooks.epub.from_comic:main',
-                             'comic2mobi = calibre.ebooks.mobi.from_comic:main',
-			     'comic2pdf  = calibre.ebooks.pdf.from_comic:main',
-                             'calibre-debug      = calibre.debug:main',
-                             'calibredb          = calibre.library.cli:main',
-                             'calibre-fontconfig = calibre.utils.fontconfig:main',
-                             'calibre-parallel   = calibre.parallel:main',
-                             'calibre-customize  = calibre.customize.ui:main',                             
-			     'pdftrim = calibre.ebooks.pdf.pdftrim:main' ,
-                           ],
+             'ebook-device       = calibre.devices.prs500.cli.main:main',
+             'ebook-meta         = calibre.ebooks.metadata.cli:main',
+             'txt2lrf            = calibre.ebooks.lrf.txt.convert_from:main',
+             'html2lrf           = calibre.ebooks.lrf.html.convert_from:main',
+             'html2oeb           = calibre.ebooks.html:main',
+             'html2epub          = calibre.ebooks.epub.from_html:main',
+             'odt2oeb            = calibre.ebooks.odt.to_oeb:main',
+             'markdown-calibre   = calibre.ebooks.markdown.markdown:main',
+             'lit2lrf            = calibre.ebooks.lrf.lit.convert_from:main',
+             'epub2lrf           = calibre.ebooks.lrf.epub.convert_from:main',
+             'rtf2lrf            = calibre.ebooks.lrf.rtf.convert_from:main',
+             'web2disk           = calibre.web.fetch.simple:main',
+             'feeds2disk         = calibre.web.feeds.main:main',
+             'calibre-server     = calibre.library.server:main',
+             'feeds2lrf          = calibre.ebooks.lrf.feeds.convert_from:main',
+             'feeds2epub         = calibre.ebooks.epub.from_feeds:main',
+             'feeds2mobi         = calibre.ebooks.mobi.from_feeds:main',
+             'web2lrf            = calibre.ebooks.lrf.web.convert_from:main',
+             'pdf2lrf            = calibre.ebooks.lrf.pdf.convert_from:main',
+             'mobi2lrf           = calibre.ebooks.lrf.mobi.convert_from:main',
+             'fb22lrf            = calibre.ebooks.lrf.fb2.convert_from:main',
+             'any2lrf            = calibre.ebooks.lrf.any.convert_from:main',
+             'any2epub           = calibre.ebooks.epub.from_any:main',
+             'any2lit            = calibre.ebooks.lit.from_any:main',
+             'any2mobi           = calibre.ebooks.mobi.from_any:main',
+             'lrf2lrs            = calibre.ebooks.lrf.lrfparser:main',
+             'lrs2lrf            = calibre.ebooks.lrf.lrs.convert_from:main',
+             'pdfreflow          = calibre.ebooks.lrf.pdf.reflow:main',
+             'isbndb             = calibre.ebooks.metadata.isbndb:main',
+             'librarything       = calibre.ebooks.metadata.library_thing:main',
+             'mobi2oeb           = calibre.ebooks.mobi.reader:main',
+             'oeb2mobi           = calibre.ebooks.mobi.writer:main',
+             'lit2oeb            = calibre.ebooks.lit.reader:main',
+             'oeb2lit            = calibre.ebooks.lit.writer:main',
+             'comic2lrf          = calibre.ebooks.lrf.comic.convert_from:main',
+             'comic2epub         = calibre.ebooks.epub.from_comic:main',
+             'comic2mobi         = calibre.ebooks.mobi.from_comic:main',
+             'comic2pdf          = calibre.ebooks.pdf.from_comic:main',
+             'calibre-debug      = calibre.debug:main',
+             'calibredb          = calibre.library.cli:main',
+             'calibre-fontconfig = calibre.utils.fontconfig:main',
+             'calibre-parallel   = calibre.parallel:main',
+             'calibre-customize  = calibre.customize.ui:main',
+             'pdftrim            = calibre.ebooks.pdf.pdftrim:main' ,
+             'any2pdf  = calibre.ebooks.pdf.from_any:main',
+        ],
        'gui_scripts'    : [
-                            __appname__+' = calibre.gui2.main:main',
-                            'lrfviewer = calibre.gui2.lrf_renderer.main:main',
-                            'ebook-viewer       = calibre.gui2.viewer.main:main',
+            __appname__+' = calibre.gui2.main:main',
+            'lrfviewer    = calibre.gui2.lrf_renderer.main:main',
+            'ebook-viewer = calibre.gui2.viewer.main:main',
                            ],
      }

@ -177,7 +167,7 @@ def setup_completion(fatal_errors):
        sys.stdout.flush()
        from calibre.ebooks.lrf.html.convert_from import option_parser as htmlop
        from calibre.ebooks.lrf.txt.convert_from import option_parser as txtop
-        from calibre.ebooks.lrf.meta import option_parser as metaop
+        from calibre.ebooks.metadata.cli import option_parser as metaop, filetypes as meta_filetypes
        from calibre.ebooks.lrf.lrfparser import option_parser as lrf2lrsop
        from calibre.gui2.lrf_renderer.main import option_parser as lrfviewerop
        from calibre.ebooks.lrf.pdf.reflow import option_parser as pdfhtmlop
@ -186,7 +176,6 @@ def setup_completion(fatal_errors):
        from calibre.web.feeds.main import option_parser as feeds2disk
        from calibre.web.feeds.recipes import titles as feed_titles
        from calibre.ebooks.lrf.feeds.convert_from import option_parser as feeds2lrf
-        from calibre.ebooks.metadata.epub import option_parser as epub_meta
        from calibre.ebooks.lrf.comic.convert_from import option_parser as comicop
        from calibre.ebooks.epub.from_html import option_parser as html2epub
        from calibre.ebooks.html import option_parser as html2oeb
@ -225,15 +214,7 @@ def setup_completion(fatal_errors):
        f.write(opts_and_exts('any2mobi', any2mobi, any_formats))
        f.write(opts_and_exts('oeb2mobi', oeb2mobi, ['opf']))
        f.write(opts_and_exts('lrf2lrs', lrf2lrsop, ['lrf']))
-        f.write(opts_and_exts('lrf-meta', metaop, ['lrf']))
-        f.write(opts_and_exts('rtf-meta', metaop, ['rtf']))
-        f.write(opts_and_exts('pdf-meta', metaop, ['pdf']))
-        f.write(opts_and_exts('lit-meta', metaop, ['lit']))
-        f.write(opts_and_exts('imp-meta', metaop, ['imp']))
-        f.write(opts_and_exts('rb-meta',  metaop, ['rb']))
-        f.write(opts_and_exts('opf-meta', metaop, ['opf']))
-        f.write(opts_and_exts('odt-meta', metaop, ['odt', 'ods', 'odf', 'odg', 'odp']))
-        f.write(opts_and_exts('epub-meta', epub_meta, ['epub']))
+        f.write(opts_and_exts('ebook-meta', metaop, list(meta_filetypes())))
        f.write(opts_and_exts('lrfviewer', lrfviewerop, ['lrf']))
        f.write(opts_and_exts('pdfrelow', pdfhtmlop, ['pdf']))
        f.write(opts_and_exts('mobi2oeb', mobioeb, ['mobi', 'prc']))
@ -423,10 +404,8 @@ def install_man_pages(fatal_errors):
    os.environ['PATH'] += ':'+os.path.expanduser('~/bin')
    for src in entry_points['console_scripts']:
        prog = src[:src.index('=')].strip()
-        if prog in ('prs500', 'pdf-meta', 'epub-meta', 'lit-meta',
-                    'markdown-calibre', 'calibre-debug', 'fb2-meta',
-                    'calibre-fontconfig', 'calibre-parallel', 'odt-meta',
-                    'rb-meta', 'imp-meta', 'mobi-meta'):
+        if prog in ('ebook-device', 'markdown-calibre', 
+                    'calibre-fontconfig', 'calibre-parallel'):
            continue

        help2man = ('help2man', prog, '--name', 'part of %s'%__appname__,
--- a/src/calibre/utils/logging.py
+++ b/src/calibre/utils/logging.py
@ -0,0 +1,92 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+'A simplified logging system'
+
+DEBUG = 0
+INFO  = 1
+WARN  = 2
+ERROR = 3
+
+import sys, traceback
+from functools import partial
+
+from calibre import prints
+from calibre.utils.terminfo import TerminalController
+
+class ANSIStream:
+    
+    def __init__(self, stream=sys.stdout):
+        self.stream = stream
+        tc = TerminalController(stream)
+        self.color = {
+                      DEBUG: tc.GREEN,
+                      INFO:'',
+                      WARN: tc.YELLOW,
+                      ERROR: tc.RED
+                      }
+        self.normal = tc.NORMAL
+    
+    def prints(self, level, *args, **kwargs):
+        self.stream.write(self.color[level])
+        kwargs['file'] = self.stream
+        prints(*args, **kwargs)
+        self.stream.write(self.normal)
+        
+    def flush(self):
+        self.stream.flush()
+        
+class HTMLStream:
+    
+    def __init__(self, stream=sys.stdout):
+        self.stream = stream
+        self.color = {
+                      DEBUG: '<span style="color:green">',
+                      INFO:'<span>',
+                      WARN: '<span style="color:yellow">',
+                      ERROR: '<span style="color:red">'
+                      }
+        self.normal = '</span>'
+        
+    def prints(self, level, *args, **kwargs):
+        self.stream.write(self.color[level])
+        kwargs['file'] = self.stream
+        prints(*args, **kwargs)
+        self.stream.write(self.normal)
+        
+    def flush(self):
+        self.stream.flush()
+
+class Log(object):
+
+    DEBUG = DEBUG
+    INFO  = INFO
+    WARN  = WARN
+    ERROR = ERROR
+    
+    def __init__(self, level=INFO):
+        self.filter_level = level
+        default_output = ANSIStream()
+        self.outputs = [default_output]
+        
+        self.debug = partial(self.prints, DEBUG) 
+        self.info  = partial(self.prints, INFO)
+        self.warn  = self.warning = partial(self.prints, WARN)
+        self.error = partial(self.prints, ERROR) 
+        
+        
+    def prints(self, level, *args, **kwargs):
+        if level < self.filter_level:
+            return
+        for output in self.outputs:
+            output.prints(level, *args, **kwargs)
+    
+    def exception(self, *args, **kwargs):
+        limit = kwargs.pop('limit', None)
+        self.prints(ERROR, *args, **kwargs)
+        self.prints(DEBUG, traceback.format_exc(limit))
+
+    def __call__(self, *args, **kwargs):
+        self.prints(INFO, *args, **kwargs)
--- a/src/calibre/utils/terminfo.py
+++ b/src/calibre/utils/terminfo.py
@ -33,7 +33,7 @@ class TerminalController:
    
    >>> term = TerminalController()
    >>> if term.CLEAR_SCREEN:
-    ...     print 'This terminal supports clearning the screen.'
+    ...     print 'This terminal supports clearing the screen.'
    
    Finally, if the width and height of the terminal are known, then
    they will be stored in the `COLS` and `LINES` attributes.