diff --git a/setup.py b/setup.py
index b0ff04a983..003067b34f 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@ from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 
-import sys, re, os, shutil, cStringIO, tempfile, subprocess, time
+import sys, re, os, subprocess
 sys.path.append('src')
 iswindows = re.search('win(32|64)', sys.platform)
 isosx = 'darwin' in sys.platform
@@ -54,7 +54,7 @@ if __name__ == '__main__':
                         build_osx, upload_installers, upload_user_manual, \
                         upload_to_pypi, stage3, stage2, stage1, upload, \
                         upload_rss
-    
+
     entry_points['console_scripts'].append(
                             'calibre_postinstall = calibre.linux:post_install')
     ext_modules = [
@@ -65,12 +65,15 @@ if __name__ == '__main__':
                                       'src/calibre/utils/lzx/lzc.c',
                                       'src/calibre/utils/lzx/lzxc.c'],
                              include_dirs=['src/calibre/utils/lzx']),
-                   
+
                    Extension('calibre.plugins.msdes',
                              sources=['src/calibre/utils/msdes/msdesmodule.c',
                                       'src/calibre/utils/msdes/des.c'],
                              include_dirs=['src/calibre/utils/msdes']),
-                   
+
+                    Extension('calibre.plugins.cPalmdoc',
+                        sources=['src/calibre/ebooks/mobi/palmdoc.c']),
+
                     PyQtExtension('calibre.plugins.pictureflow',
                                   ['src/calibre/gui2/pictureflow/pictureflow.cpp',
                                    'src/calibre/gui2/pictureflow/pictureflow.h'],
@@ -81,7 +84,7 @@ if __name__ == '__main__':
         ext_modules.append(Extension('calibre.plugins.winutil',
                 sources=['src/calibre/utils/windows/winutil.c'],
                 libraries=['shell32', 'setupapi'],
-                include_dirs=os.environ.get('INCLUDE', 
+                include_dirs=os.environ.get('INCLUDE',
                         'C:/WinDDK/6001.18001/inc/api/;'
                         'C:/WinDDK/6001.18001/inc/crt/').split(';'),
                 extra_compile_args=['/X']
@@ -91,7 +94,7 @@ if __name__ == '__main__':
                 sources=['src/calibre/devices/usbobserver/usbobserver.c'],
                 extra_link_args=['-framework', 'IOKit'])
                            )
-    
+
     if not iswindows:
         plugins = ['plugins/%s.so'%(x.name.rpartition('.')[-1]) for x in ext_modules]
     else:
@@ -99,7 +102,7 @@ if __name__ == '__main__':
                   ['plugins/%s.pyd.manifest'%(x.name.rpartition('.')[-1]) \
                         for x in ext_modules if 'pictureflow' not in x.name]
 
-    
+
     setup(
           name           = APPNAME,
           packages       = find_packages('src'),
@@ -152,9 +155,9 @@ if __name__ == '__main__':
             'Topic :: System :: Hardware :: Hardware Drivers'
             ],
           cmdclass       = {
-                      'build_ext'     : build_ext, 
+                      'build_ext'     : build_ext,
                       'build'         : build,
-                      'build_py'      : build_py, 
+                      'build_py'      : build_py,
                       'pot'           : pot,
                       'manual'        : manual,
                       'resources'     : resources,
diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py
index 9e18af3cf9..5656079ead 100644
--- a/src/calibre/__init__.py
+++ b/src/calibre/__init__.py
@@ -2,7 +2,7 @@
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-import sys, os, re, logging, time, subprocess, atexit, mimetypes, \
+import sys, os, re, logging, time, subprocess, mimetypes, \
        __builtin__, warnings
 __builtin__.__dict__['dynamic_property'] = lambda(func): func(None)
 from htmlentitydefs import name2codepoint
@@ -71,7 +71,7 @@ def sanitize_file_name(name, substitute='_', as_unicode=False):
     **WARNING:** This function also replaces path separators, so only pass file names
     and not full paths to it.
     *NOTE:* This function always returns byte strings, not unicode objects. The byte strings
-    are encoded in the filesystem encoding of the platform, or UTF-8. 
+    are encoded in the filesystem encoding of the platform, or UTF-8.
     '''
     if isinstance(name, unicode):
         name = name.encode(filesystem_encoding, 'ignore')
@@ -159,7 +159,7 @@ def extract(path, dir):
 
 def get_proxies():
     proxies = {}
-    
+
     for q in ('http', 'ftp'):
         proxy =  os.environ.get(q+'_proxy', None)
         if not proxy: continue
@@ -194,8 +194,8 @@ def get_proxies():
 def browser(honor_time=True, max_time=2, mobile_browser=False):
     '''
     Create a mechanize browser for web scraping. The browser handles cookies,
-    refresh requests and ignores robots.txt. Also uses proxy if avaialable.  
-    
+    refresh requests and ignores robots.txt. Also uses proxy if avaialable.
+
     :param honor_time: If True honors pause time in refresh requests
     :param max_time: Maximum time in seconds to wait during a refresh request
     '''
@@ -232,16 +232,16 @@ def fit_image(width, height, pwidth, pheight):
     return scaled, int(width), int(height)
 
 class CurrentDir(object):
-    
+
     def __init__(self, path):
         self.path = path
         self.cwd = None
-        
+
     def __enter__(self, *args):
         self.cwd = os.getcwd()
         os.chdir(self.path)
         return self.cwd
-    
+
     def __exit__(self, *args):
         os.chdir(self.cwd)
 
diff --git a/src/calibre/constants.py b/src/calibre/constants.py
index 186eb37e34..ff641cfbeb 100644
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@@ -53,7 +53,7 @@ if plugins is None:
             plugin_path = getattr(pkg_resources, 'resource_filename')('calibre', 'plugins')
             sys.path.insert(0, plugin_path)
 
-        for plugin in ['pictureflow', 'lzx', 'msdes'] + \
+        for plugin in ['pictureflow', 'lzx', 'msdes', 'cPalmdoc'] + \
                     (['winutil'] if iswindows else []) + \
                     (['usbobserver'] if isosx else []):
             try:
diff --git a/src/calibre/ebooks/mobi/palmdoc.c b/src/calibre/ebooks/mobi/palmdoc.c
new file mode 100644
index 0000000000..29e9579140
--- /dev/null
+++ b/src/calibre/ebooks/mobi/palmdoc.c
@@ -0,0 +1,204 @@
+/*
+:mod:`cPalmdoc` -- Palmdoc compression/decompression
+=====================================================
+
+.. module:: cPalmdoc
+    :platform: All
+    :synopsis: Compression decompression of Palmdoc implemented in C for speed
+
+.. moduleauthor:: Kovid Goyal <kovid@kovidgoyal.net> Copyright 2009
+
+*/
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include <stdio.h>
+
+#define DELTA sizeof(Byte)*4096
+
+#define BUFFER 6000
+
+#define MIN(x, y) ( ((x) < (y)) ? (x) : (y) )
+
+typedef unsigned short int Byte;
+typedef struct {
+	Byte	*data;
+	Py_ssize_t len;
+} buffer;
+
+#ifdef	bool
+#undef	bool
+#endif
+#define	bool		int
+
+#ifdef	false
+#undef	false
+#endif
+#define	false		0
+
+#ifdef	true
+#undef	true
+#endif
+#define	true		1
+
+#define CHAR(x) (( (x) > 127 ) ? (x)-256 : (x))
+
+static PyObject *
+cpalmdoc_decompress(PyObject *self, PyObject *args) {
+    const char *_input = NULL; Py_ssize_t input_len = 0;
+    Py_ssize_t i = 0, o = 0, j = 0, di, n;
+    if (!PyArg_ParseTuple(args, "t#", &_input, &input_len))
+		return NULL;
+    Byte *input = (Byte *)PyMem_Malloc(sizeof(Byte)*input_len);
+    if (input == NULL) return PyErr_NoMemory();
+    // Map chars to bytes
+    for (j = 0; j < input_len; j++) 
+        input[j] = (_input[j] < 0) ? _input[j]+256 : _input[j];
+    char *output = (char *)PyMem_Malloc(sizeof(char)*BUFFER);
+    Byte c;
+    PyObject *ans;
+    if (output == NULL) return PyErr_NoMemory();
+
+    while (i < input_len) {
+        c = input[i++];
+        if (c >= 1 && c <= 8)  // copy 'c' bytes
+            while (c--) output[o++] = input[i++];
+
+        else if (c <= 0x7F)  // 0, 09-7F = self
+            output[o++] = c;
+        
+        else if (c >= 0xC0) { // space + ASCII char
+            output[o++] = ' ';
+            output[o++] = c ^ 0x80;
+        }
+        else { // 80-BF repeat sequences
+            c = (c << 8) + input[i++];
+            di = (c & 0x3FFF) >> 3;
+            for ( n = (c & 7) + 3; n--; ++o ) 
+                output[o] = output[o - di];
+        }
+    }
+    ans = Py_BuildValue("s#", output, o);
+    if (output != NULL) PyMem_Free(output);
+    if (input != NULL) PyMem_Free(input);
+    return ans;
+}
+
+static bool 
+cpalmdoc_memcmp( Byte *a, Byte *b, Py_ssize_t len) {
+    Py_ssize_t i;
+    for (i = 0; i < len; i++) if (a[i] != b[i]) return false;
+    return true;
+}
+
+static Py_ssize_t
+cpalmdoc_rfind(Byte *data, Py_ssize_t pos, Py_ssize_t chunk_length) {
+    Py_ssize_t i;
+    for (i = pos - chunk_length; i > -1; i--) 
+        if (cpalmdoc_memcmp(data+i, data+pos, chunk_length)) return i;
+    return pos;
+}
+
+
+static Py_ssize_t
+cpalmdoc_do_compress(buffer *b, char *output) {
+    Py_ssize_t i = 0, j, chunk_len, dist;
+    unsigned compound;
+    Byte c, n;
+    bool found;
+    char *head;
+    head = output;
+    buffer temp; 
+    temp.data = (Byte *)PyMem_Malloc(sizeof(Byte)*8); temp.len = 0;
+    if (temp.data == NULL) return 0;
+    while (i < b->len) {
+        c = b->data[i];
+        //do repeats
+        if ( i > 10 && (b->len - i) > 10) {
+            found = false;
+            for (chunk_len = 10; chunk_len > 2; chunk_len--) {
+                j = cpalmdoc_rfind(b->data, i, chunk_len);
+                dist = i - j;
+                if (j < i && dist <= 2047) {
+                    found = true;
+                    compound = (dist << 3) + chunk_len-3;
+                    *(output++) = CHAR(0x80 + (compound >> 8 ));
+                    *(output++) = CHAR(compound & 0xFF);
+                    i += chunk_len;
+                    break;
+                }
+            }
+            if (found) continue;
+        }
+
+        //write single character
+        i++;
+        if (c == 32 && i < b->len) {
+            n = b->data[i];
+            if ( n >= 0x40 && n <= 0x7F) {
+                *(output++) = CHAR(n^0x80); i++; continue;
+            }
+        }
+        if (c == 0 || (c > 8 && c < 0x80))
+            *(output++) = CHAR(c);
+        else { // Write binary data
+            j = i;
+            temp.data[0] = c; temp.len = 1;
+            while (j < b->len && temp.len < 8) {
+                c = b->data[j];
+                if (c == 0 || (c > 8 && c < 0x80)) break;
+                temp.data[temp.len++] = c; j++;
+            }
+            i += temp.len - 1;
+            *(output++) = temp.len;
+            for (j=0; j < temp.len; j++) *(output++) = temp.data[j];
+        }
+    }
+    return output - head;
+}
+
+static PyObject *
+cpalmdoc_compress(PyObject *self, PyObject *args) {
+    const char *_input = NULL; Py_ssize_t input_len = 0;
+    Py_ssize_t j = 0;
+    buffer b;
+    if (!PyArg_ParseTuple(args, "t#", &_input, &input_len))
+		return NULL;
+    b.data = (Byte *)PyMem_Malloc(sizeof(Byte)*input_len);
+    if (b.data == NULL) return PyErr_NoMemory();
+    // Map chars to bytes
+    for (j = 0; j < input_len; j++) 
+        b.data[j] = (_input[j] < 0) ? _input[j]+256 : _input[j];
+    b.len = input_len;
+    char *output = (char *)PyMem_Malloc(sizeof(char) * b.len);
+    if (output == NULL) return PyErr_NoMemory();
+    j = cpalmdoc_do_compress(&b, output);
+    if ( j == 0) return PyErr_NoMemory();
+    PyObject *ans = Py_BuildValue("s#", output, j);
+    PyMem_Free(output);
+    PyMem_Free(b.data);
+    return ans;
+}
+
+static PyMethodDef cPalmdocMethods[] = {
+    {"decompress", cpalmdoc_decompress, METH_VARARGS,
+    "decompress(bytestring) -> decompressed bytestring\n\n"
+    		"Decompress a palmdoc compressed byte string. "
+    },
+
+    {"compress", cpalmdoc_compress, METH_VARARGS,
+    "compress(bytestring) -> compressed bytestring\n\n"
+    		"Palmdoc compress a byte string. "
+    },
+    {NULL, NULL, 0, NULL}
+};
+
+PyMODINIT_FUNC
+initcPalmdoc(void) {
+    PyObject *m;
+    m = Py_InitModule3("cPalmdoc", cPalmdocMethods,
+    "Compress and decompress palmdoc strings."
+    );
+    if (m == NULL) return;
+}
+
diff --git a/src/calibre/ebooks/mobi/palmdoc.py b/src/calibre/ebooks/mobi/palmdoc.py
index eedab1c88f..90dabcb5a8 100644
--- a/src/calibre/ebooks/mobi/palmdoc.py
+++ b/src/calibre/ebooks/mobi/palmdoc.py
@@ -2,41 +2,46 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 
 __license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
-    'and Marshall T. Vandegrift <llasram@gmail.com>'
+__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 
 from cStringIO import StringIO
 from struct import pack
 
-COUNT_BITS = 3
+from calibre.constants import plugins
+cPalmdoc = plugins['cPalmdoc'][0]
+if not cPalmdoc:
+    raise RuntimeError(('Failed to load required cPalmdoc module: '
+            '%s')%plugins['cPalmdoc'][1])
 
 def decompress_doc(data):
-    buffer = [ord(i) for i in data]
-    res = []
-    i = 0
-    while i < len(buffer):
-        c = buffer[i]
-        i += 1
-        if c >= 1 and c <= 8:
-            res.extend(buffer[i:i+c])
-            i += c
-        elif c <= 0x7f:
-            res.append(c)
-        elif c >= 0xc0:
-            res.extend( (ord(' '), c^0x80) )
-        else:
-            c = (c << 8) + buffer[i]
-            i += 1
-            di = (c & 0x3fff) >> COUNT_BITS
-            j = len(res)
-            num = (c & ((1 << COUNT_BITS) - 1)) + 3
-
-            for k in range( num ):
-                res.append(res[j - di+k])
-
-    return ''.join([chr(i) for i in res])
+    return cPalmdoc.decompress(data)
 
 def compress_doc(data):
+    return cPalmdoc.compress(data)
+
+def test():
+    TESTS = [
+            'abc\x03\x04\x05\x06ms', # Test binary writing
+            'a b c \xfed ', # Test encoding of spaces
+            '0123456789axyz2bxyz2cdfgfo9iuyerh',
+            '0123456789asd0123456789asd|yyzzxxffhhjjkk',
+            ('ciewacnaq eiu743 r787q 0w%  ; sa fd\xef\ffdxosac wocjp acoiecowei '
+            'owaic jociowapjcivcjpoivjporeivjpoavca; p9aw8743y6r74%$^$^%8 ')
+            ]
+    for test in TESTS:
+        print 'Test:', repr(test)
+        print '\tTesting compression...'
+        good = py_compress_doc(test)
+        x = compress_doc(test)
+        print '\t\tgood:',  repr(good)
+        print '\t\tx   :',  repr(x)
+        assert x == good
+        print '\tTesting decompression...'
+        print '\t\t', repr(decompress_doc(x))
+        assert decompress_doc(x) == test
+        print
+
+def py_compress_doc(data):
     out = StringIO()
     i = 0
     ldata = len(data)
@@ -85,4 +90,4 @@ def compress_doc(data):
             out.write(''.join(binseq))
             i += len(binseq) - 1
     return out.getvalue()
-        
+
diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py
index b68263ab28..38de3476d1 100644
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@@ -226,7 +226,7 @@ class MobiReader(object):
                     page-break-after: always; margin: 0; display: block
                 }
                 ''')
-        self.tag_css_rules = []
+        self.tag_css_rules = {}
 
         if hasattr(filename_or_stream, 'read'):
             stream = filename_or_stream
@@ -328,10 +328,10 @@ class MobiReader(object):
 
         with open('styles.css', 'wb') as s:
             s.write(self.base_css_rules+'\n\n')
-            for rule in self.tag_css_rules:
+            for cls, rule in self.tag_css_rules.items():
                 if isinstance(rule, unicode):
                     rule = rule.encode('utf-8')
-                s.write(rule+'\n\n')
+                s.write('.%s { %s }\n\n'%(cls, rule))
 
 
         if self.book_header.exth is not None or self.embedded_mi is not None:
@@ -389,6 +389,7 @@ class MobiReader(object):
                     'xx-large' : '6',
                     }
         mobi_version = self.book_header.mobi_version
+        style_map = {}
         for i, tag in enumerate(root.iter(etree.Element)):
             if tag.tag in ('country-region', 'place', 'placetype', 'placename',
                            'state', 'city', 'street', 'address', 'content'):
@@ -455,9 +456,18 @@ class MobiReader(object):
                 except ValueError:
                     pass
             if styles:
-                attrib['id'] = attrib.get('id', 'calibre_mr_gid%d'%i)
-                self.tag_css_rules.append('#%s {%s}'%(attrib['id'],
-                                                      '; '.join(styles)))
+                cls = None
+                rule = '; '.join(styles)
+                for sel, srule in self.tag_css_rules.items():
+                    if srule == rule:
+                        cls = sel
+                        break
+                if cls is None:
+                    ncls = 'calibre_%d'%i
+                    self.tag_css_rules[ncls] = rule
+                cls = attrib.get('class', '')
+                cls = cls + (' ' if cls else '') + ncls
+                attrib['class'] = cls
 
     def create_opf(self, htmlfile, guide=None, root=None):
         mi = getattr(self.book_header.exth, 'mi', self.embedded_mi)
diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index 81120aaf2e..783f09e5cc 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -726,6 +726,7 @@ class Manifest(object):
                 % (self.id, self.href, self.media_type)
 
         def _parse_xhtml(self, data):
+            self.oeb.log.debug('Parsing', self.href, '...')
             # Convert to Unicode and normalize line endings
             data = self.oeb.decode(data)
             data = self.oeb.html_preprocessor(data)
@@ -804,6 +805,7 @@ class Manifest(object):
             return data
 
         def _parse_css(self, data):
+            self.oeb.log.debug('Parsing', self.href, '...')
             data = self.oeb.decode(data)
             data = self.oeb.css_preprocessor(data)
             data = XHTML_CSS_NAMESPACE + data
diff --git a/src/calibre/ebooks/oeb/transforms/guide.py b/src/calibre/ebooks/oeb/transforms/guide.py
index 00830b1a8c..dc7123446b 100644
--- a/src/calibre/ebooks/oeb/transforms/guide.py
+++ b/src/calibre/ebooks/oeb/transforms/guide.py
@@ -41,10 +41,12 @@ class Clean(object):
 
         for x in list(self.oeb.guide):
             href = urldefrag(self.oeb.guide[x].href)[0]
-            if x.lower() != ('cover', 'titlepage'):
+            if x.lower() not in ('cover', 'titlepage'):
                 try:
                     if href not in protected_hrefs:
-                        self.oeb.manifest.remove(self.oeb.manifest.hrefs[href])
+                        item = self.oeb.manifest.hrefs[href]
+                        if item not in self.oeb.spine:
+                            self.oeb.manifest.remove(self.oeb.manifest.hrefs[href])
                 except KeyError:
                     pass
                 self.oeb.guide.remove(x)
diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py
index d3505a5fd9..21d71da5bb 100644
--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@@ -44,14 +44,14 @@ class Split(object):
         self.split_on_page_breaks = split_on_page_breaks
         self.page_breaks_xpath = page_breaks_xpath
         self.max_flow_size = max_flow_size
+        self.page_break_selectors = None
         if self.page_breaks_xpath is not None:
-            self.page_breaks_xpath = XPath(self.page_breaks_xpath)
+            self.page_break_selectors = [(XPath(self.page_breaks_xpath), False)]
 
     def __call__(self, oeb, context):
         self.oeb = oeb
         self.log = oeb.log
         self.map = {}
-        self.page_break_selectors = None
         for item in list(self.oeb.manifest.items):
             if item.spine_position is not None and etree.iselement(item.data):
                 self.split_item(item)
@@ -60,10 +60,7 @@ class Split(object):
 
     def split_item(self, item):
         if self.split_on_page_breaks:
-            if self.page_breaks_xpath is None:
-                page_breaks, page_break_ids = self.find_page_breaks(item)
-            else:
-                page_breaks, page_break_ids = self.page_breaks_xpath(item.data)
+            page_breaks, page_break_ids = self.find_page_breaks(item)
 
         splitter = FlowSplitter(item, page_breaks, page_break_ids,
                 self.max_flow_size, self.oeb)