diff --git a/src/calibre/ebooks/epub/split.py b/src/calibre/ebooks/epub/split.py
index 33c6360495..38f51ae3be 100644
--- a/src/calibre/ebooks/epub/split.py
+++ b/src/calibre/ebooks/epub/split.py
@@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
 Split the flows in an epub file to conform to size limitations.
 '''
 
-import os, math, copy, logging, functools
+import os, math, copy, logging, functools, collections
 
 from lxml.etree import XPath as _XPath
 from lxml import etree, html
@@ -234,7 +234,7 @@ class Splitter(LoggingInterface):
         all anchors in the original tree. Internal links are re-directed. The
         original file is deleted and the split files are saved.
         '''
-        self.anchor_map = {None:self.base%0}
+        self.anchor_map = collections.defaultdict(lambda :self.base%0)
         self.files = []
         
         for i, tree in enumerate(self.trees):
diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py
index 147e4a8a79..1433976113 100644
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@@ -252,15 +252,7 @@ def opf_traverse(opf_reader, verbose=0, encoding=None):
 
 class PreProcessor(object):
     PREPROCESS = []
-    # Fix Baen markup
-    BAEN = [ 
-                     (re.compile(r'page-break-before:\s*\w+([\s;\}])', re.IGNORECASE), 
-                      lambda match: match.group(1)),
-                     (re.compile(r'<p>\s*(<a id.*?>\s*</a>)\s*</p>', re.IGNORECASE), 
-                      lambda match: match.group(1)),
-                     (re.compile(r'<\s*a\s+id="p[0-9]+"\s+name="p[0-9]+"\s*>\s*</a>', re.IGNORECASE), 
-                      lambda match: ''),
-                     ]
+                     
     # Fix pdftohtml markup
     PDFTOHTML  = [
                   # Remove <hr> tags
@@ -275,6 +267,9 @@ class PreProcessor(object):
                   # Remove hyphenation
                   (re.compile(r'-\n\r?'), lambda match: ''),
                   
+                  # Remove gray background
+                  (re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>')
+                  
                   ]
     
     # Fix Book Designer markup
@@ -305,7 +300,7 @@ class PreProcessor(object):
                           
     def preprocess(self, html):
         if self.is_baen(html):
-            rules = self.BAEN
+            rules = []
         elif self.is_book_designer(html):
             rules = self.BOOK_DESIGNER
         elif self.is_pdftohtml(html):