diff --git a/resources/recipes/ajc.recipe b/resources/recipes/ajc.recipe
index ccd0efebdd..ea989b4b4c 100644
--- a/resources/recipes/ajc.recipe
+++ b/resources/recipes/ajc.recipe
@@ -1,6 +1,6 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__author__    = 'Tony Stegall' 
+__author__    = 'Tony Stegall'
 __copyright__ = '2010, Tony Stegall or Tonythebookworm on mobiread.com'
 __version__   = '1.03'
 __date__      = '27, September 2010'
@@ -9,6 +9,8 @@ __docformat__ = 'restructuredtext en'
 
 
 import datetime
+from calibre.web.feeds.news import BasicNewsRecipe
+
 class AdvancedUserRecipe1282101454(BasicNewsRecipe):
     now = datetime.datetime.now()
     title = 'The AJC'
@@ -20,39 +22,39 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
     oldest_article = 1
     max_articles_per_feed = 100
     no_stylesheets = True
-    
+
     masthead_url = 'http://gawand.org/wp-content/uploads/2010/06/ajc-logo.gif'
     extra_css = '''
                     h1.articleHeadline{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                     h2.articleSubheadline{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
-                    
+
                     p.byline{text-align:right; font-size:small;margin-top:0px;margin-bottom: 0px;}
                     p.organization{text-align:right; font-size:small;margin-top:0px;margin-bottom: 0px;}
-                    
-                    
+
+
                     p{font-family:Helvetica,Arial,sans-serif;font-size:small;}
 		        '''
-    
-    
+
+
     keep_only_tags    = [
                         dict(name='div', attrs={'class':['cxArticleHeader']})
                        ,dict(attrs={'id':['cxArticleText']})
                         ]
-    
-                  
+
+
     remove_tags = [
                      dict(name='div'  , attrs={'class':'cxArticleList'       })
                     ,dict(name='div'  , attrs={'class':'cxFeedTease' })
                     ,dict(name='div'  , attrs={'class':'cxElementEnlarge'  })
                     ,dict(name='div'  , attrs={'id':'cxArticleTools'  })
                   ]
-              
-                  
-                  
+
+
+
     feeds          = [
                       ('Breaking News', 'http://www.ajc.com/genericList-rss.do?source=61499'),
                       # -------------------------------------------------------------------
-                      # Here are the different area feeds. Choose which ever one you wish to 
+                      # Here are the different area feeds. Choose which ever one you wish to
                       # read by simply removing the pound sign from it.  I currently have it
                       # set to only get the Cobb area
                       # --------------------------------------------------------------------
@@ -70,7 +72,7 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
                        ('Opinions', 'http://www.ajc.com/section-rss.do?source=opinion'),
                        ('Ga Politics', 'http://www.ajc.com/section-rss.do?source=georgia-politics-elections'),
                       # ------------------------------------------------------------------------
-                      # Here are the different sports feeds. I only follow the Falcons, and Highschool 
+                      # Here are the different sports feeds. I only follow the Falcons, and Highschool
                       # but again
                       # You can enable which ever team you like by removing the pound sign
                       # ------------------------------------------------------------------------
@@ -85,25 +87,25 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
                        ('Music', 'http://www.accessatlanta.com/section-rss.do?source=music'),
                     ]
 
-   
+
 
     def postprocess_html(self, soup, first):
       for credit_tag in soup.findAll('span', attrs={'class':['imageCredit rightFloat']}):
        credit_tag.extract()
-      
+
       return soup
-       
+
    #def print_version(self, url):
    #     return url.partition('?')[0] +'?printArticle=y'
-   
-      
-       
-    
 
 
 
 
 
-    
- 
+
+
+
+
+
+
 
diff --git a/resources/recipes/boortz.recipe b/resources/recipes/boortz.recipe
index dfb624c4bc..b281798ac8 100644
--- a/resources/recipes/boortz.recipe
+++ b/resources/recipes/boortz.recipe
@@ -1,6 +1,6 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__author__    = 'Tony Stegall' 
+__author__    = 'Tony Stegall'
 __copyright__ = '2010, Tony Stegall or Tonythebookworm on mobiread.com'
 __version__   = '1.04'
 __date__      = '27, September 2010'
@@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
 
 
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup, re
+
 class AdvancedUserRecipe1282101454(BasicNewsRecipe):
     title = 'Nealz Nuze'
     language = 'en'
@@ -18,7 +18,7 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
     category = 'news, politics, USA, talkshow'
     oldest_article = 1
     max_articles_per_feed = 100
-    
+
     no_stylesheets = True
     remove_javascript   = True
     use_embedded_content = True
@@ -26,5 +26,5 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
     conversion_options = {'linearize_tables' : True}
     feeds          = [
                       ('NUZE', 'http://boortz.com/nealz_nuze_rss/rss.xml')
-                      
+
                     ]
diff --git a/resources/recipes/popscience.recipe b/resources/recipes/popscience.recipe
index 1527a1bb71..5f66d048a6 100644
--- a/resources/recipes/popscience.recipe
+++ b/resources/recipes/popscience.recipe
@@ -1,5 +1,5 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import re
+import re
 
 class AdvancedUserRecipe1282101454(BasicNewsRecipe):
     title = 'Popular Science'
diff --git a/resources/recipes/telegraph_uk.recipe b/resources/recipes/telegraph_uk.recipe
index 2c261987b2..f79f0fa50c 100644
--- a/resources/recipes/telegraph_uk.recipe
+++ b/resources/recipes/telegraph_uk.recipe
@@ -1,6 +1,5 @@
-#!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 telegraph.co.uk
 '''
@@ -8,14 +7,16 @@ telegraph.co.uk
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class TelegraphUK(BasicNewsRecipe):
-    title                 = u'Telegraph.co.uk'
+    title                 = 'Telegraph.co.uk'
     __author__            = 'Darko Miletic and Sujata Raman'
     description           = 'News from United Kingdom'
-    oldest_article        = 7
+    oldest_article        = 2
+    category              = 'news, politics, UK'
+    publisher             = 'Telegraph Media Group ltd.'    
     max_articles_per_feed = 100
     no_stylesheets        = True
-    language = 'en'
-
+    language              = 'en_GB'
+    remove_empty_feeds    = True
     use_embedded_content  = False
 
     extra_css           = '''
@@ -27,13 +28,20 @@ class TelegraphUK(BasicNewsRecipe):
                         .imageExtras{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
                         '''
 
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+                        
+                        
     keep_only_tags      = [
-                           dict(name='div', attrs={'class':'storyHead'})
-                          ,dict(name='div', attrs={'class':'story'    })
-                          #,dict(name='div', attrs={'class':['slideshowHD gutterUnder',"twoThirds gutter","caption" ]   })
+                           dict(name='div', attrs={'class':['storyHead','byline']})
+                          ,dict(name='div', attrs={'id':'mainBodyArea'           })
                           ]
-    remove_tags         = [dict(name='div', attrs={'class':['related_links_inline',"imgindex","next","prev","gutterUnder",'ssImgHide','imageExtras','ssImg hide']})
-                          #,dict(name='div', attrs={'class':['toolshideoneQuarter']})
+    remove_tags         = [dict(name='div', attrs={'class':['related_links_inline',"imgindex","next","prev","gutterUnder",'ssImgHide','imageExtras','ssImg hide','related_links_video']})
+                          ,dict(name='ul' , attrs={'class':['shareThis shareBottom']})
                           ,dict(name='span', attrs={'class':['num','placeComment']})
                           ]
 
@@ -51,24 +59,7 @@ class TelegraphUK(BasicNewsRecipe):
                          ]
 
     def get_article_url(self, article):
-
-        url = article.get('guid', None)
-
+        url = article.get('link', None)
         if 'picture-galleries' in url or 'pictures' in url or 'picturegalleries' in url :
             url = None
-
         return url
-
-
-    def postprocess_html(self,soup,first):
-
-        for bylineTag in soup.findAll(name='div', attrs={'class':'byline'}):
-            for pTag in bylineTag.findAll(name='p'):
-                if getattr(pTag.contents[0],"Comments",True):
-                    pTag.extract()
-        return soup
-
-
-
-
-
diff --git a/resources/recipes/twtfb.recipe b/resources/recipes/twtfb.recipe
new file mode 100644
index 0000000000..bb2bfe2348
--- /dev/null
+++ b/resources/recipes/twtfb.recipe
@@ -0,0 +1,40 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.thewaythefutureblogs.com
+Frederik Pohl's Blog
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class TheWayTheFutureBlogs(BasicNewsRecipe):
+    title                 = 'The Way the Future Blogs'
+    __author__            = 'Darko Miletic'
+    description           = "Frederik Pohl's blog"
+    publisher             = 'Frederik Pohl'
+    category              = 'news, SF, books'
+    oldest_article        = 30
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'utf8'
+    use_embedded_content  = False
+    language              = 'en'
+    remove_empty_feeds    = True
+    extra_css             = ' body{font-family: Georgia,serif } '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+    remove_tags   =[dict(name=['meta','object','embed','iframe','base','link'])]
+    keep_only_tags=[dict(attrs={'class':['post','commentlist']})]
+    remove_attributes=['width','height','lang','border']
+
+    feeds = [(u'Posts', u'http://www.thewaythefutureblogs.com/feed/')]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index cf6995d3bb..d4d4ee5d4e 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -446,7 +446,7 @@ from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
                 BOOQ, ELONEX, POCKETBOOK301, MENTOR
 from calibre.devices.iliad.driver import ILIAD
 from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
-from calibre.devices.jetbook.driver import JETBOOK, MIBUK
+from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
 from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
 from calibre.devices.nook.driver import NOOK
 from calibre.devices.prs505.driver import PRS505
@@ -468,14 +468,14 @@ from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
     LibraryThing
 from calibre.ebooks.metadata.douban import DoubanBooks
 from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
-        LibraryThingCovers
+        LibraryThingCovers, DoubanCovers
 from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
 from calibre.ebooks.epub.fix.unmanifested import Unmanifested
 from calibre.ebooks.epub.fix.epubcheck import Epubcheck
 
 plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
         LibraryThing, DoubanBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
-        Epubcheck, OpenLibraryCovers, LibraryThingCovers]
+        Epubcheck, OpenLibraryCovers, LibraryThingCovers, DoubanCovers]
 plugins += [
     ComicInput,
     EPUBInput,
@@ -520,6 +520,7 @@ plugins += [
     IREXDR1000,
     IREXDR800,
     JETBOOK,
+    JETBOOK_MINI,
     MIBUK,
     SHINEBOOK,
     POCKETBOOK360,
diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py
index 2b5eb5011e..0310f09242 100644
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@@ -1,3 +1,4 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import with_statement
 __license__ = 'GPL 3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
@@ -251,6 +252,9 @@ class OutputProfile(Plugin):
     #: The character used to represent a star in ratings
     ratings_char = u'*'
 
+    #: Unsupported unicode characters to be replaced during preprocessing
+    unsupported_unicode_chars = []
+
     @classmethod
     def tags_to_string(cls, tags):
         return escape(', '.join(tags))
@@ -422,6 +426,8 @@ class SonyReaderOutput(OutputProfile):
     dpi                       = 168.451
     fbase                     = 12
     fsizes                    = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
+    unsupported_unicode_chars = [u'\u201f', u'\u201b']
+
 
 class KoboReaderOutput(OutputProfile):
 
diff --git a/src/calibre/customize/ui.py b/src/calibre/customize/ui.py
index 231cc0e225..844269e453 100644
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@@ -120,7 +120,7 @@ def enable_plugin(plugin_or_name):
     config['enabled_plugins'] = ep
 
 default_disabled_plugins = set([
-    'Douban Books',
+    'Douban Books', 'Douban.com covers',
 ])
 
 def is_disabled(plugin):
diff --git a/src/calibre/devices/__init__.py b/src/calibre/devices/__init__.py
index 956d18e903..1918a36cc8 100644
--- a/src/calibre/devices/__init__.py
+++ b/src/calibre/devices/__init__.py
@@ -56,6 +56,7 @@ def get_connected_device():
     return dev
 
 def debug(ioreg_to_tmp=False, buf=None):
+    import textwrap
     from calibre.customize.ui import device_plugins
     from calibre.devices.scanner import DeviceScanner, win_pnp_drives
     from calibre.constants import iswindows, isosx, __version__
@@ -95,13 +96,19 @@ def debug(ioreg_to_tmp=False, buf=None):
             ioreg += 'Output from osx_get_usb_drives:\n'+drives+'\n\n'
             ioreg += Device.run_ioreg()
         connected_devices = []
-        for dev in sorted(device_plugins(), cmp=lambda
-                x,y:cmp(x.__class__.__name__, y.__class__.__name__)):
-            out('Looking for', dev.__class__.__name__)
+        devplugins = list(sorted(device_plugins(), cmp=lambda
+                x,y:cmp(x.__class__.__name__, y.__class__.__name__)))
+        out('Available plugins:', textwrap.fill(' '.join([x.__class__.__name__ for x in
+            devplugins])))
+        out(' ')
+        out('Looking for devices...')
+        for dev in devplugins:
             connected, det = s.is_device_connected(dev, debug=True)
             if connected:
+                out('\t\tDetected possible device', dev.__class__.__name__)
                 connected_devices.append((dev, det))
 
+        out(' ')
         errors = {}
         success = False
         out('Devices possibly connected:', end=' ')
diff --git a/src/calibre/devices/jetbook/driver.py b/src/calibre/devices/jetbook/driver.py
index 6ee1c07464..f108de3347 100644
--- a/src/calibre/devices/jetbook/driver.py
+++ b/src/calibre/devices/jetbook/driver.py
@@ -99,4 +99,30 @@ class MIBUK(USBMS):
     VENDOR_NAME      = 'LINUX'
     WINDOWS_MAIN_MEM = 'WOLDERMIBUK'
 
+class JETBOOK_MINI(USBMS):
+
+    '''
+    ['0x4b8',
+  '0x507',
+  '0x100',
+  'ECTACO',
+  'ECTACO ATA/ATAPI Bridge (Bulk-Only)',
+  'Rev.0.20']
+    '''
+    FORMATS     = ['fb2', 'txt']
+
+    gui_name = 'JetBook Mini'
+    name = 'JetBook Mini Device Interface'
+    description    = _('Communicate with the JetBook Mini reader.')
+    author         = 'Kovid Goyal'
+
+    VENDOR_ID = [0x4b8]
+    PRODUCT_ID = [0x507]
+    BCD = [0x100]
+    VENDOR_NAME      = 'ECTACO'
+    WINDOWS_MAIN_MEM = '' # Matches PROD_
+    MAIN_MEMORY_VOLUME_LABEL  = 'Jetbook Mini'
+
+    SUPPORTS_SUB_DIRS = True
+
 
diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index 92c2fe5954..bb5c26a50c 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -62,49 +62,104 @@ def wrap_lines(match):
     else:
                return ital+' '
 
-def line_length(format, raw, percent):
+class DocAnalysis(object):
     '''
-    raw is the raw text to find the line length to use for wrapping.
-    percentage is a decimal number, 0 - 1 which is used to determine
-    how far in the list of line lengths to use. The list of line lengths is
-    ordered smallest to larged and does not include duplicates. 0.5 is the
-    median value.
+    Provides various text analysis functions to determine how the document is structured.
+    format is the type of document analysis will be done against.
+    raw is the raw text to determine the line length to use for wrapping.
+    Blank lines are excluded from analysis
     '''
-    raw = raw.replace('&nbsp;', ' ')
-    if format == 'html':
-        linere = re.compile('(?<=<p).*?(?=</p>)', re.DOTALL)
-    elif format == 'pdf':
-        linere = re.compile('(?<=<br>).*?(?=<br>)', re.DOTALL)
-    elif format == 'spanned_html':
-        linere = re.compile('(?<=<span).*?(?=</span>)', re.DOTALL)
-    lines = linere.findall(raw)
 
-    lengths = []
-    for line in lines:
-        if len(line) > 0:
-            lengths.append(len(line))
+    def __init__(self, format='html', raw=''):
+        raw = raw.replace('&nbsp;', ' ')
+        if format == 'html':
+            linere = re.compile('(?<=<p)(?![^>]*>\s*</p>).*?(?=</p>)', re.DOTALL)
+        elif format == 'pdf':
+            linere = re.compile('(?<=<br>)(?!\s*<br>).*?(?=<br>)', re.DOTALL)
+        elif format == 'spanned_html':
+            linere = re.compile('(?<=<span).*?(?=</span>)', re.DOTALL)
+        self.lines = linere.findall(raw)
 
-    if not lengths:
-        return 0
+    def line_length(self, percent):
+        '''
+        Analyses the document to find the median line length.
+        percentage is a decimal number, 0 - 1 which is used to determine
+        how far in the list of line lengths to use. The list of line lengths is
+        ordered smallest to larged and does not include duplicates. 0.5 is the
+        median value.
+        '''
+        lengths = []
+        for line in self.lines:
+            if len(line) > 0:
+                lengths.append(len(line))
 
-    lengths = list(set(lengths))
-    total = sum(lengths)
-    avg = total / len(lengths)
-    max_line = avg * 2
+        if not lengths:
+            return 0
 
-    lengths = sorted(lengths)
-    for i in range(len(lengths) - 1, -1, -1):
-        if lengths[i] > max_line:
-            del lengths[i]
+        lengths = list(set(lengths))
+        total = sum(lengths)
+        avg = total / len(lengths)
+        max_line = avg * 2
 
-    if percent > 1:
-        percent = 1
-    if percent < 0:
-        percent = 0
+        lengths = sorted(lengths)
+        for i in range(len(lengths) - 1, -1, -1):
+            if lengths[i] > max_line:
+                del lengths[i]
 
-    index = int(len(lengths) * percent) - 1
+        if percent > 1:
+            percent = 1
+        if percent < 0:
+            percent = 0
 
-    return lengths[index]
+        index = int(len(lengths) * percent) - 1
+
+        return lengths[index]
+
+    def line_histogram(self, percent):
+        '''
+        Creates a broad histogram of the document to determine whether it incorporates hard
+        line breaks.  Lines are sorted into 20 'buckets' based on length.
+        percent is the percentage of lines that should be in a single bucket to return true
+        The majority of the lines will exist in 1-2 buckets in typical docs with hard line breaks
+        '''
+        minLineLength=20 # Ignore lines under 20 chars (typical of spaces)
+        maxLineLength=1900 # Discard larger than this to stay in range
+        buckets=20 # Each line is divided into a bucket based on length
+
+        #print "there are "+str(len(lines))+" lines"
+        #max = 0
+        #for line in self.lines:
+        #    l = len(line)
+        #    if l > max:
+        #        max = l
+        #print "max line found is "+str(max)
+        # Build the line length histogram
+        hRaw = [ 0 for i in range(0,buckets) ]
+        for line in self.lines:
+            l = len(line)
+            if l > minLineLength and l < maxLineLength:
+                    l = int(l/100)
+                    #print "adding "+str(l)
+                    hRaw[l]+=1
+
+        # Normalize the histogram into percents
+        totalLines = len(self.lines)
+        h = [ float(count)/totalLines for count in hRaw ]
+        #print "\nhRaw histogram lengths are: "+str(hRaw)
+        #print "              percents are: "+str(h)+"\n"
+
+        # Find the biggest bucket
+        maxValue = 0
+        for i in range(0,len(h)):
+            if h[i] > maxValue:
+                maxValue = h[i]
+
+        if maxValue < percent:
+            #print "Line lengths are too variable. Not unwrapping."
+            return False
+        else:
+            #print str(maxValue)+" of the lines were in one bucket"
+            return True
 
 class Dehyphenator(object):
     '''
@@ -117,42 +172,62 @@ class Dehyphenator(object):
     def __init__(self):
         # Add common suffixes to the regex below to increase the likelihood of a match -
         # don't add suffixes which are also complete words, such as 'able' or 'sex'
-        self.removesuffixes = re.compile(r"((ed)?ly|('e)?s|a?(t|s)ion(s|al(ly)?)?|ings?|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic|(e|a)nce|ment(s)?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex)$", re.IGNORECASE)
+        self.removesuffixes = re.compile(r"((ed)?ly|('e)?s|a?(t|s)?ion(s|al(ly)?)?|ings?|er|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic(ally)?|(e|a)nce|ment(s)?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex)$", re.IGNORECASE)
         # remove prefixes if the prefix was not already the point of hyphenation
-        self.prefixes = re.compile(r'^(un|in|ex)$', re.IGNORECASE)
-        self.removeprefix = re.compile(r'^(un|in|ex)', re.IGNORECASE)
+        self.prefixes = re.compile(r'^(dis|re|un|in|ex)$', re.IGNORECASE)
+        self.removeprefix = re.compile(r'^(dis|re|un|in|ex)', re.IGNORECASE)
 
     def dehyphenate(self, match):
         firsthalf = match.group('firstpart')
         secondhalf = match.group('secondpart')
+        try:
+            wraptags = match.group('wraptags')
+        except:
+            wraptags = ''
         hyphenated = str(firsthalf) + "-" + str(secondhalf)
         dehyphenated = str(firsthalf) + str(secondhalf)
         lookupword = self.removesuffixes.sub('', dehyphenated)
         if self.prefixes.match(firsthalf) is None:
            lookupword = self.removeprefix.sub('', lookupword)
-        booklookup = re.compile(u'%s' % lookupword, re.IGNORECASE)
         #print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated)
-        match = booklookup.search(self.html)
-        if match:
-            #print "returned dehyphenated word: " + str(dehyphenated)
-            return dehyphenated
-        else:
-            #print "returned hyphenated word: " + str(hyphenated)
+        try:
+            searchresult = self.html.find(str.lower(lookupword))
+        except:
             return hyphenated
+        if self.format == 'html_cleanup':
+            if self.html.find(lookupword) != -1 or searchresult != -1:
+                #print "Cleanup:returned dehyphenated word: " + str(dehyphenated)
+                return dehyphenated
+            elif self.html.find(hyphenated) != -1:
+                #print "Cleanup:returned hyphenated word: " + str(hyphenated)
+                return hyphenated
+            else:
+                #print "Cleanup:returning original text "+str(firsthalf)+" + linefeed "+str(secondhalf)
+                return firsthalf+u'\u2014'+wraptags+secondhalf
+
+        else:
+            if self.html.find(lookupword) != -1 or searchresult != -1:
+                #print "returned dehyphenated word: " + str(dehyphenated)
+                return dehyphenated
+            else:
+                #print "           returned hyphenated word: " + str(hyphenated)
+                return hyphenated
 
     def __call__(self, html, format, length=1):
         self.html = html
+        self.format = format
         if format == 'html':
-            intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^“"\s>]+)-\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)' % length)
+            intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)-\s*(?=<)(?P<wraptags></span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)' % length)
         elif format == 'pdf':
-            intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^“"\s>]+)-\s*(<p>|</[iub]>\s*<p>\s*<[iub]>)\s*(?P<secondpart>[\w\d]+)'% length)
+            intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)-\s*(?P<wraptags><p>|</[iub]>\s*<p>\s*<[iub]>)\s*(?P<secondpart>[\w\d]+)'% length)
         elif format == 'individual_words':
-            intextmatch = re.compile('>[^<]*\b(?P<firstpart>[^"\s>]+)-(?P<secondpart)\w+)\b[^<]*<') # for later, not called anywhere yet
+            intextmatch = re.compile(u'>[^<]*\b(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)"\s>]+)-(?P<secondpart)\w+)\b[^<]*<') # for later, not called anywhere yet
+        elif format == 'html_cleanup':
+            intextmatch = re.compile(u'(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)-\s*(?=<)(?P<wraptags></span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)')
 
         html = intextmatch.sub(self.dehyphenate, html)
         return html
 
-
 class CSSPreProcessor(object):
 
     PAGE_PAT   = re.compile(r'@page[^{]*?{[^}]*?}')
@@ -286,7 +361,7 @@ class HTMLPreProcessor(object):
                   (re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>'),
 
                   # Detect Chapters to match default XPATH in GUI
-                  (re.compile(r'<br>\s*(?P<chap>(<[ibu]>){0,2}\s*.?(Introduction|Chapter|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+\s*){0,3}\s*(</[ibu]>){0,2})\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*<br>)?', re.IGNORECASE), chap_head),
+                  (re.compile(r'<br>\s*(?P<chap>(<[ibu]>){0,2}\s*.?(Introduction|Chapter|Kapitel|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+\s*){0,3}\s*(</[ibu]>){0,2})\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*<br>)?', re.IGNORECASE), chap_head),
                   # Cover the case where every letter in a chapter title is separated by a space
                   (re.compile(r'<br>\s*(?P<chap>([A-Z]\s+){4,}\s*([\d\w-]+\s*){0,3}\s*)\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*(<br>))?'), chap_head),
 
@@ -374,10 +449,8 @@ class HTMLPreProcessor(object):
                 print 'Failed to parse remove_footer regexp'
                 traceback.print_exc()
 
-        # unwrap em/en dashes, delete soft hyphens - moved here so it's executed after header/footer removal
+        # delete soft hyphens - moved here so it's executed after header/footer removal
         if is_pdftohtml:
-            # unwrap em/en dashes
-            end_rules.append((re.compile(u'(?<=[–—])\s*<p>\s*(?=[[a-z\d])'), lambda match: ''))
             # unwrap/delete soft hyphens
             end_rules.append((re.compile(u'[­](\s*<p>)+\s*(?=[[a-z\d])'), lambda match: ''))
             # unwrap/delete soft hyphens with formatting
@@ -391,12 +464,15 @@ class HTMLPreProcessor(object):
 
         length = -1
         if getattr(self.extra_opts, 'unwrap_factor', 0.0) > 0.01:
-            length = line_length('pdf', html, getattr(self.extra_opts, 'unwrap_factor'))
+            docanalysis = DocAnalysis('pdf', html)
+            length = docanalysis.line_length(getattr(self.extra_opts, 'unwrap_factor'))
             if length:
-                # print "The pdf line length returned is " + str(length)
+                #print "The pdf line length returned is " + str(length)
+                # unwrap em/en dashes
+                end_rules.append((re.compile(u'(?<=.{%i}[–—])\s*<p>\s*(?=[[a-z\d])' % length), lambda match: ''))
                 end_rules.append(
                     # Un wrap using punctuation
-                    (re.compile(r'(?<=.{%i}([a-z,:)\IA]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
+                    (re.compile(u'(?<=.{%i}([a-z,:)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
                 )
 
         for rule in self.PREPROCESS + start_rules:
@@ -454,6 +530,14 @@ class HTMLPreProcessor(object):
         if getattr(self.extra_opts, 'smarten_punctuation', False):
             html = self.smarten_punctuation(html)
 
+        unsupported_unicode_chars = self.extra_opts.output_profile.unsupported_unicode_chars
+        if unsupported_unicode_chars:
+            from calibre.ebooks.unidecode.unidecoder import Unidecoder
+            unidecoder = Unidecoder()
+            for char in unsupported_unicode_chars:
+                asciichar = unidecoder.decode(char)
+                html = html.replace(char, asciichar)
+
         return html
 
     def smarten_punctuation(self, html):
diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index 6a5eaa4a34..5f5c12a703 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -6,7 +6,7 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
 import re
-from calibre.ebooks.conversion.preprocess import line_length, Dehyphenator
+from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
 from calibre.utils.logging import default_log
 
 class PreProcessor(object):
@@ -77,13 +77,18 @@ class PreProcessor(object):
 
     def __call__(self, html):
         self.log("*********  Preprocessing HTML  *********")
+
+        # Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly
+        html = re.sub(r"\s*</p>", "</p>\n", html)
+        html = re.sub(r"\s*<p>\s*", "\n<p>", html)
+
         ###### Check Markup ######
         #
         # some lit files don't have any <p> tags or equivalent (generally just plain text between
         # <pre> tags), check and  mark up line endings if required before proceeding
         if self.no_markup(html, 0.1):
              self.log("not enough paragraph markers, adding now")
-             # check if content is in pre tags, use txt procesor to mark up if so
+             # check if content is in pre tags, use txt processor to mark up if so
              pre = re.compile(r'<pre>', re.IGNORECASE)
              if len(pre.findall(html)) == 1:
                  self.log("Running Text Processing")
@@ -113,47 +118,77 @@ class PreProcessor(object):
         # Get rid of empty <o:p> tags to simplify other processing
         html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html)
         # Get rid of empty span, bold, & italics tags
-        html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
+        html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html)
         html = re.sub(r"\s*<[ibu][^>]*>\s*(<[ibu][^>]*>\s*</[ibu]>\s*){0,2}\s*</[ibu]>", " ", html)
         html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
 
-        # If more than 40% of the lines are empty paragraphs then delete them to clean up spacing
+        # If more than 40% of the lines are empty paragraphs and the user has enabled remove
+        # paragraph spacing then delete blank lines to clean up spacing
         linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL)
         blankreg = re.compile(r'\s*(?P<openline><p[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
         #multi_blank = re.compile(r'(\s*<p[^>]*>\s*(<(b|i|u)>)?\s*(</(b|i|u)>)?\s*</p>){2,}', re.IGNORECASE)
         blanklines = blankreg.findall(html)
         lines = linereg.findall(html)
+        blanks_between_paragraphs = False
         if len(lines) > 1:
             self.log("There are " + str(len(blanklines)) + " blank lines. " + str(float(len(blanklines)) / float(len(lines))) + " percent blank")
             if float(len(blanklines)) / float(len(lines)) > 0.40 and getattr(self.extra_opts,
             'remove_paragraph_spacing', False):
                 self.log("deleting blank lines")
                 html = blankreg.sub('', html)
-        # Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly
-        html = re.sub(r"\s*</p>", "</p>\n", html)
-        html = re.sub(r"\s*<p>\s*", "\n<p>", html)
+            elif float(len(blanklines)) / float(len(lines)) > 0.40:
+               blanks_between_paragraphs = True
+               #print "blanks between paragraphs is marked True"
+            else:
+                blanks_between_paragraphs = False
+        #self.log("\n\n\n\n\n\n\n\n\n\n\n"+html+"\n\n\n\n\n\n\n\n\n\n\n\n\n")
         # detect chapters/sections to match xpath or splitting logic
+        #
+        # Build the Regular Expressions in pieces
+        lookahead = "(?=<(p|div))"
+        chapter_line_open = "<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*"
+        chapter_header_open = r"(?P<chap>"
+        chapter_header_close = ")\s*"
+        chapter_line_close = "(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)\s[^>]*>)?\s*</(?P=outer)>\s*"
+        if blanks_between_paragraphs:
+            blank_lines = "(\s*<p[^>]*>\s*</p>){0,2}\s*"
+        else:
+            blank_lines = ""
+        opt_title_open = "("
+        title_line_open = "<(?P<outer2>p|div)[^>]*>\s*(<(?P<inner4>font|span|[ibu])[^>]*>)?\s*(<(?P<inner5>font|span|[ibu])[^>]*>)?\s*(<(?P<inner6>font|span|[ibu])[^>]*>)?\s*"
+        title_header_open = "(?P<title>"
+        title_header_close = ")\s*"
+        title_line_close = "(</(?P=inner6)>)?\s*(</(?P=inner5)>)?\s*(</(?P=inner4)\s[^>]*>)?\s*</(?P=outer2)>"
+        opt_title_close = ")?"
+
+        default_title = r"(\s*[\w\'\"-]+){1,5}(?!<)"
+        typical_chapters = r".?(Introduction|Synopsis|Acknowledgements|Chapter|Kapitel|Epilogue|Volume\s|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,4}"
+        numeric_chapters = r".?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*"
+        uppercase_chapters = r"\s*.?([A-Z#]+(\s|-){0,3}){1,5}\s*"
+
+        chapter_marker = lookahead+chapter_line_open+chapter_header_open+typical_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
+        #print chapter_marker
         heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE)
         self.html_preprocess_sections = len(heading.findall(html))
         self.log("found " + str(self.html_preprocess_sections) + " pre-existing headings")
         #
         # Start with most typical chapter headings, get more aggressive until one works
         if self.html_preprocess_sections < 10:
-            chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?(Introduction|Synopsis|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(</[ibu]>){0,2})\s*(</span>)?s*(</[ibu]>){0,2}\s*(</span>)?\s*(</(p|/?br)>)\s*\s*(\s*<p[^>]*>\s*</p>){0,2}\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.IGNORECASE|re.VERBOSE)
+            chapdetect = re.compile(r'%s' % chapter_marker, re.IGNORECASE)
             html = chapdetect.sub(self.chapter_head, html)
         if self.html_preprocess_sections < 10:
             self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying numeric chapters")
-            chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE)
+            chapter_marker = lookahead+chapter_line_open+chapter_header_open+numeric_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
+            chapdetect2 = re.compile(r'%s' % chapter_marker, re.IGNORECASE)
             html = chapdetect2.sub(self.chapter_head, html)
 
         if self.html_preprocess_sections < 10:
             self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying with uppercase words")
-            chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?([A-Z#\-\s]+)\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE)
+            chapter_marker = lookahead+chapter_line_open+chapter_header_open+uppercase_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
+            chapdetect2 = re.compile(r'%s' % chapter_marker,  re.UNICODE)
             html = chapdetect2.sub(self.chapter_head, html)
-
         ###### Unwrap lines ######
         #
-        self.log("Unwrapping Lines")
         # Some OCR sourced files have line breaks in the html using a combination of span & p tags
         # span are used for hard line breaks, p for new paragraphs.  Determine which is used so
         # that lines can be un-wrapped across page boundaries
@@ -168,25 +203,40 @@ class PreProcessor(object):
                 format = 'html'
         else:
             format = 'html'
-
+        # Check Line histogram to determine if the document uses hard line breaks, If 50% or
+        # more of the lines break in the same region of the document then unwrapping is required
+        docanalysis = DocAnalysis(format, html)
+        hardbreaks = docanalysis.line_histogram(.50)
+        self.log("Hard line breaks check returned "+str(hardbreaks))
         # Calculate Length
-        length = line_length(format, html, getattr(self.extra_opts,
-            'html_unwrap_factor', 0.4))
+        unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4)
+        length = docanalysis.line_length(unwrap_factor)
         self.log("*** Median line length is " + str(length) + ", calculated with " + format + " format ***")
-        max_length = length * 1.4
-        min_max = str("(?<=.{"+str(length)+"})(?<!.{"+str(max_length)+"})")
-        #
-        # Unwrap em/en dashes, delete soft-hyphens
-        #self.log("\n\n\n\n\n\n\n\n\n\n\n"+html+"\n\n\n\n\n\n\n\n\n\n\n\n\n")
-        html = re.sub(u'\xad\s*(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*', '', html)
-        html = re.sub(u'%s(?<=[\u2013\u2014])\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])' % min_max, '', html)
-        # Dehyphenate
-        dehyphenator = Dehyphenator()
-        html = dehyphenator(html,'html', length)
+        # only go through unwrapping code if the histogram shows unwrapping is required or if the user decreased the default unwrap_factor
+        if hardbreaks or unwrap_factor < 0.4:
+            self.log("Unwrapping required, unwrapping Lines")
+            # Unwrap em/en dashes
+            html = re.sub(u'(?<=.{%i}[\u2013\u2014])\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])' % length, '', html)
+            # Dehyphenate
+            self.log("Unwrapping/Removing hyphens")
+            dehyphenator = Dehyphenator()
+            html = dehyphenator(html,'html', length)
+            self.log("Done dehyphenating")
+            # Unwrap lines using punctation and line length
+            unwrap = re.compile(u"(?<=.{%i}([a-z,:)\IA\u00DF]|(?<!\&\w{4});))\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE)
+            html = unwrap.sub(' ', html)
+            #check any remaining hyphens, but only unwrap if there is a match
+            dehyphenator = Dehyphenator()
+            html = dehyphenator(html,'html_cleanup', length)
+        else:
+            # dehyphenate in cleanup mode to fix anything previous conversions/editing missed
+            self.log("Cleaning up hyphenation")
+            dehyphenator = Dehyphenator()
+            html = dehyphenator(html,'html_cleanup', length)
+            self.log("Done dehyphenating")
 
-        # Unwrap lines using punctation and line length
-        unwrap = re.compile(r"(?<=.{%i}([a-z,;):\IA]|(?<!\&\w{4});))\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE)
-        html = unwrap.sub(' ', html)
+        # delete soft hyphens
+        html = re.sub(u'\xad\s*(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*', '', html)
 
         # If still no sections after unwrapping mark split points on lines with no punctuation
         if self.html_preprocess_sections < 10:
diff --git a/src/calibre/ebooks/metadata/covers.py b/src/calibre/ebooks/metadata/covers.py
index b05444c1c6..2f6fb46540 100644
--- a/src/calibre/ebooks/metadata/covers.py
+++ b/src/calibre/ebooks/metadata/covers.py
@@ -9,6 +9,7 @@ import traceback, socket, re, sys
 from functools import partial
 from threading import Thread, Event
 from Queue import Queue, Empty
+from lxml import etree
 
 import mechanize
 
@@ -216,6 +217,68 @@ def download_covers(mi, result_queue, max_covers=50, timeout=5.): # {{{
 
 # }}}
 
+class DoubanCovers(CoverDownload): # {{{
+    'Download covers from Douban.com'
+
+    DOUBAN_ISBN_URL = 'http://api.douban.com/book/subject/isbn/'
+    CALIBRE_DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d'
+    name = 'Douban.com covers'
+    description = _('Download covers from Douban.com')
+    author = 'Li Fanxi'
+
+    def get_cover_url(self, isbn, br, timeout=5.):
+        try:
+            url = self.DOUBAN_ISBN_URL + isbn + "?apikey=" + self.CALIBRE_DOUBAN_API_KEY
+            src = br.open(url, timeout=timeout).read()
+        except Exception, err:
+            if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
+                err = Exception(_('Douban.com API timed out. Try again later.'))
+            raise err
+        else:
+            feed = etree.fromstring(src)
+            NAMESPACES = {
+              'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
+              'atom' : 'http://www.w3.org/2005/Atom',
+              'db': 'http://www.douban.com/xmlns/'
+            }
+            XPath = partial(etree.XPath, namespaces=NAMESPACES)
+            entries = XPath('//atom:entry')(feed)
+            if len(entries) < 1:
+                return None
+            try:
+                cover_url = XPath("descendant::atom:link[@rel='image']/attribute::href")
+                u = cover_url(entries[0])[0].replace('/spic/', '/lpic/');
+                # If URL contains "book-default", the book doesn't have a cover
+                if u.find('book-default') != -1:
+                    return None
+            except:
+                return None
+            return u
+
+    def has_cover(self, mi, ans, timeout=5.):
+        if not mi.isbn:
+            return False
+        br = browser()
+        try:
+            if self.get_cover_url(mi.isbn, br, timeout=timeout) != None:
+                self.debug('cover for', mi.isbn, 'found')
+                ans.set()
+        except Exception, e:
+            self.debug(e)
+
+    def get_covers(self, mi, result_queue, abort, timeout=5.):
+        if not mi.isbn:
+            return
+        br = browser()
+        try:
+            url = self.get_cover_url(mi.isbn, br, timeout=timeout)
+            cover_data = br.open_novisit(url).read()
+            result_queue.put((True, cover_data, 'jpg', self.name))
+        except Exception, e:
+            result_queue.put((False, self.exception_to_string(e),
+                traceback.format_exc(), self.name))
+# }}}
+
 def download_cover(mi, timeout=5.): # {{{
     results = Queue()
     download_covers(mi, results, max_covers=1, timeout=timeout)
diff --git a/src/calibre/ebooks/metadata/meta.py b/src/calibre/ebooks/metadata/meta.py
index 68deca5e10..b02ae2dbff 100644
--- a/src/calibre/ebooks/metadata/meta.py
+++ b/src/calibre/ebooks/metadata/meta.py
@@ -181,7 +181,7 @@ def metadata_from_filename(name, pat=None):
             mi.isbn = si
         except (IndexError, ValueError):
             pass
-    if not mi.title:
+    if mi.is_null('title'):
         mi.title = name
     return mi
 
diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py
index bd9728989b..cc74b3c515 100644
--- a/src/calibre/gui2/actions/edit_metadata.py
+++ b/src/calibre/gui2/actions/edit_metadata.py
@@ -184,7 +184,7 @@ class EditMetadataAction(InterfaceAction):
         self.gui.tags_view.blockSignals(True)
         try:
             changed = MetadataBulkDialog(self.gui, rows,
-                self.gui.library_view.model().db).changed
+                self.gui.library_view.model()).changed
         finally:
             self.gui.tags_view.blockSignals(False)
         if changed:
diff --git a/src/calibre/gui2/dialogs/metadata_bulk.py b/src/calibre/gui2/dialogs/metadata_bulk.py
index 9c83b3aee5..b0ce0a1e6d 100644
--- a/src/calibre/gui2/dialogs/metadata_bulk.py
+++ b/src/calibre/gui2/dialogs/metadata_bulk.py
@@ -142,12 +142,13 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
                             _('Append to field'),
                         ]
 
-    def __init__(self, window, rows, db):
+    def __init__(self, window, rows, model):
         QDialog.__init__(self, window)
         Ui_MetadataBulkDialog.__init__(self)
         self.setupUi(self)
-        self.db = db
-        self.ids = [db.id(r) for r in rows]
+        self.model = model
+        self.db = model.db
+        self.ids = [self.db.id(r) for r in rows]
         self.box_title.setText('<p>' +
                 _('Editing meta information for <b>%d books</b>') %
                 len(rows))
@@ -170,7 +171,7 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
         self.tag_editor_button.clicked.connect(self.tag_editor)
         self.autonumber_series.stateChanged[int].connect(self.auto_number_changed)
 
-        if len(db.custom_field_keys(include_composites=False)) == 0:
+        if len(self.db.custom_field_keys(include_composites=False)) == 0:
             self.central_widget.removeTab(1)
         else:
             self.create_custom_column_editors()
@@ -617,8 +618,15 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
         self.worker = Worker(args, self.db, self.ids,
                 getattr(self, 'custom_column_widgets', []),
                 Dispatcher(bb.accept, parent=bb))
-        self.worker.start()
-        bb.exec_()
+
+        # The metadata backup thread causes database commits
+        # which can slow down bulk editing of large numbers of books
+        self.model.stop_metadata_backup()
+        try:
+            self.worker.start()
+            bb.exec_()
+        finally:
+            self.model.start_metadata_backup()
 
         if self.worker.error is not None:
             return error_dialog(self, _('Failed'),
diff --git a/src/calibre/gui2/dialogs/scheduler.py b/src/calibre/gui2/dialogs/scheduler.py
index fd8184933f..30f4a2d8a2 100644
--- a/src/calibre/gui2/dialogs/scheduler.py
+++ b/src/calibre/gui2/dialogs/scheduler.py
@@ -57,6 +57,10 @@ class SchedulerDialog(QDialog, Ui_Dialog):
 
         self.old_news.setValue(gconf['oldest_news'])
 
+    def keyPressEvent(self, ev):
+        if ev.key() not in (Qt.Key_Enter, Qt.Key_Return):
+            return QDialog.keyPressEvent(self, ev)
+
     def break_cycles(self):
         self.disconnect(self.recipe_model,  SIGNAL('searched(PyQt_PyObject)'),
                 self.search_done)
diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py
index b2a7f08055..9da5420681 100644
--- a/src/calibre/gui2/library/models.py
+++ b/src/calibre/gui2/library/models.py
@@ -159,17 +159,24 @@ class BooksModel(QAbstractTableModel): # {{{
             # do something on the GUI thread. Deadlock.
         self.cover_cache = CoverCache(db, FunctionDispatcher(self.db.cover))
         self.cover_cache.start()
-        if self.metadata_backup is not None:
-            self.metadata_backup.stop()
-            # Would like to to a join here, but the thread might be waiting to
-            # do something on the GUI thread. Deadlock.
-        self.metadata_backup = MetadataBackup(db)
-        self.metadata_backup.start()
+        self.stop_metadata_backup()
+        self.start_metadata_backup()
         def refresh_cover(event, ids):
             if event == 'cover' and self.cover_cache is not None:
                 self.cover_cache.refresh(ids)
         db.add_listener(refresh_cover)
 
+    def start_metadata_backup(self):
+        self.metadata_backup = MetadataBackup(self.db)
+        self.metadata_backup.start()
+
+    def stop_metadata_backup(self):
+        if getattr(self, 'metadata_backup', None) is not None:
+            self.metadata_backup.stop()
+            # Would like to to a join here, but the thread might be waiting to
+            # do something on the GUI thread. Deadlock.
+
+
     def refresh_ids(self, ids, current_row=-1):
         rows = self.db.refresh_ids(ids)
         if rows:
diff --git a/src/calibre/gui2/preferences/misc.py b/src/calibre/gui2/preferences/misc.py
index 865115c2ed..582d110c6c 100644
--- a/src/calibre/gui2/preferences/misc.py
+++ b/src/calibre/gui2/preferences/misc.py
@@ -106,14 +106,13 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
         d.exec_()
 
     def compact(self, *args):
-        from calibre.library.caches import MetadataBackup
         m = self.gui.library_view.model()
-        if m.metadata_backup is not None:
-            m.metadata_backup.stop()
-        d = CheckIntegrity(m.db, self)
-        d.exec_()
-        m.metadata_backup = MetadataBackup(m.db)
-        m.metadata_backup.start()
+        m.stop_metadata_backup()
+        try:
+            d = CheckIntegrity(m.db, self)
+            d.exec_()
+        finally:
+            m.start_metadata_backup()
 
     def open_config_dir(self, *args):
         from calibre.utils.config import config_dir
diff --git a/src/calibre/gui2/tools.py b/src/calibre/gui2/tools.py
index 2f0452a773..c068168247 100644
--- a/src/calibre/gui2/tools.py
+++ b/src/calibre/gui2/tools.py
@@ -217,9 +217,12 @@ def fetch_scheduled_recipe(arg):
     if 'output_profile' in ps:
         recs.append(('output_profile', ps['output_profile'],
             OptionRecommendation.HIGH))
-        if ps['output_profile'] == 'kindle':
-            recs.append(('no_inline_toc', True,
-                OptionRecommendation.HIGH))
+        # Disabled since apparently some people use
+        # K4PC and, surprise, surprise, it doesn't support
+        # indexed MOBIs.
+        #if ps['output_profile'] == 'kindle':
+        #    recs.append(('no_inline_toc', True,
+        #        OptionRecommendation.HIGH))
 
     lf = load_defaults('look_and_feel')
     if lf.get('base_font_size', 0.0) != 0.0:
diff --git a/src/calibre/gui2/wizard/__init__.py b/src/calibre/gui2/wizard/__init__.py
index f3234d48d5..37b7c7bd7c 100644
--- a/src/calibre/gui2/wizard/__init__.py
+++ b/src/calibre/gui2/wizard/__init__.py
@@ -73,6 +73,14 @@ class JetBook(Device):
     manufacturer = 'Ectaco'
     id = 'jetbook'
 
+class JetBookMini(Device):
+
+    output_profile = 'jetbook5'
+    output_format  = 'FB2'
+    name = 'JetBook Mini'
+    manufacturer = 'Ectaco'
+    id = 'jetbookmini'
+
 class KindleDX(Kindle):
 
     output_profile = 'kindle_dx'
@@ -584,12 +592,42 @@ class LibraryPage(QWizardPage, LibraryUI):
         qt_app.load_translations()
         self.emit(SIGNAL('retranslate()'))
         self.init_languages()
+        try:
+            if prefs['language'].lower().startswith('zh'):
+                from calibre.customize.ui import enable_plugin
+                for name in ('Douban Books', 'Douban.com covers'):
+                    enable_plugin(name)
+        except:
+            pass
+
+    def is_library_dir_suitable(self, x):
+        return LibraryDatabase2.exists_at(x) or not os.listdir(x)
+
+    def validatePage(self):
+        newloc = unicode(self.location.text())
+        if not self.is_library_dir_suitable(newloc):
+            self.show_library_dir_error(newloc)
+            return False
+        return True
 
     def change(self):
-        dir = choose_dir(self, 'database location dialog',
+        x = choose_dir(self, 'database location dialog',
                          _('Select location for books'))
-        if dir:
-            self.location.setText(dir)
+        if x:
+            if self.is_library_dir_suitable(x):
+                self.location.setText(x)
+            else:
+                self.show_library_dir_error(x)
+
+    def show_library_dir_error(self, x):
+        if not isinstance(x, unicode):
+            try:
+                x = x.decode(filesystem_encoding)
+            except:
+                x = unicode(repr(x))
+        error_dialog(self, _('Bad location'),
+            _('You must choose an empty folder for '
+                'the calibre library. %s is not empty.')%x, show=True)
 
     def initializePage(self):
         lp = prefs['library_path']