Merge

2025-08-11 09:13:57 -04:00 · 2011-01-18 22:47:31 +01:00 · 2011-01-18 22:47:31 +01:00 · 8a4df39897
commit 8a4df39897
parent 4412d11c87 f56b7453b1
90 changed files with 3283 additions and 1834 deletions
--- a/resources/calibre-portable.bat
+++ b/resources/calibre-portable.bat
@ -1,6 +1,4 @@
@echo OFF
-REM			CalibreRun.bat
-REM			~~~~~~~~~~~~~~
 REM Batch File to start a Calibre configuration on Windows
 REM giving explicit control of the location of:
 REM  - Calibe Program Files
@ -24,7 +22,10 @@ REM -------------------------------------
 REM Set up Calibre Config folder
 REM -------------------------------------

-If EXIST CalibreConfig SET CALIBRE_CONFIG_DIRECTORY=%cd%\CalibreConfig
+IF EXIST CalibreConfig (
+	SET CALIBRE_CONFIG_DIRECTORY=%cd%\CalibreConfig
+	ECHO CONFIG=%cd%\CalibreConfig
+)


 REM --------------------------------------------------------------
@ -38,24 +39,53 @@ REM drive letter of the USB stick.
 REM Comment out any of the following that are not to be used
 REM --------------------------------------------------------------

-SET CALIBRE_LIBRARY_DIRECTORY=U:\eBOOKS\CalibreLibrary
-IF EXIST CalibreLibrary SET CALIBRE_LIBRARY_DIRECTORY=%cd%\CalibreLibrary
-IF EXIST CalibreBooks SET CALIBRE_LIBRARY_DIRECTORY=%cd%\CalibreBooks
+IF EXIST U:\eBooks\CalibreLibrary (
+	SET CALIBRE_LIBRARY_DIRECTORY=U:\eBOOKS\CalibreLibrary
+	ECHO LIBRARY=U:\eBOOKS\CalibreLibrary
+)
+IF EXIST CalibreLibrary (
+	SET CALIBRE_LIBRARY_DIRECTORY=%cd%\CalibreLibrary
+	ECHO LIBRARY=%cd%\CalibreLibrary
+)
+IF EXIST CalibreBooks (
+	SET CALIBRE_LIBRARY_DIRECTORY=%cd%\CalibreBooks
+	ECHO LIBRARY=%cd%\CalibreBooks
+)


 REM --------------------------------------------------------------
-REM Specify Location of metadata database  (optional)
+REM Specify Location of metadata database (optional)
 REM
 REM Location where the metadata.db file is located.  If not set
 REM the same location as Books files will be assumed.  This.
 REM options is used to get better performance when the Library is
 REM on a (slow) network drive.  Putting the metadata.db file 
-REM locally gives a big performance improvement.
+REM locally makes gives a big performance improvement.
+REM
+REM NOTE.  If you use this option, then the ability to switch
+REM        libraries within Calibre will be disabled.  Therefore
+REM        you do not want to set it if the metadata.db file
+REM        is at the same location as the book files.
 REM --------------------------------------------------------------

-IF EXIST CalibreBooks SET SET CALIBRE_OVERRIDE_DATABASE_PATH=%cd%\CalibreBooks\metadata.db
-IF EXIST CalibreMetadata SET CALIBRE_OVERRIDE_DATABASE_PATH=%cd%\CalibreMetadata\metadata.db
-
+IF EXIST CalibreBooks (
+	IF NOT "%CALIBRE_LIBRARY_DIRECTORY%" == "%cd%\CalibreBooks" (
+		SET SET CALIBRE_OVERRIDE_DATABASE_PATH=%cd%\CalibreBooks\metadata.db
+		ECHO DATABASE=%cd%\CalibreBooks\metadata.db
+		ECHO '
+		ECHO ***CAUTION*** Library Switching will be disabled 
+		ECHO '
+	)
+)
+IF EXIST CalibreMetadata (
+	IF NOT "%CALIBRE_LIBRARY_DIRECTORY%" == "%cd%\CalibreMetadata" (
+		SET CALIBRE_OVERRIDE_DATABASE_PATH=%cd%\CalibreMetadata\metadata.db
+		ECHO DATABASE=%cd%\CalibreMetadata\metadata.db
+		ECHO '
+		ECHO ***CAUTION*** Library Switching will be disabled 
+		ECHO '
+	)
+)

 REM --------------------------------------------------------------
 REM Specify Location of source (optional)
@ -63,13 +93,20 @@ REM
 REM It is easy to run Calibre from source
 REM Just set the environment variable to where the source is located
 REM When running from source the GUI will have a '*' after the version.
+REM number that is displayed at the bottom of the Calibre main screen.
 REM --------------------------------------------------------------

-IF EXIST Calibre\src SET CALIBRE_DEVELOP_FROM=%cd%\Calibre\src
-
+IF EXIST Calibre\src (
+	SET CALIBRE_DEVELOP_FROM=%cd%\Calibre\src
+	ECHO SOURCE=%cd%\Calibre\src
+)
+IF EXIST D:\Calibre\Calibre\src (
+	SET CALIBRE_DEVELOP_FROM=D:\Calibre\Calibre\src
+	ECHO SOURCE=D:\Calibre\Calibre\src
+)

 REM --------------------------------------------------------------
-REM Specify Location of calibre binaries (optinal)
+REM Specify Location of calibre binaries (optional)
 REM
 REM To avoid needing Calibre to be set in the search path, ensure
 REM that Calibre Program Files is current directory when starting.
@ -78,21 +115,15 @@ REM This folder can be populated by cpying the Calibre2 folder from
 REM an existing isntallation or by isntalling direct to here.
 REM --------------------------------------------------------------

-IF EXIST Calibre2 CD Calibre2
-
-
-REM --------------------------------------------
-REM Display settings that will be used
-REM --------------------------------------------
-
-echo PROGRAMS=%cd%
-echo SOURCE=%CALIBRE_DEVELOP_FROM%
-echo CONFIG=%CALIBRE_CONFIG_DIRECTORY%
-echo LIBRARY=%CALIBRE_LIBRARY_DIRECTORY%
-echo DATABASE=%CALIBRE_OVERRIDE_DATABASE_PATH%
+IF EXIST Calibre2 (
+	Calibre2 CD Calibre2
+	ECHO PROGRAMS=%cd%
+)

+REM ----------------------------------------------------------
 REM  The following gives a chance to check the settings before
 REM  starting Calibre.  It can be commented out if not wanted.
+REM ----------------------------------------------------------

 echo "Press CTRL-C if you do not want to continue"
 pause
@ -111,4 +142,4 @@ REM Use with /WAIT to wait until Calibre completes to run a task on exit
 REM --------------------------------------------------------

 echo "Starting up Calibre"
-START /belownormal Calibre --with-library %CALIBRE_LIBRARY_DIRECTORY%
+START /belownormal Calibre --with-library "%CALIBRE_LIBRARY_DIRECTORY%"
--- a/resources/catalog/section_list_templates.py
+++ b/resources/catalog/section_list_templates.py
@ -0,0 +1,42 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+'''
+    These templates control the content of titles displayed in the various sections
+
+    Available fields:
+    {title}          Title of the book
+    {series}         Series name
+    {series_index}   Number of the book in the series
+    {rating}         Rating
+    {rating_parens}  Rating, in parentheses
+    {pubyear}        Year the book was published
+    {pubyear_parens} Year the book was published, in parentheses
+
+'''
+# Books by Author
+by_authors_normal_title_template = '{title} {pubyear_parens}'
+by_authors_series_title_template = '[{series_index}] {title} {pubyear_parens}'
+
+# Books by Title
+by_titles_normal_title_template = '{title}'
+by_titles_series_title_template = '{title} ({series} [{series_index}])'
+
+# Books by Series
+by_series_title_template = '[{series_index}] {title} {pubyear_parens}'
+
+# Books by Genre
+by_genres_normal_title_template = '{title} {pubyear_parens}'
+by_genres_series_title_template = '{series_index}. {title} {pubyear_parens}'
+
+# Recently Added
+by_recently_added_normal_title_template = '{title}'
+by_recently_added_series_title_template = '{title} ({series} [{series_index}])'
+
+# By Month added
+by_month_added_normal_title_template = '{title} {pubyear_parens}'
+by_month_added_series_title_template = '[{series_index}] {title} {pubyear_parens}'
--- a/resources/recipes/ars_technica.recipe
+++ b/resources/recipes/ars_technica.recipe
@ -1,6 +1,5 @@
-
 __license__   = 'GPL v3'
-__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 arstechnica.com
 '''
@ -9,19 +8,26 @@ import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag

-class ArsTechnica2(BasicNewsRecipe):
+class ArsTechnica(BasicNewsRecipe):
    title                 = u'Ars Technica'
    language              = 'en'
-    __author__            = 'Darko Miletic and Sujata Raman'
+    __author__            = 'Darko Miletic, Sujata Raman, Alexis Rohou'
    description           = 'The art of technology'
    publisher             = 'Ars Technica'
    category              = 'news, IT, technology'
-    oldest_article        = 2
+    oldest_article        = 5
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
-    extra_css             = ' body {font-family: Arial,Helvetica,sans-serif} .title{text-align: left} .byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none} '
+    extra_css             = 	'''
+				body {font-family: Arial,Helvetica,sans-serif}
+				.title{text-align: left}
+				.byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none}
+				.news-item-figure-caption-text{font-size:small; font-style:italic}
+				.news-item-figure-caption-byline{font-size:small; font-style:italic; font-weight:bold}
+				'''
+    ignoreEtcArticles     = True	# Etc feed items can be ignored, as they're not real stories

    conversion_options = {
                             'comments'  : description
@ -31,10 +37,10 @@ class ArsTechnica2(BasicNewsRecipe):
                         }


-    preprocess_regexps = [
-                (re.compile(r'<div class="news-item-figure', re.DOTALL|re.IGNORECASE),lambda match: '<div class="news-item-figure"')
-               ,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
-                         ]
+    #preprocess_regexps = [
+    #            (re.compile(r'<div class="news-item-figure', re.DOTALL|re.IGNORECASE),lambda match: '<div class="news-item-figure"')
+    #           ,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
+    #                     ]

    keep_only_tags = [dict(name='div', attrs={'id':['story','etc-story']})]

@ -42,7 +48,7 @@ class ArsTechnica2(BasicNewsRecipe):
                     dict(name=['object','link','embed'])
                    ,dict(name='div', attrs={'class':'read-more-link'})
                  ]
-    remove_attributes=['width','height']
+    #remove_attributes=['width','height']

    feeds = [
              (u'Infinite Loop (Apple content)'        , u'http://feeds.arstechnica.com/arstechnica/apple/'      )
@ -56,6 +62,7 @@ class ArsTechnica2(BasicNewsRecipe):
             ,(u'Law & Disorder (Tech policy content)' , u'http://feeds.arstechnica.com/arstechnica/tech-policy/')
            ]

+    # This deals with multi-page stories
    def append_page(self, soup, appendtag, position):
        pager = soup.find('div',attrs={'class':'pager'})
        if pager:
@ -81,6 +88,7 @@ class ArsTechnica2(BasicNewsRecipe):


    def preprocess_html(self, soup):
+	# Adds line breaks near the byline (not sure why this is needed)
        ftag = soup.find('div', attrs={'class':'byline'})
        if ftag:
           brtag = Tag(soup,'br')
@ -88,12 +96,33 @@ class ArsTechnica2(BasicNewsRecipe):
           ftag.insert(4,brtag)
           ftag.insert(5,brtag2)

+	# Remove style items
        for item in soup.findAll(style=True):
           del item['style']

+	# Remove id
+	for item in soup.findAll(id=True):
+		del item['id']
+
+	# For some reason, links to authors don't have the domainname
+	a_author = soup.find('a',{'href':re.compile("^/author")})
+	if a_author:
+		a_author['href'] = 'http://arstechnica.com'+a_author['href']
+
+	# within div class news-item-figure, we need to grab images
+
+	# Deal with multi-page stories
        self.append_page(soup, soup.body, 3)

        return soup

    def get_article_url(self, article):
+	# If the article title starts with Etc:, don't return it
+	if self.ignoreEtcArticles:
+		article_title = article.get('title',None)
+		if re.match('Etc: ',article_title) is not None:
+			return None
+
+	# The actual article is in a guid tag
        return article.get('guid',  None).rpartition('?')[0]
+
--- a/resources/recipes/dilbert.recipe
+++ b/resources/recipes/dilbert.recipe
@ -28,7 +28,7 @@ class DilbertBig(BasicNewsRecipe):
                            ,'publisher'       : publisher
                         }

-    feeds = [(u'Dilbert', u'http://feeds.dilbert.com/DilbertDailyStrip' )]
+    feeds = [(u'Dilbert', u'http://feed.dilbert.com/dilbert/daily_strip' )]

    def get_article_url(self, article):
        return article.get('feedburner_origlink', None)
--- a/resources/recipes/economist.recipe
+++ b/resources/recipes/economist.recipe
@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.BeautifulSoup import Tag, NavigableString

-import mechanize, string, urllib, time, re
+import string, time, re

 class Economist(BasicNewsRecipe):

@ -18,19 +18,19 @@ class Economist(BasicNewsRecipe):

    __author__ = "Kovid Goyal"
    INDEX = 'http://www.economist.com/printedition'
-    description = ('Global news and current affairs from a European perspective.'
-            ' Needs a subscription from ')+INDEX
+    description = 'Global news and current affairs from a European perspective.'

    oldest_article = 7.0
    cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
    remove_tags = [dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
            dict(attrs={'class':['dblClkTrk', 'ec-article-info']})]
    keep_only_tags = [dict(id='ec-article-body')]
-    needs_subscription = True
+    needs_subscription = False
    no_stylesheets = True
    preprocess_regexps = [(re.compile('</html>.*', re.DOTALL),
        lambda x:'</html>')]

+    '''
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        br.open('http://www.economist.com')
@ -50,6 +50,7 @@ class Economist(BasicNewsRecipe):
                    }))
        br.open(req).read()
        return br
+    '''

    def parse_index(self):
        try:
--- a/resources/recipes/economist_free.recipe
+++ b/resources/recipes/economist_free.recipe
@ -7,12 +7,12 @@ from lxml import html

 class Economist(BasicNewsRecipe):

-    title = 'The Economist (free)'
+    title = 'The Economist (RSS)'
    language = 'en'

    __author__ = "Kovid Goyal"
    description = ('Global news and current affairs from a European perspective.'
-            ' Much slower than the subscription based version.')
+            ' Much slower than the print edition based version.')

    oldest_article = 7.0
    cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
--- a/resources/recipes/el_pais.recipe
+++ b/resources/recipes/el_pais.recipe
@ -9,13 +9,14 @@ __docformat__ = 'restructuredtext en'
 elpais.es
 '''

+from time import strftime
+
 from calibre.web.feeds.news import BasicNewsRecipe

 class ElPais(BasicNewsRecipe):
    __author__        = 'Kovid Goyal & Lorenzo Vigentini & Jordi Balcells'
    description   = 'Main daily newspaper from Spain'

-    cover_url      = 'http://www.elpais.com/im/tit_logo_global.gif'
    title          = u'El Pais'
    publisher      = u'Ediciones El Pa\xeds SL'
    category       = 'News, politics, culture, economy, general interest'
@ -62,6 +63,6 @@ class ElPais(BasicNewsRecipe):
                        (u'Vi\xf1etas', u'http://www.elpais.com/rss/feed.html?feedId=17058')
                        ]

-def print_version(self, url):
-    url = url+'?print=1'
-    return url
+    def get_cover_url(self):
+        return 'http://img5.kiosko.net/' + strftime("%Y/%m/%d") + '/es/elpais.750.jpg'
+
--- a/resources/recipes/freenature.recipe
+++ b/resources/recipes/freenature.recipe
@ -1,4 +1,5 @@
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag
 import re

 class NatureNews(BasicNewsRecipe):
@ -10,17 +11,76 @@ class NatureNews(BasicNewsRecipe):
    max_articles_per_feed = 50

    no_stylesheets = True
-    remove_tags_before = dict(name='h1', attrs={'class':'heading entry-title'})
-    remove_tags_after  = dict(name='h2', attrs={'id':'comments'})
+    keep_only_tags = [dict(name='div', attrs={'id':'content'})]
+#    remove_tags_before = dict(name='h1', attrs={'class':'heading entry-title'})
+#    remove_tags_after  = dict(name='h2', attrs={'id':'comments'})
    remove_tags = [
       dict(name='h2', attrs={'id':'comments'}),
       dict(attrs={'alt':'Advertisement'}),
       dict(name='div', attrs={'class':'ad'}),
-    ] 
+       dict(attrs={'class':'Z3988'}),
+       dict(attrs={'class':['formatpublished','type-of-article','cleardiv','disclaimer','buttons','comments xoxo']}),
+       dict(name='a', attrs={'href':'#comments'}),
+       dict(name='h2',attrs={'class':'subheading plusicon icon-add-comment'})
+    ]

    preprocess_regexps = [
        (re.compile(r'<p>ADVERTISEMENT</p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
        ]

+    extra_css             = '''
+                            .author { text-align: right; font-size: small; line-height:1em; margin-top:0px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                            .imagedescription { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                            .imagecredit { font-size: x-small; font-style: normal; font-weight: bold}
+                            '''
+
    feeds = [('Nature News', 'http://feeds.nature.com/news/rss/most_recent')]

+    def preprocess_html(self,soup):
+        # The author name is slightly buried - dig it up
+        author = soup.find('p', {'class':'byline'})
+        if author:
+            # Find out the author's name
+            authornamediv = author.find('span',{'class':'author fn'})
+            authornamelink = authornamediv.find('a')
+            if authornamelink:
+                authorname = authornamelink.contents[0]
+            else:
+                authorname = authornamediv.contents[0]
+            # Stick the author's name in the byline tag
+            tag = Tag(soup,'div')
+            tag['class'] = 'author'
+            tag.insert(0,authorname.strip())
+            author.replaceWith(tag)
+
+        # Change the intro from a p to a div
+        intro = soup.find('p',{'class':'intro'})
+        if intro:
+            tag = Tag(soup,'div')
+            tag['class'] = 'intro'
+            tag.insert(0,intro.contents[0])
+            intro.replaceWith(tag)
+
+        # Change span class=imagedescription to div
+        descr = soup.find('span',{'class':'imagedescription'})
+        if descr:
+            tag = Tag(soup,'div')
+            tag['class'] = 'imagedescription'
+            tag.insert(0,descr.renderContents())
+            descr.replaceWith(tag)
+
+        # The references are in a list, let's make them simpler
+        reflistcont =  soup.find('ul',{'id':'article-refrences'})
+        if reflistcont:
+            reflist = reflistcont.li.renderContents()
+            tag = Tag(soup,'div')
+            tag['class'] = 'article-references'
+            tag.insert(0,reflist)
+            reflistcont.replaceWith(tag)
+
+        # Within the id=content div, we need to remove all the stuff after the end of the class=entry-content
+        entrycontent = soup.find('div',{'class':'entry-content'})
+        for nextSibling in entrycontent.findNextSiblings():
+            nextSibling.extract()
+
+        return soup
--- a/resources/recipes/ihned.recipe
+++ b/resources/recipes/ihned.recipe
@ -0,0 +1,182 @@
+import re, time
+from calibre import strftime
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class IHNed(BasicNewsRecipe):
+
+
+    stahnout_vsechny = True
+        #True   = stahuje vsechny z homepage
+        #False  = stahuje pouze dnesni clanky (ze dne, kdy je skript spusten)
+
+    title       = 'iHNed'
+    __author__  = 'Karel Bílek'
+    language = 'cs'
+    description = 'Zprávy z iHNed.cz'
+    timefmt = ' [%a, %d %b, %Y]'
+    needs_subscription = False
+    remove_tags = [dict(attrs={'class':['borderbottom', 'web', 'foot', 'reklama', 'd-elm d-rellinks', 'd-elm']}),
+                 dict(style=['text-align: center;']),
+                 dict(id=['r-bfull']),
+                 dict(name=['script', 'noscript', 'style'])]
+    encoding = 'windows-1250'
+    no_stylesheets = True
+    remove_tags_before = dict(attrs={'class':'d-nadtit'})
+    remove_tags_after = dict(attrs={'class':'like'})
+
+    conversion_options = {
+      'linearize_tables' : True,
+    }
+
+
+
+    def preprocess_html(self, soup):
+
+        def makeurl(wat):
+            return "http://ihned.cz"+wat;
+
+        for h1 in soup.findAll('h1'):
+             a = h1.find('a')
+             if a:
+                 string = a.string
+                 if string:
+                     soup.a.replaceWith(string)
+        for a in soup.findAll('a',  href=True) :
+            cil = str(a['href'])
+            if cil.startswith("/") or  cil.startswith("index"):
+                a['href'] = makeurl(cil)
+        return soup
+
+
+    def parse_index(self):
+
+        def makeurl(wat):
+            if wat.startswith("/") or  wat.startswith("index"):
+                return "http://ihned.cz"+wat;
+            else:
+                return wat
+
+
+        articles = {} #vysledek, asi
+        key = None #soucasna sekce
+        ans = [] #vsechny sekce
+
+        articles["Hlavní"] = []
+        ans.append("Hlavní")
+
+        was = {}
+
+        def parse_subpage(url, name):
+            articles[name] = []
+            ans.append(name)
+
+
+            soup = self.index_to_soup(url)
+            otvirak = soup.find(True, attrs={'class':['otv']})
+            if otvirak:
+
+                #the code is copypasted here because I don't know python. simple as that.
+                a = otvirak.find('a', href=True)
+                title = self.tag_to_string(a, use_alt=True).strip()
+                txt = otvirak.find(True, attrs={'class':['txt']})
+                description = ''
+                if txt:
+                    match = re.match(r'<div class="txt">\s*([^<]*)\s*<a', str(txt), re.L)
+                    if match:
+                        description = match.group(1)
+
+                pubdate = strftime('%d. %m.')
+                if not title in was:
+                    articles[name].append(
+                          dict(title=title, url=makeurl(a['href']), date=pubdate,
+                                description=description,
+                                content=''))
+
+            otv234 = soup.find(True, attrs={'class':['otv234', 'col2a']})
+            if otv234:
+                for ow in otv234.findAll(True, attrs={'class':['ow']}):
+                    a = ow.find('a', href=True)
+                    title = self.tag_to_string(a, use_alt=True).strip()
+                    description=''
+                    prx = ow.find(True, attrs={'class':['prx']});
+                    if prx:
+                        description = str(prx.string)
+                    nfo = ow.find(True, attrs={'class':['nfo']});
+                    pubdate = ''
+                    if nfo:
+                        dtime = time.localtime();
+                        day = dtime[2]
+                        month = dtime[1]
+
+                        pubdate = strftime('%d. %m.')
+
+                        match = re.search(r'([0-9]*)\.([0-9]*)\.', str(nfo))
+
+                        if self.stahnout_vsechny or (int(day) == int(match.group(1)) and int(month) == int(match.group(2))):
+                            if not title in was:
+                                articles[name].append(
+                                      dict(title=title, url=makeurl(a['href']), date=pubdate,
+                                            description=description,
+                                            content=''))
+
+
+
+
+
+
+        soup = self.index_to_soup('http://ihned.cz/')
+        otvirak = soup.find(True, attrs={'class':['otv']})
+        if otvirak:
+            a = otvirak.find('a', href=True)
+            title = self.tag_to_string(a, use_alt=True).strip()
+            txt = otvirak.find(True, attrs={'class':['txt']})
+            description = ''
+            if txt:
+                match = re.match(r'<div class="txt">\s*([^<]*)\s*<a', str(txt), re.L)
+                if match:
+                    description = match.group(1)
+
+            pubdate = strftime('%d. %m.')
+            feed = "Hlavní"
+            articles[feed].append(
+                      dict(title=title, url=(a['href']), date=pubdate,
+                            description=description,
+                            content=''))
+            was[title]=1
+
+        otvirak2345 = soup.find(True, attrs={'class':['otv2345']})
+        if otvirak2345:
+            for otv2 in otvirak2345.findAll(True, attrs={'class':['otv2-5']}):
+                a = otv2.find('a', attrs={'class':['tit2']}, href=True)
+                title = self.tag_to_string(a, use_alt=True).strip()
+                description=''
+                span = otv2.find('span');
+                if span:
+                    match = re.match(r'<span>\s*([^<]*)\s*<a', str(span), re.L)
+                    if match:
+                        description = match.group(1)
+                feed = "Hlavní"
+                pubdate = strftime('%d. %m.')
+                articles[feed].append(
+                          dict(title=title, url=(a['href']), date=pubdate,
+                                description=description,
+                                content=''))
+                was[title]=1
+
+
+        parse_subpage("http://komentare.ihned.cz/", "Komentáře")
+        parse_subpage("http://domaci.ihned.cz", "Domácí")
+        parse_subpage("http://ekonomika.ihned.cz", "Ekonomika")
+        parse_subpage("http://zahranicni.ihned.cz/", "Zahraničí");
+        parse_subpage("http://finweb.ihned.cz/", "Finance");
+        parse_subpage("http://digiweb.ihned.cz/", "DigiWeb");
+        parse_subpage("http://kultura.ihned.cz/", "Kultura")
+        parse_subpage("http://sport.ihned.cz/", "Sport");
+
+        #seradi kategorie
+        ans = self.sort_index_by(ans, {'Hlavni':1, 'Domácí':2, 'Ekonomika':5, 'Zahraničí':3, 'Finance':6, 'DigiWeb':7, 'Kultura':8, 'Sport':9, 'Komentáře':4})
+
+        #vrati, ale pouze, kdyz je v kategoriich...
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return ans
+
--- a/resources/recipes/kath_net.recipe
+++ b/resources/recipes/kath_net.recipe
@ -0,0 +1,16 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1295262156(BasicNewsRecipe):
+    title          = u'kath.net'
+    __author__     = 'Bobus'
+    oldest_article = 7
+    max_articles_per_feed = 100
+
+    feeds          = [(u'kath.net', u'http://www.kath.net/2005/xml/index.xml')]
+
+
+    def print_version(self, url):
+        return url+"&print=yes"
+
+    extra_css = 'td.textb {font-size: medium;}'
+
--- a/resources/recipes/nikkei_sub_economy.recipe
+++ b/resources/recipes/nikkei_sub_economy.recipe
@ -27,6 +27,9 @@ class NikkeiNet_sub_economy(BasicNewsRecipe):
                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
                       {'class':"cmn-article_keyword cmn-clearfix"},
                       {'class':"cmn-print_headline cmn-clearfix"},
+                       {'class':"cmn-article_list"},
+                       dict(id="ABOUT-NIKKEI"),
+                       {'class':"cmn-sub_market"},
                         ]
    remove_tags_after = {'class':"cmn-pr_list"}

--- a/resources/recipes/nytimes.recipe
+++ b/resources/recipes/nytimes.recipe
@ -586,105 +586,125 @@ class NYTimes(BasicNewsRecipe):
        return self.strip_anchors(soup)

    def postprocess_html(self,soup, True):
+		try:
+			if self.one_picture_per_article:
+				# Remove all images after first
+				largeImg = soup.find(True, {'class':'articleSpanImage'})
+				inlineImgs = soup.findAll(True, {'class':'inlineImage module'})
+				if largeImg:
+					for inlineImg in inlineImgs:
+						inlineImg.extract()
+				else:
+					if inlineImgs:
+						firstImg = inlineImgs[0]
+						for inlineImg in inlineImgs[1:]:
+							inlineImg.extract()
+						# Move firstImg before article body
+						cgFirst = soup.find(True, {'class':re.compile('columnGroup  *first')})
+						if cgFirst:
+							# Strip all sibling NavigableStrings: noise
+							navstrings = cgFirst.findAll(text=True, recursive=False)
+							[ns.extract() for ns in navstrings]
+							headline_found = False
+							tag = cgFirst.find(True)
+							insertLoc = 0
+							while True:
+								insertLoc += 1
+								if hasattr(tag,'class') and tag['class'] == 'articleHeadline':
+										headline_found = True
+										break
+								tag = tag.nextSibling
+								if not tag:
+									headline_found = False
+									break
+							if headline_found:
+								cgFirst.insert(insertLoc,firstImg)
+						else:
+							self.log(">>> No class:'columnGroup first' found <<<")
+		except:
+			self.log("ERROR: One picture per article in postprocess_html")

-        if self.one_picture_per_article:
-            # Remove all images after first
-            largeImg = soup.find(True, {'class':'articleSpanImage'})
-            inlineImgs = soup.findAll(True, {'class':'inlineImage module'})
-            if largeImg:
-                for inlineImg in inlineImgs:
-                    inlineImg.extract()
-            else:
-                if inlineImgs:
-                    firstImg = inlineImgs[0]
-                    for inlineImg in inlineImgs[1:]:
-                        inlineImg.extract()
-                    # Move firstImg before article body
-                    cgFirst = soup.find(True, {'class':re.compile('columnGroup  *first')})
-                    if cgFirst:
-                        # Strip all sibling NavigableStrings: noise
-                        navstrings = cgFirst.findAll(text=True, recursive=False)
-                        [ns.extract() for ns in navstrings]
-                        headline_found = False
-                        tag = cgFirst.find(True)
-                        insertLoc = 0
-                        while True:
-                            insertLoc += 1
-                            if hasattr(tag,'class') and tag['class'] == 'articleHeadline':
-                                    headline_found = True
-                                    break
-                            tag = tag.nextSibling
-                            if not tag:
-                                headline_found = False
-                                break
-                        if headline_found:
-                            cgFirst.insert(insertLoc,firstImg)
-                    else:
-                        self.log(">>> No class:'columnGroup first' found <<<")
+		try:
+			# Change captions to italic
+			for caption in soup.findAll(True, {'class':'caption'}) :
+				if caption and len(caption) > 0:
+					cTag = Tag(soup, "p", [("class", "caption")])
+					c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
+					mp_off = c.find("More Photos")
+					if mp_off >= 0:
+						c = c[:mp_off]
+					cTag.insert(0, c)
+					caption.replaceWith(cTag)
+		except:
+			self.log("ERROR:  Problem in change captions to italic")

-        # Change captions to italic
-        for caption in soup.findAll(True, {'class':'caption'}) :
-            if caption and caption.contents[0]:
-                cTag = Tag(soup, "p", [("class", "caption")])
-                c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
-                mp_off = c.find("More Photos")
-                if mp_off >= 0:
-                    c = c[:mp_off]
-                cTag.insert(0, c)
-                caption.replaceWith(cTag)
+		try:
+			# Change <nyt_headline> to <h2>
+			h1 = soup.find('h1')
+			if h1:
+				headline = h1.find("nyt_headline")
+				if headline:
+					tag = Tag(soup, "h2")
+					tag['class'] = "headline"
+					tag.insert(0, self.fixChars(headline.contents[0]))
+					h1.replaceWith(tag)
+			else:
+				# Blog entry - replace headline, remove <hr> tags
+				headline = soup.find('title')
+				if headline:
+					tag = Tag(soup, "h2")
+					tag['class'] = "headline"
+					tag.insert(0, self.fixChars(headline.contents[0]))
+					soup.insert(0, tag)
+					hrs = soup.findAll('hr')
+					for hr in hrs:
+						hr.extract()
+		except:
+			self.log("ERROR:  Problem in Change <nyt_headline> to <h2>")

-        # Change <nyt_headline> to <h2>
-        h1 = soup.find('h1')
-        if h1:
-            headline = h1.find("nyt_headline")
-            if headline:
-                tag = Tag(soup, "h2")
-                tag['class'] = "headline"
-                tag.insert(0, self.fixChars(headline.contents[0]))
-                h1.replaceWith(tag)
-        else:
-            # Blog entry - replace headline, remove <hr> tags
-            headline = soup.find('title')
-            if headline:
-                tag = Tag(soup, "h2")
-                tag['class'] = "headline"
-                tag.insert(0, self.fixChars(headline.contents[0]))
-                soup.insert(0, tag)
-                hrs = soup.findAll('hr')
-                for hr in hrs:
-                    hr.extract()
+		try:
+			# Change <h1> to <h3> - used in editorial blogs
+			masthead = soup.find("h1")
+			if masthead:
+				# Nuke the href
+				if masthead.a:
+					del(masthead.a['href'])
+				tag = Tag(soup, "h3")
+				tag.insert(0, self.fixChars(masthead.contents[0]))
+				masthead.replaceWith(tag)
+		except:
+			self.log("ERROR:  Problem in Change <h1> to <h3> - used in editorial blogs")

-        # Change <h1> to <h3> - used in editorial blogs
-        masthead = soup.find("h1")
-        if masthead:
-            # Nuke the href
-            if masthead.a:
-                del(masthead.a['href'])
-            tag = Tag(soup, "h3")
-            tag.insert(0, self.fixChars(masthead.contents[0]))
-            masthead.replaceWith(tag)
+		try:
+			# Change <span class="bold"> to <b>
+			for subhead in soup.findAll(True, {'class':'bold'}) :
+				if subhead.contents:
+					bTag = Tag(soup, "b")
+					bTag.insert(0, subhead.contents[0])
+					subhead.replaceWith(bTag)
+		except:
+			self.log("ERROR:  Problem in Change <h1> to <h3> - used in editorial blogs")

-        # Change <span class="bold"> to <b>
-        for subhead in soup.findAll(True, {'class':'bold'}) :
-            if subhead.contents:
-                bTag = Tag(soup, "b")
-                bTag.insert(0, subhead.contents[0])
-                subhead.replaceWith(bTag)
+		try:
+			divTag = soup.find('div',attrs={'id':'articleBody'})
+			if divTag:
+				divTag['class'] = divTag['id']
+		except:
+			self.log("ERROR:  Problem in soup.find(div,attrs={id:articleBody})")

-        divTag = soup.find('div',attrs={'id':'articleBody'})
-        if divTag:
-            divTag['class'] = divTag['id']
+		try:
+			# Add class="authorId" to <div> so we can format with CSS
+			divTag = soup.find('div',attrs={'id':'authorId'})
+			if divTag and divTag.contents[0]:
+				tag = Tag(soup, "p")
+				tag['class'] = "authorId"
+				tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0],
+								 use_alt=False)))
+				divTag.replaceWith(tag)
+		except:
+			self.log("ERROR:  Problem in Add class=authorId to <div> so we can format with CSS")

-        # Add class="authorId" to <div> so we can format with CSS
-        divTag = soup.find('div',attrs={'id':'authorId'})
-        if divTag and divTag.contents[0]:
-            tag = Tag(soup, "p")
-            tag['class'] = "authorId"
-            tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0],
-                             use_alt=False)))
-            divTag.replaceWith(tag)
-
-        return soup
+		return soup

    def populate_article_metadata(self, article, soup, first):
        shortparagraph = ""
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@ -1,4 +1,5 @@
 #!/usr/bin/env  python
+# -*- coding: utf-8 -*-

 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
@ -23,6 +24,10 @@ class NYTimes(BasicNewsRecipe):
    webEdition = False
    oldest_article = 7

+    # replace paid Kindle Version:  the name will be changed to "The New York Times" to cause
+    # previous paid versions of the new york times to best sent to the back issues folder on the kindle
+    replaceKindleVersion = False
+
    # includeSections: List of sections to include. If empty, all sections found will be included.
    # Otherwise, only the sections named will be included. For example,
    #
@ -94,6 +99,10 @@ class NYTimes(BasicNewsRecipe):
        title='New York Times (Web)'
        description = 'New York Times on the Web'
        needs_subscription = True
+    elif replaceKindleVersion:
+	title='The New York Times'
+        description = 'Today\'s New York Times'
+        needs_subscription = True
    else:
        title='New York Times'
        description = 'Today\'s New York Times'
@ -150,6 +159,11 @@ class NYTimes(BasicNewsRecipe):
                            'relatedSearchesModule',
                            'side_tool',
                            'singleAd',
+                            'entry entry-utility', #added for DealBook
+                            'entry-tags', #added for DealBook
+                            'footer promos clearfix', #added for DealBook
+                            'footer links clearfix', #added for DealBook
+                            'inlineImage module', #added for DealBook
                            re.compile('^subNavigation'),
                            re.compile('^leaderboard'),
                            re.compile('^module'),
@ -183,6 +197,9 @@ class NYTimes(BasicNewsRecipe):
                            'side_index',
                            'side_tool',
                            'toolsRight',
+                            'skybox', #added for DealBook
+                            'TopAd', #added for DealBook
+                            'related-content', #added for DealBook
                            ]),
                   dict(name=['script', 'noscript', 'style','form','hr'])]
    no_stylesheets = True
@ -237,7 +254,7 @@ class NYTimes(BasicNewsRecipe):
    def exclude_url(self,url):
        if not url.startswith("http"):
            return True
-        if not url.endswith(".html"):
+        if not url.endswith(".html") and 'dealbook.nytimes.com' not in url: #added for DealBook
            return True
        if 'nytimes.com' not in url:
            return True
@ -560,7 +577,6 @@ class NYTimes(BasicNewsRecipe):


    def preprocess_html(self, soup):
-
        if self.webEdition & (self.oldest_article>0):
            date_tag = soup.find(True,attrs={'class': ['dateline','date']})
            if date_tag:
@ -583,106 +599,166 @@ class NYTimes(BasicNewsRecipe):
                img_div = soup.find('div','inlineImage module')
                if img_div:
                    img_div.extract()
+
+
        return self.strip_anchors(soup)

    def postprocess_html(self,soup, True):

-        if self.one_picture_per_article:
-            # Remove all images after first
-            largeImg = soup.find(True, {'class':'articleSpanImage'})
-            inlineImgs = soup.findAll(True, {'class':'inlineImage module'})
-            if largeImg:
-                for inlineImg in inlineImgs:
-                    inlineImg.extract()
-            else:
-                if inlineImgs:
-                    firstImg = inlineImgs[0]
-                    for inlineImg in inlineImgs[1:]:
-                        inlineImg.extract()
-                    # Move firstImg before article body
-                    cgFirst = soup.find(True, {'class':re.compile('columnGroup  *first')})
-                    if cgFirst:
-                        # Strip all sibling NavigableStrings: noise
-                        navstrings = cgFirst.findAll(text=True, recursive=False)
-                        [ns.extract() for ns in navstrings]
-                        headline_found = False
-                        tag = cgFirst.find(True)
-                        insertLoc = 0
-                        while True:
-                            insertLoc += 1
-                            if hasattr(tag,'class') and tag['class'] == 'articleHeadline':
-                                    headline_found = True
-                                    break
-                            tag = tag.nextSibling
-                            if not tag:
-                                headline_found = False
-                                break
-                        if headline_found:
-                            cgFirst.insert(insertLoc,firstImg)
-                    else:
-                        self.log(">>> No class:'columnGroup first' found <<<")
+        try:
+                if self.one_picture_per_article:
+                        # Remove all images after first
+                        largeImg = soup.find(True, {'class':'articleSpanImage'})
+                        inlineImgs = soup.findAll(True, {'class':'inlineImage module'})
+                        if largeImg:
+                                for inlineImg in inlineImgs:
+                                        inlineImg.extract()
+                        else:
+                                if inlineImgs:
+                                        firstImg = inlineImgs[0]
+                                        for inlineImg in inlineImgs[1:]:
+                                                inlineImg.extract()
+                                        # Move firstImg before article body
+                                        cgFirst = soup.find(True, {'class':re.compile('columnGroup  *first')})
+                                        if cgFirst:
+                                                # Strip all sibling NavigableStrings: noise
+                                                navstrings = cgFirst.findAll(text=True, recursive=False)
+                                                [ns.extract() for ns in navstrings]
+                                                headline_found = False
+                                                tag = cgFirst.find(True)
+                                                insertLoc = 0
+                                                while True:
+                                                        insertLoc += 1
+                                                        if hasattr(tag,'class') and tag['class'] == 'articleHeadline':
+                                                                        headline_found = True
+                                                                        break
+                                                        tag = tag.nextSibling
+                                                        if not tag:
+                                                                headline_found = False
+                                                                break
+                                                if headline_found:
+                                                        cgFirst.insert(insertLoc,firstImg)
+                                        else:
+                                                self.log(">>> No class:'columnGroup first' found <<<")
+        except:
+                self.log("ERROR: One picture per article in postprocess_html")

-        # Change captions to italic
-        for caption in soup.findAll(True, {'class':'caption'}) :
-            if caption and caption.contents[0]:
-                cTag = Tag(soup, "p", [("class", "caption")])
-                c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
-                mp_off = c.find("More Photos")
-                if mp_off >= 0:
-                    c = c[:mp_off]
-                cTag.insert(0, c)
-                caption.replaceWith(cTag)
+        try:
+                # Change captions to italic
+                for caption in soup.findAll(True, {'class':'caption'}) :
+                        if caption and len(caption) > 0:
+                                cTag = Tag(soup, "p", [("class", "caption")])
+                                c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
+                                mp_off = c.find("More Photos")
+                                if mp_off >= 0:
+                                        c = c[:mp_off]
+                                cTag.insert(0, c)
+                                caption.replaceWith(cTag)
+        except:
+                self.log("ERROR:  Problem in change captions to italic")

-        # Change <nyt_headline> to <h2>
-        h1 = soup.find('h1')
-        if h1:
-            headline = h1.find("nyt_headline")
-            if headline:
-                tag = Tag(soup, "h2")
-                tag['class'] = "headline"
-                tag.insert(0, self.fixChars(headline.contents[0]))
-                h1.replaceWith(tag)
-        else:
-            # Blog entry - replace headline, remove <hr> tags
-            headline = soup.find('title')
-            if headline:
-                tag = Tag(soup, "h2")
-                tag['class'] = "headline"
-                tag.insert(0, self.fixChars(headline.contents[0]))
-                soup.insert(0, tag)
-                hrs = soup.findAll('hr')
-                for hr in hrs:
-                    hr.extract()
+        try:
+                # Change <nyt_headline> to <h2>
+                h1 = soup.find('h1')
+                blogheadline = str(h1) #added for dealbook
+                if h1:
+                        headline = h1.find("nyt_headline")
+                        if headline:
+                                tag = Tag(soup, "h2")
+                                tag['class'] = "headline"
+                                tag.insert(0, self.fixChars(headline.contents[0]))
+                                h1.replaceWith(tag)
+                        elif blogheadline.find('entry-title'):#added for dealbook
+                                tag = Tag(soup, "h2")#added for dealbook
+                                tag['class'] = "headline"#added for dealbook
+                                tag.insert(0, self.fixChars(h1.contents[0]))#added for dealbook
+                                h1.replaceWith(tag)#added for dealbook

-        # Change <h1> to <h3> - used in editorial blogs
-        masthead = soup.find("h1")
-        if masthead:
-            # Nuke the href
-            if masthead.a:
-                del(masthead.a['href'])
-            tag = Tag(soup, "h3")
-            tag.insert(0, self.fixChars(masthead.contents[0]))
-            masthead.replaceWith(tag)
+                else:
+                        # Blog entry - replace headline, remove <hr> tags  - BCC I think this is no longer functional 1-18-2011
+                        headline = soup.find('title')
+                        if headline:
+                                tag = Tag(soup, "h2")
+                                tag['class'] = "headline"
+                                tag.insert(0, self.fixChars(headline.renderContents()))
+                                soup.insert(0, tag)
+                                hrs = soup.findAll('hr')
+                                for hr in hrs:
+                                        hr.extract()
+        except:
+                self.log("ERROR:  Problem in Change <nyt_headline> to <h2>")

-        # Change <span class="bold"> to <b>
-        for subhead in soup.findAll(True, {'class':'bold'}) :
-            if subhead.contents:
-                bTag = Tag(soup, "b")
-                bTag.insert(0, subhead.contents[0])
-                subhead.replaceWith(bTag)
+        try:
+                #if this is from a blog (dealbook, fix the byline format
+                bylineauthor = soup.find('address',attrs={'class':'byline author vcard'})
+                if bylineauthor:
+                    tag = Tag(soup, "h6")
+                    tag['class'] = "byline"
+                    tag.insert(0, self.fixChars(bylineauthor.renderContents()))
+                    bylineauthor.replaceWith(tag)
+        except:
+            self.log("ERROR:  fixing byline author format")

-        divTag = soup.find('div',attrs={'id':'articleBody'})
-        if divTag:
-            divTag['class'] = divTag['id']
+        try:
+                #if this is a blog (dealbook) fix the credit style for the pictures
+                blogcredit = soup.find('div',attrs={'class':'credit'})
+                if blogcredit:
+                    tag = Tag(soup, "h6")
+                    tag['class'] = "credit"
+                    tag.insert(0, self.fixChars(blogcredit.renderContents()))
+                    blogcredit.replaceWith(tag)
+        except:
+            self.log("ERROR:  fixing credit format")

-        # Add class="authorId" to <div> so we can format with CSS
-        divTag = soup.find('div',attrs={'id':'authorId'})
-        if divTag and divTag.contents[0]:
-            tag = Tag(soup, "p")
-            tag['class'] = "authorId"
-            tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0],
-                             use_alt=False)))
-            divTag.replaceWith(tag)
+
+        try:
+                # Change <h1> to <h3> - used in editorial blogs
+                masthead = soup.find("h1")
+                if masthead:
+                        # Nuke the href
+                        if masthead.a:
+                                del(masthead.a['href'])
+                        tag = Tag(soup, "h3")
+                        tag.insert(0, self.fixChars(masthead.contents[0]))
+                        masthead.replaceWith(tag)
+        except:
+                self.log("ERROR:  Problem in Change <h1> to <h3> - used in editorial blogs")
+
+        try:
+                # Change <span class="bold"> to <b>
+                for subhead in soup.findAll(True, {'class':'bold'}) :
+                        if subhead.contents:
+                                bTag = Tag(soup, "b")
+                                bTag.insert(0, subhead.contents[0])
+                                subhead.replaceWith(bTag)
+        except:
+                self.log("ERROR:  Problem in Change <h1> to <h3> - used in editorial blogs")
+        try:
+                #remove the <strong> update tag
+                blogupdated = soup.find('span', {'class':'update'})
+                if blogupdated:
+                    blogupdated.replaceWith("")
+        except:
+                self.log("ERROR:  Removing strong tag")
+
+        try:
+                divTag = soup.find('div',attrs={'id':'articleBody'})
+                if divTag:
+                        divTag['class'] = divTag['id']
+        except:
+                self.log("ERROR:  Problem in soup.find(div,attrs={id:articleBody})")
+
+        try:
+                # Add class="authorId" to <div> so we can format with CSS
+                divTag = soup.find('div',attrs={'id':'authorId'})
+                if divTag and divTag.contents[0]:
+                        tag = Tag(soup, "p")
+                        tag['class'] = "authorId"
+                        tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0],
+                                                         use_alt=False)))
+                        divTag.replaceWith(tag)
+        except:
+                self.log("ERROR:  Problem in Add class=authorId to <div> so we can format with CSS")

        return soup
    def populate_article_metadata(self, article, soup, first):
--- a/resources/recipes/seattle_times.recipe
+++ b/resources/recipes/seattle_times.recipe
@ -21,16 +21,53 @@ class SeattleTimes(BasicNewsRecipe):
    encoding              = 'cp1252'
    language = 'en'

-
-    html2lrf_options = [
-                          '--comment'  , description
-                        , '--category' , category
-                        , '--publisher', publisher
-                        ]
-
-    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
-
-    feeds              = [(u'Articles', u'http://seattletimes.nwsource.com/rss/seattletimes.xml')]
+    feeds              = [
+                          (u'Top Stories',
+                              u'http://seattletimes.nwsource.com/rss/home.xml'),
+                          #(u'Articles', u'http://seattletimes.nwsource.com/rss/seattletimes.xml')
+                          (u'Business & Technology',
+                              u'http://seattletimes.nwsource.com/rss/businesstechnology.xml'),
+                          (u'Personal Technology',
+                              u'http://seattletimes.nwsource.com/rss/personaltechnology.xml'),
+                          (u'Entertainment & the Arts',
+                              u'http://seattletimes.nwsource.com/rss/artsentertainment.xml'),
+                          (u'Health',
+                              u'http://seattletimes.nwsource.com/rss/health.xml'),
+                          (u'Living',
+                              u'http://seattletimes.nwsource.com/rss/living.xml'),
+                          (u'Local News',
+                              u'http://seattletimes.nwsource.com/rss/localnews.xml'),
+                          (u'Nation & World',
+                              u'http://seattletimes.nwsource.com/rss/nationworld.xml'),
+                          (u'Opinion',
+                              u'http://seattletimes.nwsource.com/rss/opinion.xml'),
+                          (u'Politics',
+                              u'http://seattletimes.nwsource.com/rss/politics.xml'),
+                          (u'Sports',
+                              u'http://seattletimes.nwsource.com/rss/sports.xml'),
+                          (u'Nicole Brodeur',
+                              u'http://seattletimes.nwsource.com/rss/nicolebrodeur.xml'),
+                          (u'Danny Westneat',
+                              u'http://seattletimes.nwsource.com/rss/dannywestneat.xml'),
+                          (u'Jerry Large',
+                              u'http://seattletimes.nwsource.com/rss/jerrylarge.xml'),
+                          (u'Ron Judd',
+                              u'http://seattletimes.nwsource.com/rss/ronjudd.xml'),
+                          (u'Education',
+                              u'http://seattletimes.nwsource.com/rss/education.xml'),
+                          (u'Letters to the Editor',
+                              u'http://seattletimes.nwsource.com/rss/northwestvoices.xml'),
+                          (u'Travel',
+                              u'http://seattletimes.nwsource.com/rss/travel.xml'),
+                          (u'Outdoors',
+                              u'http://seattletimes.nwsource.com/rss/outdoors.xml'),
+                          (u'Steve Kelley',
+                              u'http://seattletimes.nwsource.com/rss/stevekelley.xml'),
+                          (u'Jerry Brewer',
+                              u'http://seattletimes.nwsource.com/rss/jerrybrewer.xml'),
+                          (u'Most Read Articles',
+                              u'http://seattletimes.nwsource.com/rss/mostreadarticles.xml'),
+                         ]

    remove_tags        = [
                             dict(name=['object','link','script'])
--- a/src/calibre/customize/conversion.py
+++ b/src/calibre/customize/conversion.py
@ -160,18 +160,6 @@ class InputFormatPlugin(Plugin):
        '''
        raise NotImplementedError()

-    def preprocess_html(self, opts, html):
-        '''
-        This method is called by the conversion pipeline on all HTML before it
-        is parsed. It is meant to be used to do any required preprocessing on
-        the HTML, like removing hard line breaks, etc.
-
-        :param html: A unicode string
-        :return: A unicode string
-        '''
-        return html
-
-
    def convert(self, stream, options, file_ext, log, accelerators):
        '''
        This method must be implemented in sub-classes. It must return
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -36,7 +36,7 @@ class ANDROID(USBMS):

            # Google
            0x18d1 : { 0x4e11 : [0x0100, 0x226, 0x227], 0x4e12: [0x0100, 0x226,
-                0x227], 0x4e21: [0x0100, 0x226, 0x227]},
+                0x227], 0x4e21: [0x0100, 0x226, 0x227], 0xb058: [0x0222]},

            # Samsung
            0x04e8 : { 0x681d : [0x0222, 0x0223, 0x0224, 0x0400],
@ -64,12 +64,13 @@ class ANDROID(USBMS):
    EXTRA_CUSTOMIZATION_DEFAULT = ', '.join(EBOOK_DIR_MAIN)

    VENDOR_NAME      = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
-            'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS']
+            'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
+            'TELECHIP']
    WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
            '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
            'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
            'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
-            'SGH-T849', '_MB300', 'A70S']
+            'SGH-T849', '_MB300', 'A70S', 'S_ANDROID']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S']
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@ -178,7 +178,7 @@ class INVESBOOK(EB600):

 class BOOQ(EB600):
    name = 'Booq Device Interface'
-    gui_name = 'Booq'
+    gui_name = 'bq Reader'

    FORMATS = ['epub', 'mobi', 'prc', 'fb2', 'pdf', 'doc', 'rtf', 'txt', 'html']

--- a/src/calibre/devices/misc.py
+++ b/src/calibre/devices/misc.py
@ -33,8 +33,8 @@ class PALMPRE(USBMS):

 class AVANT(USBMS):
    name           = 'Booq Avant Device Interface'
-    gui_name       = 'Avant'
-    description    = _('Communicate with the Booq Avant')
+    gui_name       = 'bq Avant'
+    description    = _('Communicate with the Bq Avant')
    author         = 'Kovid Goyal'
    supported_platforms = ['windows', 'osx', 'linux']

@ -193,6 +193,9 @@ class LUMIREAD(USBMS):

    THUMBNAIL_HEIGHT = 200

+    VENDOR_NAME = 'ACER'
+    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'LUMIREAD_600'
+
    def upload_cover(self, path, filename, metadata, filepath):
        if metadata.thumbnail and metadata.thumbnail[-1]:
            cfilepath = filepath.replace('/', os.sep)
--- a/src/calibre/devices/sne/driver.py
+++ b/src/calibre/devices/sne/driver.py
@ -33,6 +33,6 @@ class SNE(USBMS):
    STORAGE_CARD_VOLUME_LABEL = 'SNE Storage Card'

    EBOOK_DIR_MAIN = EBOOK_DIR_CARD_A = 'Books'
-    SUPPORTS_SUB_DIRS = False
+    SUPPORTS_SUB_DIRS = True


--- a/src/calibre/ebooks/chm/input.py
+++ b/src/calibre/ebooks/chm/input.py
@ -75,7 +75,7 @@ class CHMInput(InputFormatPlugin):
    def _create_oebbook(self, hhcpath, basedir, opts, log, mi):
        from calibre.ebooks.conversion.plumber import create_oebbook
        from calibre.ebooks.oeb.base import DirContainer
-        oeb = create_oebbook(log, None, opts, self,
+        oeb = create_oebbook(log, None, opts,
                encoding=opts.input_encoding, populate=False)
        self.oeb = oeb

--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@ -126,10 +126,29 @@ def add_pipeline_options(parser, plumber):
                      'margin_top', 'margin_left', 'margin_right',
                      'margin_bottom', 'change_justification',
                      'insert_blank_line', 'remove_paragraph_spacing','remove_paragraph_spacing_indent_size',
-                      'asciiize', 'remove_header', 'header_regex',
-                      'remove_footer', 'footer_regex',
+                      'asciiize',
                  ]
                  ),
+                  
+              'HEURISTIC PROCESSING' : (
+                  _('Modify the document text and structure using common patterns.'),
+                  [
+                      'enable_heuristics', 'markup_chapter_headings',
+                      'italicize_common_cases', 'fix_indents',
+                      'html_unwrap_factor', 'unwrap_lines',
+                      'delete_blank_paragraphs', 'format_scene_breaks',
+                      'dehyphenate', 'renumber_headings',
+                  ]
+                  ),
+                  
+              'SEARCH AND REPLACE' : (
+                 _('Modify the document text and structure using user defined patterns.'),
+                 [
+                      'sr1_search', 'sr1_replace',
+                      'sr2_search', 'sr2_replace',
+                      'sr3_search', 'sr3_replace',
+                 ]
+              ),

              'STRUCTURE DETECTION' : (
                  _('Control auto-detection of document structure.'),
@ -137,7 +156,6 @@ def add_pipeline_options(parser, plumber):
                      'chapter', 'chapter_mark',
                      'prefer_metadata_cover', 'remove_first_image',
                      'insert_metadata', 'page_breaks_before',
-                      'preprocess_html', 'html_unwrap_factor',
                  ]
                  ),

@ -164,7 +182,8 @@ def add_pipeline_options(parser, plumber):

              }

-    group_order = ['', 'LOOK AND FEEL', 'STRUCTURE DETECTION',
+    group_order = ['', 'LOOK AND FEEL', 'HEURISTIC PROCESSING',
+            'SEARCH AND REPLACE', 'STRUCTURE DETECTION',
            'TABLE OF CONTENTS', 'METADATA', 'DEBUG']

    for group in group_order:
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -376,23 +376,6 @@ OptionRecommendation(name='insert_metadata',
            )
        ),

-OptionRecommendation(name='preprocess_html',
-        recommended_value=False, level=OptionRecommendation.LOW,
-        help=_('Attempt to detect and correct hard line breaks and other '
-            'problems in the source file. This may make things worse, so use '
-            'with care.'
-            )
-        ),
-
-OptionRecommendation(name='html_unwrap_factor',
-        recommended_value=0.40, level=OptionRecommendation.LOW,
-        help=_('Scale used to determine the length at which a line should '
-            'be unwrapped if preprocess is enabled. Valid values are a decimal between 0 and 1. The '
-            'default is 0.40, just below the median line length. This will unwrap typical books '
-            ' with hard line breaks, but should be reduced if the line length is variable.'
-            )
-        ),
-
 OptionRecommendation(name='smarten_punctuation',
        recommended_value=False, level=OptionRecommendation.LOW,
        help=_('Convert plain quotes, dashes and ellipsis to their '
@ -401,32 +384,6 @@ OptionRecommendation(name='smarten_punctuation',
            )
        ),

-OptionRecommendation(name='remove_header',
-        recommended_value=False, level=OptionRecommendation.LOW,
-        help=_('Use a regular expression to try and remove the header.'
-            )
-        ),
-
-OptionRecommendation(name='header_regex',
-        recommended_value='(?i)(?<=<hr>)((\s*<a name=\d+></a>((<img.+?>)*<br>\s*)?\d+<br>\s*.*?\s*)|(\s*<a name=\d+></a>((<img.+?>)*<br>\s*)?.*?<br>\s*\d+))(?=<br>)',
-        level=OptionRecommendation.LOW,
-        help=_('The regular expression to use to remove the header.'
-            )
-        ),
-
-OptionRecommendation(name='remove_footer',
-        recommended_value=False, level=OptionRecommendation.LOW,
-        help=_('Use a regular expression to try and remove the footer.'
-            )
-        ),
-
-OptionRecommendation(name='footer_regex',
-        recommended_value='(?i)(?<=<hr>)((\s*<a name=\d+></a>((<img.+?>)*<br>\s*)?\d+<br>\s*.*?\s*)|(\s*<a name=\d+></a>((<img.+?>)*<br>\s*)?.*?<br>\s*\d+))(?=<br>)',
-        level=OptionRecommendation.LOW,
-        help=_('The regular expression to use to remove the footer.'
-            )
-        ),
-
 OptionRecommendation(name='read_metadata_from_opf',
            recommended_value=None, level=OptionRecommendation.LOW,
            short_switch='m',
@ -527,6 +484,89 @@ OptionRecommendation(name='timestamp',
    recommended_value=None, level=OptionRecommendation.LOW,
    help=_('Set the book timestamp (used by the date column in calibre).')),

+OptionRecommendation(name='enable_heuristics',
+    recommended_value=False, level=OptionRecommendation.LOW,
+    help=_('Enable heuristic processing. This option must be set for any '
+           'heuristic processing to take place.')),
+
+OptionRecommendation(name='markup_chapter_headings',
+    recommended_value=False, level=OptionRecommendation.LOW,
+    help=_('Detect unformatted chapter headings and sub headings. Change '
+           'them to h2 and h3 tags.  This setting will not create a TOC, '
+           'but can be used in conjunction with structure detection to create '
+           'one.')),
+
+OptionRecommendation(name='italicize_common_cases',
+    recommended_value=False, level=OptionRecommendation.LOW,
+    help=_('Look for common words and patterns that denote '
+           'italics and italicize them.')),
+
+OptionRecommendation(name='fix_indents',
+    recommended_value=False, level=OptionRecommendation.LOW,
+    help=_('Turn indentation created from multiple non-breaking space entities '
+           'into CSS indents.')),
+
+OptionRecommendation(name='html_unwrap_factor',
+    recommended_value=0.40, level=OptionRecommendation.LOW,
+    help=_('Scale used to determine the length at which a line should '
+            'be unwrapped. Valid values are a decimal between 0 and 1. The '
+            'default is 0.4, just below the median line length.  If only a '
+            'few lines in the document require unwrapping this value should '
+            'be reduced')),
+
+OptionRecommendation(name='unwrap_lines',
+    recommended_value=False, level=OptionRecommendation.LOW,
+    help=_('Unwrap lines using punctuation and other formatting clues.')),
+
+OptionRecommendation(name='delete_blank_paragraphs',
+    recommended_value=False, level=OptionRecommendation.LOW,
+    help=_('Remove empty paragraphs from the document when they exist between '
+           'every other paragraph')),
+
+OptionRecommendation(name='format_scene_breaks',
+    recommended_value=False, level=OptionRecommendation.LOW,
+    help=_('Left aligned scene break markers are center aligned. '
+           'Replace soft scene breaks that use multiple blank lines with'
+           'horizontal rules.')),
+
+OptionRecommendation(name='dehyphenate',
+    recommended_value=False, level=OptionRecommendation.LOW,
+    help=_('Analyze hyphenated words throughout the document.  The '
+           'document itself is used as a dictionary to determine whether hyphens '
+           'should be retained or removed.')),
+
+OptionRecommendation(name='renumber_headings',
+    recommended_value=False, level=OptionRecommendation.LOW,
+    help=_('Looks for occurrences of sequential <h1> or <h2> tags. '
+           'The tags are renumbered to prevent splitting in the middle '
+           'of chapter headings.')),
+
+OptionRecommendation(name='sr1_search',
+    recommended_value='', level=OptionRecommendation.LOW,
+    help=_('Search pattern (regular expression) to be replaced with '
+           'sr1-replace.')),
+
+OptionRecommendation(name='sr1_replace',
+    recommended_value='', level=OptionRecommendation.LOW,
+    help=_('Replacement to replace the text found with sr1-search.')),
+
+OptionRecommendation(name='sr2_search',
+    recommended_value='', level=OptionRecommendation.LOW,
+    help=_('Search pattern (regular expression) to be replaced with '
+           'sr2-replace.')),
+
+OptionRecommendation(name='sr2_replace',
+    recommended_value='', level=OptionRecommendation.LOW,
+    help=_('Replacement to replace the text found with sr2-search.')),
+
+OptionRecommendation(name='sr3_search',
+    recommended_value='', level=OptionRecommendation.LOW,
+    help=_('Search pattern (regular expression) to be replaced with '
+           'sr3-replace.')),
+
+OptionRecommendation(name='sr3_replace',
+    recommended_value='', level=OptionRecommendation.LOW,
+    help=_('Replacement to replace the text found with sr3-search.')),
 ]
        # }}}

@ -861,7 +901,6 @@ OptionRecommendation(name='timestamp',
                self.opts_to_mi(self.user_metadata)
            if not hasattr(self.oeb, 'manifest'):
                self.oeb = create_oebbook(self.log, self.oeb, self.opts,
-                        self.input_plugin,
                        encoding=self.input_plugin.output_encoding)
            self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
            self.opts.is_image_collection = self.input_plugin.is_image_collection
@ -971,14 +1010,13 @@ OptionRecommendation(name='timestamp',
        self.log(self.output_fmt.upper(), 'output written to', self.output)
        self.flush()

-def create_oebbook(log, path_or_stream, opts, input_plugin, reader=None,
+def create_oebbook(log, path_or_stream, opts, reader=None,
        encoding='utf-8', populate=True):
    '''
    Create an OEBBook.
    '''
    from calibre.ebooks.oeb.base import OEBBook
-    html_preprocessor = HTMLPreProcessor(input_plugin.preprocess_html,
-            opts.preprocess_html, opts)
+    html_preprocessor = HTMLPreProcessor(log, opts)
    if not encoding:
        encoding = None
    oeb = OEBBook(log, html_preprocessor,
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -174,13 +174,19 @@ class Dehyphenator(object):
    retain hyphens.
    '''

-    def __init__(self):
+    def __init__(self, verbose=0, log=None):
+        self.log = log
+        self.verbose = verbose
        # Add common suffixes to the regex below to increase the likelihood of a match -
        # don't add suffixes which are also complete words, such as 'able' or 'sex'
-        self.removesuffixes = re.compile(r"((ed)?ly|('e)?s|a?(t|s)?ion(s|al(ly)?)?|ings?|er|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic(ally)?|(e|a)nce|m?ents?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex|ian)$", re.IGNORECASE)
+        # only remove if it's not already the point of hyphenation
+        self.suffix_string = "((ed)?ly|'?e?s||a?(t|s)?ion(s|al(ly)?)?|ings?|er|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic(ally)?|(e|a)nce|m?ents?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex|ian)$"
+        self.suffixes = re.compile(r"^%s" % self.suffix_string, re.IGNORECASE)
+        self.removesuffixes = re.compile(r"%s" % self.suffix_string, re.IGNORECASE)
        # remove prefixes if the prefix was not already the point of hyphenation
-        self.prefixes = re.compile(r'^(dis|re|un|in|ex)$', re.IGNORECASE)
-        self.removeprefix = re.compile(r'^(dis|re|un|in|ex)', re.IGNORECASE)
+        self.prefix_string = '^(dis|re|un|in|ex)'
+        self.prefixes = re.compile(r'%s$' % self.prefix_string, re.IGNORECASE)
+        self.removeprefix = re.compile(r'%s' % self.prefix_string, re.IGNORECASE)

    def dehyphenate(self, match):
        firsthalf = match.group('firstpart')
@ -191,31 +197,44 @@ class Dehyphenator(object):
            wraptags = ''
        hyphenated = unicode(firsthalf) + "-" + unicode(secondhalf)
        dehyphenated = unicode(firsthalf) + unicode(secondhalf)
-        lookupword = self.removesuffixes.sub('', dehyphenated)
-        if self.prefixes.match(firsthalf) is None:
+        if self.suffixes.match(secondhalf) is None:
+            lookupword = self.removesuffixes.sub('', dehyphenated)
+        else:
+            lookupword = dehyphenated
+        if len(firsthalf) > 3 and self.prefixes.match(firsthalf) is None:
            lookupword = self.removeprefix.sub('', lookupword)
-        #print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated)
+        if self.verbose > 2:
+            self.log("lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated))
        try:
            searchresult = self.html.find(lookupword.lower())
        except:
            return hyphenated
        if self.format == 'html_cleanup' or self.format == 'txt_cleanup':
            if self.html.find(lookupword) != -1 or searchresult != -1:
-                #print "Cleanup:returned dehyphenated word: " + str(dehyphenated)
+                if self.verbose > 2:
+                    self.log("    Cleanup:returned dehyphenated word: " + str(dehyphenated))
                return dehyphenated
            elif self.html.find(hyphenated) != -1:
-                #print "Cleanup:returned hyphenated word: " + str(hyphenated)
+                if self.verbose > 2:
+                    self.log("        Cleanup:returned hyphenated word: " + str(hyphenated))
                return hyphenated
            else:
-                #print "Cleanup:returning original text "+str(firsthalf)+" + linefeed "+str(secondhalf)
+                if self.verbose > 2:
+                    self.log("            Cleanup:returning original text "+str(firsthalf)+" + linefeed "+str(secondhalf))
                return firsthalf+u'\u2014'+wraptags+secondhalf

        else:
+            if len(firsthalf) <= 2 and len(secondhalf) <= 2:
+                if self.verbose > 2:
+                    self.log("too short, returned hyphenated word: " + str(hyphenated))
+                return hyphenated
            if self.html.find(lookupword) != -1 or searchresult != -1:
-                #print "returned dehyphenated word: " + str(dehyphenated)
+                if self.verbose > 2:
+                    self.log("     returned dehyphenated word: " + str(dehyphenated))
                return dehyphenated
            else:
-                #print "           returned hyphenated word: " + str(hyphenated)
+                if self.verbose > 2:
+                    self.log("          returned hyphenated word: " + str(hyphenated))
                return hyphenated

    def __call__(self, html, format, length=1):
@ -228,7 +247,7 @@ class Dehyphenator(object):
        elif format == 'txt':
            intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)(\u0020|\u0009)*(?P<wraptags>(\n(\u0020|\u0009)*)+)(?P<secondpart>[\w\d]+)'% length)
        elif format == 'individual_words':
-            intextmatch = re.compile(u'>[^<]*\b(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)"\s>]+)(-|‐)\u0020*(?P<secondpart>\w+)\b[^<]*<') # for later, not called anywhere yet
+            intextmatch = re.compile(u'(?!<)(?P<firstpart>\w+)(-|‐)\s*(?P<secondpart>\w+)(?![^<]*?>)')
        elif format == 'html_cleanup':
            intextmatch = re.compile(u'(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)\s*(?=<)(?P<wraptags></span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)')
        elif format == 'txt_cleanup':
@ -360,7 +379,7 @@ class HTMLPreProcessor(object):
                  (re.compile(r'((?<=</a>)\s*file:////?[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''),

                  # Center separator lines
-                  (re.compile(u'<br>\s*(?P<break>([*#•✦]+\s*)+)\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group(1) + '</p>'),
+                  (re.compile(u'<br>\s*(?P<break>([*#•✦=]+\s*)+)\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group(1) + '</p>'),

                  # Remove page links
                  (re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''),
@ -397,10 +416,8 @@ class HTMLPreProcessor(object):
                     (re.compile('<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
                      lambda match : '<h3 class="subtitle">%s</h3>'%(match.group(1),)),
                     ]
-    def __init__(self, input_plugin_preprocess, plugin_preprocess,
-            extra_opts=None):
-        self.input_plugin_preprocess = input_plugin_preprocess
-        self.plugin_preprocess = plugin_preprocess
+    def __init__(self, log=None, extra_opts=None):
+        self.log = log
        self.extra_opts = extra_opts

    def is_baen(self, src):
@ -436,27 +453,19 @@ class HTMLPreProcessor(object):
        if not getattr(self.extra_opts, 'keep_ligatures', False):
            html = _ligpat.sub(lambda m:LIGATURES[m.group()], html)

+        for search, replace in [['sr3_search', 'sr3_replace'], ['sr2_search', 'sr2_replace'], ['sr1_search', 'sr1_replace']]:
+            search_pattern = getattr(self.extra_opts, search, '')
+            if search_pattern:
+                try:
+                    search_re = re.compile(search_pattern)
+                    replace_txt = getattr(self.extra_opts, replace, '')
+                    if replace_txt == None:
+                        replace_txt = ''
+                    rules.insert(0, (search_re, replace_txt))
+                except Exception as e:
+                    self.log.error('Failed to parse %s regexp because %s' % (search, e))
+
        end_rules = []
-        if getattr(self.extra_opts, 'remove_header', None):
-            try:
-                rules.insert(0,
-                    (re.compile(self.extra_opts.header_regex), lambda match : '')
-                )
-            except:
-                import traceback
-                print 'Failed to parse remove_header regexp'
-                traceback.print_exc()
-
-        if getattr(self.extra_opts, 'remove_footer', None):
-            try:
-                rules.insert(0,
-                    (re.compile(self.extra_opts.footer_regex), lambda match : '')
-                )
-            except:
-                import traceback
-                print 'Failed to parse remove_footer regexp'
-                traceback.print_exc()
-
        # delete soft hyphens - moved here so it's executed after header/footer removal
        if is_pdftohtml:
            # unwrap/delete soft hyphens
@ -464,12 +473,6 @@ class HTMLPreProcessor(object):
            # unwrap/delete soft hyphens with formatting
            end_rules.append((re.compile(u'[]\s*(</(i|u|b)>)+(</p>\s*<p>\s*)+\s*(<(i|u|b)>)+\s*(?=[[a-z\d])'), lambda match: ''))

-        # Make the more aggressive chapter marking regex optional with the preprocess option to
-        # reduce false positives and move after header/footer removal
-        if getattr(self.extra_opts, 'preprocess_html', None):
-            if is_pdftohtml:
-                end_rules.append((re.compile(r'<p>\s*(?P<chap>(<[ibu]>){0,2}\s*([A-Z \'"!]{3,})\s*([\dA-Z:]+\s){0,4}\s*(</[ibu]>){0,2})\s*<p>\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*<p>)?'), chap_head),)
-
        length = -1
        if getattr(self.extra_opts, 'unwrap_factor', 0.0) > 0.01:
            docanalysis = DocAnalysis('pdf', html)
@ -480,7 +483,7 @@ class HTMLPreProcessor(object):
                end_rules.append((re.compile(u'(?<=.{%i}[–—])\s*<p>\s*(?=[[a-z\d])' % length), lambda match: ''))
                end_rules.append(
                    # Un wrap using punctuation
-                    (re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíóńśúâêîôûçąężıãõñæøþðß,:)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
+                    (re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíóńśúâêîôûçąężıãõñæøþðßě,:)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
                )

        for rule in self.PREPROCESS + start_rules:
@ -512,15 +515,14 @@ class HTMLPreProcessor(object):

        if is_pdftohtml and length > -1:
            # Dehyphenate
-            dehyphenator = Dehyphenator()
+            dehyphenator = Dehyphenator(self.extra_opts.verbose, self.log)
            html = dehyphenator(html,'html', length)

        if is_pdftohtml:
-            from calibre.ebooks.conversion.utils import PreProcessor
-            pdf_markup = PreProcessor(self.extra_opts, None)
+            from calibre.ebooks.conversion.utils import HeuristicProcessor
+            pdf_markup = HeuristicProcessor(self.extra_opts, None)
            totalwords = 0
-            totalwords = pdf_markup.get_word_count(html)
-            if totalwords > 7000:
+            if pdf_markup.get_word_count(html) > 7000:
                html = pdf_markup.markup_chapters(html, totalwords, True)

        #dump(html, 'post-preprocess')
@ -540,8 +542,10 @@ class HTMLPreProcessor(object):
            unidecoder = Unidecoder()
            html = unidecoder.decode(html)

-        if self.plugin_preprocess:
-            html = self.input_plugin_preprocess(self.extra_opts, html)
+        if getattr(self.extra_opts, 'enable_heuristics', False):
+            from calibre.ebooks.conversion.utils import HeuristicProcessor
+            preprocessor = HeuristicProcessor(self.extra_opts, self.log)
+            html = preprocessor(html)

        if getattr(self.extra_opts, 'smarten_punctuation', False):
            html = self.smarten_punctuation(html)
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@ -11,13 +11,22 @@ from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
 from calibre.utils.logging import default_log
 from calibre.utils.wordcount import get_wordcount_obj

-class PreProcessor(object):
+class HeuristicProcessor(object):

    def __init__(self, extra_opts=None, log=None):
        self.log = default_log if log is None else log
        self.html_preprocess_sections = 0
        self.found_indents = 0
        self.extra_opts = extra_opts
+        self.deleted_nbsps = False
+        self.totalwords = 0
+        self.min_chapters = 1
+        self.chapters_no_title = 0
+        self.chapters_with_title = 0
+        self.blanks_deleted = False
+        self.linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL)
+        self.blankreg = re.compile(r'\s*(?P<openline><p(?!\sid=\"softbreak\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
+        self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>){2,}', re.IGNORECASE)

    def is_pdftohtml(self, src):
        return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
@ -27,12 +36,12 @@ class PreProcessor(object):
        title = match.group('title')
        if not title:
            self.html_preprocess_sections = self.html_preprocess_sections + 1
-            self.log("marked " + unicode(self.html_preprocess_sections) +
+            self.log.debug("marked " + unicode(self.html_preprocess_sections) +
                    " chapters. - " + unicode(chap))
            return '<h2>'+chap+'</h2>\n'
        else:
            self.html_preprocess_sections = self.html_preprocess_sections + 1
-            self.log("marked " + unicode(self.html_preprocess_sections) +
+            self.log.debug("marked " + unicode(self.html_preprocess_sections) +
                    " chapters & titles. - " + unicode(chap) + ", " + unicode(title))
            return '<h2>'+chap+'</h2>\n<h3>'+title+'</h3>\n'

@ -40,10 +49,18 @@ class PreProcessor(object):
        chap = match.group('section')
        styles = match.group('styles')
        self.html_preprocess_sections = self.html_preprocess_sections + 1
-        self.log("marked " + unicode(self.html_preprocess_sections) +
+        self.log.debug("marked " + unicode(self.html_preprocess_sections) +
                " section markers based on punctuation. - " + unicode(chap))
        return '<'+styles+' style="page-break-before:always">'+chap

+    def analyze_title_matches(self, match):
+        chap = match.group('chap')
+        title = match.group('title')
+        if not title:
+            self.chapters_no_title = self.chapters_no_title + 1
+        else:
+            self.chapters_with_title = self.chapters_with_title + 1
+
    def insert_indent(self, match):
        pstyle = match.group('formatting')
        span = match.group('span')
@ -75,8 +92,8 @@ class PreProcessor(object):
        line_end = line_end_ere.findall(raw)
        tot_htm_ends = len(htm_end)
        tot_ln_fds = len(line_end)
-        self.log("There are " + unicode(tot_ln_fds) + " total Line feeds, and " +
-                unicode(tot_htm_ends) + " marked up endings")
+        #self.log.debug("There are " + unicode(tot_ln_fds) + " total Line feeds, and " +
+        #        unicode(tot_htm_ends) + " marked up endings")

        if percent > 1:
            percent = 1
@ -84,7 +101,7 @@ class PreProcessor(object):
            percent = 0

        min_lns = tot_ln_fds * percent
-        self.log("There must be fewer than " + unicode(min_lns) + " unmarked lines to add markup")
+        #self.log.debug("There must be fewer than " + unicode(min_lns) + " unmarked lines to add markup")
        if min_lns > tot_htm_ends:
            return True

@ -112,16 +129,55 @@ class PreProcessor(object):
        wordcount = get_wordcount_obj(word_count_text)
        return wordcount.words

+    def markup_italicis(self, html):
+        ITALICIZE_WORDS = [
+            'Etc.', 'etc.', 'viz.', 'ie.', 'i.e.', 'Ie.', 'I.e.', 'eg.',
+            'e.g.', 'Eg.', 'E.g.', 'et al.', 'et cetera', 'n.b.', 'N.b.',
+            'nota bene', 'Nota bene', 'Ste.', 'Mme.', 'Mdme.',
+            'Mlle.', 'Mons.', 'PS.', 'PPS.',
+        ]
+        
+        ITALICIZE_STYLE_PATS = [
+            r'(?msu)(?<=\s)_(?P<words>\S[^_]{0,40}?\S)?_(?=\s)',
+            r'(?msu)(?<=\s)/(?P<words>\S[^/]{0,40}?\S)?/(?=\s)',
+            r'(?msu)(?<=\s)~~(?P<words>\S[^~]{0,40}?\S)?~~(?=\s)',
+            r'(?msu)(?<=\s)\*(?P<words>\S[^\*]{0,40}?\S)?\*(?=\s)',
+            r'(?msu)(?<=\s)~(?P<words>\S[^~]{0,40}?\S)?~(?=\s)',
+            r'(?msu)(?<=\s)_/(?P<words>\S[^/_]{0,40}?\S)?/_(?=\s)',
+            r'(?msu)(?<=\s)_\*(?P<words>\S[^\*_]{0,40}?\S)?\*_(?=\s)',
+            r'(?msu)(?<=\s)\*/(?P<words>\S[^/\*]{0,40}?\S)?/\*(?=\s)',
+            r'(?msu)(?<=\s)_\*/(?P<words>\S[^\*_]{0,40}?\S)?/\*_(?=\s)',
+            r'(?msu)(?<=\s)/:(?P<words>\S[^:/]{0,40}?\S)?:/(?=\s)',
+            r'(?msu)(?<=\s)\|:(?P<words>\S[^:\|]{0,40}?\S)?:\|(?=\s)',
+        ]
+        
+        for word in ITALICIZE_WORDS:
+            html = html.replace(word, '<i>%s</i>' % word)
+
+        for pat in ITALICIZE_STYLE_PATS:
+            html = re.sub(pat, lambda mo: '<i>%s</i>' % mo.group('words'), html)
+
+        return html
+
    def markup_chapters(self, html, wordcount, blanks_between_paragraphs):
+        '''
+        Searches for common chapter headings throughout the document
+        attempts multiple patterns based on likelihood of a match
+        with minimum false positives.  Exits after finding a successful pattern
+        '''
        # Typical chapters are between 2000 and 7000 words, use the larger number to decide the
-        # minimum of chapters to search for
-        self.min_chapters = 1
+        # minimum of chapters to search for.  A max limit is calculated to prevent things like OCR
+        # or pdf page numbers from being treated as TOC markers
+        max_chapters = 150
+        typical_chapters = 7000.
        if wordcount > 7000:
-            self.min_chapters = int(ceil(wordcount / 7000.))
-        #print "minimum chapters required are: "+str(self.min_chapters)
+            if wordcount > 200000:
+                typical_chapters = 15000.
+            self.min_chapters = int(ceil(wordcount / typical_chapters))
+        self.log.debug("minimum chapters required are: "+str(self.min_chapters))
        heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE)
        self.html_preprocess_sections = len(heading.findall(html))
-        self.log("found " + unicode(self.html_preprocess_sections) + " pre-existing headings")
+        self.log.debug("found " + unicode(self.html_preprocess_sections) + " pre-existing headings")

        # Build the Regular Expressions in pieces
        init_lookahead = "(?=<(p|div))"
@ -151,103 +207,160 @@ class PreProcessor(object):
        n_lookahead_open = "\s+(?!"
        n_lookahead_close = ")"

-        default_title = r"(<[ibu][^>]*>)?\s{0,3}([\w\'\"-]+\s{0,3}){1,5}?(</[ibu][^>]*>)?(?=<)"
+        default_title = r"(<[ibu][^>]*>)?\s{0,3}(?!Chapter)([\w\:\'’\"-]+\s{0,3}){1,5}?(</[ibu][^>]*>)?(?=<)"
+        simple_title = r"(<[ibu][^>]*>)?\s{0,3}(?!(Chapter|\s+<)).{0,65}?(</[ibu][^>]*>)?(?=<)"
+
+        analysis_result = []

        chapter_types = [
-            [r"[^'\"]?(Introduction|Synopsis|Acknowledgements|Chapter|Kapitel|Epilogue|Volume\s|Prologue|Book\s|Part\s|Dedication|Preface)\s*([\d\w-]+\:?\'?\s*){0,5}", True, "Searching for common Chapter Headings"],
-            [r"<b[^>]*>\s*(<span[^>]*>)?\s*(?!([*#•]+\s*)+)(\s*(?=[\d.\w#\-*\s]+<)([\d.\w#-*]+\s*){1,5}\s*)(?!\.)(</span>)?\s*</b>", True, "Searching for emphasized lines"], # Emphasized lines
-            [r"[^'\"]?(\d+(\.|:)|CHAPTER)\s*([\dA-Z\-\'\"#,]+\s*){0,7}\s*", True, "Searching for numeric chapter headings"],  # Numeric Chapters
-            [r"([A-Z]\s+){3,}\s*([\d\w-]+\s*){0,3}\s*", True, "Searching for letter spaced headings"],  # Spaced Lettering
-            [r"[^'\"]?(\d+\.?\s+([\d\w-]+\:?\'?-?\s?){0,5})\s*", True, "Searching for numeric chapters with titles"], # Numeric Titles
-            [r"[^'\"]?(\d+|CHAPTER)\s*([\dA-Z\-\'\"\?!#,]+\s*){0,7}\s*", True, "Searching for simple numeric chapter headings"],  # Numeric Chapters, no dot or colon
-            [r"\s*[^'\"]?([A-Z#]+(\s|-){0,3}){1,5}\s*", False, "Searching for chapters with Uppercase Characters" ] # Uppercase Chapters
+            [r"[^'\"]?(Introduction|Synopsis|Acknowledgements|Epilogue|CHAPTER|Kapitel|Volume\b|Prologue|Book\b|Part\b|Dedication|Preface)\s*([\d\w-]+\:?\'?\s*){0,5}", True, True, True, False, "Searching for common section headings", 'common'],
+            [r"[^'\"]?(CHAPTER|Kapitel)\s*([\dA-Z\-\'\"\?!#,]+\s*){0,7}\s*", True, True, True, False, "Searching for most common chapter headings", 'chapter'],  # Highest frequency headings which include titles
+            [r"<b[^>]*>\s*(<span[^>]*>)?\s*(?!([*#•=]+\s*)+)(\s*(?=[\d.\w#\-*\s]+<)([\d.\w#-*]+\s*){1,5}\s*)(?!\.)(</span>)?\s*</b>", True, True, True, False, "Searching for emphasized lines", 'emphasized'], # Emphasized lines
+            [r"[^'\"]?(\d+(\.|:))\s*([\dA-Z\-\'\"#,]+\s*){0,7}\s*", True, True, True, False, "Searching for numeric chapter headings", 'numeric'],  # Numeric Chapters
+            [r"([A-Z]\s+){3,}\s*([\d\w-]+\s*){0,3}\s*", True, True, True, False, "Searching for letter spaced headings", 'letter_spaced'],  # Spaced Lettering
+            [r"[^'\"]?(\d+\.?\s+([\d\w-]+\:?\'?-?\s?){0,5})\s*", True, True, True, False, "Searching for numeric chapters with titles", 'numeric_title'], # Numeric Titles
+            [r"[^'\"]?(\d+)\s*([\dA-Z\-\'\"\?!#,]+\s*){0,7}\s*", True, True, True, False, "Searching for simple numeric headings", 'plain_number'],  # Numeric Chapters, no dot or colon
+            [r"\s*[^'\"]?([A-Z#]+(\s|-){0,3}){1,5}\s*", False, True, False, False, "Searching for chapters with Uppercase Characters", 'uppercase' ] # Uppercase Chapters
            ]

-        # Start with most typical chapter headings, get more aggressive until one works
-        for [chapter_type, lookahead_ignorecase, log_message] in chapter_types:
-            if self.html_preprocess_sections >= self.min_chapters:
-                break
-            full_chapter_line = chapter_line_open+chapter_header_open+chapter_type+chapter_header_close+chapter_line_close
-            n_lookahead = re.sub("(ou|in|cha)", "lookahead_", full_chapter_line)
-            self.log("Marked " + unicode(self.html_preprocess_sections) + " headings, " + log_message)
-            if lookahead_ignorecase:
-                chapter_marker = init_lookahead+full_chapter_line+blank_lines+n_lookahead_open+n_lookahead+n_lookahead_close+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
-                chapdetect = re.compile(r'%s' % chapter_marker, re.IGNORECASE)
-            else:
-                chapter_marker = init_lookahead+full_chapter_line+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close+n_lookahead_open+n_lookahead+n_lookahead_close
-                chapdetect = re.compile(r'%s' % chapter_marker, re.UNICODE)
-            html = chapdetect.sub(self.chapter_head, html)
+        def recurse_patterns(html, analyze):
+            # Start with most typical chapter headings, get more aggressive until one works
+            for [chapter_type, n_lookahead_req, strict_title, ignorecase, title_req, log_message, type_name] in chapter_types:
+                n_lookahead = ''
+                hits = 0
+                self.chapters_no_title = 0
+                self.chapters_with_title = 0
+
+                if n_lookahead_req:
+                    lp_n_lookahead_open = n_lookahead_open
+                    lp_n_lookahead_close = n_lookahead_close
+                else:
+                    lp_n_lookahead_open = ''
+                    lp_n_lookahead_close = ''
+
+                if strict_title:
+                    lp_title = default_title
+                else:
+                    lp_title = simple_title
+                 
+                if ignorecase:
+                    arg_ignorecase = r'(?i)'
+                else:
+                    arg_ignorecase = ''
+
+                if title_req:
+                    lp_opt_title_open = ''
+                    lp_opt_title_close = ''        
+                else:
+                    lp_opt_title_open = opt_title_open
+                    lp_opt_title_close = opt_title_close
+
+                if self.html_preprocess_sections >= self.min_chapters:
+                    break
+                full_chapter_line = chapter_line_open+chapter_header_open+chapter_type+chapter_header_close+chapter_line_close
+                if n_lookahead_req:
+                    n_lookahead = re.sub("(ou|in|cha)", "lookahead_", full_chapter_line)
+                if not analyze:
+                    self.log.debug("Marked " + unicode(self.html_preprocess_sections) + " headings, " + log_message)
+
+                chapter_marker = arg_ignorecase+init_lookahead+full_chapter_line+blank_lines+lp_n_lookahead_open+n_lookahead+lp_n_lookahead_close+lp_opt_title_open+title_line_open+title_header_open+lp_title+title_header_close+title_line_close+lp_opt_title_close
+                chapdetect = re.compile(r'%s' % chapter_marker)
+
+                if analyze:
+                    hits = len(chapdetect.findall(html))
+                    if hits:
+                        chapdetect.sub(self.analyze_title_matches, html)
+                        if float(self.chapters_with_title) / float(hits) > .5:
+                            title_req = True
+                            strict_title = False
+                        self.log.debug(unicode(type_name)+" had "+unicode(hits)+" hits - "+unicode(self.chapters_no_title)+" chapters with no title, "+unicode(self.chapters_with_title)+" chapters with titles, "+unicode(float(self.chapters_with_title) / float(hits))+" percent. ")
+                        if type_name == 'common':
+                            analysis_result.append([chapter_type, n_lookahead_req, strict_title, ignorecase, title_req, log_message, type_name])
+                        elif self.min_chapters <= hits < max_chapters:
+                            analysis_result.append([chapter_type, n_lookahead_req, strict_title, ignorecase, title_req, log_message, type_name])
+                            break
+                else:
+                    html = chapdetect.sub(self.chapter_head, html)
+            return html
+
+        recurse_patterns(html, True)
+        chapter_types = analysis_result
+        html = recurse_patterns(html, False)

        words_per_chptr = wordcount
        if words_per_chptr > 0 and self.html_preprocess_sections > 0:
            words_per_chptr = wordcount / self.html_preprocess_sections
-        self.log("Total wordcount is: "+ str(wordcount)+", Average words per section is: "+str(words_per_chptr)+", Marked up "+str(self.html_preprocess_sections)+" chapters")
+        self.log.debug("Total wordcount is: "+ str(wordcount)+", Average words per section is: "+str(words_per_chptr)+", Marked up "+str(self.html_preprocess_sections)+" chapters")
        return html

    def punctuation_unwrap(self, length, content, format):
+        '''
+        Unwraps lines based on line length and punctuation
+        supports a range of html markup and text files
+        '''
        # define the pieces of the regex
-        lookahead = "(?<=.{"+str(length)+"}([a-zäëïöüàèìòùáćéíóńśúâêîôûçąężıãõñæøþðß,:)\IA\u00DF]|(?<!\&\w{4});))" # (?<!\&\w{4});) is a semicolon not part of an entity
-        line_ending = "\s*</(span|p|div)>\s*(</(p|span|div)>)?"
+        lookahead = "(?<=.{"+str(length)+u"}([a-zäëïöüàèìòùáćéíóńśúâêîôûçąężıãõñæøþðßě,:)\IA\u00DF]|(?<!\&\w{4});))" # (?<!\&\w{4});) is a semicolon not part of an entity
+        em_en_lookahead = "(?<=.{"+str(length)+u"}[\u2013\u2014])"
+        soft_hyphen = u"\xad"
+        line_ending = "\s*</(span|[iubp]|div)>\s*(</(span|[iubp]|div)>)?"
        blanklines = "\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*"
-        line_opening = "<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*"
+        line_opening = "<(span|[iubp]|div)[^>]*>\s*(<(span|[iubp]|div)[^>]*>)?\s*"
        txt_line_wrap = u"((\u0020|\u0009)*\n){1,4}"

        unwrap_regex = lookahead+line_ending+blanklines+line_opening
+        em_en_unwrap_regex = em_en_lookahead+line_ending+blanklines+line_opening
+        shy_unwrap_regex = soft_hyphen+line_ending+blanklines+line_opening
+
        if format == 'txt':
            unwrap_regex = lookahead+txt_line_wrap
+            em_en_unwrap_regex = em_en_lookahead+txt_line_wrap
+            shy_unwrap_regex = soft_hyphen+txt_line_wrap

        unwrap = re.compile(u"%s" % unwrap_regex, re.UNICODE)
+        em_en_unwrap = re.compile(u"%s" % em_en_unwrap_regex, re.UNICODE)
+        shy_unwrap = re.compile(u"%s" % shy_unwrap_regex, re.UNICODE)
+
        content = unwrap.sub(' ', content)
+        content = em_en_unwrap.sub('', content)
+        content = shy_unwrap.sub('', content)
        return content

+    def txt_process(self, match):
+        from calibre.ebooks.txt.processor import convert_basic, preserve_spaces, \
+        separate_paragraphs_single_line
+        content = match.group('text')
+        content = separate_paragraphs_single_line(content)
+        content = preserve_spaces(content)
+        content = convert_basic(content, epub_split_size_kb=0)
+        return content

-    def __call__(self, html):
-        self.log("*********  Preprocessing HTML  *********")
+    def markup_pre(self, html):
+        pre = re.compile(r'<pre>', re.IGNORECASE)
+        if len(pre.findall(html)) >= 1:
+            self.log.debug("Running Text Processing")
+            outerhtml = re.compile(r'.*?(?<=<pre>)(?P<text>.*?)</pre>', re.IGNORECASE|re.DOTALL)
+            html = outerhtml.sub(self.txt_process, html)
+        else:
+            # Add markup naively
+            # TODO - find out if there are cases where there are more than one <pre> tag or
+            # other types of unmarked html and handle them in some better fashion
+            add_markup = re.compile('(?<!>)(\n)')
+            html = add_markup.sub('</p>\n<p>', html)
+        return html

-        # Count the words in the document to estimate how many chapters to look for and whether
-        # other types of processing are attempted
-        totalwords = 0
-        totalwords = self.get_word_count(html)
-
-        if totalwords < 50:
-            self.log("not enough text, not preprocessing")
-            return html
-
-        # Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly
+    def arrange_htm_line_endings(self, html):
        html = re.sub(r"\s*</(?P<tag>p|div)>", "</"+"\g<tag>"+">\n", html)
        html = re.sub(r"\s*<(?P<tag>p|div)(?P<style>[^>]*)>\s*", "\n<"+"\g<tag>"+"\g<style>"+">", html)
+        return html

-        ###### Check Markup ######
-        #
-        # some lit files don't have any <p> tags or equivalent (generally just plain text between
-        # <pre> tags), check and  mark up line endings if required before proceeding
-        if self.no_markup(html, 0.1):
-            self.log("not enough paragraph markers, adding now")
-            # check if content is in pre tags, use txt processor to mark up if so
-            pre = re.compile(r'<pre>', re.IGNORECASE)
-            if len(pre.findall(html)) == 1:
-                self.log("Running Text Processing")
-                from calibre.ebooks.txt.processor import convert_basic, preserve_spaces, \
-                separate_paragraphs_single_line
-                outerhtml = re.compile(r'.*?(?<=<pre>)(?P<text>.*)(?=</pre>).*', re.IGNORECASE|re.DOTALL)
-                html = outerhtml.sub('\g<text>', html)
-                html = separate_paragraphs_single_line(html)
-                html = preserve_spaces(html)
-                html = convert_basic(html, epub_split_size_kb=0)
-            else:
-                # Add markup naively
-                # TODO - find out if there are cases where there are more than one <pre> tag or
-                # other types of unmarked html and handle them in some better fashion
-                add_markup = re.compile('(?<!>)(\n)')
-                html = add_markup.sub('</p>\n<p>', html)
-
-        ###### Mark Indents/Cleanup ######
-        #
-        # Replace series of non-breaking spaces with text-indent
+    def fix_nbsp_indents(self, html):
        txtindent = re.compile(ur'<p(?P<formatting>[^>]*)>\s*(?P<span>(<span[^>]*>\s*)+)?\s*(\u00a0){2,}', re.IGNORECASE)
        html = txtindent.sub(self.insert_indent, html)
        if self.found_indents > 1:
-            self.log("replaced "+unicode(self.found_indents)+ " nbsp indents with inline styles")
+            self.log.debug("replaced "+unicode(self.found_indents)+ " nbsp indents with inline styles")
+        return html
+
+    def cleanup_markup(self, html):
        # remove remaining non-breaking spaces
        html = re.sub(ur'\u00a0', ' ', html)
        # Get rid of various common microsoft specific tags which can cause issues later
@ -255,108 +368,166 @@ class PreProcessor(object):
        html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html)
        # Delete microsoft 'smart' tags
        html = re.sub('(?i)</?st1:\w+>', '', html)
-        # Get rid of empty span, bold, & italics tags
+        # Get rid of empty span, bold, font, em, & italics tags
        html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html)
-        html = re.sub(r"\s*<[ibu][^>]*>\s*(<[ibu][^>]*>\s*</[ibu]>\s*){0,2}\s*</[ibu]>", " ", html)
+        html = re.sub(r"\s*<(font|[ibu]|em)[^>]*>\s*(<(font|[ibu]|em)[^>]*>\s*</(font|[ibu]|em)>\s*){0,2}\s*</(font|[ibu]|em)>", " ", html)
        html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
-        # ADE doesn't render <br />, change to empty paragraphs
-        #html = re.sub('<br[^>]*>', u'<p>\u00a0</p>', html)
+        html = re.sub(r"\s*<(font|[ibu]|em)[^>]*>\s*(<(font|[ibu]|em)[^>]*>\s*</(font|[ibu]|em)>\s*){0,2}\s*</(font|[ibu]|em)>", " ", html)
+        self.deleted_nbsps = True
+        return html

-        # If more than 40% of the lines are empty paragraphs and the user has enabled remove
-        # paragraph spacing then delete blank lines to clean up spacing
-        linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL)
-        blankreg = re.compile(r'\s*(?P<openline><p[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
-        #multi_blank = re.compile(r'(\s*<p[^>]*>\s*(<(b|i|u)>)?\s*(</(b|i|u)>)?\s*</p>){2,}', re.IGNORECASE)
-        blanklines = blankreg.findall(html)
-        lines = linereg.findall(html)
-        blanks_between_paragraphs = False
-        if len(lines) > 1:
-            self.log("There are " + unicode(len(blanklines)) + " blank lines. " +
-                    unicode(float(len(blanklines)) / float(len(lines))) + " percent blank")
-            if float(len(blanklines)) / float(len(lines)) > 0.40 and getattr(self.extra_opts,
-            'remove_paragraph_spacing', False):
-                self.log("deleting blank lines")
-                html = blankreg.sub('', html)
-            elif float(len(blanklines)) / float(len(lines)) > 0.40:
-                blanks_between_paragraphs = True
-                #print "blanks between paragraphs is marked True"
-            else:
-                blanks_between_paragraphs = False
-
-        #self.dump(html, 'before_chapter_markup')
-        # detect chapters/sections to match xpath or splitting logic
-        #
-
-        html = self.markup_chapters(html, totalwords, blanks_between_paragraphs)
-
-
-        ###### Unwrap lines ######
-        #
-        # Some OCR sourced files have line breaks in the html using a combination of span & p tags
-        # span are used for hard line breaks, p for new paragraphs.  Determine which is used so
-        # that lines can be un-wrapped across page boundaries
+    def analyze_line_endings(self, html):
+        '''
+        determines the type of html line ending used most commonly in a document
+        use before calling docanalysis functions
+        '''
        paras_reg = re.compile('<p[^>]*>', re.IGNORECASE)
        spans_reg = re.compile('<span[^>]*>', re.IGNORECASE)
        paras = len(paras_reg.findall(html))
        spans = len(spans_reg.findall(html))
        if spans > 1:
            if float(paras) / float(spans) < 0.75:
-                format = 'spanned_html'
+                return 'spanned_html'
            else:
-                format = 'html'
+                return 'html'
        else:
-            format = 'html'
+            return 'html'
+
+    def analyze_blanks(self, html):
+        blanklines = self.blankreg.findall(html)
+        lines = self.linereg.findall(html)
+        if len(lines) > 1:
+            self.log.debug("There are " + unicode(len(blanklines)) + " blank lines. " +
+                    unicode(float(len(blanklines)) / float(len(lines))) + " percent blank")
+                    
+            if float(len(blanklines)) / float(len(lines)) > 0.40:
+                return True
+            else:
+                return False
+
+    def cleanup_required(self):
+        for option in ['unwrap_lines', 'markup_chapter_headings', 'format_scene_breaks', 'delete_blank_paragraphs']:
+            if getattr(self.extra_opts, option, False):
+                return True
+        return False
+
+
+    def __call__(self, html):
+        self.log.debug("*********  Heuristic processing HTML  *********")
+
+        # Count the words in the document to estimate how many chapters to look for and whether
+        # other types of processing are attempted
+        try:
+            self.totalwords = self.get_word_count(html)
+        except:
+            self.log.warn("Can't get wordcount")
+
+        if self.totalwords < 50:
+            self.log.warn("flow is too short, not running heuristics")
+            return html
+
+        # Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly
+        html = self.arrange_htm_line_endings(html)
+
+        if self.cleanup_required():
+            ###### Check Markup ######
+            #
+            # some lit files don't have any <p> tags or equivalent (generally just plain text between
+            # <pre> tags), check and  mark up line endings if required before proceeding
+            # fix indents must run after this step
+            if self.no_markup(html, 0.1):
+                self.log.debug("not enough paragraph markers, adding now")
+                # markup using text processing
+                html = self.markup_pre(html)
+
+        # Replace series of non-breaking spaces with text-indent
+        if getattr(self.extra_opts, 'fix_indents', False):
+            html = self.fix_nbsp_indents(html)
+
+        if self.cleanup_required():
+            # fix indents must run before this step, as it removes non-breaking spaces
+            html = self.cleanup_markup(html)
+
+        # ADE doesn't render <br />, change to empty paragraphs
+        #html = re.sub('<br[^>]*>', u'<p>\u00a0</p>', html)
+
+        # Determine whether the document uses interleaved blank lines
+        blanks_between_paragraphs = self.analyze_blanks(html)
+
+        #self.dump(html, 'before_chapter_markup')
+        # detect chapters/sections to match xpath or splitting logic
+
+        if getattr(self.extra_opts, 'markup_chapter_headings', False):
+            html = self.markup_chapters(html, self.totalwords, blanks_between_paragraphs)
+
+        if getattr(self.extra_opts, 'italicize_common_cases', False): 
+            html = self.markup_italicis(html)
+
+        # If more than 40% of the lines are empty paragraphs and the user has enabled delete
+        # blank paragraphs then delete blank lines to clean up spacing
+        if blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False):
+            self.log.debug("deleting blank lines")
+            self.blanks_deleted = True
+            html = self.multi_blank.sub('\n<p id="softbreak" style="margin-top:1.5em; margin-bottom:1.5em"> </p>', html)
+            html = self.blankreg.sub('', html)
+
+        # Determine line ending type
+        # Some OCR sourced files have line breaks in the html using a combination of span & p tags
+        # span are used for hard line breaks, p for new paragraphs.  Determine which is used so
+        # that lines can be un-wrapped across page boundaries
+        format = self.analyze_line_endings(html)
+
        # Check Line histogram to determine if the document uses hard line breaks, If 50% or
        # more of the lines break in the same region of the document then unwrapping is required
        docanalysis = DocAnalysis(format, html)
        hardbreaks = docanalysis.line_histogram(.50)
-        self.log("Hard line breaks check returned "+unicode(hardbreaks))
+        self.log.debug("Hard line breaks check returned "+unicode(hardbreaks))
+
        # Calculate Length
        unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4)
        length = docanalysis.line_length(unwrap_factor)
-        self.log("Median line length is " + unicode(length) + ", calculated with " + format + " format")
-        # only go through unwrapping code if the histogram shows unwrapping is required or if the user decreased the default unwrap_factor
-        if hardbreaks or unwrap_factor < 0.4:
-            self.log("Unwrapping required, unwrapping Lines")
-            # Unwrap em/en dashes
-            html = re.sub(u'(?<=.{%i}[\u2013\u2014])\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])' % length, '', html)
-            # Dehyphenate
-            self.log("Unwrapping/Removing hyphens")
-            dehyphenator = Dehyphenator()
-            html = dehyphenator(html,'html', length)
-            self.log("Done dehyphenating")
-            # Unwrap lines using punctation and line length
-            #unwrap_quotes = re.compile(u"(?<=.{%i}\"')\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*(?=[a-z])" % length, re.UNICODE)
-            html = self.punctuation_unwrap(length, html, 'html')
-            #check any remaining hyphens, but only unwrap if there is a match
-            dehyphenator = Dehyphenator()
-            html = dehyphenator(html,'html_cleanup', length)
-        else:
-            # dehyphenate in cleanup mode to fix anything previous conversions/editing missed
-            self.log("Cleaning up hyphenation")
-            dehyphenator = Dehyphenator()
-            html = dehyphenator(html,'html_cleanup', length)
-            self.log("Done dehyphenating")
+        self.log.debug("Median line length is " + unicode(length) + ", calculated with " + format + " format")
+            
+        ###### Unwrap lines ######
+        if getattr(self.extra_opts, 'unwrap_lines', False):
+            # only go through unwrapping code if the histogram shows unwrapping is required or if the user decreased the default unwrap_factor
+            if hardbreaks or unwrap_factor < 0.4:
+                self.log.debug("Unwrapping required, unwrapping Lines")
+                # Dehyphenate with line length limiters
+                dehyphenator = Dehyphenator(self.extra_opts.verbose, self.log)
+                html = dehyphenator(html,'html', length)
+                html = self.punctuation_unwrap(length, html, 'html')

-        # delete soft hyphens
-        html = re.sub(u'\xad\s*(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*', '', html)
+        if getattr(self.extra_opts, 'dehyphenate', False):
+            # dehyphenate in cleanup mode to fix anything previous conversions/editing missed
+            self.log.debug("Fixing hyphenated content")
+            dehyphenator = Dehyphenator(self.extra_opts.verbose, self.log)
+            html = dehyphenator(html,'html_cleanup', length)
+            html = dehyphenator(html, 'individual_words', length)

        # If still no sections after unwrapping mark split points on lines with no punctuation
-        if self.html_preprocess_sections < self.min_chapters:
-            self.log("Looking for more split points based on punctuation,"
+        if self.html_preprocess_sections < self.min_chapters and getattr(self.extra_opts, 'markup_chapter_headings', False):
+            self.log.debug("Looking for more split points based on punctuation,"
                    " currently have " + unicode(self.html_preprocess_sections))
            chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(?!([*#•]+\s*)+)(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE)
            html = chapdetect3.sub(self.chapter_break, html)
-        # search for places where a first or second level heading is immediately followed by another
-        # top level heading.  demote the second heading to h3 to prevent splitting between chapter
-        # headings and titles, images, etc
-        doubleheading = re.compile(r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE)
-        html = doubleheading.sub('\g<firsthead>'+'\n<h3'+'\g<secondhead>'+'</h3>', html)

-        # put back non-breaking spaces in empty paragraphs to preserve original formatting
-        html = blankreg.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
+        if getattr(self.extra_opts, 'renumber_headings', False):
+            # search for places where a first or second level heading is immediately followed by another
+            # top level heading.  demote the second heading to h3 to prevent splitting between chapter
+            # headings and titles, images, etc
+            doubleheading = re.compile(r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE)
+            html = doubleheading.sub('\g<firsthead>'+'\n<h3'+'\g<secondhead>'+'</h3>', html)

-        # Center separator lines
-        html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center">' + '\g<break>' + '</p>', html)
+        if getattr(self.extra_opts, 'format_scene_breaks', False):
+            # Center separator lines
+            html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•=✦]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:1.25em; margin-bottom:1.25em">' + '\g<break>' + '</p>', html)
+            if not self.blanks_deleted:
+                html = self.multi_blank.sub('\n<p id="softbreak" style="margin-top:1.5em; margin-bottom:1.5em"> </p>', html)
+            html = re.sub('<p\s+id="softbreak"[^>]*>\s*</p>', '<div id="softbreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em"><hr style="height: 3px; background:#505050" /></div>', html)
+
+        if self.deleted_nbsps:
+            # put back non-breaking spaces in empty paragraphs to preserve original formatting
+            html = self.blankreg.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)

        return html
--- a/src/calibre/ebooks/fb2/input.py
+++ b/src/calibre/ebooks/fb2/input.py
@ -104,13 +104,17 @@ class FB2Input(InputFormatPlugin):
        entries = [(f, guess_type(f)[0]) for f in os.listdir('.')]
        opf.create_manifest(entries)
        opf.create_spine(['index.xhtml'])
-
-        for img in doc.xpath('//f:coverpage/f:image', namespaces=NAMESPACES):
-            href = img.get('{%s}href'%XLINK_NS, img.get('href', None))
-            if href is not None:
-                if href.startswith('#'):
-                    href = href[1:]
-                opf.guide.set_cover(os.path.abspath(href))
+        if mi.cover_data and mi.cover_data[1]:
+            with open('fb2_cover_calibre_mi.jpg', 'wb') as f:
+                f.write(mi.cover_data[1])
+            opf.guide.set_cover(os.path.abspath('fb2_cover_calibre_mi.jpg'))
+        else:
+            for img in doc.xpath('//f:coverpage/f:image', namespaces=NAMESPACES):
+                href = img.get('{%s}href'%XLINK_NS, img.get('href', None))
+                if href is not None:
+                    if href.startswith('#'):
+                        href = href[1:]
+                    opf.guide.set_cover(os.path.abspath(href))

        opf.render(open('metadata.opf', 'wb'))
        return os.path.join(os.getcwd(), 'metadata.opf')
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -21,10 +21,9 @@ from calibre.customize.conversion import InputFormatPlugin
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.customize.conversion import OptionRecommendation
 from calibre.constants import islinux, isfreebsd, iswindows
-from calibre import unicode_path
+from calibre import unicode_path, as_unicode
 from calibre.utils.localization import get_lang
 from calibre.utils.filenames import ascii_filename
-from calibre.ebooks.conversion.utils import PreProcessor

 class Link(object):
    '''
@ -112,7 +111,7 @@ class HTMLFile(object):
            with open(self.path, 'rb') as f:
                src = f.read()
        except IOError, err:
-            msg = 'Could not read from file: %s with error: %s'%(self.path, unicode(err))
+            msg = 'Could not read from file: %s with error: %s'%(self.path, as_unicode(err))
            if level == 0:
                raise IOError(msg)
            raise IgnoreFile(msg, err.errno)
@ -296,7 +295,7 @@ class HTMLInput(InputFormatPlugin):
            return oeb

        from calibre.ebooks.conversion.plumber import create_oebbook
-        return create_oebbook(log, stream.name, opts, self,
+        return create_oebbook(log, stream.name, opts,
                encoding=opts.input_encoding)

    def is_case_sensitive(self, path):
@ -485,9 +484,3 @@ class HTMLInput(InputFormatPlugin):
            self.log.exception('Failed to read CSS file: %r'%link)
            return (None, None)
        return (None, raw)
-
-    def preprocess_html(self, options, html):
-        self.options = options
-        preprocessor = PreProcessor(self.options, log=getattr(self, 'log', None))
-        return preprocessor(html)
-
--- a/src/calibre/ebooks/lit/input.py
+++ b/src/calibre/ebooks/lit/input.py
@ -7,7 +7,7 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

 from calibre.customize.conversion import InputFormatPlugin
-from calibre.ebooks.conversion.utils import PreProcessor
+from calibre.ebooks.conversion.utils import HeuristicProcessor


 class LITInput(InputFormatPlugin):
@ -22,7 +22,7 @@ class LITInput(InputFormatPlugin):
        from calibre.ebooks.lit.reader import LitReader
        from calibre.ebooks.conversion.plumber import create_oebbook
        self.log = log
-        return create_oebbook(log, stream, options, self, reader=LitReader)
+        return create_oebbook(log, stream, options, reader=LitReader)

    def postprocess_book(self, oeb, opts, log):
        from calibre.ebooks.oeb.base import XHTML_NS, XPath, XHTML
@ -39,10 +39,13 @@ class LITInput(InputFormatPlugin):
                body = body[0]
                if len(body) == 1 and body[0].tag == XHTML('pre'):
                    pre = body[0]
-                    from calibre.ebooks.txt.processor import convert_basic
+                    from calibre.ebooks.txt.processor import convert_basic, preserve_spaces, \
+                    separate_paragraphs_single_line
                    from lxml import etree
                    import copy
-                    html = convert_basic(pre.text).replace('<html>',
+                    html = separate_paragraphs_single_line(pre.text)
+                    html = preserve_spaces(html)
+                    html = convert_basic(html).replace('<html>',
                            '<html xmlns="%s">'%XHTML_NS)
                    root = etree.fromstring(html)
                    body = XPath('//h:body')(root)
@ -51,10 +54,3 @@ class LITInput(InputFormatPlugin):
                    for elem in body:
                        ne = copy.deepcopy(elem)
                        pre.append(ne)
-
-
-    def preprocess_html(self, options, html):
-        self.options = options
-        preprocessor = PreProcessor(self.options, log=getattr(self, 'log', None))
-        return preprocessor(html)
-
--- a/src/calibre/ebooks/lrf/input.py
+++ b/src/calibre/ebooks/lrf/input.py
@ -12,7 +12,6 @@ from copy import deepcopy
 from lxml import etree

 from calibre.customize.conversion import InputFormatPlugin
-from calibre.ebooks.conversion.utils import PreProcessor
 from calibre import guess_type

 class Canvas(etree.XSLTExtension):
@ -419,11 +418,3 @@ class LRFInput(InputFormatPlugin):
            f.write(result)
        styles.write()
        return os.path.abspath('content.opf')
-
-    def preprocess_html(self, options, html):
-        self.options = options
-        preprocessor = PreProcessor(self.options, log=getattr(self, 'log', None))
-        return preprocessor(html)
-
-
-
--- a/src/calibre/ebooks/mobi/input.py
+++ b/src/calibre/ebooks/mobi/input.py
@ -39,11 +39,3 @@ class MOBIInput(InputFormatPlugin):
                accelerators['pagebreaks'] = '//h:div[@class="mbp_pagebreak"]'
        return mr.created_opf_path

-    def preprocess_html(self, options, html):
-        # search for places where a first or second level heading is immediately followed by another
-        # top level heading.  demote the second heading to h3 to prevent splitting between chapter
-        # headings and titles, images, etc
-        doubleheading = re.compile(r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE)
-        html = doubleheading.sub('\g<firsthead>'+'\n<h3'+'\g<secondhead>'+'</h3>', html)
-        return html
-
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -542,7 +542,17 @@ class MobiReader(object):
                        elif tag.tag == 'img':
                            tag.set('height', height)
                        else:
-                            styles.append('margin-top: %s' % self.ensure_unit(height))
+                            if tag.tag == 'div' and not tag.text and \
+                                    (not tag.tail or not tag.tail.strip()) and \
+                                    not len(list(tag.iterdescendants())):
+                                # Paragraph spacer
+                                # Insert nbsp so that the element is never
+                                # discarded by a renderer
+                                tag.text = u'\u00a0' # nbsp
+                                styles.append('height: %s' %
+                                        self.ensure_unit(height))
+                            else:
+                                styles.append('margin-top: %s' % self.ensure_unit(height))
            if attrib.has_key('width'):
                width = attrib.pop('width').strip()
                if width and re.search(r'\d+', width):
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -251,7 +251,7 @@ class Serializer(object):
        tag = prefixname(elem.tag, nsrmap)
        # Previous layers take care of @name
        id = elem.attrib.pop('id', None)
-        if id is not None:
+        if id:
            href = '#'.join((item.href, id))
            offset = self.anchor_offset or buffer.tell()
            self.id_offsets[urlnormalize(href)] = offset
--- a/src/calibre/ebooks/oeb/iterator.py
+++ b/src/calibre/ebooks/oeb/iterator.py
@ -199,8 +199,8 @@ class EbookIterator(object):
                    not hasattr(self.pathtoopf, 'manifest'):
                if hasattr(self.pathtoopf, 'manifest'):
                    self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
-                self.pathtoopf = create_oebbook(self.log, self.pathtoopf, plumber.opts,
-                        plumber.input_plugin)
+                self.pathtoopf = create_oebbook(self.log, self.pathtoopf,
+                        plumber.opts)

        if hasattr(self.pathtoopf, 'manifest'):
            self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
@ -227,7 +227,7 @@ class EbookIterator(object):
                self.log.warn('Missing spine item:', repr(spath))

        cover = self.opf.cover
-        if self.ebook_ext in ('lit', 'mobi', 'prc', 'opf') and cover:
+        if self.ebook_ext in ('lit', 'mobi', 'prc', 'opf', 'fb2') and cover:
            cfile = os.path.join(self.base, 'calibre_iterator_cover.html')
            chtml = (TITLEPAGE%os.path.relpath(cover, self.base).replace(os.sep,
                '/')).encode('utf-8')
--- a/src/calibre/ebooks/pdb/input.py
+++ b/src/calibre/ebooks/pdb/input.py
@ -9,7 +9,6 @@ import os
 from calibre.customize.conversion import InputFormatPlugin
 from calibre.ebooks.pdb.header import PdbHeaderReader
 from calibre.ebooks.pdb import PDBError, IDENTITY_TO_NAME, get_reader
-from calibre.ebooks.conversion.utils import PreProcessor

 class PDBInput(InputFormatPlugin):

@ -32,8 +31,3 @@ class PDBInput(InputFormatPlugin):
        opf = reader.extract_content(os.getcwd())

        return opf
-
-    def preprocess_html(self, options, html):
-        self.options = options
-        preprocessor = PreProcessor(self.options, log=getattr(self, 'log', None))
-        return preprocessor(html)
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@ -34,18 +34,15 @@ class PML_HTMLizer(object):
        'ra',
        'c',
        'r',
-        't',
        's',
        'l',
        'k',
-        'T',
        'FN',
        'SB',
    ]

    STATES_VALUE_REQ = [
        'a',
-        'T',
        'FN',
        'SB',
    ]
@ -96,8 +93,6 @@ class PML_HTMLizer(object):
        'Sb': 'sb',
        'c': 'c',
        'r': 'r',
-        't': 't',
-        'T': 'T',
        'i': 'i',
        'I': 'i',
        'u': 'u',
@ -133,8 +128,6 @@ class PML_HTMLizer(object):
    DIV_STATES = [
        'c',
        'r',
-        't',
-        'T',
        'FN',
        'SB',
    ]
@ -255,8 +248,6 @@ class PML_HTMLizer(object):

        for key, val in self.state.items():
            if val[0]:
-                if key == 'T':
-                    self.state['T'][0] = False
                if key in self.DIV_STATES:
                    div.append(key)
                elif key in self.SPAN_STATES:
@ -506,6 +497,9 @@ class PML_HTMLizer(object):
        self.toc = TOC()
        self.file_name = file_name

+        indent_state = {'t': False, 'T': False}
+        adv_indent_val = ''
+
        for s in self.STATES:
            self.state[s] = [False, ''];

@ -515,6 +509,8 @@ class PML_HTMLizer(object):

            parsed = []
            empty = True
+            basic_indent = indent_state['t']
+            adv_indent = indent_state['T']

            # Must use StringIO, cStringIO does not support unicode
            line = StringIO.StringIO(line)
@ -527,7 +523,7 @@ class PML_HTMLizer(object):
                if c == '\\':
                    c = line.read(1)

-                    if c in 'qcrtTiIuobBlk':
+                    if c in 'qcriIuobBlk':
                        text = self.process_code(c, line)
                    elif c in 'FS':
                        l = line.read(1)
@ -574,6 +570,15 @@ class PML_HTMLizer(object):
                    elif c == 'w':
                        empty = False
                        text = '<hr width="%s" />' % self.code_value(line)
+                    elif c == 't':
+                        indent_state[c] = not indent_state[c]
+                        if indent_state[c]:
+                            basic_indent = True
+                    elif c == 'T':
+                        indent_state[c] = not indent_state[c]
+                        if indent_state[c]:
+                            adv_indent = True
+                            adv_indent_val = self.code_value(line)
                    elif c == '-':
                        empty = False
                        text = '&shy;'
@ -590,6 +595,16 @@ class PML_HTMLizer(object):
            if not empty:
                text = self.end_line()
                parsed.append(text)
+                
+                if basic_indent:
+                    parsed.insert(0, self.STATES_TAGS['t'][0])
+                    parsed.append(self.STATES_TAGS['t'][1])
+                elif adv_indent:
+                    parsed.insert(0, self.STATES_TAGS['T'][0] % adv_indent_val)
+                    parsed.append(self.STATES_TAGS['T'][1])
+                    indent_state['T'] = False
+                    adv_indent_val = ''
+                
                output.append(u''.join(parsed))
            line.close()

--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@ -7,7 +7,6 @@ import os, glob, re, textwrap
 from lxml import etree

 from calibre.customize.conversion import InputFormatPlugin
-from calibre.ebooks.conversion.utils import PreProcessor

 border_style_map = {
        'single' : 'solid',
@ -322,13 +321,9 @@ class RTFInput(InputFormatPlugin):
            res = transform.tostring(result)
            res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
            # Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
-            if not getattr(self.opts, 'remove_paragraph_spacing', False):
-                res = re.sub('\s*<body>', '<body>', res)
-                res = re.sub('(?<=\n)\n{2}',
-                        u'<p>\u00a0</p>\n'.encode('utf-8'), res)
-            if self.opts.preprocess_html:
-                preprocessor = PreProcessor(self.opts, log=getattr(self, 'log', None))
-                res = preprocessor(res.decode('utf-8')).encode('utf-8')
+            res = re.sub('\s*<body>', '<body>', res)
+            res = re.sub('(?<=\n)\n{2}',
+                    u'<p>\u00a0</p>\n'.encode('utf-8'), res)
            f.write(res)
        self.write_inline_css(inline_class, border_styles)
        stream.seek(0)
--- a/src/calibre/ebooks/snb/input.py
+++ b/src/calibre/ebooks/snb/input.py
@ -41,7 +41,7 @@ class SNBInput(InputFormatPlugin):
            raise ValueError("Invalid SNB file")
        log.debug("Handle meta data ...")
        from calibre.ebooks.conversion.plumber import create_oebbook
-        oeb = create_oebbook(log, None, options, self,
+        oeb = create_oebbook(log, None, options,
                encoding=options.input_encoding, populate=False)
        meta = snbFile.GetFileStream('snbf/book.snbf')
        if meta != None:
--- a/src/calibre/ebooks/txt/heuristicprocessor.py
+++ b/src/calibre/ebooks/txt/heuristicprocessor.py
@ -1,58 +0,0 @@
-# -*- coding: utf-8 -*-
-
-__license__ = 'GPL 3'
-__copyright__ = '2011, John Schember <john@nachtimwald.com>'
-__docformat__ = 'restructuredtext en'
-
-import re
-
-from calibre import prepare_string_for_xml
-
-class TXTHeuristicProcessor(object):
-
-    def __init__(self):
-        self.ITALICIZE_WORDS = [
-            'Etc.', 'etc.', 'viz.', 'ie.', 'i.e.', 'Ie.', 'I.e.', 'eg.',
-            'e.g.', 'Eg.', 'E.g.', 'et al.', 'et cetra', 'n.b.', 'N.b.',
-            'nota bene', 'Nota bene', 'Ste.', 'Mme.', 'Mdme.',
-            'Mlle.', 'Mons.', 'PS.', 'PPS.',
-        ]
-        self.ITALICIZE_STYLE_PATS = [
-            r'(?msu)_(?P<words>.+?)_',
-            r'(?msu)/(?P<words>[^<>]+?)/',
-            r'(?msu)~~(?P<words>.+?)~~',
-            r'(?msu)\*(?P<words>.+?)\*',
-            r'(?msu)~(?P<words>.+?)~',
-            r'(?msu)_/(?P<words>[^<>]+?)/_',
-            r'(?msu)_\*(?P<words>.+?)\*_',
-            r'(?msu)\*/(?P<words>[^<>]+?)/\*',
-            r'(?msu)_\*/(?P<words>[^<>]+?)/\*_',
-            r'(?msu)/:(?P<words>[^<>]+?):/',
-            r'(?msu)\|:(?P<words>.+?):\|',
-        ]
-
-    def process_paragraph(self, paragraph):
-        for word in self.ITALICIZE_WORDS:
-            paragraph = paragraph.replace(word, '<i>%s</i>' % word)
-        for pat in self.ITALICIZE_STYLE_PATS:
-            paragraph = re.sub(pat, lambda mo: '<i>%s</i>' % mo.group('words'), paragraph)
-        return paragraph
-
-    def convert(self, txt, title='', epub_split_size_kb=0):
-        from calibre.ebooks.txt.processor import clean_txt, split_txt, HTML_TEMPLATE
-        txt = clean_txt(txt)
-        txt = split_txt(txt, epub_split_size_kb)
-
-        processed = []
-        for line in txt.split('\n\n'):
-            processed.append(u'<p>%s</p>' % self.process_paragraph(prepare_string_for_xml(line.replace('\n', ' '))))
-
-        txt = u'\n'.join(processed)
-        txt = re.sub('[ ]{2,}', ' ', txt)
-        html = HTML_TEMPLATE % (title, txt)
-
-        from calibre.ebooks.conversion.utils import PreProcessor
-        pp = PreProcessor()
-        html = pp.markup_chapters(html, pp.get_word_count(html), False)
-
-        return html
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@ -12,7 +12,7 @@ from calibre.ebooks.chardet import detect
 from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
    separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
    preserve_spaces, detect_paragraph_type, detect_formatting_type, \
-    convert_heuristic, normalize_line_endings, convert_textile
+    normalize_line_endings, convert_textile
 from calibre import _ent_pat, xml_entity_to_unicode

 class TXTInput(InputFormatPlugin):
@ -34,7 +34,7 @@ class TXTInput(InputFormatPlugin):
                   'starts a paragraph.'
                   '* unformatted: Most lines have hard line breaks, few/no blank lines or indents.')),
        OptionRecommendation(name='formatting_type', recommended_value='auto',
-            choices=['auto', 'none', 'heuristic', 'markdown'],
+            choices=['auto', 'none', 'heuristic', 'textile', 'markdown'],
            help=_('Formatting used within the document.'
                   '* auto: Automatically decide which formatting processor to use.\n'
                   '* none: Do not process the document formatting. Everything is a '
@ -106,7 +106,7 @@ class TXTInput(InputFormatPlugin):
                    log.debug('Auto detected paragraph type as %s' % options.paragraph_type)

            # Dehyphenate
-            dehyphenator = Dehyphenator()
+            dehyphenator = Dehyphenator(options.verbose, log=getattr(self, 'log', None))
            txt = dehyphenator(txt,'txt', length)

            # We don't check for block because the processor assumes block.
@ -118,24 +118,24 @@ class TXTInput(InputFormatPlugin):
                txt = separate_paragraphs_print_formatted(txt)

            if options.paragraph_type == 'unformatted':
-                from calibre.ebooks.conversion.utils import PreProcessor
+                from calibre.ebooks.conversion.utils import HeuristicProcessor
                # get length

                # unwrap lines based on punctuation
-                preprocessor = PreProcessor(options, log=getattr(self, 'log', None))
+                preprocessor = HeuristicProcessor(options, log=getattr(self, 'log', None))
                txt = preprocessor.punctuation_unwrap(length, txt, 'txt')

            flow_size = getattr(options, 'flow_size', 0)
+            html = convert_basic(txt, epub_split_size_kb=flow_size)

            if options.formatting_type == 'heuristic':
-                html = convert_heuristic(txt, epub_split_size_kb=flow_size)
-            else:
-                html = convert_basic(txt, epub_split_size_kb=flow_size)
-
-        # Dehyphenate in cleanup mode for missed txt and markdown conversion
-        dehyphenator = Dehyphenator()
-        html = dehyphenator(html,'txt_cleanup', length)
-        html = dehyphenator(html,'html_cleanup', length)
+                setattr(options, 'enable_heuristics', True)
+                setattr(options, 'markup_chapter_headings', True)
+                setattr(options, 'italicize_common_cases', True)
+                setattr(options, 'fix_indents', True)
+                setattr(options, 'delete_blank_paragraphs', True)
+                setattr(options, 'format_scene_breaks', True)
+                setattr(options, 'dehyphenate', True)

        from calibre.customize.ui import plugin_for_input_format
        html_input = plugin_for_input_format('html')
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@ -12,8 +12,7 @@ import os, re

 from calibre import prepare_string_for_xml, isbytestring
 from calibre.ebooks.metadata.opf2 import OPFCreator
-from calibre.utils.cleantext import clean_ascii_chars
-from calibre.ebooks.txt.heuristicprocessor import TXTHeuristicProcessor
+
 from calibre.ebooks.conversion.preprocess import DocAnalysis
 from calibre.utils.cleantext import clean_ascii_chars

@ -68,10 +67,6 @@ def convert_basic(txt, title='', epub_split_size_kb=0):

    return HTML_TEMPLATE % (title, u'\n'.join(lines))

-def convert_heuristic(txt, title='', epub_split_size_kb=0):
-    tp = TXTHeuristicProcessor()
-    return tp.convert(txt, title, epub_split_size_kb)
-
 def convert_markdown(txt, title='', disable_toc=False):
    from calibre.ebooks.markdown import markdown
    md = markdown.Markdown(
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -85,7 +85,7 @@ def _config():
    c.add_opt('LRF_ebook_viewer_options', default=None,
              help=_('Options for the LRF ebook viewer'))
    c.add_opt('internally_viewed_formats', default=['LRF', 'EPUB', 'LIT',
-        'MOBI', 'PRC', 'HTML', 'FB2', 'PDB', 'RB', 'SNB'],
+        'MOBI', 'PRC', 'AZW', 'HTML', 'FB2', 'PDB', 'RB', 'SNB'],
              help=_('Formats that are viewed using the internal viewer'))
    c.add_opt('column_map', default=ALL_COLUMNS,
              help=_('Columns to be displayed in the book list'))
--- a/src/calibre/gui2/actions/add.py
+++ b/src/calibre/gui2/actions/add.py
@ -91,13 +91,14 @@ class AddAction(InterfaceAction):
                self.gui.library_view.model().db.import_book(MetaInformation(None), [])
            self.gui.library_view.model().books_added(num)

-    def add_isbns(self, books):
+    def add_isbns(self, books, add_tags=[]):
        from calibre.ebooks.metadata import MetaInformation
        ids = set([])
+        db = self.gui.library_view.model().db
+
        for x in books:
            mi = MetaInformation(None)
            mi.isbn = x['isbn']
-            db = self.gui.library_view.model().db
            if x['path'] is not None:
                ids.add(db.import_book(mi, [x['path']]))
            else:
@ -109,6 +110,8 @@ class AddAction(InterfaceAction):
            self.gui.iactions['Edit Metadata'].do_download_metadata(ids)
        finally:
            config['overwrite_author_title_metadata'] = orig
+        if add_tags and ids:
+            db.bulk_modify_tags(ids, add=add_tags)


    def files_dropped(self, paths):
@ -166,7 +169,7 @@ class AddAction(InterfaceAction):
        from calibre.gui2.dialogs.add_from_isbn import AddFromISBN
        d = AddFromISBN(self.gui)
        if d.exec_() == d.Accepted:
-            self.add_isbns(d.books)
+            self.add_isbns(d.books, add_tags=d.set_tags)

    def add_books(self, *args):
        '''
--- a/src/calibre/gui2/actions/catalog.py
+++ b/src/calibre/gui2/actions/catalog.py
@ -5,11 +5,11 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import os, shutil
+import re, os, shutil

 from PyQt4.Qt import QModelIndex

-from calibre.gui2 import error_dialog, choose_dir
+from calibre.gui2 import choose_dir, error_dialog, warning_dialog
 from calibre.gui2.tools import generate_catalog
 from calibre.utils.config import dynamic
 from calibre.gui2.actions import InterfaceAction
@ -55,10 +55,18 @@ class GenerateCatalogAction(InterfaceAction):

    def catalog_generated(self, job):
        if job.result:
-            # Error during catalog generation
-            return error_dialog(self.gui, _('Catalog generation terminated'),
-                    job.result,
-                    show=True)
+            # Problems during catalog generation
+            # jobs.results is a list - the first entry is the intended title for the dialog
+            # Subsequent strings are error messages
+            dialog_title = job.result.pop(0)
+            if re.match('warning:', job.result[0].lower()):
+                job.result.append("Catalog generation complete.")
+                warning_dialog(self.gui, dialog_title, '\n'.join(job.result), show=True)
+            else:
+                job.result.append("Catalog generation terminated.")
+                error_dialog(self.gui, dialog_title,'\n'.join(job.result),show=True)
+                return
+
        if job.failed:
            return self.gui.job_exception(job)
        id = self.gui.library_view.model().add_catalog(job.catalog_file_path, job.catalog_title)
--- a/src/calibre/gui2/comments_editor.py
+++ b/src/calibre/gui2/comments_editor.py
@ -593,6 +593,11 @@ class Editor(QWidget): # {{{
    def code_dirtied(self, *args):
        self.source_dirty = True

+    def hide_toolbars(self):
+        self.toolbar1.setVisible(False)
+        self.toolbar2.setVisible(False)
+        self.toolbar3.setVisible(False)
+
 # }}}

 if __name__ == '__main__':
--- a/src/calibre/gui2/convert/bulk.py
+++ b/src/calibre/gui2/convert/bulk.py
@ -11,6 +11,8 @@ from calibre.gui2.convert.single import Config, sort_formats_by_preference, \
 from calibre.customize.ui import available_output_formats
 from calibre.gui2 import ResizableDialog
 from calibre.gui2.convert.look_and_feel import LookAndFeelWidget
+from calibre.gui2.convert.heuristics import HeuristicsWidget
+from calibre.gui2.convert.search_and_replace import SearchAndReplaceWidget
 from calibre.gui2.convert.page_setup import PageSetupWidget
 from calibre.gui2.convert.structure_detection import StructureDetectionWidget
 from calibre.gui2.convert.toc import TOCWidget
@ -69,6 +71,8 @@ class BulkConfig(Config):

        self.setWindowTitle(_('Bulk Convert'))
        lf = widget_factory(LookAndFeelWidget)
+        hw = widget_factory(HeuristicsWidget)
+        sr = widget_factory(SearchAndReplaceWidget)
        ps = widget_factory(PageSetupWidget)
        sd = widget_factory(StructureDetectionWidget)
        toc = widget_factory(TOCWidget)
@ -90,7 +94,7 @@ class BulkConfig(Config):
            if not c: break
            self.stack.removeWidget(c)

-        widgets = [lf, ps, sd, toc]
+        widgets = [lf, hw, sr, ps, sd, toc]
        if output_widget is not None:
            widgets.append(output_widget)
        for w in widgets:
--- a/src/calibre/gui2/convert/heuristics.py
+++ b/src/calibre/gui2/convert/heuristics.py
@ -0,0 +1,72 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2011, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+from PyQt4.Qt import Qt
+
+from calibre.gui2.convert.heuristics_ui import Ui_Form
+from calibre.gui2.convert import Widget
+
+class HeuristicsWidget(Widget, Ui_Form):
+
+    TITLE = _('Heuristic Processing')
+    HELP  = _('Modify the document text and structure using common patterns.')
+    COMMIT_NAME = 'heuristics'
+
+    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
+        Widget.__init__(self, parent,
+                ['enable_heuristics', 'markup_chapter_headings',
+                 'italicize_common_cases', 'fix_indents',
+                 'html_unwrap_factor', 'unwrap_lines',
+                 'delete_blank_paragraphs', 'format_scene_breaks',
+                 'dehyphenate', 'renumber_headings']
+                )
+        self.db, self.book_id = db, book_id
+        self.initialize_options(get_option, get_help, db, book_id)
+        
+        self.opt_enable_heuristics.stateChanged.connect(self.enable_heuristics)
+        self.opt_unwrap_lines.stateChanged.connect(self.enable_unwrap)
+        
+        self.enable_heuristics(self.opt_enable_heuristics.checkState())
+
+    def break_cycles(self):
+        Widget.break_cycles(self)
+        
+        try:
+            self.opt_enable_heuristics.stateChanged.disconnect()
+            self.opt_unwrap_lines.stateChanged.disconnect()
+        except:
+            pass
+        
+    def set_value_handler(self, g, val):
+        if val is None and g is self.opt_html_unwrap_factor:
+            g.setValue(0.0)
+            return True
+
+    def enable_heuristics(self, state):
+        if state == Qt.Checked:
+            state = True
+        else:
+            state = False
+        self.opt_markup_chapter_headings.setEnabled(state)
+        self.opt_italicize_common_cases.setEnabled(state)
+        self.opt_fix_indents.setEnabled(state)
+        self.opt_delete_blank_paragraphs.setEnabled(state)
+        self.opt_format_scene_breaks.setEnabled(state)
+        self.opt_dehyphenate.setEnabled(state)
+        self.opt_renumber_headings.setEnabled(state)
+        
+        self.opt_unwrap_lines.setEnabled(state)
+        if state and self.opt_unwrap_lines.checkState() == Qt.Checked:
+            self.opt_html_unwrap_factor.setEnabled(True)
+        else:
+            self.opt_html_unwrap_factor.setEnabled(False)
+
+    def enable_unwrap(self, state):
+        if state == Qt.Checked:
+            state = True
+        else:
+            state = False
+        self.opt_html_unwrap_factor.setEnabled(state)
--- a/src/calibre/gui2/convert/heuristics.ui
+++ b/src/calibre/gui2/convert/heuristics.ui
@ -0,0 +1,178 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <class>Form</class>
+ <widget class="QWidget" name="Form">
+  <property name="geometry">
+   <rect>
+    <x>0</x>
+    <y>0</y>
+    <width>938</width>
+    <height>470</height>
+   </rect>
+  </property>
+  <property name="windowTitle">
+   <string>Form</string>
+  </property>
+  <layout class="QVBoxLayout" name="verticalLayout">
+   <item>
+    <widget class="QCheckBox" name="opt_enable_heuristics">
+     <property name="text">
+      <string>&amp;Preprocess input file to possibly improve structure detection</string>
+     </property>
+    </widget>
+   </item>
+   <item>
+    <widget class="QGroupBox" name="groupBox">
+     <property name="title">
+      <string>Heuristic Processing</string>
+     </property>
+     <layout class="QGridLayout" name="gridLayout">
+      <item row="0" column="0" colspan="2">
+       <widget class="QCheckBox" name="opt_unwrap_lines">
+        <property name="text">
+         <string>Unwrap lines</string>
+        </property>
+       </widget>
+      </item>
+      <item row="1" column="1">
+       <widget class="QLabel" name="huf_label">
+        <property name="text">
+         <string>Line &amp;un-wrap factor during preprocess:</string>
+        </property>
+        <property name="buddy">
+         <cstring>opt_html_unwrap_factor</cstring>
+        </property>
+       </widget>
+      </item>
+      <item row="1" column="2">
+       <widget class="QDoubleSpinBox" name="opt_html_unwrap_factor">
+        <property name="toolTip">
+         <string/>
+        </property>
+        <property name="maximum">
+         <double>1.000000000000000</double>
+        </property>
+        <property name="singleStep">
+         <double>0.050000000000000</double>
+        </property>
+        <property name="value">
+         <double>0.400000000000000</double>
+        </property>
+       </widget>
+      </item>
+      <item row="1" column="3">
+       <spacer name="horizontalSpacer_2">
+        <property name="orientation">
+         <enum>Qt::Horizontal</enum>
+        </property>
+        <property name="sizeHint" stdset="0">
+         <size>
+          <width>40</width>
+          <height>20</height>
+         </size>
+        </property>
+       </spacer>
+      </item>
+      <item row="2" column="0" colspan="4">
+       <widget class="QCheckBox" name="opt_markup_chapter_headings">
+        <property name="text">
+         <string>Detect and markup unformatted chapter headings and sub headings</string>
+        </property>
+       </widget>
+      </item>
+      <item row="3" column="0" colspan="4">
+       <widget class="QCheckBox" name="opt_renumber_headings">
+        <property name="text">
+         <string>Renumber sequences of &lt;h1&gt; or &lt;h2&gt; tags to prevent splitting</string>
+        </property>
+       </widget>
+      </item>
+      <item row="4" column="0" colspan="2">
+       <widget class="QCheckBox" name="opt_delete_blank_paragraphs">
+        <property name="text">
+         <string>Delete blank lines between paragraphs</string>
+        </property>
+       </widget>
+      </item>
+      <item row="5" column="0" colspan="3">
+       <widget class="QCheckBox" name="opt_format_scene_breaks">
+        <property name="text">
+         <string>Ensure scene breaks are consistently formatted</string>
+        </property>
+       </widget>
+      </item>
+      <item row="6" column="0" colspan="2">
+       <widget class="QCheckBox" name="opt_dehyphenate">
+        <property name="text">
+         <string>Remove unnecessary hyphens</string>
+        </property>
+       </widget>
+      </item>
+      <item row="7" column="0" colspan="2">
+       <widget class="QCheckBox" name="opt_italicize_common_cases">
+        <property name="text">
+         <string>Italicize common words and patterns</string>
+        </property>
+       </widget>
+      </item>
+      <item row="8" column="0" colspan="2">
+       <widget class="QCheckBox" name="opt_fix_indents">
+        <property name="text">
+         <string>Replace entity indents with CSS indents</string>
+        </property>
+       </widget>
+      </item>
+      <item row="9" column="0" colspan="2">
+       <spacer name="verticalSpacer">
+        <property name="orientation">
+         <enum>Qt::Vertical</enum>
+        </property>
+        <property name="sizeHint" stdset="0">
+         <size>
+          <width>131</width>
+          <height>35</height>
+         </size>
+        </property>
+       </spacer>
+      </item>
+     </layout>
+    </widget>
+   </item>
+  </layout>
+ </widget>
+ <resources/>
+ <connections>
+  <connection>
+   <sender>opt_enable_heuristics</sender>
+   <signal>toggled(bool)</signal>
+   <receiver>opt_html_unwrap_factor</receiver>
+   <slot>setEnabled(bool)</slot>
+   <hints>
+    <hint type="sourcelabel">
+     <x>328</x>
+     <y>87</y>
+    </hint>
+    <hint type="destinationlabel">
+     <x>481</x>
+     <y>113</y>
+    </hint>
+   </hints>
+  </connection>
+  <connection>
+   <sender>opt_enable_heuristics</sender>
+   <signal>toggled(bool)</signal>
+   <receiver>huf_label</receiver>
+   <slot>setEnabled(bool)</slot>
+   <hints>
+    <hint type="sourcelabel">
+     <x>295</x>
+     <y>88</y>
+    </hint>
+    <hint type="destinationlabel">
+     <x>291</x>
+     <y>105</y>
+    </hint>
+   </hints>
+  </connection>
+ </connections>
+</ui>
--- a/src/calibre/gui2/convert/metadata.py
+++ b/src/calibre/gui2/convert/metadata.py
@ -18,6 +18,7 @@ from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.gui2.convert import Widget
 from calibre.utils.icu import sort_key
+from calibre.library.comments import comments_to_html

 def create_opf_file(db, book_id):
    mi = db.get_metadata(book_id, index_is_id=True)
@ -57,6 +58,7 @@ class MetadataWidget(Widget, Ui_Form):
            self.initialize_metadata_options()
        self.initialize_options(get_option, get_help, db, book_id)
        self.connect(self.cover_button, SIGNAL("clicked()"), self.select_cover)
+        self.comment.hide_toolbars()

    def deduce_author_sort(self, *args):
        au = unicode(self.author.currentText())
@ -68,6 +70,9 @@ class MetadataWidget(Widget, Ui_Form):
    def initialize_metadata_options(self):
        self.initialize_combos()
        self.author.editTextChanged.connect(self.deduce_author_sort)
+        self.author.set_separator('&')
+        self.author.set_space_before_sep(True)
+        self.author.update_items_cache(self.db.all_author_names())

        mi = self.db.get_metadata(self.book_id, index_is_id=True)
        self.title.setText(mi.title)
@ -75,8 +80,8 @@ class MetadataWidget(Widget, Ui_Form):
            self.publisher.setCurrentIndex(self.publisher.findText(mi.publisher))
        self.author_sort.setText(mi.author_sort if mi.author_sort else '')
        self.tags.setText(', '.join(mi.tags if mi.tags else []))
-        self.tags.update_tags_cache(self.db.all_tags())
-        self.comment.setPlainText(mi.comments if mi.comments else '')
+        self.tags.update_items_cache(self.db.all_tags())
+        self.comment.html = comments_to_html(mi.comments) if mi.comments else ''
        if mi.series:
            self.series.setCurrentIndex(self.series.findText(mi.series))
        if mi.series_index is not None:
@ -151,7 +156,7 @@ class MetadataWidget(Widget, Ui_Form):
        author_sort = unicode(self.author_sort.text()).strip()
        if author_sort:
            mi.author_sort = author_sort
-        comments = unicode(self.comment.toPlainText()).strip()
+        comments = self.comment.html
        if comments:
            mi.comments = comments
        mi.series_index = float(self.series_index.value())
--- a/src/calibre/gui2/convert/metadata.ui
+++ b/src/calibre/gui2/convert/metadata.ui
@ -20,30 +20,6 @@
      <string>Book Cover</string>
     </property>
     <layout class="QGridLayout" name="_2">
-      <item row="0" column="0">
-       <layout class="QHBoxLayout" name="_3">
-        <item>
-         <widget class="ImageView" name="cover" native="true">
-          <property name="sizePolicy">
-           <sizepolicy hsizetype="Expanding" vsizetype="Expanding">
-            <horstretch>0</horstretch>
-            <verstretch>0</verstretch>
-           </sizepolicy>
-          </property>
-         </widget>
-        </item>
-       </layout>
-      </item>
-      <item row="2" column="0">
-       <widget class="QCheckBox" name="opt_prefer_metadata_cover">
-        <property name="text">
-         <string>Use cover from &amp;source file</string>
-        </property>
-        <property name="checked">
-         <bool>true</bool>
-        </property>
-       </widget>
-      </item>
      <item row="1" column="0">
       <layout class="QVBoxLayout" name="_4">
        <property name="spacing">
@ -95,6 +71,30 @@
        </item>
       </layout>
      </item>
+      <item row="2" column="0">
+       <widget class="QCheckBox" name="opt_prefer_metadata_cover">
+        <property name="text">
+         <string>Use cover from &amp;source file</string>
+        </property>
+        <property name="checked">
+         <bool>true</bool>
+        </property>
+       </widget>
+      </item>
+      <item row="0" column="0">
+       <layout class="QHBoxLayout" name="_3">
+        <item>
+         <widget class="ImageView" name="cover" native="true">
+          <property name="sizePolicy">
+           <sizepolicy hsizetype="Expanding" vsizetype="Expanding">
+            <horstretch>0</horstretch>
+            <verstretch>0</verstretch>
+           </sizepolicy>
+          </property>
+         </widget>
+        </item>
+       </layout>
+      </item>
     </layout>
     <zorder>opt_prefer_metadata_cover</zorder>
     <zorder></zorder>
@ -190,7 +190,7 @@
        </widget>
       </item>
       <item row="4" column="1">
-        <widget class="TagsLineEdit" name="tags">
+        <widget class="CompleteLineEdit" name="tags">
         <property name="toolTip">
          <string>Tags categorize the book. This is particularly useful while searching. &lt;br&gt;&lt;br&gt;They can be any words or phrases, separated by commas.</string>
         </property>
@ -255,7 +255,7 @@
        </widget>
       </item>
       <item row="1" column="1">
-        <widget class="EnComboBox" name="author">
+        <widget class="CompleteComboBox" name="author">
         <property name="editable">
          <bool>true</bool>
         </property>
@ -264,35 +264,7 @@
      </layout>
     </item>
     <item>
-      <widget class="QGroupBox" name="groupBox_2">
-       <property name="sizePolicy">
-        <sizepolicy hsizetype="Minimum" vsizetype="Minimum">
-         <horstretch>0</horstretch>
-         <verstretch>0</verstretch>
-        </sizepolicy>
-       </property>
-       <property name="maximumSize">
-        <size>
-         <width>16777215</width>
-         <height>200</height>
-        </size>
-       </property>
-       <property name="title">
-        <string>Comments</string>
-       </property>
-       <layout class="QGridLayout" name="_8">
-        <item row="0" column="0">
-         <widget class="QTextEdit" name="comment">
-          <property name="maximumSize">
-           <size>
-            <width>16777215</width>
-            <height>180</height>
-           </size>
-          </property>
-         </widget>
-        </item>
-       </layout>
-      </widget>
+      <widget class="Editor" name="comment" native="true"/>
     </item>
    </layout>
   </item>
@ -310,7 +282,12 @@
   <header>widgets.h</header>
  </customwidget>
  <customwidget>
-   <class>TagsLineEdit</class>
+   <class>CompleteComboBox</class>
+   <extends>QComboBox</extends>
+   <header>widgets.h</header>
+  </customwidget>
+  <customwidget>
+   <class>CompleteLineEdit</class>
   <extends>QLineEdit</extends>
   <header>widgets.h</header>
  </customwidget>
@ -320,6 +297,12 @@
   <header>calibre/gui2/widgets.h</header>
   <container>1</container>
  </customwidget>
+  <customwidget>
+   <class>Editor</class>
+   <extends>QWidget</extends>
+   <header>calibre/gui2/comments_editor.h</header>
+   <container>1</container>
+  </customwidget>
 </customwidgets>
 <tabstops>
  <tabstop>title</tabstop>
@ -329,7 +312,6 @@
  <tabstop>tags</tabstop>
  <tabstop>series</tabstop>
  <tabstop>series_index</tabstop>
-  <tabstop>comment</tabstop>
  <tabstop>cover_path</tabstop>
  <tabstop>cover_button</tabstop>
  <tabstop>opt_prefer_metadata_cover</tabstop>
--- a/src/calibre/gui2/convert/pdb_output.py
+++ b/src/calibre/gui2/convert/pdb_output.py
@ -6,8 +6,6 @@ __docformat__ = 'restructuredtext en'

 from calibre.gui2.convert.pdb_output_ui import Ui_Form
 from calibre.gui2.convert import Widget
-from calibre.ebooks.pdb import FORMAT_WRITERS
-from calibre.gui2.widgets import BasicComboModel

 format_model = None

@ -21,17 +19,8 @@ class PluginWidget(Widget, Ui_Form):
    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
        Widget.__init__(self, parent, ['format', 'inline_toc', 'pdb_output_encoding'])
        self.db, self.book_id = db, book_id
+
+        for x in get_option('format').option.choices:
+            self.opt_format.addItem(x)
+        
        self.initialize_options(get_option, get_help, db, book_id)
-
-        default = self.opt_format.currentText()
-
-        global format_model
-        if format_model is None:
-            format_model = BasicComboModel(FORMAT_WRITERS.keys())
-        self.format_model = format_model
-        self.opt_format.setModel(self.format_model)
-
-        default_index = self.opt_format.findText(default)
-        format_index = self.opt_format.findText('doc')
-        self.opt_format.setCurrentIndex(default_index if default_index != -1 else format_index if format_index != -1 else 0)
-
--- a/src/calibre/gui2/convert/pdf_output.py
+++ b/src/calibre/gui2/convert/pdf_output.py
@ -6,8 +6,6 @@ __docformat__ = 'restructuredtext en'

 from calibre.gui2.convert.pdf_output_ui import Ui_Form
 from calibre.gui2.convert import Widget
-from calibre.ebooks.pdf.pageoptions import PAPER_SIZES, ORIENTATIONS
-from calibre.gui2.widgets import BasicComboModel

 paper_size_model = None
 orientation_model = None
@ -23,28 +21,11 @@ class PluginWidget(Widget, Ui_Form):
        Widget.__init__(self, parent, ['paper_size',
            'orientation', 'preserve_cover_aspect_ratio'])
        self.db, self.book_id = db, book_id
+        
+        for x in get_option('paper_size').option.choices:
+            self.opt_paper_size.addItem(x)
+        for x in get_option('orientation').option.choices:
+            self.opt_orientation.addItem(x)
+        
        self.initialize_options(get_option, get_help, db, book_id)
-
-        default_paper_size = self.opt_paper_size.currentText()
-        default_orientation = self.opt_orientation.currentText()
-
-        global paper_size_model
-        if paper_size_model is None:
-            paper_size_model = BasicComboModel(PAPER_SIZES.keys())
-        self.paper_size_model = paper_size_model
-        self.opt_paper_size.setModel(self.paper_size_model)
-
-        default_paper_size_index = self.opt_paper_size.findText(default_paper_size)
-        letter_index = self.opt_paper_size.findText('letter')
-        self.opt_paper_size.setCurrentIndex(default_paper_size_index if default_paper_size_index != -1 else letter_index if letter_index != -1 else 0)
-
-        global orientation_model
-        if orientation_model is None:
-            orientation_model = BasicComboModel(ORIENTATIONS.keys())
-        self.orientation_model = orientation_model
-        self.opt_orientation.setModel(self.orientation_model)
-
-        default_orientation_index = self.opt_orientation.findText(default_orientation)
-        orientation_index = self.opt_orientation.findText('portrait')
-        self.opt_orientation.setCurrentIndex(default_orientation_index if default_orientation_index != -1 else orientation_index if orientation_index != -1 else 0)
-
+        
--- a/src/calibre/gui2/convert/search_and_replace.py
+++ b/src/calibre/gui2/convert/search_and_replace.py
@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2011, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import re
+
+from calibre.gui2.convert.search_and_replace_ui import Ui_Form
+from calibre.gui2.convert import Widget
+from calibre.gui2 import error_dialog
+
+class SearchAndReplaceWidget(Widget, Ui_Form):
+
+    TITLE = _('Search &\nReplace')
+    HELP  = _('Modify the document text and structure using user defined patterns.')
+    COMMIT_NAME = 'search_and_replace'
+
+    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
+        Widget.__init__(self, parent,
+                ['sr1_search', 'sr1_replace',
+                 'sr2_search', 'sr2_replace',
+                 'sr3_search', 'sr3_replace']
+                )
+        self.db, self.book_id = db, book_id
+        self.initialize_options(get_option, get_help, db, book_id)
+        self.opt_sr1_search.set_msg(_('Search Regular Expression'))
+        self.opt_sr1_search.set_book_id(book_id)
+        self.opt_sr1_search.set_db(db)
+        self.opt_sr2_search.set_msg(_('Search Regular Expression'))
+        self.opt_sr2_search.set_book_id(book_id)
+        self.opt_sr2_search.set_db(db)
+        self.opt_sr3_search.set_msg(_('Search Regular Expression'))
+        self.opt_sr3_search.set_book_id(book_id)
+        self.opt_sr3_search.set_db(db)
+        
+    def break_cycles(self):
+        Widget.break_cycles(self)
+        
+        self.opt_sr1_search.break_cycles()
+        self.opt_sr2_search.break_cycles()
+        self.opt_sr3_search.break_cycles()
+
+    def pre_commit_check(self):
+        for x in ('sr1_search', 'sr2_search', 'sr3_search'):
+            x = getattr(self, 'opt_'+x)
+            try:
+                pat = unicode(x.regex)
+                re.compile(pat)
+            except Exception, err:
+                error_dialog(self, _('Invalid regular expression'),
+                             _('Invalid regular expression: %s')%err).exec_()
+                return False
+        return True
--- a/src/calibre/gui2/convert/search_and_replace.ui
+++ b/src/calibre/gui2/convert/search_and_replace.ui
@ -0,0 +1,191 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <class>Form</class>
+ <widget class="QWidget" name="Form">
+  <property name="geometry">
+   <rect>
+    <x>0</x>
+    <y>0</y>
+    <width>198</width>
+    <height>350</height>
+   </rect>
+  </property>
+  <property name="sizePolicy">
+   <sizepolicy hsizetype="Minimum" vsizetype="Preferred">
+    <horstretch>0</horstretch>
+    <verstretch>0</verstretch>
+   </sizepolicy>
+  </property>
+  <property name="windowTitle">
+   <string>Form</string>
+  </property>
+  <layout class="QGridLayout" name="gridLayout_4">
+   <property name="sizeConstraint">
+    <enum>QLayout::SetDefaultConstraint</enum>
+   </property>
+   <item row="0" column="0">
+    <widget class="QGroupBox" name="groupBox">
+     <property name="sizePolicy">
+      <sizepolicy hsizetype="Minimum" vsizetype="Preferred">
+       <horstretch>0</horstretch>
+       <verstretch>0</verstretch>
+      </sizepolicy>
+     </property>
+     <property name="title">
+      <string>1.</string>
+     </property>
+     <layout class="QGridLayout" name="gridLayout_2">
+      <property name="sizeConstraint">
+       <enum>QLayout::SetMinimumSize</enum>
+      </property>
+      <item row="0" column="0">
+       <widget class="RegexEdit" name="opt_sr1_search" native="true">
+        <property name="sizePolicy">
+         <sizepolicy hsizetype="Minimum" vsizetype="Preferred">
+          <horstretch>0</horstretch>
+          <verstretch>0</verstretch>
+         </sizepolicy>
+        </property>
+       </widget>
+      </item>
+      <item row="1" column="0">
+       <widget class="QLabel" name="label_4">
+        <property name="sizePolicy">
+         <sizepolicy hsizetype="Minimum" vsizetype="Preferred">
+          <horstretch>0</horstretch>
+          <verstretch>0</verstretch>
+         </sizepolicy>
+        </property>
+        <property name="text">
+         <string>Replacement Text</string>
+        </property>
+       </widget>
+      </item>
+      <item row="2" column="0">
+       <widget class="QLineEdit" name="opt_sr1_replace">
+        <property name="sizePolicy">
+         <sizepolicy hsizetype="Minimum" vsizetype="Fixed">
+          <horstretch>0</horstretch>
+          <verstretch>0</verstretch>
+         </sizepolicy>
+        </property>
+       </widget>
+      </item>
+     </layout>
+    </widget>
+   </item>
+   <item row="1" column="0">
+    <widget class="QGroupBox" name="groupBox_2">
+     <property name="sizePolicy">
+      <sizepolicy hsizetype="Minimum" vsizetype="Preferred">
+       <horstretch>0</horstretch>
+       <verstretch>0</verstretch>
+      </sizepolicy>
+     </property>
+     <property name="title">
+      <string>2.</string>
+     </property>
+     <layout class="QGridLayout" name="gridLayout">
+      <property name="sizeConstraint">
+       <enum>QLayout::SetMinimumSize</enum>
+      </property>
+      <item row="0" column="0">
+       <widget class="RegexEdit" name="opt_sr2_search" native="true">
+        <property name="sizePolicy">
+         <sizepolicy hsizetype="Minimum" vsizetype="Preferred">
+          <horstretch>0</horstretch>
+          <verstretch>0</verstretch>
+         </sizepolicy>
+        </property>
+       </widget>
+      </item>
+      <item row="1" column="0">
+       <widget class="QLabel" name="label_5">
+        <property name="sizePolicy">
+         <sizepolicy hsizetype="Minimum" vsizetype="Preferred">
+          <horstretch>0</horstretch>
+          <verstretch>0</verstretch>
+         </sizepolicy>
+        </property>
+        <property name="text">
+         <string>Replacement Text</string>
+        </property>
+       </widget>
+      </item>
+      <item row="2" column="0">
+       <widget class="QLineEdit" name="opt_sr2_replace">
+        <property name="sizePolicy">
+         <sizepolicy hsizetype="Minimum" vsizetype="Fixed">
+          <horstretch>0</horstretch>
+          <verstretch>0</verstretch>
+         </sizepolicy>
+        </property>
+       </widget>
+      </item>
+     </layout>
+    </widget>
+   </item>
+   <item row="2" column="0">
+    <widget class="QGroupBox" name="groupBox_3">
+     <property name="sizePolicy">
+      <sizepolicy hsizetype="Minimum" vsizetype="Preferred">
+       <horstretch>0</horstretch>
+       <verstretch>0</verstretch>
+      </sizepolicy>
+     </property>
+     <property name="title">
+      <string>3.</string>
+     </property>
+     <layout class="QGridLayout" name="gridLayout_3">
+      <property name="sizeConstraint">
+       <enum>QLayout::SetMinimumSize</enum>
+      </property>
+      <item row="0" column="0">
+       <widget class="RegexEdit" name="opt_sr3_search" native="true">
+        <property name="sizePolicy">
+         <sizepolicy hsizetype="Minimum" vsizetype="Preferred">
+          <horstretch>0</horstretch>
+          <verstretch>0</verstretch>
+         </sizepolicy>
+        </property>
+       </widget>
+      </item>
+      <item row="1" column="0">
+       <widget class="QLabel" name="label_6">
+        <property name="sizePolicy">
+         <sizepolicy hsizetype="Minimum" vsizetype="Preferred">
+          <horstretch>0</horstretch>
+          <verstretch>0</verstretch>
+         </sizepolicy>
+        </property>
+        <property name="text">
+         <string>Replacement Text</string>
+        </property>
+       </widget>
+      </item>
+      <item row="2" column="0">
+       <widget class="QLineEdit" name="opt_sr3_replace">
+        <property name="sizePolicy">
+         <sizepolicy hsizetype="Minimum" vsizetype="Fixed">
+          <horstretch>0</horstretch>
+          <verstretch>0</verstretch>
+         </sizepolicy>
+        </property>
+       </widget>
+      </item>
+     </layout>
+    </widget>
+   </item>
+  </layout>
+ </widget>
+ <customwidgets>
+  <customwidget>
+   <class>RegexEdit</class>
+   <extends>QWidget</extends>
+   <header>regex_builder.h</header>
+   <container>1</container>
+  </customwidget>
+ </customwidgets>
+ <resources/>
+ <connections/>
+</ui>
--- a/src/calibre/gui2/convert/single.py
+++ b/src/calibre/gui2/convert/single.py
@ -16,6 +16,8 @@ from calibre.ebooks.conversion.config import GuiRecommendations, save_specifics,
 from calibre.gui2.convert.single_ui import Ui_Dialog
 from calibre.gui2.convert.metadata import MetadataWidget
 from calibre.gui2.convert.look_and_feel import LookAndFeelWidget
+from calibre.gui2.convert.heuristics import HeuristicsWidget
+from calibre.gui2.convert.search_and_replace import SearchAndReplaceWidget
 from calibre.gui2.convert.page_setup import PageSetupWidget
 from calibre.gui2.convert.structure_detection import StructureDetectionWidget
 from calibre.gui2.convert.toc import TOCWidget
@ -170,6 +172,8 @@ class Config(ResizableDialog, Ui_Dialog):
        self.mw = widget_factory(MetadataWidget)
        self.setWindowTitle(_('Convert')+ ' ' + unicode(self.mw.title.text()))
        lf = widget_factory(LookAndFeelWidget)
+        hw = widget_factory(HeuristicsWidget)
+        sr = widget_factory(SearchAndReplaceWidget)
        ps = widget_factory(PageSetupWidget)
        sd = widget_factory(StructureDetectionWidget)
        toc = widget_factory(TOCWidget)
@ -203,7 +207,7 @@ class Config(ResizableDialog, Ui_Dialog):
            if not c: break
            self.stack.removeWidget(c)

-        widgets = [self.mw, lf, ps, sd, toc]
+        widgets = [self.mw, lf, hw, sr, ps, sd, toc]
        if input_widget is not None:
            widgets.append(input_widget)
        if output_widget is not None:
--- a/src/calibre/gui2/convert/structure_detection.py
+++ b/src/calibre/gui2/convert/structure_detection.py
@ -6,8 +6,6 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import re
-
 from calibre.gui2.convert.structure_detection_ui import Ui_Form
 from calibre.gui2.convert import Widget
 from calibre.gui2 import error_dialog
@ -24,12 +22,8 @@ class StructureDetectionWidget(Widget, Ui_Form):
        Widget.__init__(self, parent,
                ['chapter', 'chapter_mark',
                'remove_first_image',
-                'insert_metadata', 'page_breaks_before',
-                'preprocess_html', 'remove_header', 'header_regex',
-                'remove_footer', 'footer_regex','html_unwrap_factor']
+                'insert_metadata', 'page_breaks_before']
                )
-        self.opt_html_unwrap_factor.setEnabled(False)
-        self.huf_label.setEnabled(False)
        self.db, self.book_id = db, book_id
        for x in ('pagebreak', 'rule', 'both', 'none'):
            self.opt_chapter_mark.addItem(x)
@ -37,28 +31,11 @@ class StructureDetectionWidget(Widget, Ui_Form):
        self.opt_chapter.set_msg(_('Detect chapters at (XPath expression):'))
        self.opt_page_breaks_before.set_msg(_('Insert page breaks before '
            '(XPath expression):'))
-        self.opt_header_regex.set_msg(_('Header regular expression:'))
-        self.opt_header_regex.set_book_id(book_id)
-        self.opt_header_regex.set_db(db)
-        self.opt_footer_regex.set_msg(_('Footer regular expression:'))
-        self.opt_footer_regex.set_book_id(book_id)
-        self.opt_footer_regex.set_db(db)
-
+        
    def break_cycles(self):
        Widget.break_cycles(self)
-        self.opt_header_regex.break_cycles()
-        self.opt_footer_regex.break_cycles()

    def pre_commit_check(self):
-        for x in ('header_regex', 'footer_regex'):
-            x = getattr(self, 'opt_'+x)
-            try:
-                pat = unicode(x.regex)
-                re.compile(pat)
-            except Exception, err:
-                error_dialog(self, _('Invalid regular expression'),
-                             _('Invalid regular expression: %s')%err).exec_()
-                return False
        for x in ('chapter', 'page_breaks_before'):
            x = getattr(self, 'opt_'+x)
            if not x.check():
@ -66,8 +43,3 @@ class StructureDetectionWidget(Widget, Ui_Form):
                _('The XPath expression %s is invalid.')%x.text).exec_()
                return False
        return True
-
-    def set_value_handler(self, g, val):
-        if val is None and g is self.opt_html_unwrap_factor:
-            g.setValue(0.0)
-            return True
--- a/src/calibre/gui2/convert/structure_detection.ui
+++ b/src/calibre/gui2/convert/structure_detection.ui
@ -14,10 +14,10 @@
   <string>Form</string>
  </property>
  <layout class="QGridLayout" name="gridLayout">
-   <item row="0" column="1" colspan="2">
+   <item row="0" column="0" colspan="3">
    <widget class="XPathEdit" name="opt_chapter" native="true"/>
   </item>
-   <item row="1" column="0" colspan="2">
+   <item row="1" column="0">
    <widget class="QLabel" name="label">
     <property name="text">
      <string>Chapter &amp;mark:</string>
@ -27,7 +27,7 @@
     </property>
    </widget>
   </item>
-   <item row="1" column="2">
+   <item row="1" column="1">
    <widget class="QComboBox" name="opt_chapter_mark">
     <property name="minimumContentsLength">
      <number>20</number>
@ -41,17 +41,17 @@
     </property>
    </widget>
   </item>
-   <item row="5" column="0" colspan="2">
+   <item row="3" column="0" colspan="2">
    <widget class="QCheckBox" name="opt_insert_metadata">
     <property name="text">
      <string>Insert &amp;metadata as page at start of book</string>
     </property>
    </widget>
   </item>
-   <item row="11" column="0" colspan="3">
+   <item row="5" column="0" colspan="3">
    <widget class="XPathEdit" name="opt_page_breaks_before" native="true"/>
   </item>
-   <item row="12" column="0" colspan="3">
+   <item row="6" column="0" colspan="3">
    <spacer name="verticalSpacer">
     <property name="orientation">
      <enum>Qt::Vertical</enum>
@ -64,53 +64,7 @@
     </property>
    </spacer>
   </item>
-   <item row="8" column="0" colspan="2">
-    <widget class="QCheckBox" name="opt_remove_footer">
-     <property name="text">
-      <string>Remove F&amp;ooter</string>
-     </property>
-    </widget>
-   </item>
-   <item row="6" column="0" colspan="2">
-    <widget class="QCheckBox" name="opt_remove_header">
-     <property name="text">
-      <string>Remove H&amp;eader</string>
-     </property>
-    </widget>
-   </item>
-   <item row="7" column="0" colspan="3">
-    <widget class="RegexEdit" name="opt_header_regex" native="true"/>
-   </item>
-   <item row="9" column="0" colspan="3">
-    <widget class="RegexEdit" name="opt_footer_regex" native="true"/>
-   </item>
-   <item row="4" column="1">
-    <widget class="QLabel" name="huf_label">
-     <property name="text">
-      <string>Line &amp;un-wrap factor during preprocess:</string>
-     </property>
-     <property name="buddy">
-      <cstring>opt_html_unwrap_factor</cstring>
-     </property>
-    </widget>
-   </item>
-   <item row="4" column="2">
-    <widget class="QDoubleSpinBox" name="opt_html_unwrap_factor">
-     <property name="toolTip">
-      <string/>
-     </property>
-     <property name="maximum">
-      <double>1.000000000000000</double>
-     </property>
-     <property name="singleStep">
-      <double>0.050000000000000</double>
-     </property>
-     <property name="value">
-      <double>0.400000000000000</double>
-     </property>
-    </widget>
-   </item>
-   <item row="4" column="0">
+   <item row="1" column="2">
    <spacer name="horizontalSpacer">
     <property name="orientation">
      <enum>Qt::Horizontal</enum>
@ -123,13 +77,6 @@
     </property>
    </spacer>
   </item>
-   <item row="3" column="0" colspan="2">
-    <widget class="QCheckBox" name="opt_preprocess_html">
-     <property name="text">
-      <string>&amp;Preprocess input file to possibly improve structure detection</string>
-     </property>
-    </widget>
-   </item>
  </layout>
 </widget>
 <customwidgets>
@ -139,46 +86,7 @@
   <header>convert/xpath_wizard.h</header>
   <container>1</container>
  </customwidget>
-  <customwidget>
-   <class>RegexEdit</class>
-   <extends>QWidget</extends>
-   <header>regex_builder.h</header>
-   <container>1</container>
-  </customwidget>
 </customwidgets>
 <resources/>
- <connections>
-  <connection>
-   <sender>opt_preprocess_html</sender>
-   <signal>toggled(bool)</signal>
-   <receiver>opt_html_unwrap_factor</receiver>
-   <slot>setEnabled(bool)</slot>
-   <hints>
-    <hint type="sourcelabel">
-     <x>328</x>
-     <y>87</y>
-    </hint>
-    <hint type="destinationlabel">
-     <x>481</x>
-     <y>113</y>
-    </hint>
-   </hints>
-  </connection>
-  <connection>
-   <sender>opt_preprocess_html</sender>
-   <signal>toggled(bool)</signal>
-   <receiver>huf_label</receiver>
-   <slot>setEnabled(bool)</slot>
-   <hints>
-    <hint type="sourcelabel">
-     <x>295</x>
-     <y>88</y>
-    </hint>
-    <hint type="destinationlabel">
-     <x>291</x>
-     <y>105</y>
-    </hint>
-   </hints>
-  </connection>
- </connections>
+ <connections/>
 </ui>
--- a/src/calibre/gui2/convert/txt_output.py
+++ b/src/calibre/gui2/convert/txt_output.py
@ -4,10 +4,10 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

+from PyQt4.Qt import Qt
+
 from calibre.gui2.convert.txt_output_ui import Ui_Form
 from calibre.gui2.convert import Widget
-from calibre.ebooks.txt.newlines import TxtNewlines
-from calibre.gui2.widgets import BasicComboModel

 newline_model = None

@ -23,17 +23,27 @@ class PluginWidget(Widget, Ui_Form):
        ['newline', 'max_line_length', 'force_max_line_length',
        'inline_toc', 'markdown_format', 'keep_links', 'keep_image_references',
        'txt_output_encoding'])
-        self.db, self.book_id = db, book_id
+        self.db, self.book_id = db, book_id        
+        for x in get_option('newline').option.choices:
+            self.opt_newline.addItem(x)        
        self.initialize_options(get_option, get_help, db, book_id)

-        default = self.opt_newline.currentText()
+        self.opt_markdown_format.stateChanged.connect(self.enable_markdown_format)
+        self.enable_markdown_format(self.opt_markdown_format.checkState())

-        global newline_model
-        if newline_model is None:
-            newline_model = BasicComboModel(TxtNewlines.NEWLINE_TYPES.keys())
-        self.newline_model = newline_model
-        self.opt_newline.setModel(self.newline_model)
-
-        default_index = self.opt_newline.findText(default)
-        system_index = self.opt_newline.findText('system')
-        self.opt_newline.setCurrentIndex(default_index if default_index != -1 else system_index if system_index != -1 else 0)
+    def break_cycles(self):
+        Widget.break_cycles(self)
+        
+        try:
+            self.opt_markdown_format.stateChanged.disconnect()
+        except:
+            pass
+        
+    def enable_markdown_format(self, state):
+        if state == Qt.Checked:
+            state = True
+        else:
+            state = False
+        self.opt_keep_links.setEnabled(state)
+        self.opt_keep_image_references.setEnabled(state)
+        
--- a/src/calibre/gui2/convert/xexp_edit.ui
+++ b/src/calibre/gui2/convert/xexp_edit.ui
@ -6,8 +6,8 @@
   <rect>
    <x>0</x>
    <y>0</y>
-    <width>422</width>
-    <height>64</height>
+    <width>434</width>
+    <height>74</height>
   </rect>
  </property>
  <property name="windowTitle">
@ -53,13 +53,13 @@
   <item row="0" column="1">
    <widget class="QToolButton" name="button">
     <property name="toolTip">
-      <string>Use a wizard to help construct the XPath expression</string>
+      <string>Use a wizard to help construct the Regular expression</string>
     </property>
     <property name="text">
      <string>...</string>
     </property>
     <property name="icon">
-      <iconset resource="../../../../resources/images.qrc">
+      <iconset>
       <normaloff>:/images/wizard.png</normaloff>:/images/wizard.png</iconset>
     </property>
     <property name="iconSize">
@ -70,19 +70,6 @@
     </property>
    </widget>
   </item>
-   <item row="0" column="2">
-    <spacer name="horizontalSpacer">
-     <property name="orientation">
-      <enum>Qt::Horizontal</enum>
-     </property>
-     <property name="sizeHint" stdset="0">
-      <size>
-       <width>20</width>
-       <height>20</height>
-      </size>
-     </property>
-    </spacer>
-   </item>
  </layout>
 </widget>
 <customwidgets>
--- a/src/calibre/gui2/custom_column_widgets.py
+++ b/src/calibre/gui2/custom_column_widgets.py
@ -14,7 +14,7 @@ from PyQt4.Qt import QComboBox, QLabel, QSpinBox, QDoubleSpinBox, QDateEdit, \
        QPushButton

 from calibre.utils.date import qt_to_dt, now
-from calibre.gui2.widgets import TagsLineEdit, EnComboBox
+from calibre.gui2.widgets import CompleteLineEdit, EnComboBox
 from calibre.gui2.comments_editor import Editor as CommentsEditor
 from calibre.gui2 import UNDEFINED_QDATE, error_dialog
 from calibre.utils.config import tweaks
@ -212,7 +212,7 @@ class Text(Base):
        values = self.all_values = list(self.db.all_custom(num=self.col_id))
        values.sort(key=sort_key)
        if self.col_metadata['is_multiple']:
-            w = TagsLineEdit(parent, values)
+            w = CompleteLineEdit(parent, values)
            w.setSizePolicy(QSizePolicy.Minimum, QSizePolicy.Preferred)
        else:
            w = EnComboBox(parent)
@ -226,7 +226,7 @@ class Text(Base):
        val = self.normalize_db_val(val)
        if self.col_metadata['is_multiple']:
            self.setter(val)
-            self.widgets[1].update_tags_cache(self.all_values)
+            self.widgets[1].update_items_cache(self.all_values)
        else:
            idx = None
            for i, c in enumerate(self.all_values):
@ -656,7 +656,7 @@ class RemoveTags(QWidget):
        layout.setSpacing(5)
        layout.setContentsMargins(0, 0, 0, 0)

-        self.tags_box = TagsLineEdit(parent, values)
+        self.tags_box = CompleteLineEdit(parent, values)
        layout.addWidget(self.tags_box, stretch = 1)
        # self.tags_box.setSizePolicy(QSizePolicy.Minimum, QSizePolicy.Preferred)

@ -678,7 +678,7 @@ class BulkText(BulkBase):
        values = self.all_values = list(self.db.all_custom(num=self.col_id))
        values.sort(key=sort_key)
        if self.col_metadata['is_multiple']:
-            w = TagsLineEdit(parent, values)
+            w = CompleteLineEdit(parent, values)
            w.setSizePolicy(QSizePolicy.Minimum, QSizePolicy.Preferred)
            self.widgets = [QLabel('&'+self.col_metadata['name']+': ' +
                                   _('tags to add'), parent), w]
@ -697,7 +697,7 @@ class BulkText(BulkBase):

    def initialize(self, book_ids):
        if self.col_metadata['is_multiple']:
-            self.widgets[1].update_tags_cache(self.all_values)
+            self.widgets[1].update_items_cache(self.all_values)
        else:
            val = self.get_initial_value(book_ids)
            self.initial_val = val = self.normalize_db_val(val)
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -1018,7 +1018,8 @@ class DeviceMixin(object): # {{{
        ids = [self.library_view.model().id(r) \
               for r in self.library_view.selectionModel().selectedRows()] \
                                if send_ids is None else send_ids
-        if not self.device_manager or not ids or len(ids) == 0:
+        if not self.device_manager or not ids or len(ids) == 0 or \
+                not self.device_manager.is_device_connected:
            return

        settings = self.device_manager.device.settings()
--- a/src/calibre/gui2/device_drivers/configwidget.ui
+++ b/src/calibre/gui2/device_drivers/configwidget.ui
@ -85,6 +85,9 @@
   </item>
   <item row="2" column="0">
    <widget class="QCheckBox" name="opt_use_subdirs">
+     <property name="toolTip">
+      <string>If checked, books are placed into sub directories based on their metadata on the device. If unchecked, books are all put into the top level directory.</string>
+     </property>
     <property name="text">
      <string>Use sub directories</string>
     </property>
--- a/src/calibre/gui2/dialogs/add_from_isbn.py
+++ b/src/calibre/gui2/dialogs/add_from_isbn.py
@ -12,6 +12,7 @@ from PyQt4.Qt import QDialog, QApplication
 from calibre.gui2.dialogs.add_from_isbn_ui import Ui_Dialog
 from calibre.ebooks.metadata import check_isbn
 from calibre.constants import iswindows
+from calibre.gui2 import gprefs

 class AddFromISBN(QDialog, Ui_Dialog):

@ -25,7 +26,9 @@ class AddFromISBN(QDialog, Ui_Dialog):

        self.isbns = []
        self.books = []
+        self.set_tags = []
        self.paste_button.clicked.connect(self.paste)
+        self.add_tags.setText(', '.join(gprefs.get('add from ISBN tags', [])))

    def paste(self, *args):
        app = QApplication.instance()
@ -37,6 +40,10 @@ class AddFromISBN(QDialog, Ui_Dialog):
            self.isbn_box.setPlainText(new)

    def accept(self, *args):
+        tags = unicode(self.add_tags.text()).strip().split(',')
+        tags = list(filter(None, [x.strip() for x in tags]))
+        gprefs['add from ISBN tags'] = tags
+        self.set_tags = tags
        for line in unicode(self.isbn_box.toPlainText()).strip().splitlines():
            line = line.strip()
            if not line:
--- a/src/calibre/gui2/dialogs/add_from_isbn.ui
+++ b/src/calibre/gui2/dialogs/add_from_isbn.ui
@ -18,8 +18,19 @@
    <normaloff>:/images/add_book.png</normaloff>:/images/add_book.png</iconset>
  </property>
  <layout class="QGridLayout" name="gridLayout">
-   <item row="0" column="0">
-    <widget class="QPlainTextEdit" name="isbn_box"/>
+   <item row="0" column="0" rowspan="2">
+    <layout class="QVBoxLayout" name="verticalLayout_2">
+     <item>
+      <widget class="QPlainTextEdit" name="isbn_box"/>
+     </item>
+     <item>
+      <widget class="QPushButton" name="paste_button">
+       <property name="text">
+        <string>&amp;Paste from clipboard</string>
+       </property>
+      </widget>
+     </item>
+    </layout>
   </item>
   <item row="0" column="1">
    <widget class="QLabel" name="label">
@ -34,6 +45,36 @@
     </property>
    </widget>
   </item>
+   <item row="1" column="1">
+    <layout class="QVBoxLayout" name="verticalLayout">
+     <item>
+      <widget class="QLabel" name="label_2">
+       <property name="text">
+        <string>&amp;Tags to set on created book entries:</string>
+       </property>
+       <property name="buddy">
+        <cstring>add_tags</cstring>
+       </property>
+      </widget>
+     </item>
+     <item>
+      <widget class="QLineEdit" name="add_tags"/>
+     </item>
+     <item>
+      <spacer name="verticalSpacer">
+       <property name="orientation">
+        <enum>Qt::Vertical</enum>
+       </property>
+       <property name="sizeHint" stdset="0">
+        <size>
+         <width>20</width>
+         <height>40</height>
+        </size>
+       </property>
+      </spacer>
+     </item>
+    </layout>
+   </item>
   <item row="2" column="0" colspan="2">
    <widget class="QDialogButtonBox" name="buttonBox">
     <property name="orientation">
@ -44,13 +85,6 @@
     </property>
    </widget>
   </item>
-   <item row="1" column="0">
-    <widget class="QPushButton" name="paste_button">
-     <property name="text">
-      <string>&amp;Paste from clipboard</string>
-     </property>
-    </widget>
-   </item>
  </layout>
 </widget>
 <resources>
--- a/src/calibre/gui2/dialogs/metadata_bulk.py
+++ b/src/calibre/gui2/dialogs/metadata_bulk.py
@ -15,15 +15,16 @@ from calibre.ebooks.metadata import string_to_authors, authors_to_string
 from calibre.ebooks.metadata.book.base import composite_formatter
 from calibre.ebooks.metadata.meta import get_metadata
 from calibre.gui2.custom_column_widgets import populate_metadata_page
-from calibre.gui2 import error_dialog, ResizableDialog
+from calibre.gui2 import error_dialog, ResizableDialog, UNDEFINED_QDATE
 from calibre.gui2.progress_indicator import ProgressIndicator
 from calibre.utils.config import dynamic
 from calibre.utils.titlecase import titlecase
 from calibre.utils.icu import sort_key, capitalize
-from calibre.utils.config import prefs
+from calibre.utils.config import prefs, tweaks
 from calibre.utils.magick.draw import identify_data
+from calibre.utils.date import qt_to_dt

-def get_cover_data(path):
+def get_cover_data(path): # {{{
    old = prefs['read_file_metadata']
    if not old:
        prefs['read_file_metadata'] = True
@ -46,8 +47,7 @@ def get_cover_data(path):
        prefs['read_file_metadata'] = old

    return cdata, area
-
-
+# }}}

 class MyBlockingBusy(QDialog): # {{{

@ -132,7 +132,8 @@ class MyBlockingBusy(QDialog): # {{{
        remove_all, remove, add, au, aus, do_aus, rating, pub, do_series, \
            do_autonumber, do_remove_format, remove_format, do_swap_ta, \
            do_remove_conv, do_auto_author, series, do_series_restart, \
-            series_start_value, do_title_case, cover_action, clear_series = self.args
+            series_start_value, do_title_case, cover_action, clear_series, \
+            pubdate, adddate = self.args


        # first loop: do author and title. These will commit at the end of each
@ -209,6 +210,12 @@ class MyBlockingBusy(QDialog): # {{{
            if clear_series:
                self.db.set_series(id, '', notify=False, commit=False)

+            if pubdate is not None:
+                self.db.set_pubdate(id, pubdate, notify=False, commit=False)
+
+            if adddate is not None:
+                self.db.set_timestamp(id, adddate, notify=False, commit=False)
+
            if do_series:
                if do_series_restart:
                    if self.series_start_value is None:
@ -274,8 +281,8 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
        self.changed = False

        all_tags = self.db.all_tags()
-        self.tags.update_tags_cache(all_tags)
-        self.remove_tags.update_tags_cache(all_tags)
+        self.tags.update_items_cache(all_tags)
+        self.remove_tags.update_items_cache(all_tags)

        self.initialize_combos()

@ -288,6 +295,17 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
        self.series.editTextChanged.connect(self.series_changed)
        self.tag_editor_button.clicked.connect(self.tag_editor)
        self.autonumber_series.stateChanged[int].connect(self.auto_number_changed)
+        self.pubdate.setMinimumDate(UNDEFINED_QDATE)
+        pubdate_format = tweaks['gui_pubdate_display_format']
+        if pubdate_format is not None:
+            self.pubdate.setDisplayFormat(pubdate_format)
+        self.pubdate.setSpecialValueText(_('Undefined'))
+        self.clear_pubdate_button.clicked.connect(self.clear_pubdate)
+        self.pubdate.dateChanged.connect(self.do_apply_pubdate)
+        self.adddate.setMinimumDate(UNDEFINED_QDATE)
+        self.adddate.setSpecialValueText(_('Undefined'))
+        self.clear_adddate_button.clicked.connect(self.clear_adddate)
+        self.adddate.dateChanged.connect(self.do_apply_adddate)

        if len(self.db.custom_field_keys(include_composites=False)) == 0:
            self.central_widget.removeTab(1)
@ -304,6 +322,18 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
        self.central_widget.setCurrentIndex(tab)
        self.exec_()

+    def do_apply_pubdate(self, *args):
+        self.apply_pubdate.setChecked(True)
+
+    def clear_pubdate(self, *args):
+        self.pubdate.setDate(UNDEFINED_QDATE)
+
+    def do_apply_adddate(self, *args):
+        self.apply_adddate.setChecked(True)
+
+    def clear_adddate(self, *args):
+        self.adddate.setDate(UNDEFINED_QDATE)
+
    def button_clicked(self, which):
        if which == self.button_box.button(QDialogButtonBox.Apply):
            self.do_again = True
@ -709,6 +739,10 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
            self.authors.addItem(name)
        self.authors.setEditText('')

+        self.authors.set_separator('&')
+        self.authors.set_space_before_sep(True)
+        self.authors.update_items_cache(self.db.all_author_names())
+
    def initialize_series(self):
        all_series = self.db.all_series()
        all_series.sort(key=lambda x : sort_key(x[1]))
@ -733,8 +767,8 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
        if d.result() == QDialog.Accepted:
            tag_string = ', '.join(d.tags)
            self.tags.setText(tag_string)
-            self.tags.update_tags_cache(self.db.all_tags())
-            self.remove_tags.update_tags_cache(self.db.all_tags())
+            self.tags.update_items_cache(self.db.all_tags())
+            self.remove_tags.update_items_cache(self.db.all_tags())

    def auto_number_changed(self, state):
        if state:
@ -783,6 +817,12 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
        do_remove_conv = self.remove_conversion_settings.isChecked()
        do_auto_author = self.auto_author_sort.isChecked()
        do_title_case = self.change_title_to_title_case.isChecked()
+        pubdate = adddate = None
+        if self.apply_pubdate.isChecked():
+            pubdate = qt_to_dt(self.pubdate.date())
+        if self.apply_adddate.isChecked():
+            adddate = qt_to_dt(self.adddate.date())
+
        cover_action = None
        if self.cover_remove.isChecked():
            cover_action = 'remove'
@ -794,7 +834,8 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
        args = (remove_all, remove, add, au, aus, do_aus, rating, pub, do_series,
                do_autonumber, do_remove_format, remove_format, do_swap_ta,
                do_remove_conv, do_auto_author, series, do_series_restart,
-                series_start_value, do_title_case, cover_action, clear_series)
+                series_start_value, do_title_case, cover_action, clear_series,
+                pubdate, adddate)

        bb = MyBlockingBusy(_('Applying changes to %d books.\nPhase {0} {1}%%.')
                %len(self.ids), args, self.db, self.ids,
--- a/src/calibre/gui2/dialogs/metadata_bulk.ui
+++ b/src/calibre/gui2/dialogs/metadata_bulk.ui
@ -75,13 +75,31 @@
             </property>
            </widget>
           </item>
-           <item row="1" column="1">
-            <widget class="QCheckBox" name="auto_author_sort">
-             <property name="text">
-              <string>A&amp;utomatically set author sort</string>
+           <item row="0" column="1">
+            <widget class="CompleteComboBox" name="authors">
+             <property name="editable">
+              <bool>true</bool>
             </property>
            </widget>
           </item>
+           <item row="1" column="1">
+            <layout class="QHBoxLayout" name="horizontalLayout_2">
+             <item>
+              <widget class="QCheckBox" name="auto_author_sort">
+               <property name="text">
+                <string>A&amp;utomatically set author sort</string>
+               </property>
+              </widget>
+             </item>
+             <item>
+              <widget class="QCheckBox" name="swap_title_and_author">
+               <property name="text">
+                <string>&amp;Swap title and author</string>
+               </property>
+              </widget>
+             </item>
+            </layout>
+           </item>
           <item row="2" column="0">
            <widget class="QLabel" name="label_8">
             <property name="text">
@ -95,7 +113,7 @@
             </property>
            </widget>
           </item>
-           <item row="2" column="1" colspan="2">
+           <item row="2" column="1">
            <widget class="EnLineEdit" name="author_sort">
             <property name="toolTip">
              <string>Specify how the author(s) of this book should be sorted. For example Charles Dickens should be sorted as Dickens, Charles.</string>
@ -115,7 +133,7 @@
             </property>
            </widget>
           </item>
-           <item row="3" column="1" colspan="2">
+           <item row="3" column="1">
            <widget class="QSpinBox" name="rating">
             <property name="toolTip">
              <string>Rating of this book. 0-5 stars</string>
@ -156,7 +174,7 @@
             </property>
            </widget>
           </item>
-           <item row="4" column="1" colspan="2">
+           <item row="4" column="1">
            <widget class="EnComboBox" name="publisher">
             <property name="editable">
              <bool>true</bool>
@ -177,7 +195,7 @@
            </widget>
           </item>
           <item row="5" column="1">
-            <widget class="TagsLineEdit" name="tags">
+            <widget class="CompleteLineEdit" name="tags">
             <property name="toolTip">
              <string>Tags categorize the book. This is particularly useful while searching. &lt;br&gt;&lt;br&gt;They can be any words or phrases, separated by commas.</string>
             </property>
@ -202,13 +220,16 @@
             <property name="text">
              <string>&amp;Remove tags:</string>
             </property>
+             <property name="alignment">
+              <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
+             </property>
             <property name="buddy">
              <cstring>remove_tags</cstring>
             </property>
            </widget>
           </item>
           <item row="6" column="1">
-            <widget class="TagsLineEdit" name="remove_tags">
+            <widget class="CompleteLineEdit" name="remove_tags">
             <property name="toolTip">
              <string>Comma separated list of tags to remove from the books. </string>
             </property>
@ -220,7 +241,7 @@
              <string>Check this box to remove all tags from the books.</string>
             </property>
             <property name="text">
-              <string>Remove all</string>
+              <string>Remove &amp;all</string>
             </property>
            </widget>
           </item>
@ -241,52 +262,44 @@
            </widget>
           </item>
           <item row="7" column="1">
-            <layout class="QHBoxLayout" name="HLayout_34">
-             <item>
-              <widget class="EnComboBox" name="series">
-               <property name="toolTip">
-                <string>List of known series. You can add new series.</string>
-               </property>
-               <property name="whatsThis">
-                <string>List of known series. You can add new series.</string>
-               </property>
-               <property name="editable">
-                <bool>true</bool>
-               </property>
-               <property name="insertPolicy">
-                <enum>QComboBox::InsertAlphabetically</enum>
-               </property>
-               <property name="sizeAdjustPolicy">
-                <enum>QComboBox::AdjustToContents</enum>
-               </property>
-              </widget>
-             </item>
-             <item>
-              <widget class="QCheckBox" name="clear_series">
-               <property name="toolTip">
-                <string>If checked, the series will be cleared</string>
-               </property>
-               <property name="text">
-                <string>Clear series</string>
-               </property>
-              </widget>
-             </item>
-             <item>
-              <spacer name="HSpacer_344">
-               <property name="orientation">
-                <enum>Qt::Horizontal</enum>
-               </property>
-               <property name="sizeHint" stdset="0">
-                <size>
-                 <width>20</width>
-                 <height>0</height>
-                </size>
-               </property>
-              </spacer>
-             </item>
-            </layout>
+            <widget class="EnComboBox" name="series">
+             <property name="sizePolicy">
+              <sizepolicy hsizetype="Preferred" vsizetype="Fixed">
+               <horstretch>0</horstretch>
+               <verstretch>0</verstretch>
+              </sizepolicy>
+             </property>
+             <property name="toolTip">
+              <string>List of known series. You can add new series.</string>
+             </property>
+             <property name="whatsThis">
+              <string>List of known series. You can add new series.</string>
+             </property>
+             <property name="editable">
+              <bool>true</bool>
+             </property>
+             <property name="insertPolicy">
+              <enum>QComboBox::InsertAlphabetically</enum>
+             </property>
+             <property name="sizeAdjustPolicy">
+              <enum>QComboBox::AdjustToMinimumContentsLengthWithIcon</enum>
+             </property>
+             <property name="minimumContentsLength">
+              <number>40</number>
+             </property>
+            </widget>
           </item>
-           <item row="8" column="1" colspan="2">
+           <item row="7" column="2">
+            <widget class="QCheckBox" name="clear_series">
+             <property name="toolTip">
+              <string>If checked, the series will be cleared</string>
+             </property>
+             <property name="text">
+              <string>&amp;Clear series</string>
+             </property>
+            </widget>
+           </item>
+           <item row="8" column="1">
            <layout class="QHBoxLayout" name="HLayout_3">
             <item>
              <widget class="QCheckBox" name="autonumber_series">
@ -297,7 +310,7 @@ you selected them. So if you selected Book A and then Book B,
 Book A will have series number 1 and Book B series number 2.</string>
               </property>
               <property name="text">
-                <string>Automatically number books in this series</string>
+                <string>&amp;Automatically number books in this series</string>
               </property>
              </widget>
             </item>
@ -312,7 +325,7 @@ for that series. Checking this box will tell calibre to start numbering
 from the value in the box</string>
               </property>
               <property name="text">
-                <string>Force numbers to start with </string>
+                <string>&amp;Force numbers to start with:</string>
               </property>
              </widget>
             </item>
@ -332,22 +345,102 @@ from the value in the box</string>
               </property>
              </widget>
             </item>
-             <item>
-              <spacer name="HSpacer_34">
-               <property name="orientation">
-                <enum>Qt::Horizontal</enum>
-               </property>
-               <property name="sizeHint" stdset="0">
-                <size>
-                 <width>20</width>
-                 <height>10</height>
-                </size>
-               </property>
-              </spacer>
-             </item>
            </layout>
           </item>
           <item row="9" column="0">
+            <widget class="QLabel" name="label_10">
+             <property name="text">
+              <string>&amp;Date:</string>
+             </property>
+             <property name="alignment">
+              <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
+             </property>
+             <property name="buddy">
+              <cstring>adddate</cstring>
+             </property>
+            </widget>
+           </item>
+           <item row="9" column="1">
+            <layout class="QHBoxLayout" name="horizontalLayout_5">
+             <item>
+              <widget class="QDateEdit" name="adddate">
+               <property name="displayFormat">
+                <string>d MMM yyyy</string>
+               </property>
+               <property name="calendarPopup">
+                <bool>true</bool>
+               </property>
+              </widget>
+             </item>
+             <item>
+              <widget class="QToolButton" name="clear_adddate_button">
+               <property name="text">
+                <string>...</string>
+               </property>
+               <property name="icon">
+                <iconset resource="../../../../resources/images.qrc">
+                 <normaloff>:/images/trash.png</normaloff>:/images/trash.png</iconset>
+               </property>
+              </widget>
+             </item>
+            </layout>
+           </item>
+           <item row="9" column="2">
+            <widget class="QCheckBox" name="apply_adddate">
+             <property name="text">
+              <string>&amp;Apply date</string>
+             </property>
+            </widget>
+           </item>
+           <item row="10" column="0">
+            <widget class="QLabel" name="label_9">
+             <property name="text">
+              <string>&amp;Published:</string>
+             </property>
+             <property name="alignment">
+              <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
+             </property>
+             <property name="buddy">
+              <cstring>pubdate</cstring>
+             </property>
+            </widget>
+           </item>
+           <item row="10" column="1">
+            <layout class="QHBoxLayout" name="horizontalLayout_4">
+             <item>
+              <widget class="QDateEdit" name="pubdate">
+               <property name="displayFormat">
+                <string>MMM yyyy</string>
+               </property>
+               <property name="calendarPopup">
+                <bool>true</bool>
+               </property>
+              </widget>
+             </item>
+             <item>
+              <widget class="QToolButton" name="clear_pubdate_button">
+               <property name="toolTip">
+                <string>Clear published date</string>
+               </property>
+               <property name="text">
+                <string>...</string>
+               </property>
+               <property name="icon">
+                <iconset resource="../../../../resources/images.qrc">
+                 <normaloff>:/images/trash.png</normaloff>:/images/trash.png</iconset>
+               </property>
+              </widget>
+             </item>
+            </layout>
+           </item>
+           <item row="10" column="2">
+            <widget class="QCheckBox" name="apply_pubdate">
+             <property name="text">
+              <string>&amp;Apply date</string>
+             </property>
+            </widget>
+           </item>
+           <item row="11" column="0">
            <widget class="QLabel" name="label_5">
             <property name="text">
              <string>Remove &amp;format:</string>
@ -357,60 +450,73 @@ from the value in the box</string>
             </property>
            </widget>
           </item>
-           <item row="9" column="1">
-            <widget class="QComboBox" name="remove_format"/>
-           </item>
-           <item row="0" column="1">
-            <widget class="EnComboBox" name="authors">
-             <property name="editable">
-              <bool>true</bool>
+           <item row="11" column="1">
+            <widget class="QComboBox" name="remove_format">
+             <property name="maximumSize">
+              <size>
+               <width>120</width>
+               <height>16777215</height>
+              </size>
             </property>
            </widget>
           </item>
-           <item row="11" column="0" colspan="2">
-            <widget class="QCheckBox" name="swap_title_and_author">
-             <property name="text">
-              <string>&amp;Swap title and author</string>
-             </property>
-            </widget>
-           </item>
-           <item row="12" column="0" colspan="2">
-            <widget class="QCheckBox" name="change_title_to_title_case">
-             <property name="toolTip">
-              <string>Force the title to be in title case. If both this and swap authors are checked,
-title and author are swapped before the title case is set</string>
-             </property>
-             <property name="text">
-              <string>Change title to title case</string>
-             </property>
-            </widget>
-           </item>
-           <item row="10" column="0" colspan="2">
-            <widget class="QCheckBox" name="remove_conversion_settings">
-             <property name="toolTip">
-              <string>Remove stored conversion settings for the selected books.
-
-Future conversion of these books will use the default settings.</string>
-             </property>
-             <property name="text">
-              <string>Remove &amp;stored conversion settings for the selected books</string>
-             </property>
-            </widget>
-           </item>
-           <item row="14" column="0" colspan="3">
-            <spacer name="verticalSpacer_2">
+           <item row="12" column="0">
+            <spacer name="verticalSpacer">
             <property name="orientation">
              <enum>Qt::Vertical</enum>
             </property>
+             <property name="sizeType">
+              <enum>QSizePolicy::Fixed</enum>
+             </property>
             <property name="sizeHint" stdset="0">
              <size>
               <width>20</width>
-               <height>40</height>
+               <height>15</height>
              </size>
             </property>
            </spacer>
           </item>
           <item row="13" column="0" colspan="3">
+            <layout class="QHBoxLayout" name="horizontalLayout_3">
+             <item>
+              <widget class="QCheckBox" name="change_title_to_title_case">
+               <property name="toolTip">
+                <string>Force the title to be in title case. If both this and swap authors are checked,
+title and author are swapped before the title case is set</string>
+               </property>
+               <property name="text">
+                <string>Change title to title &amp;case</string>
+               </property>
+              </widget>
+             </item>
+             <item>
+              <spacer name="horizontalSpacer">
+               <property name="orientation">
+                <enum>Qt::Horizontal</enum>
+               </property>
+               <property name="sizeHint" stdset="0">
+                <size>
+                 <width>40</width>
+                 <height>20</height>
+                </size>
+               </property>
+              </spacer>
+             </item>
+             <item>
+              <widget class="QCheckBox" name="remove_conversion_settings">
+               <property name="toolTip">
+                <string>Remove stored conversion settings for the selected books.
+
+Future conversion of these books will use the default settings.</string>
+               </property>
+               <property name="text">
+                <string>Remove &amp;stored conversion settings for the selected books</string>
+               </property>
+              </widget>
+             </item>
+            </layout>
+           </item>
+           <item row="14" column="0" colspan="3">
            <widget class="QGroupBox" name="groupBox">
             <property name="title">
              <string>Change &amp;cover</string>
@ -440,6 +546,19 @@ Future conversion of these books will use the default settings.</string>
             </layout>
            </widget>
           </item>
+           <item row="15" column="0">
+            <spacer name="verticalSpacer_2">
+             <property name="orientation">
+              <enum>Qt::Vertical</enum>
+             </property>
+             <property name="sizeHint" stdset="0">
+              <size>
+               <width>20</width>
+               <height>40</height>
+              </size>
+             </property>
+            </spacer>
+           </item>
          </layout>
         </widget>
         <widget class="QWidget" name="tab">
@ -881,7 +1000,12 @@ not multiple and the destination field is multiple</string>
   <header>widgets.h</header>
  </customwidget>
  <customwidget>
-   <class>TagsLineEdit</class>
+   <class>CompleteComboBox</class>
+   <extends>QComboBox</extends>
+   <header>widgets.h</header>
+  </customwidget>
+  <customwidget>
+   <class>CompleteLineEdit</class>
   <extends>QLineEdit</extends>
   <header>widgets.h</header>
  </customwidget>
@ -902,14 +1026,9 @@ not multiple and the destination field is multiple</string>
  <tabstop>remove_tags</tabstop>
  <tabstop>remove_all_tags</tabstop>
  <tabstop>series</tabstop>
-  <tabstop>clear_series</tabstop>
  <tabstop>autonumber_series</tabstop>
  <tabstop>series_numbering_restarts</tabstop>
  <tabstop>series_start_number</tabstop>
-  <tabstop>remove_format</tabstop>
-  <tabstop>remove_conversion_settings</tabstop>
-  <tabstop>swap_title_and_author</tabstop>
-  <tabstop>change_title_to_title_case</tabstop>
  <tabstop>button_box</tabstop>
  <tabstop>search_field</tabstop>
  <tabstop>search_mode</tabstop>
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@ -16,7 +16,7 @@ from PyQt4.Qt import SIGNAL, QObject, Qt, QTimer, QDate, \

 from calibre.gui2 import error_dialog, file_icon_provider, dynamic, \
                           choose_files, choose_images, ResizableDialog, \
-                           warning_dialog, question_dialog
+                           warning_dialog, question_dialog, UNDEFINED_QDATE
 from calibre.gui2.dialogs.metadata_single_ui import Ui_MetadataSingleDialog
 from calibre.gui2.dialogs.fetch_metadata import FetchMetadata
 from calibre.gui2.dialogs.tag_editor import TagEditor
@ -491,11 +491,15 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
        self.formats.setAcceptDrops(True)
        self.cover_changed = False
        self.cpixmap = None
-        self.pubdate.setMinimumDate(QDate(100,1,1))
+        self.pubdate.setMinimumDate(UNDEFINED_QDATE)
        pubdate_format = tweaks['gui_pubdate_display_format']
        if pubdate_format is not None:
            self.pubdate.setDisplayFormat(pubdate_format)
-        self.date.setMinimumDate(QDate(100,1,1))
+        self.date.setMinimumDate(UNDEFINED_QDATE)
+        self.pubdate.setSpecialValueText(_('Undefined'))
+        self.date.setSpecialValueText(_('Undefined'))
+        self.clear_pubdate_button.clicked.connect(self.clear_pubdate)
+

        self.connect(self.cover, SIGNAL('cover_changed(PyQt_PyObject)'), self.cover_dropped)
        QObject.connect(self.cover_button, SIGNAL("clicked(bool)"), \
@ -552,7 +556,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
        tags = self.db.tags(row)
        self.original_tags = ', '.join(tags.split(',')) if tags else ''
        self.tags.setText(self.original_tags)
-        self.tags.update_tags_cache(self.db.all_tags())
+        self.tags.update_items_cache(self.db.all_tags())
        rating = self.db.rating(row)
        if rating > 0:
            self.rating.setValue(int(rating/2.))
@ -615,6 +619,9 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):

        self.show()

+    def clear_pubdate(self, *args):
+        self.pubdate.setDate(UNDEFINED_QDATE)
+
    def create_custom_column_editors(self):
        w = self.central_widget.widget(1)
        layout = w.layout()
@ -717,6 +724,10 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
            au = _('Unknown')
        au = ' & '.join([a.strip().replace('|', ',') for a in au.split(',')])
        self.authors.setEditText(au)
+        
+        self.authors.set_separator('&')
+        self.authors.set_space_before_sep(True)
+        self.authors.update_items_cache(self.db.all_author_names())

    def initialize_series(self):
        self.series.setSizeAdjustPolicy(self.series.AdjustToContentsOnFirstShow)
@ -769,7 +780,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
        if d.result() == QDialog.Accepted:
            tag_string = ', '.join(d.tags)
            self.tags.setText(tag_string)
-            self.tags.update_tags_cache(self.db.all_tags())
+            self.tags.update_items_cache(self.db.all_tags())


    def fetch_metadata(self):
--- a/src/calibre/gui2/dialogs/metadata_single.ui
+++ b/src/calibre/gui2/dialogs/metadata_single.ui
@ -100,246 +100,6 @@
                    </property>
                   </widget>
                  </item>
-                  <item row="1" column="0">
-                   <widget class="QLabel" name="label">
-                    <property name="text">
-                     <string>Title &amp;sort: </string>
-                    </property>
-                    <property name="alignment">
-                     <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
-                    </property>
-                    <property name="buddy">
-                     <cstring>title_sort</cstring>
-                    </property>
-                   </widget>
-                  </item>
-                  <item row="1" column="1">
-                   <widget class="EnLineEdit" name="title_sort">
-                    <property name="toolTip">
-                     <string>Specify how this book should be sorted when by title. For example, The Exorcist might be sorted as Exorcist, The.</string>
-                    </property>
-                   </widget>
-                  </item>
-                  <item row="2" column="0">
-                   <widget class="QLabel" name="label_2">
-                    <property name="text">
-                     <string>&amp;Author(s): </string>
-                    </property>
-                    <property name="alignment">
-                     <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
-                    </property>
-                    <property name="buddy">
-                     <cstring>authors</cstring>
-                    </property>
-                   </widget>
-                  </item>
-                  <item row="2" column="1">
-                   <widget class="EnComboBox" name="authors">
-                    <property name="editable">
-                     <bool>true</bool>
-                    </property>
-                   </widget>
-                  </item>
-                  <item row="3" column="0">
-                   <widget class="QLabel" name="label_8">
-                    <property name="text">
-                     <string>Author S&amp;ort: </string>
-                    </property>
-                    <property name="alignment">
-                     <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
-                    </property>
-                    <property name="buddy">
-                     <cstring>author_sort</cstring>
-                    </property>
-                   </widget>
-                  </item>
-                  <item row="3" column="1">
-                   <widget class="EnLineEdit" name="author_sort">
-                    <property name="toolTip">
-                     <string>Specify how the author(s) of this book should be sorted. For example Charles Dickens should be sorted as Dickens, Charles.
-If the box is colored green, then text matches the individual author's sort strings. If it is colored red, then the authors and this text do not match.</string>
-                    </property>
-                   </widget>
-                  </item>
-                  <item row="4" column="0">
-                   <widget class="QLabel" name="label_6">
-                    <property name="text">
-                     <string>&amp;Rating:</string>
-                    </property>
-                    <property name="alignment">
-                     <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
-                    </property>
-                    <property name="buddy">
-                     <cstring>rating</cstring>
-                    </property>
-                   </widget>
-                  </item>
-                  <item row="4" column="1" colspan="2">
-                   <widget class="QSpinBox" name="rating">
-                    <property name="toolTip">
-                     <string>Rating of this book. 0-5 stars</string>
-                    </property>
-                    <property name="whatsThis">
-                     <string>Rating of this book. 0-5 stars</string>
-                    </property>
-                    <property name="buttonSymbols">
-                     <enum>QAbstractSpinBox::PlusMinus</enum>
-                    </property>
-                    <property name="suffix">
-                     <string> stars</string>
-                    </property>
-                    <property name="maximum">
-                     <number>5</number>
-                    </property>
-                   </widget>
-                  </item>
-                  <item row="5" column="0">
-                   <widget class="QLabel" name="label_3">
-                    <property name="text">
-                     <string>&amp;Publisher: </string>
-                    </property>
-                    <property name="alignment">
-                     <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
-                    </property>
-                    <property name="buddy">
-                     <cstring>publisher</cstring>
-                    </property>
-                   </widget>
-                  </item>
-                  <item row="5" column="1" colspan="2">
-                   <widget class="EnComboBox" name="publisher">
-                    <property name="editable">
-                     <bool>true</bool>
-                    </property>
-                   </widget>
-                  </item>
-                  <item row="6" column="0">
-                   <widget class="QLabel" name="label_4">
-                    <property name="text">
-                     <string>Ta&amp;gs: </string>
-                    </property>
-                    <property name="alignment">
-                     <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
-                    </property>
-                    <property name="buddy">
-                     <cstring>tags</cstring>
-                    </property>
-                   </widget>
-                  </item>
-                  <item row="7" column="0">
-                   <widget class="QLabel" name="label_7">
-                    <property name="text">
-                     <string>&amp;Series:</string>
-                    </property>
-                    <property name="textFormat">
-                     <enum>Qt::PlainText</enum>
-                    </property>
-                    <property name="alignment">
-                     <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
-                    </property>
-                    <property name="buddy">
-                     <cstring>series</cstring>
-                    </property>
-                   </widget>
-                  </item>
-                  <item row="7" column="1">
-                   <layout class="QHBoxLayout" name="_3">
-                    <property name="spacing">
-                     <number>5</number>
-                    </property>
-                    <item>
-                     <widget class="EnComboBox" name="series">
-                      <property name="toolTip">
-                       <string>List of known series. You can add new series.</string>
-                      </property>
-                      <property name="whatsThis">
-                       <string>List of known series. You can add new series.</string>
-                      </property>
-                      <property name="editable">
-                       <bool>true</bool>
-                      </property>
-                      <property name="insertPolicy">
-                       <enum>QComboBox::InsertAlphabetically</enum>
-                      </property>
-                     </widget>
-                    </item>
-                   </layout>
-                  </item>
-                  <item row="8" column="1" colspan="2">
-                   <widget class="QDoubleSpinBox" name="series_index">
-                    <property name="enabled">
-                     <bool>false</bool>
-                    </property>
-                    <property name="prefix">
-                     <string>Book </string>
-                    </property>
-                    <property name="maximum">
-                     <double>9999.989999999999782</double>
-                    </property>
-                   </widget>
-                  </item>
-                  <item row="9" column="0">
-                   <widget class="QLabel" name="label_9">
-                    <property name="text">
-                     <string>IS&amp;BN:</string>
-                    </property>
-                    <property name="alignment">
-                     <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
-                    </property>
-                    <property name="buddy">
-                     <cstring>isbn</cstring>
-                    </property>
-                   </widget>
-                  </item>
-                  <item row="9" column="1" colspan="2">
-                   <widget class="QLineEdit" name="isbn"/>
-                  </item>
-                  <item row="10" column="0">
-                   <widget class="QLabel" name="label_11">
-                    <property name="text">
-                     <string>&amp;Date:</string>
-                    </property>
-                    <property name="alignment">
-                     <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
-                    </property>
-                    <property name="buddy">
-                     <cstring>date</cstring>
-                    </property>
-                   </widget>
-                  </item>
-                  <item row="10" column="1" colspan="2">
-                   <widget class="QDateEdit" name="date">
-                    <property name="displayFormat">
-                     <string>dd MMM yyyy</string>
-                    </property>
-                    <property name="calendarPopup">
-                     <bool>true</bool>
-                    </property>
-                   </widget>
-                  </item>
-                  <item row="11" column="0">
-                   <widget class="QLabel" name="label_10">
-                    <property name="text">
-                     <string>Publishe&amp;d:</string>
-                    </property>
-                    <property name="alignment">
-                     <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
-                    </property>
-                    <property name="buddy">
-                     <cstring>pubdate</cstring>
-                    </property>
-                   </widget>
-                  </item>
-                  <item row="11" column="1" colspan="2">
-                   <widget class="QDateEdit" name="pubdate">
-                    <property name="displayFormat">
-                     <string>MMM yyyy</string>
-                    </property>
-                    <property name="calendarPopup">
-                     <bool>true</bool>
-                    </property>
-                   </widget>
-                  </item>
                  <item row="0" column="2" rowspan="4">
                   <layout class="QVBoxLayout" name="verticalLayout_7">
                    <item>
@ -446,10 +206,136 @@ Using this button to create author sort will change author sort from red to gree
                    </item>
                   </layout>
                  </item>
+                  <item row="1" column="0">
+                   <widget class="QLabel" name="label">
+                    <property name="text">
+                     <string>Title &amp;sort: </string>
+                    </property>
+                    <property name="alignment">
+                     <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
+                    </property>
+                    <property name="buddy">
+                     <cstring>title_sort</cstring>
+                    </property>
+                   </widget>
+                  </item>
+                  <item row="1" column="1">
+                   <widget class="EnLineEdit" name="title_sort">
+                    <property name="toolTip">
+                     <string>Specify how this book should be sorted when by title. For example, The Exorcist might be sorted as Exorcist, The.</string>
+                    </property>
+                   </widget>
+                  </item>
+                  <item row="2" column="0">
+                   <widget class="QLabel" name="label_2">
+                    <property name="text">
+                     <string>&amp;Author(s): </string>
+                    </property>
+                    <property name="alignment">
+                     <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
+                    </property>
+                    <property name="buddy">
+                     <cstring>authors</cstring>
+                    </property>
+                   </widget>
+                  </item>
+                  <item row="2" column="1">
+                   <widget class="CompleteComboBox" name="authors">
+                    <property name="editable">
+                     <bool>true</bool>
+                    </property>
+                   </widget>
+                  </item>
+                  <item row="3" column="0">
+                   <widget class="QLabel" name="label_8">
+                    <property name="text">
+                     <string>Author S&amp;ort: </string>
+                    </property>
+                    <property name="alignment">
+                     <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
+                    </property>
+                    <property name="buddy">
+                     <cstring>author_sort</cstring>
+                    </property>
+                   </widget>
+                  </item>
+                  <item row="3" column="1">
+                   <widget class="EnLineEdit" name="author_sort">
+                    <property name="toolTip">
+                     <string>Specify how the author(s) of this book should be sorted. For example Charles Dickens should be sorted as Dickens, Charles.
+If the box is colored green, then text matches the individual author's sort strings. If it is colored red, then the authors and this text do not match.</string>
+                    </property>
+                   </widget>
+                  </item>
+                  <item row="4" column="0">
+                   <widget class="QLabel" name="label_6">
+                    <property name="text">
+                     <string>&amp;Rating:</string>
+                    </property>
+                    <property name="alignment">
+                     <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
+                    </property>
+                    <property name="buddy">
+                     <cstring>rating</cstring>
+                    </property>
+                   </widget>
+                  </item>
+                  <item row="4" column="1" colspan="2">
+                   <widget class="QSpinBox" name="rating">
+                    <property name="toolTip">
+                     <string>Rating of this book. 0-5 stars</string>
+                    </property>
+                    <property name="whatsThis">
+                     <string>Rating of this book. 0-5 stars</string>
+                    </property>
+                    <property name="buttonSymbols">
+                     <enum>QAbstractSpinBox::PlusMinus</enum>
+                    </property>
+                    <property name="suffix">
+                     <string> stars</string>
+                    </property>
+                    <property name="maximum">
+                     <number>5</number>
+                    </property>
+                   </widget>
+                  </item>
+                  <item row="5" column="0">
+                   <widget class="QLabel" name="label_3">
+                    <property name="text">
+                     <string>&amp;Publisher: </string>
+                    </property>
+                    <property name="alignment">
+                     <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
+                    </property>
+                    <property name="buddy">
+                     <cstring>publisher</cstring>
+                    </property>
+                   </widget>
+                  </item>
+                  <item row="5" column="1" colspan="2">
+                   <widget class="EnComboBox" name="publisher">
+                    <property name="editable">
+                     <bool>true</bool>
+                    </property>
+                   </widget>
+                  </item>
+                  <item row="6" column="0">
+                   <widget class="QLabel" name="label_4">
+                    <property name="text">
+                     <string>Ta&amp;gs: </string>
+                    </property>
+                    <property name="alignment">
+                     <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
+                    </property>
+                    <property name="buddy">
+                     <cstring>tags</cstring>
+                    </property>
+                   </widget>
+                  </item>
                  <item row="6" column="1">
                   <layout class="QHBoxLayout" name="_2">
                    <item>
-                     <widget class="TagsLineEdit" name="tags">
+                     <widget class="CompleteLineEdit" name="tags">
                      <property name="toolTip">
                       <string>Tags categorize the book. This is particularly useful while searching. &lt;br&gt;&lt;br&gt;They can be any words or phrases, separated by commas.</string>
                      </property>
@ -471,6 +357,45 @@ Using this button to create author sort will change author sort from red to gree
                    </property>
                   </widget>
                  </item>
+                  <item row="7" column="0">
+                   <widget class="QLabel" name="label_7">
+                    <property name="text">
+                     <string>&amp;Series:</string>
+                    </property>
+                    <property name="textFormat">
+                     <enum>Qt::PlainText</enum>
+                    </property>
+                    <property name="alignment">
+                     <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
+                    </property>
+                    <property name="buddy">
+                     <cstring>series</cstring>
+                    </property>
+                   </widget>
+                  </item>
+                  <item row="7" column="1">
+                   <layout class="QHBoxLayout" name="_3">
+                    <property name="spacing">
+                     <number>5</number>
+                    </property>
+                    <item>
+                     <widget class="EnComboBox" name="series">
+                      <property name="toolTip">
+                       <string>List of known series. You can add new series.</string>
+                      </property>
+                      <property name="whatsThis">
+                       <string>List of known series. You can add new series.</string>
+                      </property>
+                      <property name="editable">
+                       <bool>true</bool>
+                      </property>
+                      <property name="insertPolicy">
+                       <enum>QComboBox::InsertAlphabetically</enum>
+                      </property>
+                     </widget>
+                    </item>
+                   </layout>
+                  </item>
                  <item row="7" column="2">
                   <widget class="QToolButton" name="remove_series_button">
                    <property name="toolTip">
@ -485,6 +410,92 @@ Using this button to create author sort will change author sort from red to gree
                    </property>
                   </widget>
                  </item>
+                  <item row="8" column="1" colspan="2">
+                   <widget class="QDoubleSpinBox" name="series_index">
+                    <property name="enabled">
+                     <bool>false</bool>
+                    </property>
+                    <property name="prefix">
+                     <string>Book </string>
+                    </property>
+                    <property name="maximum">
+                     <double>9999.989999999999782</double>
+                    </property>
+                   </widget>
+                  </item>
+                  <item row="9" column="0">
+                   <widget class="QLabel" name="label_9">
+                    <property name="text">
+                     <string>IS&amp;BN:</string>
+                    </property>
+                    <property name="alignment">
+                     <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
+                    </property>
+                    <property name="buddy">
+                     <cstring>isbn</cstring>
+                    </property>
+                   </widget>
+                  </item>
+                  <item row="9" column="1" colspan="2">
+                   <widget class="QLineEdit" name="isbn"/>
+                  </item>
+                  <item row="10" column="0">
+                   <widget class="QLabel" name="label_11">
+                    <property name="text">
+                     <string>&amp;Date:</string>
+                    </property>
+                    <property name="alignment">
+                     <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
+                    </property>
+                    <property name="buddy">
+                     <cstring>date</cstring>
+                    </property>
+                   </widget>
+                  </item>
+                  <item row="10" column="1" colspan="2">
+                   <widget class="QDateEdit" name="date">
+                    <property name="displayFormat">
+                     <string>dd MMM yyyy</string>
+                    </property>
+                    <property name="calendarPopup">
+                     <bool>true</bool>
+                    </property>
+                   </widget>
+                  </item>
+                  <item row="11" column="0">
+                   <widget class="QLabel" name="label_10">
+                    <property name="text">
+                     <string>Publishe&amp;d:</string>
+                    </property>
+                    <property name="alignment">
+                     <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
+                    </property>
+                    <property name="buddy">
+                     <cstring>pubdate</cstring>
+                    </property>
+                   </widget>
+                  </item>
+                  <item row="11" column="1">
+                   <widget class="QDateEdit" name="pubdate">
+                    <property name="displayFormat">
+                     <string>MMM yyyy</string>
+                    </property>
+                    <property name="calendarPopup">
+                     <bool>true</bool>
+                    </property>
+                   </widget>
+                  </item>
+                  <item row="11" column="2">
+                   <widget class="QToolButton" name="clear_pubdate_button">
+                    <property name="toolTip">
+                     <string>Clear published date</string>
+                    </property>
+                    <property name="icon">
+                     <iconset resource="../../../../resources/images.qrc">
+                      <normaloff>:/images/trash.png</normaloff>:/images/trash.png</iconset>
+                    </property>
+                   </widget>
+                  </item>
                 </layout>
                </widget>
               </item>
@ -831,10 +842,15 @@ Using this button to create author sort will change author sort from red to gree
   <header>widgets.h</header>
  </customwidget>
  <customwidget>
-   <class>TagsLineEdit</class>
+   <class>CompleteLineEdit</class>
   <extends>QLineEdit</extends>
   <header>widgets.h</header>
  </customwidget>
+  <customwidget>
+   <class>CompleteComboBox</class>
+   <extends>QComboBox</extends>
+   <header>widgets.h</header>
+  </customwidget>
  <customwidget>
   <class>FormatList</class>
   <extends>QListWidget</extends>
--- a/src/calibre/gui2/dialogs/search.py
+++ b/src/calibre/gui2/dialogs/search.py
@ -31,6 +31,9 @@ class SearchDialog(QDialog, Ui_Dialog):
        self.authors_box.setEditText('')
        self.authors_box.completer().setCompletionMode(QCompleter.PopupCompletion)
        self.authors_box.setAutoCompletionCaseSensitivity(Qt.CaseInsensitive)
+        self.authors_box.set_separator('&')
+        self.authors_box.set_space_before_sep(True)
+        self.authors_box.update_items_cache(db.all_author_names())

        all_series = db.all_series()
        all_series.sort(key=lambda x : sort_key(x[1]))
@ -42,7 +45,7 @@ class SearchDialog(QDialog, Ui_Dialog):
        self.series_box.setAutoCompletionCaseSensitivity(Qt.CaseInsensitive)

        all_tags = db.all_tags()
-        self.tags_box.update_tags_cache(all_tags)
+        self.tags_box.update_items_cache(all_tags)

        self.box_last_values = copy.deepcopy(box_values)
        if self.box_last_values:
--- a/src/calibre/gui2/dialogs/search.ui
+++ b/src/calibre/gui2/dialogs/search.ui
@ -265,7 +265,7 @@
        </widget>
       </item>
       <item row="2" column="1">
-        <widget class="EnComboBox" name="authors_box">
+        <widget class="CompleteComboBox" name="authors_box">
         <property name="toolTip">
          <string>Enter an author's name. Only one author can be used.</string>
         </property>
@ -279,7 +279,7 @@
        </widget>
       </item>
       <item row="4" column="1">
-        <widget class="TagsLineEdit" name="tags_box">
+        <widget class="CompleteLineEdit" name="tags_box">
         <property name="toolTip">
          <string>Enter tags separated by spaces</string>
         </property>
@ -360,10 +360,15 @@
   <header>widgets.h</header>
  </customwidget>
  <customwidget>
-   <class>TagsLineEdit</class>
+   <class>CompleteLineEdit</class>
   <extends>QLineEdit</extends>
   <header>widgets.h</header>
  </customwidget>
+  <customwidget>
+   <class>CompleteComboBox</class>
+   <extends>QComboBox</extends>
+   <header>widgets.h</header>
+  </customwidget>
 </customwidgets>
 <tabstops>
  <tabstop>all</tabstop>
--- a/src/calibre/gui2/dialogs/template_dialog.py
+++ b/src/calibre/gui2/dialogs/template_dialog.py
@ -3,8 +3,11 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __license__   = 'GPL v3'

+import json
+
 from PyQt4.Qt import Qt, QDialog, QDialogButtonBox
 from calibre.gui2.dialogs.template_dialog_ui import Ui_TemplateDialog
+from calibre.utils.formatter_functions import formatter_functions

 class TemplateDialog(QDialog, Ui_TemplateDialog):

@ -17,9 +20,41 @@ class TemplateDialog(QDialog, Ui_TemplateDialog):
        self.setWindowFlags(self.windowFlags()&(~Qt.WindowContextHelpButtonHint))
        self.setWindowIcon(icon)

+        self.textbox.setTabStopWidth(10)
+        self.source_code.setTabStopWidth(10)
+        self.documentation.setReadOnly(True)
+        self.source_code.setReadOnly(True)
+
        if text is not None:
            self.textbox.setPlainText(text)
-        self.textbox.setTabStopWidth(50)
        self.buttonBox.button(QDialogButtonBox.Ok).setText(_('&OK'))
        self.buttonBox.button(QDialogButtonBox.Cancel).setText(_('&Cancel'))

+        try:
+            with open(P('template-functions.json'), 'rb') as f:
+                self.builtin_source_dict = json.load(f, encoding='utf-8')
+        except:
+            self.builtin_source_dict = {}
+
+        self.funcs = formatter_functions.get_functions()
+        self.builtins = formatter_functions.get_builtins()
+
+        func_names = sorted(self.funcs)
+        self.function.clear()
+        self.function.addItem('')
+        self.function.addItems(func_names)
+        self.function.setCurrentIndex(0)
+        self.function.currentIndexChanged[str].connect(self.function_changed)
+
+    def function_changed(self, toWhat):
+        name = unicode(toWhat)
+        self.source_code.clear()
+        self.documentation.clear()
+        if name in self.funcs:
+            self.documentation.setPlainText(self.funcs[name].doc)
+            if name in self.builtins:
+                if name in self.builtin_source_dict:
+                    self.source_code.setPlainText(self.builtin_source_dict[name])
+            else:
+                self.source_code.setPlainText(self.funcs[name].program_text)
+
--- a/src/calibre/gui2/dialogs/template_dialog.ui
+++ b/src/calibre/gui2/dialogs/template_dialog.ui
@ -6,8 +6,8 @@
   <rect>
    <x>0</x>
    <y>0</y>
-    <width>500</width>
-    <height>235</height>
+    <width>588</width>
+    <height>546</height>
   </rect>
  </property>
  <property name="sizePolicy">
@ -19,21 +19,77 @@
  <property name="windowTitle">
   <string>Edit Comments</string>
  </property>
-   <layout class="QVBoxLayout" name="verticalLayout">
-    <item>
-     <widget class="QPlainTextEdit" name="textbox"/>
-    </item>
-    <item>
-     <widget class="QDialogButtonBox" name="buttonBox">
-      <property name="orientation">
-       <enum>Qt::Horizontal</enum>
-      </property>
-      <property name="standardButtons">
-       <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
-      </property>
-     </widget>
-    </item>
-   </layout>
+  <layout class="QVBoxLayout" name="verticalLayout">
+   <item>
+    <widget class="QPlainTextEdit" name="textbox"/>
+   </item>
+   <item>
+    <widget class="QDialogButtonBox" name="buttonBox">
+     <property name="orientation">
+      <enum>Qt::Horizontal</enum>
+     </property>
+     <property name="standardButtons">
+      <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
+     </property>
+    </widget>
+   </item>
+   <item>
+    <layout class="QGridLayout" name="gridLayout">
+     <item row="0" column="0">
+      <widget class="QLabel" name="label">
+       <property name="text">
+        <string>Function &amp;name:</string>
+       </property>
+       <property name="buddy">
+        <cstring>function</cstring>
+       </property>
+      </widget>
+     </item>
+     <item row="0" column="1">
+      <widget class="QComboBox" name="function"/>
+     </item>
+     <item row="1" column="0">
+      <widget class="QLabel" name="label_2">
+       <property name="text">
+        <string>&amp;Documentation:</string>
+       </property>
+       <property name="alignment">
+        <set>Qt::AlignLeading|Qt::AlignLeft|Qt::AlignTop</set>
+       </property>
+       <property name="buddy">
+        <cstring>documentation</cstring>
+       </property>
+      </widget>
+     </item>
+     <item row="2" column="0">
+      <widget class="QLabel" name="label_3">
+       <property name="text">
+        <string>Python &amp;code:</string>
+       </property>
+       <property name="alignment">
+        <set>Qt::AlignLeading|Qt::AlignLeft|Qt::AlignTop</set>
+       </property>
+       <property name="buddy">
+        <cstring>source_code</cstring>
+       </property>
+      </widget>
+     </item>
+     <item row="1" column="1">
+      <widget class="QPlainTextEdit" name="documentation">
+       <property name="maximumSize">
+        <size>
+         <width>16777215</width>
+         <height>75</height>
+        </size>
+       </property>
+      </widget>
+     </item>
+     <item row="2" column="1">
+      <widget class="QPlainTextEdit" name="source_code"/>
+     </item>
+    </layout>
+   </item>
+  </layout>
 </widget>
 <resources/>
 <connections>
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -148,7 +148,6 @@ class StatusBar(QStatusBar): # {{{
                self.get_version() + ' ' + _('created by Kovid Goyal')
        self.device_string = ''
        self.update_label = QLabel('')
-        self.update_label.setOpenExternalLinks(True)
        self.addPermanentWidget(self.update_label)
        self.update_label.setVisible(False)
        self._font = QFont()
@ -174,8 +173,9 @@ class StatusBar(QStatusBar): # {{{
        self.clearMessage()

    def new_version_available(self, ver, url):
-        msg = (u'<span style="color:red; font-weight: bold">%s: <a href="%s">%s<a></span>') % (
-                _('Update found'), url, ver)
+        msg = (u'<span style="color:red; font-weight: bold">%s: <a'
+               ' href="update:%s">%s<a></span>') % (
+                _('Update found'), ver, ver)
        self.update_label.setText(msg)
        self.update_label.setCursor(Qt.PointingHandCursor)
        self.update_label.setVisible(True)
@ -240,6 +240,13 @@ class LayoutMixin(object): # {{{
            self.status_bar.addPermanentWidget(button)
        self.status_bar.addPermanentWidget(self.jobs_button)
        self.setStatusBar(self.status_bar)
+        self.status_bar.update_label.linkActivated.connect(self.update_link_clicked)
+
+    def update_link_clicked(self, url):
+        url = unicode(url)
+        if url.startswith('update:'):
+            version = url.partition(':')[-1]
+            self.update_found(version, force=True)

    def finalize_layout(self):
        self.status_bar.initialize(self.system_tray_icon)
--- a/src/calibre/gui2/library/delegates.py
+++ b/src/calibre/gui2/library/delegates.py
@ -16,7 +16,7 @@ from PyQt4.Qt import QColor, Qt, QModelIndex, QSize, \
                     QComboBox, QTextDocument

 from calibre.gui2 import UNDEFINED_QDATE, error_dialog
-from calibre.gui2.widgets import EnLineEdit, TagsLineEdit
+from calibre.gui2.widgets import EnLineEdit, CompleteLineEdit
 from calibre.utils.date import now, format_date
 from calibre.utils.config import tweaks
 from calibre.utils.formatter import validation_formatter
@ -173,9 +173,9 @@ class TagsDelegate(QStyledItemDelegate): # {{{
        if self.db:
            col = index.model().column_map[index.column()]
            if not index.model().is_custom_column(col):
-                editor = TagsLineEdit(parent, self.db.all_tags())
+                editor = CompleteLineEdit(parent, self.db.all_tags())
            else:
-                editor = TagsLineEdit(parent,
+                editor = CompleteLineEdit(parent,
                        sorted(list(self.db.all_custom(label=self.db.field_metadata.key_to_label(col))),
                               key=sort_key))
                return editor
@ -184,6 +184,31 @@ class TagsDelegate(QStyledItemDelegate): # {{{
        return editor
 # }}}

+class CompleteDelegate(QStyledItemDelegate): # {{{
+    def __init__(self, parent, sep, items_func_name, space_before_sep=False):
+        QStyledItemDelegate.__init__(self, parent)
+        self.sep = sep
+        self.items_func_name = items_func_name
+        self.space_before_sep = space_before_sep
+
+    def set_database(self, db):
+        self.db = db
+
+    def createEditor(self, parent, option, index):
+        if self.db and hasattr(self.db, self.items_func_name):
+            col = index.model().column_map[index.column()]
+            if not index.model().is_custom_column(col):
+                editor = CompleteLineEdit(parent, getattr(self.db, self.items_func_name)(),
+                    self.sep, self.space_before_sep)
+            else:
+                editor = CompleteLineEdit(parent,
+                    sorted(list(self.db.all_custom(label=self.db.field_metadata.key_to_label(col))),
+                    key=sort_key), self.sep, self.space_before_sep)
+        else:
+            editor = EnLineEdit(parent)
+        return editor
+# }}}
+
 class CcDateDelegate(QStyledItemDelegate): # {{{
    '''
    Delegate for custom columns dates. Because this delegate stores the
@ -267,7 +292,7 @@ class CcEnumDelegate(QStyledItemDelegate): # {{{
    def createEditor(self, parent, option, index):
        m = index.model()
        col = m.column_map[index.column()]
-        editor = QComboBox(parent)
+        editor = DelegateCB(parent)
        editor.addItem('')
        for v in m.custom_columns[col]['display']['enum_values']:
            editor.addItem(v)
@ -328,6 +353,17 @@ class CcCommentsDelegate(QStyledItemDelegate): # {{{
        model.setData(index, QVariant(editor.textbox.html), Qt.EditRole)
 # }}}

+class DelegateCB(QComboBox): # {{{
+
+    def __init__(self, parent):
+        QComboBox.__init__(self, parent)
+
+    def event(self, e):
+        if e.type() == e.ShortcutOverride:
+            e.accept()
+        return QComboBox.event(self, e)
+# }}}
+
 class CcBoolDelegate(QStyledItemDelegate): # {{{
    def __init__(self, parent):
        '''
@ -336,7 +372,7 @@ class CcBoolDelegate(QStyledItemDelegate): # {{{
        QStyledItemDelegate.__init__(self, parent)

    def createEditor(self, parent, option, index):
-        editor = QComboBox(parent)
+        editor = DelegateCB(parent)
        items = [_('Y'), _('N'), ' ']
        icons = [I('ok.png'), I('list_remove.png'), I('blank.png')]
        if tweaks['bool_custom_columns_are_tristate'] == 'no':
--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@ -13,7 +13,7 @@ from PyQt4.Qt import QTableView, Qt, QAbstractItemView, QMenu, pyqtSignal, \
    QPoint, QPixmap, QUrl, QImage, QPainter, QColor, QRect

 from calibre.gui2.library.delegates import RatingDelegate, PubDateDelegate, \
-    TextDelegate, DateDelegate, TagsDelegate, CcTextDelegate, \
+    TextDelegate, DateDelegate, CompleteDelegate, CcTextDelegate, \
    CcBoolDelegate, CcCommentsDelegate, CcDateDelegate, CcTemplateDelegate, \
    CcEnumDelegate
 from calibre.gui2.library.models import BooksModel, DeviceBooksModel
@ -76,8 +76,8 @@ class BooksView(QTableView): # {{{
        self.rating_delegate = RatingDelegate(self)
        self.timestamp_delegate = DateDelegate(self)
        self.pubdate_delegate = PubDateDelegate(self)
-        self.tags_delegate = TagsDelegate(self)
-        self.authors_delegate = TextDelegate(self)
+        self.tags_delegate = CompleteDelegate(self, ',', 'all_tags')
+        self.authors_delegate = CompleteDelegate(self, '&', 'all_author_names', True)
        self.series_delegate = TextDelegate(self)
        self.publisher_delegate = TextDelegate(self)
        self.text_delegate = TextDelegate(self)
@ -410,8 +410,7 @@ class BooksView(QTableView): # {{{
        self.save_state()
        self._model.set_database(db)
        self.tags_delegate.set_database(db)
-        self.authors_delegate.set_auto_complete_function(
-                lambda: [(x, y.replace('|', ',')) for (x, y) in db.all_authors()])
+        self.authors_delegate.set_database(db)
        self.series_delegate.set_auto_complete_function(db.all_series)
        self.publisher_delegate.set_auto_complete_function(db.all_publishers)

--- a/src/calibre/gui2/preferences/conversion.py
+++ b/src/calibre/gui2/preferences/conversion.py
@ -12,6 +12,8 @@ from calibre.ebooks.conversion.plumber import Plumber
 from calibre.utils.logging import Log
 from calibre.gui2.preferences.conversion_ui import Ui_Form
 from calibre.gui2.convert.look_and_feel import LookAndFeelWidget
+from calibre.gui2.convert.heuristics import HeuristicsWidget
+from calibre.gui2.convert.search_and_replace import SearchAndReplaceWidget
 from calibre.gui2.convert.page_setup import PageSetupWidget
 from calibre.gui2.convert.structure_detection import StructureDetectionWidget
 from calibre.gui2.convert.toc import TOCWidget
@ -82,7 +84,8 @@ class Base(ConfigWidgetBase, Ui_Form):
 class CommonOptions(Base):

    def load_conversion_widgets(self):
-        self.conversion_widgets = [LookAndFeelWidget, PageSetupWidget,
+        self.conversion_widgets = [LookAndFeelWidget, HeuristicsWidget,
+                SearchAndReplaceWidget, PageSetupWidget,
                StructureDetectionWidget, TOCWidget]

 class InputOptions(Base):
--- a/src/calibre/gui2/preferences/plugins.py
+++ b/src/calibre/gui2/preferences/plugins.py
@ -196,6 +196,12 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
    def modify_plugin(self, op=''):
        index = self.plugin_view.currentIndex()
        if index.isValid():
+            if not index.parent().isValid():
+                name = unicode(index.data().toString())
+                return error_dialog(self, _('Error'), '<p>'+
+                        _('Select an actual plugin under <b>%s</b> to customize')%name,
+                        show=True, show_copy_button=False)
+
            plugin = self._plugin_model.index_to_plugin(index)
            if op == 'toggle':
                if not plugin.can_be_disabled:
--- a/src/calibre/gui2/preferences/template_functions.py
+++ b/src/calibre/gui2/preferences/template_functions.py
@ -5,7 +5,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import traceback
+import json, traceback

 from calibre.gui2 import error_dialog
 from calibre.gui2.preferences import ConfigWidgetBase, test_widget
@ -73,6 +73,12 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        self.textBrowser.setHtml(help_text)

    def initialize(self):
+        try:
+            with open(P('template-functions.json'), 'rb') as f:
+                self.builtin_source_dict = json.load(f, encoding='utf-8')
+        except:
+            self.builtin_source_dict = {}
+
        self.funcs = formatter_functions.get_functions()
        self.builtins = formatter_functions.get_builtins()

@ -179,8 +185,13 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        func = self.funcs[txt]
        self.argument_count.setValue(func.arg_count)
        self.documentation.setText(func.doc)
-        self.program.setPlainText(func.program_text)
        if txt in self.builtins:
+            if hasattr(func, 'program_text'):
+                self.program.setPlainText(func.program_text)
+            elif txt in self.builtin_source_dict:
+                self.program.setPlainText(self.builtin_source_dict[txt])
+            else:
+                self.program.setPlainText(_('function source code not available'))
            self.documentation.setReadOnly(True)
            self.argument_count.setReadOnly(True)
            self.program.setReadOnly(True)
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@ -485,7 +485,8 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
            if 'calibre.ebooks.DRMError' in job.details:
                if not minz:
                    from calibre.gui2.dialogs.drm_error import DRMErrorMessage
-                    d = DRMErrorMessage(self, job.description.split(':')[-1])
+                    d = DRMErrorMessage(self, _('Cannot convert') + ' ' +
+                        job.description.split(':')[-1].partition('(')[-1][:-1])
                    d.setModal(False)
                    d.show()
                    self._modeless_dialogs.append(d)
--- a/src/calibre/gui2/update.py
+++ b/src/calibre/gui2/update.py
@ -52,8 +52,7 @@ class UpdateNotification(QDialog):
        self.label = QLabel('<p>'+
            _('%s has been updated to version <b>%s</b>. '
            'See the <a href="http://calibre-ebook.com/whats-new'
-            '">new features</a>. Visit the download pa'
-            'ge?')%(__appname__, version))
+            '">new features</a>.')%(__appname__, version))
        self.label.setOpenExternalLinks(True)
        self.label.setWordWrap(True)
        self.setWindowTitle(_('Update available!'))
@ -94,13 +93,13 @@ class UpdateMixin(object):
                    type=Qt.QueuedConnection)
            self.update_checker.start()

-    def update_found(self, version):
+    def update_found(self, version, force=False):
        os = 'windows' if iswindows else 'osx' if isosx else 'linux'
        url = 'http://calibre-ebook.com/download_%s'%os
        self.status_bar.new_version_available(version, url)

-        if config.get('new_version_notification') and \
-                dynamic.get('update to version %s'%version, True):
+        if force or (config.get('new_version_notification') and \
+                dynamic.get('update to version %s'%version, True)):
            self._update_notification__ = UpdateNotification(version,
                    parent=self)
            self._update_notification__.show()
--- a/src/calibre/gui2/widgets.py
+++ b/src/calibre/gui2/widgets.py
@ -311,32 +311,6 @@ class FontFamilyModel(QAbstractListModel):
    def index_of(self, family):
        return self.families.index(family.strip())

-class BasicComboModel(QAbstractListModel):
-
-    def __init__(self, items, *args):
-        QAbstractListModel.__init__(self, *args)
-        self.items = [i for i in items]
-        self.items.sort()
-
-    def rowCount(self, *args):
-        return len(self.items)
-
-    def data(self, index, role):
-        try:
-            item = self.items[index.row()]
-        except:
-            traceback.print_exc()
-            return NONE
-        if role == Qt.DisplayRole:
-            return QVariant(item)
-        if role == Qt.FontRole:
-            return QVariant(QFont(item))
-        return NONE
-
-    def index_of(self, item):
-        return self.items.index(item.strip())
-
-
 class BasicListItem(QListWidgetItem):

    def __init__(self, text, user_data=None):
@ -426,46 +400,47 @@ class EnLineEdit(LineEditECM, QLineEdit):
    pass


-class TagsCompleter(QCompleter):
+class ItemsCompleter(QCompleter):

    '''
    A completer object that completes a list of tags. It is used in conjunction
    with a CompleterLineEdit.
    '''

-    def __init__(self, parent, all_tags):
-        QCompleter.__init__(self, all_tags, parent)
-        self.all_tags = set(all_tags)
+    def __init__(self, parent, all_items):
+        QCompleter.__init__(self, all_items, parent)
+        self.all_items = set(all_items)

-    def update(self, text_tags, completion_prefix):
-        tags = list(self.all_tags.difference(text_tags))
-        model = QStringListModel(tags, self)
+    def update(self, text_items, completion_prefix):
+        items = list(self.all_items.difference(text_items))
+        model = QStringListModel(items, self)
        self.setModel(model)

        self.setCompletionPrefix(completion_prefix)
        if completion_prefix.strip() != '':
            self.complete()

-    def update_tags_cache(self, tags):
-        self.all_tags = set(tags)
-        model = QStringListModel(tags, self)
+    def update_items_cache(self, items):
+        self.all_items = set(items)
+        model = QStringListModel(items, self)
        self.setModel(model)


-class TagsLineEdit(EnLineEdit):
+class CompleteLineEdit(EnLineEdit):

    '''
    A QLineEdit that can complete parts of text separated by separator.
    '''

-    def __init__(self, parent=0, tags=[]):
+    def __init__(self, parent=0, complete_items=[], sep=',', space_before_sep=False):
        EnLineEdit.__init__(self, parent)

-        self.separator = ','
+        self.separator = sep
+        self.space_before_sep = space_before_sep

        self.connect(self, SIGNAL('textChanged(QString)'), self.text_changed)

-        self.completer = TagsCompleter(self, tags)
+        self.completer = ItemsCompleter(self, complete_items)
        self.completer.setCaseSensitivity(Qt.CaseInsensitive)

        self.connect(self,
@ -476,32 +451,43 @@ class TagsLineEdit(EnLineEdit):

        self.completer.setWidget(self)

-    def update_tags_cache(self, tags):
-        self.completer.update_tags_cache(tags)
+    def update_items_cache(self, complete_items):
+        self.completer.update_items_cache(complete_items)
+
+    def set_separator(self, sep):
+        self.separator = sep
+
+    def set_space_before_sep(self, space_before):
+        self.space_before_sep = space_before

    def text_changed(self, text):
        all_text = unicode(text)
        text = all_text[:self.cursorPosition()]
-        prefix = text.split(',')[-1].strip()
+        prefix = text.split(self.separator)[-1].strip()

-        text_tags = []
+        text_items = []
        for t in all_text.split(self.separator):
            t1 = unicode(t).strip()
            if t1 != '':
-                text_tags.append(t)
-        text_tags = list(set(text_tags))
+                text_items.append(t)
+        text_items = list(set(text_items))

        self.emit(SIGNAL('text_changed(PyQt_PyObject, PyQt_PyObject)'),
-            text_tags, prefix)
+            text_items, prefix)

    def complete_text(self, text):
        cursor_pos = self.cursorPosition()
        before_text = unicode(self.text())[:cursor_pos]
        after_text = unicode(self.text())[cursor_pos:]
-        prefix_len = len(before_text.split(',')[-1].strip())
-        self.setText('%s%s%s %s' % (before_text[:cursor_pos - prefix_len],
-            text, self.separator, after_text))
-        self.setCursorPosition(cursor_pos - prefix_len + len(text) + 2)
+        prefix_len = len(before_text.split(self.separator)[-1].strip())
+        if self.space_before_sep:
+            complete_text_pat = '%s%s %s %s'
+            len_extra = 3
+        else:
+            complete_text_pat = '%s%s%s %s'
+            len_extra = 2
+        self.setText(complete_text_pat % (before_text[:cursor_pos - prefix_len], text, self.separator, after_text))
+        self.setCursorPosition(cursor_pos - prefix_len + len(text) + len_extra)


 class EnComboBox(QComboBox):
@ -515,7 +501,7 @@ class EnComboBox(QComboBox):
    def __init__(self, *args):
        QComboBox.__init__(self, *args)
        self.setLineEdit(EnLineEdit(self))
-        self.setAutoCompletionCaseSensitivity(Qt.CaseSensitive)
+        self.setAutoCompletionCaseSensitivity(Qt.CaseInsensitive)
        self.setMinimumContentsLength(20)

    def text(self):
@ -528,6 +514,22 @@ class EnComboBox(QComboBox):
            idx = 0
        self.setCurrentIndex(idx)

+class CompleteComboBox(EnComboBox):
+
+    def __init__(self, *args):
+        EnComboBox.__init__(self, *args)
+        self.setLineEdit(CompleteLineEdit(self))
+
+    def update_items_cache(self, complete_items):
+        self.lineEdit().update_items_cache(complete_items)
+
+    def set_separator(self, sep):
+        self.lineEdit().set_separator(sep)
+
+    def set_space_before_sep(self, space_before):
+        self.lineEdit().set_space_before_sep(space_before)
+
+
 class HistoryLineEdit(QComboBox):

    lost_focus = pyqtSignal()
--- a/src/calibre/gui2/wizard/init.py
+++ b/src/calibre/gui2/wizard/init.py
@ -111,7 +111,7 @@ class Kobo(Device):
    id = 'kobo'

 class Booq(Device):
-    name = 'Booq Reader'
+    name = 'bq Classic'
    manufacturer = 'Booq'
    output_profile = 'sony'
    output_format = 'EPUB'
@ -125,7 +125,18 @@ class TheBook(Device):
    id = 'thebook'

 class Avant(Booq):
-    name = 'Booq Avant'
+    name = 'bq Avant'
+
+class AvantXL(Booq):
+    name = 'bq Avant XL'
+    output_profile = 'ipad'
+
+class BooqPocketPlus(Booq):
+    name = 'bq Pocket Plus'
+    output_profile = 'sony300'
+
+class BooqCervantes(Booq):
+    name = 'bq Cervantes'

 class Sony300(Sony505):

--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
--- a/src/calibre/library/cli.py
+++ b/src/calibre/library/cli.py
@ -693,14 +693,7 @@ def command_catalog(args, dbpath):
                            }

    with plugin:
-        ret = plugin.run(args[1], opts, get_db(dbpath, opts))
-    if ret is None:
-        ret = 0
-    else:
-        ret = 1
-    return ret
-
-# end of GR additions
+        return int(bool(plugin.run(args[1], opts, get_db(dbpath, opts))))

 def parse_series_string(db, label, value):
    val = unicode(value).strip()
--- a/src/calibre/library/database.py
+++ b/src/calibre/library/database.py
@ -1060,6 +1060,10 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
        return [ (i[0], i[1]) for i in \
                self.conn.get('SELECT id, name FROM authors')]

+    def all_author_names(self):
+        return filter(None, [i[0].strip().replace('|', ',') for i in self.conn.get(
+            'SELECT name FROM authors')])
+
    def all_publishers(self):
        return [ (i[0], i[1]) for i in \
                self.conn.get('SELECT id, name FROM publishers')]
--- a/src/calibre/library/sqlite.py
+++ b/src/calibre/library/sqlite.py
@ -100,7 +100,7 @@ class AumSortedConcatenate(object):
        keys = self.ans.keys()
        l = len(keys)
        if l == 0:
-            return 'Unknown:::Unknown'
+            return None
        if l == 1:
            return self.ans[keys[0]]
        return ':#:'.join([self.ans[v] for v in sorted(keys)])
--- a/src/calibre/manual/conversion.rst
+++ b/src/calibre/manual/conversion.rst
@ -255,6 +255,98 @@ you are producing are meant for a particular device type, choose the correspondi

 The Output profile also controls the screen size. This will cause, for example, images to be auto-resized to be fit to the screen in some output formats. So choose a profile of a device that has a screen size similar to your device.

+.. _heuristic-processing:
+
+Heuristic Processing
+---------------------
+
+Heuristic Processing provides a variety of functions which can be used that try to detect and correct 
+common problems in poorly formatted input documents.  Use these functions if your input document suffers 
+from bad formatting. Because these functions rely on common patterns, be aware that in some cases an 
+option may lead to worse results, so use with care.  As an example, several of these options will
+remove all non-breaking-space entities.
+
+:guilabel:`Preprocess input`
+    This option activates various activates |app|'s Heuristic Processing stage of the conversion pipeline.
+    This must be enabled in order for various sub-functions to be applied
+
+:guilabel:`Unwrap lines`
+    Enabling this option will cause |app| to attempt to detect and correct hard line breaks that exist 
+    within a document using punctuation clues and line length.  |app| will first attempt to detect whether 
+    hard line breaks exist, if they do not appear to exist |app| will not attempt to unwrap lines.  The 
+    line-unwrap factor can be reduced if you want to 'force' |app| to unwrap lines.
+
+:guilabel:`Line-unwrap factor`
+    This option controls the algorithm |app| uses to remove hard line breaks. For example, if the value of this
+    option is 0.4, that means calibre will remove hard line breaks from the end of lines whose lengths are less
+    than the length of 40% of all lines in the document.  If your document only has a few line breaks which need
+    correction, then this value should be reduced to somewhere between 0.1 and 0.2.
+    
+:guilabel:`Detect and markup unformatted chapter headings and sub headings`
+    If your document does not have Chapter Markers and titles formatted differently from the rest of the text,
+    |app| can use this option to attempt detection them and surround them with heading tags. &lt;h2&gt; tags are used 
+    for chapter headings; &lt;h3&gt; tags are used for any titles that are detected.  
+    
+    This function will not create a TOC, but in many cases it will cause |app|'s default chapter detection settings 
+    to correctly detect chapters and build a TOC.  Adjust the Xpath under Structure Detection if a TOC is not automatically
+    created.  If there are no other headings used in the document then setting "//h:h2" under Structure Detection would
+    be the easiest way to create a TOC for the document.
+    
+    The inserted headings are not formatted, to apply formatting use the 'extra_css' option under
+    the Look and Feel conversion settings.  For example, to center heading tags, use the following::
+
+        h2, h3 { text-align: center }
+
+:guilabel:`Renumber sequences of &lt;h1&gt; or &lt;h2&gt; tags`
+    Some publishers format chapter headings using multiple &lt;h1&gt; or &lt;h2&gt; tags sequentially.  
+    |app|'s default conversion settings will cause such titles to be split into two pieces.  This option 
+    will re-number the heading tags to prevent splitting.
+
+:guilabel:`Delete blank lines between paragraphs`
+    This option will cause |app| to analyze blank lines included within the document.  If every paragraph is interleaved
+    with a blank line, then |app| will remove all those blank paragraphs.  Sequences of multiple blank lines will be
+    considered scene breaks and retained as a single paragraph.  This option differs from the 'Remove Paragraph Spacing' 
+    option under 'Look and Feel' in that it actually modifies the HTML content, while the other option modifies the document
+    styles.  This option can also remove paragraphs which were inserted using |app|'s 'Insert blank line' option.
+
+:guilabel:`Ensure scene breaks are consistently formatted`
+    With this option |app| will attempt to detect common scene-break markers and ensure that they are center aligned.  
+    It also attempts to detect scene breaks defined by white space and replace them with a horizontal rule 15% of the
+    page width.  Some readers may find this desirable as these 'soft' scene breaks often become page breaks on readers, and 
+    thus become difficult to distinguish.
+
+:guilabel:`Remove unnecessary hyphens`
+    |app| will analyze all hyphenated content in the document when this option is enabled.  The document itself is used
+    as a dictionary for analysis.  This allows |app| to accurately remove hyphens for any words in the document in any language, 
+    along with made-up and obscure scientific words.  The primary drawback is words appearing only a single time in the document 
+    will not be changed.  Analysis happens in two passes, the first pass analyzes line endings.  Lines are only unwrapped if the 
+    word exists with or without a hyphen in the document.  The second pass analyzes all hyphenated words throughout the document, 
+    hyphens are removed if the word exists elsewhere in the document without a match.
+
+:guilabel:`Italicize common words and patterns`
+    When enabled, |app| will look for common words and patterns that denote italics and italicize them.  Examples are common text
+    conventions such as ~word~ or phrases that should generally be italicized, e.g. latin phrases like 'etc.' or 'et cetera'.
+
+:guilabel:`Replace entity indents with CSS indents`
+    Some documents use a convention of defining text indents using non-breaking space entities.  When this option is enabled |app| will
+    attempt to detect this sort of formatting and convert them to a 3% text indent using css.
+
+.. search-replace:
+
+Search & Replace
+---------------------
+
+These options are useful primarily for conversion of PDF documents. Often, the conversion leaves
+behind page headers and footers in the text. These options use regular expressions to try and detect
+the headers and footers and remove them. Remember that they operate on the intermediate XHTML produced
+by the conversion pipeline. There is also a wizard to help you customize the regular expressions for
+your document.  These options can also be used for generic search and replace of any content by additionally 
+specifying a replacement expression.
+
+The search works by using a python regular expression. All matched text is simply removed from
+the document or replaced using the replacement pattern. You can learn more about regular expressions and 
+their syntax at http://docs.python.org/library/re.html.
+
 .. _structure-detection:

 Structure Detection
@ -298,21 +390,6 @@ which means that |app| will insert page breaks before every `<h1>` and `<h2>` ta
    
    The default expressions may change depending on the input format you are converting.

-Removing headers and footers
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-These options are useful primarily for conversion of PDF documents. Often, the conversion leaves
-behind page headers and footers in the text. These options use regular expressions to try and detect
-the headers and footers and remove them. Remember that they operate on the intermediate XHTML produced
-by the conversion pipeline. There is also a wizard to help you customize the regular expressions for
-your document.
-
-The header and footer regular expressions are used in conjunction with the remove header and footer options.
-If the remove option is not enabled the regular expression will not be applied to remove the matched text.
-The removal works by using a python regular expression. All matched text is simply removed from
-the document. You can learn more about regular expressions and their syntax at
-http://docs.python.org/library/re.html.
-
 Miscellaneous
 ~~~~~~~~~~~~~~

@ -330,16 +407,6 @@ There are a few more options in this section.
    two covers. This option will simply remove the first image from the source document, thereby
    ensuring that the converted book has only one cover, the one specified in |app|.

-:guilabel:`Preprocess input`
-    This option activates various algorithms that try to detect and correct common cases of
-    badly formatted input documents. Things like hard line breaks, large blocks of text with no formatting, etc.
-    Turn this option on if your input document suffers from bad formatting. But be aware that in
-    some cases, this option can lead to worse results, so use with care.
-
-:guilabel:`Line-unwrap factor`
-    This option control the algorithm |app| uses to remove hard line breaks. For example, if the value of this
-    option is 0.4, that means calibre will remove hard line breaks from the end of lines whose lengths are less
-    than the length of 40% of all lines in the document. 
    
 Table of Contents
 ------------------
@ -488,26 +555,33 @@ at `mobileread <http://www.mobileread.com/forums/showthread.php?t=28313>`_.
 Convert TXT documents
 ~~~~~~~~~~~~~~~~~~~~~~

-TXT documents have no well defined way to specify formatting like bold, italics, etc, or document structure like paragraphs, headings, sections and so on.
-Since TXT documents provide no way to explicitly mark parts of
-the text, by default |app| only groups lines in the input document into paragraphs. The default is to assume one or
-more blank lines are a paragraph boundary::
-
-    This is the first.
-    
-    This is the
-    second paragraph.
+TXT documents have no well defined way to specify formatting like bold, italics, etc, or document 
+structure like paragraphs, headings, sections and so on, but there are a variety of conventions commonly 
+used.  By default |app| attempts automatic detection of the correct formatting and markup based on those
+conventions.

 TXT input supports a number of options to differentiate how paragraphs are detected.

-    :guilabel:`Treat each line as a paragraph`
+    :guilabel:`Paragraph Style: Auto`
+        Analyzes the text file and attempts to automatically determine how paragraphs are defined.  This
+        option will generally work fine, if you achieve undesirable results try one of the manual options.
+
+    :guilabel:`Paragraph Style: Block`
+        Assumes one or more blank lines are a paragraph boundary::
+        
+            This is the first.
+    
+            This is the
+            second paragraph.
+
+    :guilabel:`Paragraph Style: Single`
        Assumes that every line is a paragraph::

            This is the first.
            This is the second.
            This is the third.
        
-    :guilabel:`Assume print formatting`
+    :guilabel:`Paragraph Style: Print`
        Assumes that every paragraph starts with an indent (either a tab or 2+ spaces). Paragraphs end when
        the next line that starts with an indent is reached::

@ -518,13 +592,28 @@ TXT input supports a number of options to differentiate how paragraphs are detec
            This is the
            third.

-    :guilabel:`Process using markdown`
+    :guilabel:`Paragraph Style: Unformatted`
+        Assumes that the document has no formatting, but does use hard line breaks.  Punctuation
+        and median line length are used to attempt to re-create paragraphs.
+
+    :guilabel:`Formatting Style: Auto`
+        Attemtps to detect the type of formatting markup being used.  If no markup is used then heuristic
+        formatting will be applied.
+
+    :guilabel:`Formatting Style: Heuristic`
+        Analyses the document for common chapter headings, scene breaks, and italicized words and applies the
+        appropriate html markup during conversion.
+
+    :guilabel:`Formatting Style: Markdown`
        |app| also supports running TXT input though a transformation preprocessor known as markdown. Markdown
        allows for basic formatting to be added to TXT documents, such as bold, italics, section headings, tables,
        lists, a Table of Contents, etc. Marking chapter headings with a leading # and setting the chapter XPath detection
        expression to "//h:h1" is the easiest way to have a proper table of contents generated from a TXT document.
        You can learn more about the markdown syntax at `daringfireball <http://daringfireball.net/projects/markdown/syntax>`_.

+    :guilabel:`Formatting Style: None`
+        Applies no special formatting to the text, the document is converted to html with no other changes.
+

 Convert PDF documents
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -547,6 +636,7 @@ Some limitations of PDF input are:
    * Extraction of vector images and tables from within the document is also not supported.
    * Some PDFs use special glyphs to represent ll or ff or fi, etc. Conversion of these may or may not work depending on just how they are represented internally in the PDF.
    * Some PDFs store their images upside down with a rotation instruction, |app| currently doesn't support that instruction, so the images will be rotated in the output as well. 
+    * Links and Tables of Contents are not supported

 To re-iterate **PDF is a really, really bad** format to use as input. If you absolutely must use PDF, then be prepared for an
 output ranging anywhere from decent to unusable, depending on the input PDF.
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@ -101,6 +101,17 @@ We just need some information from you:
 Once you send us the output for a particular operating system, support for the device in that operating system
 will appear in the next release of |app|.

+My device is not being detected by |app|?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Follow these steps to find the problem:
+
+    * Make sure that you are connecting only a single device to your computer at a time. Do not have another |app| supported device like an iPhone/iPad etc. at the same time.
+    * Make sure you are running the latest version of |app|. The latest version can always be downloaded from `http://calibre-ebook.com/download`_.
+    * Ensure your operating system is seeing the device. That is, the device should be mounted as a disk that you can access using Windows explorer or whatever the file management program on your computer is
+    * In calibre, go to Preferences->Plugins->Device Interface plugin and make sure the plugin for your device is enabled.
+    * If all the above steps fail, go to Preferences->Miscellaneous and click debug device detection with your device attached and post the output as a ticket on `http://bugs.calibre-ebook.com`_.
+
 How does |app| manage collections on my SONY reader?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

@ -450,6 +461,11 @@ How do I use purchased EPUB books with |app|?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Most purchased EPUB books have `DRM <http://wiki.mobileread.com/wiki/DRM>`_. This prevents |app| from opening them. You can still use |app| to store and transfer them to your e-book reader. First, you must authorize your reader on a windows machine with Adobe Digital Editions. Once this is done, EPUB books transferred with |app| will work fine on your reader. When you purchase an epub book from a website, you will get an ".acsm" file. This file should be opened with Adobe Digital Editions, which will then download the actual ".epub" e-book. The e-book file will be stored in the folder "My Digital Editions", from where you can add it to |app|.

+I am getting a "Permission Denied" error?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+A permission denied error can occur because of many possible reasons, none of them having anything to do with |app|. You can get permission denied errors if you are using an SD card with write protect enabled. Or if you, or some program you used changed the file permissions of the files in question to read only. Or if there is a filesystem error on the device which caused your operating system to mount the filesystem in read only mode or mark a particular file as read only pending recovery. Or if the files have their owner set to a user other than you. You will need to fix the underlying cause of the permissions error before resuming to use |app|. Read the error message carefully, see what file it points to and fix the permissions on that file.
+
 Can I have the comment metadata show up on my reader?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

--- a/src/calibre/utils/date.py
+++ b/src/calibre/utils/date.py
@ -46,6 +46,17 @@ local_tz = _local_tz = SafeLocalTimeZone()

 UNDEFINED_DATE = datetime(101,1,1, tzinfo=utc_tz)

+def is_date_undefined(qt_or_dt):
+    d = qt_or_dt
+    if d is None:
+        return True
+    if hasattr(d, 'toString'):
+        d = datetime(d.year(), d.month(), d.day(), tzinfo=utc_tz)
+    return d.year < UNDEFINED_DATE.year or (
+            d.year == UNDEFINED_DATE.year and
+            d.month == UNDEFINED_DATE.month and
+            d.day == UNDEFINED_DATE.day)
+
 def parse_date(date_string, assume_utc=False, as_utc=True, default=None):
    '''
    Parse a date/time string into a timezone aware datetime object. The timezone
@ -148,6 +159,9 @@ def format_date(dt, format, assume_utc=False, as_utc=False):
        if len(mo.group(0)) == 2: return '%02d'%(dt.year % 100)
        return '%04d'%dt.year

+    if dt == UNDEFINED_DATE:
+        return ''
+
    format = re.sub('d{1,4}', format_day, format)
    format = re.sub('M{1,4}', format_month, format)
    return re.sub('yyyy|yy', format_year, format)
--- a/src/calibre/utils/formatter.py
+++ b/src/calibre/utils/formatter.py
@ -274,9 +274,9 @@ class TemplateFormatter(string.Formatter):
                    colon += 1

                funcs = formatter_functions.get_functions()
-                if fmt[colon:p] in funcs:
-                    field = fmt[colon:p]
-                    func = funcs[field]
+                fname = fmt[colon:p]
+                if fname in funcs:
+                    func = funcs[fname]
                    if func.arg_count == 2:
                        # only one arg expected. Don't bother to scan. Avoids need
                        # for escaping characters
@ -292,6 +292,8 @@ class TemplateFormatter(string.Formatter):
                    else:
                        val = func.eval_(self, self.kwargs, self.book, self.locals,
                                        val, *args).strip()
+                else:
+                    return _('%s: unknown function')%fname
        if val:
            val = self._do_format(val, dispfmt)
        if not val:
--- a/src/calibre/utils/smtplib.py
+++ b/src/calibre/utils/smtplib.py
@ -554,6 +554,8 @@ class SMTP:

        def encode_cram_md5(challenge, user, password):
            challenge = base64.decodestring(challenge)
+            if isinstance(password, unicode): # Added by Kovid, see http://bugs.python.org/issue5285
+                password = password.encode('utf-8')
            response = user + " " + hmac.HMAC(password, challenge).hexdigest()
            return encode_base64(response, eol="")

--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -839,7 +839,13 @@ class BasicNewsRecipe(Recipe):
        fetcher.image_url_processor = self.image_url_processor
        res, path, failures = fetcher.start_fetch(url), fetcher.downloaded_paths, fetcher.failed_links
        if not res or not os.path.exists(res):
-            raise Exception(_('Could not fetch article. Run with -vv to see the reason'))
+            msg = _('Could not fetch article.') + ' '
+            if self.debug:
+                msg += _('The debug traceback is available earlier in this log')
+            else:
+                msg += _('Run with -vv to see the reason')
+            raise Exception(msg)
+
        return res, path, failures

    def fetch_article(self, url, dir, f, a, num_of_feeds):
@ -902,9 +908,6 @@ class BasicNewsRecipe(Recipe):
            feeds = feeds[:2]
        self.has_single_feed = len(feeds) == 1

-        if self.use_embedded_content is None:
-            self.use_embedded_content = feeds[0].has_embedded_content()
-
        index = os.path.join(self.output_dir, 'index.html')

        html = self.feeds2index(feeds)
@ -939,7 +942,9 @@ class BasicNewsRecipe(Recipe):
                    url = None
                if not url:
                    continue
-                func, arg = (self.fetch_embedded_article, article) if self.use_embedded_content else \
+                func, arg = (self.fetch_embedded_article, article) \
+                            if self.use_embedded_content or (self.use_embedded_content == None and feed.has_embedded_content()) \
+                            else \
                            ((self.fetch_obfuscated_article if self.articles_are_obfuscated \
                              else self.fetch_article), url)
                req = WorkRequest(func, (arg, art_dir, f, a, len(feed)),