KG updates

2025-07-09 03:04:10 -04:00 · 2011-01-11 08:24:02 -07:00 · 2011-01-11 08:24:02 -07:00 · 319fde9c5a
commit 319fde9c5a
parent 1d936a8292 187331af81
27 changed files with 778 additions and 223 deletions
--- a/resources/recipes/cicero.recipe
+++ b/resources/recipes/cicero.recipe
@ -0,0 +1,35 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class Cicero(BasicNewsRecipe):
    timefmt               = ' [%Y-%m-%d]'
    title                 = u'Cicero'
    __author__            = 'mad@sharktooth.de'
    description           = u'Magazin f\xfcr politische Kultur'
    oldest_article        = 7
    language              = 'de'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    publisher             = 'Ringier Publishing'
    category              = 'news, politics, Germany'
    encoding              = 'iso-8859-1'
    publication_type      = 'magazine'
    masthead_url          = 'http://www.cicero.de/img2/cicero_logo_rss.gif'
    feeds                 = [
 (u'Das gesamte Portfolio', u'http://www.cicero.de/rss/rss.php?ress_id='),
 #(u'Alle Heft-Inhalte', u'http://www.cicero.de/rss/rss.php?ress_id=heft'),
 #(u'Alle Online-Inhalte', u'http://www.cicero.de/rss/rss.php?ress_id=online'),
 #(u'Berliner Republik', u'http://www.cicero.de/rss/rss.php?ress_id=4'),
 #(u'Weltb\xfchne', u'http://www.cicero.de/rss/rss.php?ress_id=1'),
 #(u'Salon', u'http://www.cicero.de/rss/rss.php?ress_id=7'),
 #(u'Kapital', u'http://www.cicero.de/rss/rss.php?ress_id=6'),
 #(u'Netzst\xfccke', u'http://www.cicero.de/rss/rss.php?ress_id=9'),
 #(u'Leinwand', u'http://www.cicero.de/rss/rss.php?ress_id=12'),
 #(u'Bibliothek', u'http://www.cicero.de/rss/rss.php?ress_id=15'),
 (u'Kolumne - Alle Kolulmnen', u'http://www.cicero.de/rss/rss2.php?ress_id='),
 #(u'Kolumne - Schreiber, Berlin', u'http://www.cicero.de/rss/rss2.php?ress_id=35'),
 #(u'Kolumne - TV Kritik', u'http://www.cicero.de/rss/rss2.php?ress_id=34')
 ]
    def print_version(self, url):
        return 'http://www.cicero.de/page_print.php?' + url.rpartition('?')[2]
--- a/resources/recipes/cnetjapan.recipe
+++ b/resources/recipes/cnetjapan.recipe
@ -11,7 +11,7 @@ class CNetJapan(BasicNewsRecipe):
                      (u'CNet Blog', u'http://feed.japan.cnet.com/rss/blog/index.rdf')
                        ]
    language       = 'ja'
-    encoding       = 'Shift_JIS'
+    encoding       = 'utf-8'
    remove_javascript = True
    preprocess_regexps = [
--- a/resources/recipes/el_correo.recipe
+++ b/resources/recipes/el_correo.recipe
@ -0,0 +1,122 @@
 #!/usr/bin/env  python
 __license__     = 'GPL v3'
 __copyright__   = '08 Januery 2011, desUBIKado'
 __author__      = 'desUBIKado'
 __description__ = 'Daily newspaper from Biscay'
 __version__     = 'v0.08'
 __date__        = '08, Januery 2011'
 '''
 [url]http://www.elcorreo.com/[/url]
 '''
 import time
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class heraldo(BasicNewsRecipe):
    __author__            = 'desUBIKado'
    description           = 'Daily newspaper from Biscay'
    title                 = u'El Correo'
    publisher             = 'Vocento'
    category              = 'News, politics, culture, economy, general interest'
    oldest_article        = 2
    delay                 = 1
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    language              = 'es'
    timefmt               = '[%a, %d %b, %Y]'
    encoding              = 'iso-8859-1'
    remove_empty_feeds    = True
    remove_javascript     = False
    feeds              = [
                           (u'Portada',       u'http://www.elcorreo.com/vizcaya/portada.xml'),
                           (u'Local',         u'http://www.elcorreo.com/vizcaya/rss/feeds/vizcaya.xml'),
               (u'Internacional', u'hhttp://www.elcorreo.com/vizcaya/rss/feeds/internacional.xml'),
               (u'Econom\xeda',   u'http://www.elcorreo.com/vizcaya/rss/feeds/economia.xml'),
                           (u'Pol\xedtica',   u'http://www.elcorreo.com/vizcaya/rss/feeds/politica.xml'),
               (u'Opini\xf3n',    u'http://www.elcorreo.com/vizcaya/rss/feeds/opinion.xml'),
               (u'Deportes',      u'http://www.elcorreo.com/vizcaya/rss/feeds/deportes.xml'),
                           (u'Sociedad',      u'http://www.elcorreo.com/vizcaya/rss/feeds/sociedad.xml'),
               (u'Cultura',       u'http://www.elcorreo.com/vizcaya/rss/feeds/cultura.xml'),
               (u'Televisi\xf3n', u'http://www.elcorreo.com/vizcaya/rss/feeds/television.xml'),
               (u'Gente',         u'http://www.elcorreo.com/vizcaya/rss/feeds/gente.xml')
                         ]
    keep_only_tags     = [
                          dict(name='div', attrs={'class':['grouphead','date','art_head','story-texto','text','colC_articulo','contenido_comentarios']}),
                          dict(name='div' , attrs={'id':['articulo','story-texto','story-entradilla']})
                         ]
    remove_tags        = [
                          dict(name='div', attrs={'class':['art_barra','detalles-opinion','formdenunciar','modulo calculadoras','nubetags','pie']}),
                          dict(name='div', attrs={'class':['mod_lomas','bloque_lomas','blm_header','link-app3','link-app4','botones_listado']}),
                          dict(name='div', attrs={'class':['navegacion_galeria','modulocanalpromocion','separa','separacion','compartir','tags_relacionados']}),
                          dict(name='div', attrs={'class':['moduloBuscadorDeportes','modulo-gente','moddestacadopeq','OpcArt','articulopiniones']}),
                          dict(name='div', attrs={'class':['modulo-especial','publiEspecial']}),
                          dict(name='div', attrs={'id':['articulopina']}),
                          dict(name='br', attrs={'class':'clear'}),
                          dict(name='form', attrs={'name':'frm_conversor2'})
                         ]
    remove_tags_before = dict(name='div' , attrs={'class':'articulo  '})
    remove_tags_after  = dict(name='div' , attrs={'class':'comentarios'})
    def get_cover_url(self):
        cover = None
        st = time.localtime()
        year = str(st.tm_year)
        month = "%.2d" % st.tm_mon
        day = "%.2d" % st.tm_mday
        #[url]http://img.kiosko.net/2011/01/02/es/elcorreo.750.jpg[/url]
                #[url]http://info.elcorreo.com/pdf/06012011-viz.pdf[/url]
        cover='http://info.elcorreo.com/pdf/'+ day + month + year +'-viz.pdf'
        br = BasicNewsRecipe.get_browser()
        try:
            br.open(cover)
        except:
            self.log("\nPortada no disponible")
            cover ='http://www.elcorreo.com/vizcaya/noticias/201002/02/Media/logo-elcorreo-nuevo.png'
        return cover
    extra_css = '''
                    h1, .headline {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
                    h2, .subhead {font-family:Arial,Helvetica,sans-serif; font-style:italic; font-weight:normal;font-size:18px;}
                    h3, .overhead {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:16px;}
                    h4 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:16px;}
                    h5 {font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:16px;}
                    h6 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:16px;}
                    .date,.byline, .photo {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:14px;}
                    img{margin-bottom: 0.4em}
                '''
    preprocess_regexps = [
 # To present the image of the embedded video
                           (re.compile(r'var RUTA_IMAGEN', re.DOTALL|re.IGNORECASE), lambda match: '</script><img src'),
                           (re.compile(r'.jpg";', re.DOTALL|re.IGNORECASE), lambda match: '.jpg">'),
                           (re.compile(r'var SITIO = "elcorreo";', re.DOTALL|re.IGNORECASE), lambda match: '<SCRIPT TYPE="text/JavaScript"'),
 # To separate paragraphs with a blank line
                           (re.compile(r'<div class="p"', re.DOTALL|re.IGNORECASE), lambda match: '<p></p><div class="p"'),
 # To put a blank line between the subtitle and the date and time of the news
                           (re.compile(r'<div class="date">', re.DOTALL|re.IGNORECASE), lambda match: '<br><div class="date">'),
 # To put a blank line between the intro of the embedded videos and the previous text
                           (re.compile(r'<div class="video"', re.DOTALL|re.IGNORECASE), lambda match: '<br><div class="video"'),
 # To view photos from the first when these are presented as a gallery
                           (re.compile(r'src="/img/shim.gif"', re.DOTALL|re.IGNORECASE), lambda match: ''),
                           (re.compile(r'rel=', re.DOTALL|re.IGNORECASE), lambda match: 'src='),
 # To remove the link of the title
                           (re.compile(r'<h1 class="headline">\n<a href="', re.DOTALL|re.IGNORECASE), lambda match: '<h1 class="'),
                           (re.compile(r'</a>\n</h1>', re.DOTALL|re.IGNORECASE), lambda match: '</h1>'),
                         ]
--- a/resources/recipes/heraldo.recipe
+++ b/resources/recipes/heraldo.recipe
@ -3,29 +3,31 @@ __license__     = 'GPL v3'
 __copyright__   = '04 December 2010, desUBIKado'
 __author__      = 'desUBIKado'
 __description__ = 'Daily newspaper from Aragon'
-__version__     = 'v0.03'
+__version__     = 'v0.04'
-__date__        = '11, December 2010'
+__date__        = '6, Januery 2011'
 '''
 [url]http://www.heraldo.es/[/url]
 '''
 import time
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class heraldo(BasicNewsRecipe):
-    __author__        = 'desUBIKado'
+    __author__     = 'desUBIKado'
-    description   = 'Daily newspaper from Aragon'
+    description    = 'Daily newspaper from Aragon'
    title          = u'Heraldo de Aragon'
    publisher      = 'OJD Nielsen'
    category       = 'News, politics, culture, economy, general interest'
    language       = 'es'
    timefmt        = '[%a, %d %b, %Y]'
-    oldest_article = 1
+    oldest_article = 2
    delay          = 1
    max_articles_per_feed = 100
    use_embedded_content  = False
    remove_javascript = True
    no_stylesheets = True
-    recursion      = 10
+
    feeds          = [
                        (u'Portadas', u'http://www.heraldo.es/index.php/mod.portadas/mem.rss')
@ -37,29 +39,39 @@ class heraldo(BasicNewsRecipe):
    remove_tags        = [dict(name='a', attrs={'class':['com flo-r','enl-if','enl-df']}),
                          dict(name='div', attrs={'class':['brb-b-s con marg-btt','cnt-rel con']}),
-                          dict(name='form', attrs={'class':'form'})]
+                          dict(name='form', attrs={'class':'form'}),
                          dict(name='ul', attrs={'id':['cont-tags','pag-1']})]
    remove_tags_before = dict(name='div' , attrs={'id':'dts'})
    remove_tags_after  = dict(name='div' , attrs={'id':'com'})
    def get_cover_url(self):
-        cover = None
+       cover = None
-        st = time.localtime()
+       st = time.localtime()
-        year = str(st.tm_year)
+       year = str(st.tm_year)
-        month = "%.2d" % st.tm_mon
+       month = "%.2d" % st.tm_mon
-        day = "%.2d" % st.tm_mday
+       day = "%.2d" % st.tm_mday
 		#[url]http://oldorigin-www.heraldo.es/20101211/primeras/portada_aragon.pdf[/url]
-        cover='http://oldorigin-www.heraldo.es/'+ year +  month + day +'/primeras/portada_aragon.pdf'
+       cover='http://oldorigin-www.heraldo.es/'+ year +  month + day +'/primeras/portada_aragon.pdf'
-        br = BasicNewsRecipe.get_browser()
+       br = BasicNewsRecipe.get_browser()
-        try:
+       try:
-            br.open(cover)
+           br.open(cover)
-        except:
+       except:
-            self.log("\nPortada no disponible")
+           self.log("\nPortada no disponible")
-            cover ='http://www.heraldo.es/MODULOS/global/publico/interfaces/img/logo-Heraldo.png'
+           cover ='http://www.heraldo.es/MODULOS/global/publico/interfaces/img/logo-Heraldo.png'
-        return cover
+       return cover
    extra_css = '''
-                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:xx-large;}
+                    .con strong{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:16px;}
-		'''
+                    .con h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
                    .con span{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:12px;}
                    .ent {font-family:Arial,Helvetica,sans-serif; font-weight:normal; font-style:italic; font-size:18px;}
                    img{margin-bottom: 0.4em}
                '''
    preprocess_regexps = [
 # To separate the comments with a blank line
                           (re.compile(r'<div id="com"', re.DOTALL|re.IGNORECASE), lambda match: '<br><div id="com"')
                         ]
--- a/resources/recipes/tyzden.recipe
+++ b/resources/recipes/tyzden.recipe
@ -0,0 +1,80 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2011, Miroslav Vasko zemiak@gmail.com'
 '''
 .tyzden, a weekly news magazine (a week old issue)
 '''
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 from datetime import date
 import re
 class TyzdenRecipe(BasicNewsRecipe):
    __license__  = 'GPL v3'
    __author__ = 'zemiak'
    language = 'sk'
    version = 1
    publisher = u'www.tyzden.sk'
    category = u'Magazine'
    description = u'A conservative weekly magazine. The latest free issue'
    today = date.today()
    iso = today.isocalendar()
    year = iso[0]
    weeknum = iso[1]
    if (weeknum > 1):
        weeknum -= 1
    title = u'.tyzden ' + str(weeknum) + '/' + str(year)
    base_url_path = 'http://www.tyzden.sk/casopis/' + str(year) + '/' + str(weeknum)
    base_url = base_url_path + '.html'
    oldest_article = 20
    max_articles_per_feed = 100
    remove_javascript = True
    use_embedded_content    = False
    no_stylesheets = True
    keep_only_tags = []
    keep_only_tags.append(dict(name = 'h1'))
    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'text_area top_nofoto'}))
    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'text_block'}))
    remove_tags_after = [dict(name = 'div', attrs = {'class': 'text_block'})]
    def find_sections(self):
        soup = self.index_to_soup(self.base_url)
        # find cover pic
 	imgdiv = soup.find('div', attrs = {'class': 'foto'})
 	if imgdiv is not None:
            img = imgdiv.find('img')
            if img is not None:
                self.cover_url = 'http://www.tyzden.sk/' + img['src']
        # end find cover pic
        for s in soup.findAll('a', attrs={'href': re.compile(r'rubrika/.*')}):
            yield (self.tag_to_string(s), s)
    def find_articles(self, soup):
        for art in soup.findAllNext('a'):
            if (not art['href'].startswith('casopis/')):
                break;
            url = art['href']
            title = self.tag_to_string(art)
            yield {
                    'title': title, 'url':self.base_url_path + '/' + url, 'description':title,
                    'date' : strftime('%a, %d %b'),
                    }
    def parse_index(self):
        feeds = []
        for title, soup in self.find_sections():
            feeds.append((title, list(self.find_articles(soup))))
        return feeds
--- a/setup/build_environment.py
+++ b/setup/build_environment.py
@ -117,7 +117,6 @@ if iswindows:
    poppler_inc_dirs = consolidate('POPPLER_INC_DIR',
            r'%s\poppler;%s'%(sw_inc_dir, sw_inc_dir))
    popplerqt4_inc_dirs = poppler_inc_dirs + [poppler_inc_dirs[1]+r'\qt4']
    poppler_lib_dirs = consolidate('POPPLER_LIB_DIR', sw_lib_dir)
    popplerqt4_lib_dirs = poppler_lib_dirs
    poppler_libs = ['poppler']
@ -131,7 +130,6 @@ elif isosx:
    fc_lib = '/sw/lib'
    poppler_inc_dirs = consolidate('POPPLER_INC_DIR',
            '/sw/build/poppler-0.14.5/poppler:/sw/build/poppler-0.14.5')
    popplerqt4_inc_dirs = poppler_inc_dirs + [poppler_inc_dirs[0]+'/qt4']
    poppler_lib_dirs = consolidate('POPPLER_LIB_DIR',
            '/sw/lib')
    poppler_libs = ['poppler']
@ -150,9 +148,6 @@ else:
    # Include directories
    poppler_inc_dirs = pkgconfig_include_dirs('poppler',
        'POPPLER_INC_DIR', '/usr/include/poppler')
    popplerqt4_inc_dirs = pkgconfig_include_dirs('poppler-qt4', '', '')
    if not popplerqt4_inc_dirs:
        popplerqt4_inc_dirs = poppler_inc_dirs + [poppler_inc_dirs[0]+'/qt4']
    png_inc_dirs = pkgconfig_include_dirs('libpng', 'PNG_INC_DIR',
        '/usr/include')
    magick_inc_dirs = pkgconfig_include_dirs('MagickWand', 'MAGICK_INC', '/usr/include/ImageMagick')
@ -187,20 +182,17 @@ if not poppler_inc_dirs or not os.path.exists(
    poppler_error = \
    ('Poppler not found on your system. Various PDF related',
    ' functionality will not work. Use the POPPLER_INC_DIR and',
-    ' POPPLER_LIB_DIR environment variables.')
+    ' POPPLER_LIB_DIR environment variables. calibre requires '
-
+    ' the poppler XPDF headers. If your distro does not '
-popplerqt4_error = None
+    ' include them you will have to re-compile poppler '
-if not popplerqt4_inc_dirs or not os.path.exists(
+    ' by hand with --enable-xpdf-headers')
        os.path.join(popplerqt4_inc_dirs[-1], 'poppler-qt4.h')):
    popplerqt4_error = \
            ('Poppler Qt4 bindings not found on your system.')
 magick_error = None
 if not magick_inc_dirs or not os.path.exists(os.path.join(magick_inc_dirs[0],
    'wand')):
    magick_error = ('ImageMagick not found on your system. '
            'Try setting the environment variables MAGICK_INC '
-            'and MAGICK_LIB to help calibre locate the inclue and libbrary '
+            'and MAGICK_LIB to help calibre locate the include and library '
            'files.')
 podofo_lib = os.environ.get('PODOFO_LIB_DIR', podofo_lib)
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -29,7 +29,7 @@ class ANDROID(USBMS):
            # Motorola
            0x22b8 : { 0x41d9 : [0x216], 0x2d61 : [0x100], 0x2d67 : [0x100],
                       0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216],
-                       0x4286 : [0x216] },
+                       0x4286 : [0x216], 0x42b3 : [0x216] },
            # Sony Ericsson
            0xfce : { 0xd12e : [0x0100]},
@ -53,6 +53,9 @@ class ANDROID(USBMS):
            # LG
            0x1004 : { 0x61cc : [0x100] },
            # Archos
            0x0e79 : { 0x1420 : [0x0216]},
            }
    EBOOK_DIR_MAIN = ['eBooks/import', 'wordplayer/calibretransfer', 'Books']
    EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '
@ -61,18 +64,19 @@ class ANDROID(USBMS):
    EXTRA_CUSTOMIZATION_DEFAULT = ', '.join(EBOOK_DIR_MAIN)
    VENDOR_NAME      = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
-            'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE']
+            'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS']
    WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
            '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
            'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
            'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
-            'SGH-T849', '_MB300']
+            'SGH-T849', '_MB300', 'A70S']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
-            'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD']
+            'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S']
-    OSX_MAIN_MEM = 'HTC Android Phone Media'
+    OSX_MAIN_MEM = 'Android Device Main Memory'
-    MAIN_MEMORY_VOLUME_LABEL  = 'Android Phone Internal Memory'
+    MAIN_MEMORY_VOLUME_LABEL  = 'Android Device Main Memory'
    SUPPORTS_SUB_DIRS = True
--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@ -76,12 +76,23 @@ class PRS505(USBMS):
                'sending DRMed books in which you cannot change the cover.'
                ' WARNING: This option should only be used with newer '
                'SONY readers: 350, 650, 950 and newer.'),
            _('Refresh separate covers when using automatic management (newer readers)') +
                ':::' +
                _('Set this option to have separate book covers uploaded '
                  'every time you connect your device. Unset this option if '
                  'you have so many books on the reader that performance is '
                  'unacceptable.')
    ]
    EXTRA_CUSTOMIZATION_DEFAULT = [
                ', '.join(['series', 'tags']),
                False,
                False
    ]
    OPT_COLLECTIONS    = 0
    OPT_UPLOAD_COVERS  = 1
    OPT_REFRESH_COVERS = 2
    plugboard = None
    plugboard_func = None
@ -171,7 +182,7 @@ class PRS505(USBMS):
        opts = self.settings()
        if opts.extra_customization:
            collections = [x.strip() for x in
-                    opts.extra_customization[0].split(',')]
+                    opts.extra_customization[self.OPT_COLLECTIONS].split(',')]
        else:
            collections = []
        debug_print('PRS505: collection fields:', collections)
@ -183,6 +194,20 @@ class PRS505(USBMS):
        c.update(blists, collections, pb)
        c.write()
        if opts.extra_customization[self.OPT_REFRESH_COVERS]:
            debug_print('PRS505: uploading covers in sync_booklists')
            for idx,bl in blists.items():
                prefix = self._card_a_prefix if idx == 1 else \
                                self._card_b_prefix if idx == 2 \
                                    else self._main_prefix
                for book in bl:
                    p = os.path.join(prefix, book.lpath)
                    self._upload_cover(os.path.dirname(p),
                                      os.path.splitext(os.path.basename(p))[0],
                                      book, p)
        else:
            debug_print('PRS505: NOT uploading covers in sync_booklists')
        USBMS.sync_booklists(self, booklists, end_session=end_session)
        debug_print('PRS505: finished sync_booklists')
@ -199,11 +224,14 @@ class PRS505(USBMS):
    def upload_cover(self, path, filename, metadata, filepath):
        opts = self.settings()
-        if not opts.extra_customization[1]:
+        if not opts.extra_customization[self.OPT_UPLOAD_COVERS]:
            # Building thumbnails disabled
-            debug_print('PRS505: not uploading covers')
+            debug_print('PRS505: not uploading cover')
            return
-        debug_print('PRS505: uploading covers')
+        debug_print('PRS505: uploading cover')
        self._upload_cover(path, filename, metadata, filepath)
    def _upload_cover(self, path, filename, metadata, filepath):
        if metadata.thumbnail and metadata.thumbnail[-1]:
            path = path.replace('/', os.sep)
            is_main = path.startswith(self._main_prefix)
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -79,7 +79,7 @@ class DocAnalysis(object):
        elif format == 'spanned_html':
            linere = re.compile('(?<=<span).*?(?=</span>)', re.DOTALL)
        elif format == 'txt':
-            linere = re.compile('.*?\n', re.DOTALL)
+            linere = re.compile('.*?\n')
        self.lines = linere.findall(raw)
    def line_length(self, percent):
@ -177,7 +177,7 @@ class Dehyphenator(object):
    def __init__(self):
        # Add common suffixes to the regex below to increase the likelihood of a match -
        # don't add suffixes which are also complete words, such as 'able' or 'sex'
-        self.removesuffixes = re.compile(r"((ed)?ly|('e)?s|a?(t|s)?ion(s|al(ly)?)?|ings?|er|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic(ally)?|(e|a)nce|ment(s)?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex)$", re.IGNORECASE)
+        self.removesuffixes = re.compile(r"((ed)?ly|('e)?s|a?(t|s)?ion(s|al(ly)?)?|ings?|er|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic(ally)?|(e|a)nce|m?ents?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex|ian)$", re.IGNORECASE)
        # remove prefixes if the prefix was not already the point of hyphenation
        self.prefixes = re.compile(r'^(dis|re|un|in|ex)$', re.IGNORECASE)
        self.removeprefix = re.compile(r'^(dis|re|un|in|ex)', re.IGNORECASE)
@ -199,7 +199,7 @@ class Dehyphenator(object):
            searchresult = self.html.find(lookupword.lower())
        except:
            return hyphenated
-        if self.format == 'html_cleanup':
+        if self.format == 'html_cleanup' or self.format == 'txt_cleanup':
            if self.html.find(lookupword) != -1 or searchresult != -1:
                #print "Cleanup:returned dehyphenated word: " + str(dehyphenated)
                return dehyphenated
@ -225,10 +225,15 @@ class Dehyphenator(object):
            intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)\s*(?=<)(?P<wraptags>(</span>)?\s*(</[iubp]>\s*){1,2}(?P<up2threeblanks><(p|div)[^>]*>\s*(<p[^>]*>\s*</p>\s*)?</(p|div)>\s+){0,3}\s*(<[iubp][^>]*>\s*){1,2}(<span[^>]*>)?)\s*(?P<secondpart>[\w\d]+)' % length)
        elif format == 'pdf':
            intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)\s*(?P<wraptags><p>|</[iub]>\s*<p>\s*<[iub]>)\s*(?P<secondpart>[\w\d]+)'% length)
        elif format == 'txt':
            intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)(\u0020|\u0009)*(?P<wraptags>(\n(\u0020|\u0009)*)+)(?P<secondpart>[\w\d]+)'% length)
        elif format == 'individual_words':
-            intextmatch = re.compile(u'>[^<]*\b(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)"\s>]+)(-|‐)(?P<secondpart)\w+)\b[^<]*<') # for later, not called anywhere yet
+            intextmatch = re.compile(u'>[^<]*\b(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)"\s>]+)(-|‐)\u0020*(?P<secondpart>\w+)\b[^<]*<') # for later, not called anywhere yet
        elif format == 'html_cleanup':
            intextmatch = re.compile(u'(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)\s*(?=<)(?P<wraptags></span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)')
        elif format == 'txt_cleanup':
            intextmatch = re.compile(u'(?P<firstpart>\w+)(-|‐)(?P<wraptags>\s+)(?P<secondpart>[\w\d]+)')
        html = intextmatch.sub(self.dehyphenate, html)
        return html
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@ -190,16 +190,16 @@ class PreProcessor(object):
        line_ending = "\s*</(span|p|div)>\s*(</(p|span|div)>)?"
        blanklines = "\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*"
        line_opening = "<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*"
-        txt_line_wrap = u"(\u0020|\u0009)*\n"
+        txt_line_wrap = u"((\u0020|\u0009)*\n){1,4}"
-        
+
        unwrap_regex = lookahead+line_ending+blanklines+line_opening
        if format == 'txt':
            unwrap_regex = lookahead+txt_line_wrap
-        
+
        unwrap = re.compile(u"%s" % unwrap_regex, re.UNICODE)
        content = unwrap.sub(' ', content)
        return content
-       
+
    def __call__(self, html):
        self.log("*********  Preprocessing HTML  *********")
@ -357,6 +357,6 @@ class PreProcessor(object):
        html = blankreg.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
        # Center separator lines
-        html = re.sub(u'<p>\s*(?P<break>([*#•]+\s*)+)\s*</p>', '<p style="text-align:center">' + '\g<break>' + '</p>', html)
+        html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center">' + '\g<break>' + '</p>', html)
        return html
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@ -324,14 +324,16 @@ class Metadata(object):
            if metadata is None:
                traceback.print_stack()
                return
-            metadata = copy.deepcopy(metadata)
+            m = {}
-            if '#value#' not in metadata:
+            for k in metadata:
-                if metadata['datatype'] == 'text' and metadata['is_multiple']:
+                m[k] = copy.copy(metadata[k])
-                    metadata['#value#'] = []
+            if '#value#' not in m:
                if m['datatype'] == 'text' and m['is_multiple']:
                    m['#value#'] = []
                else:
-                    metadata['#value#'] = None
+                    m['#value#'] = None
            _data = object.__getattribute__(self, '_data')
-            _data['user_metadata'][field] = metadata
+            _data['user_metadata'][field] = m
    def template_to_attribute(self, other, ops):
        '''
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@ -296,7 +296,7 @@ class RTFInput(InputFormatPlugin):
                        u'<p>\u00a0</p>\n'.encode('utf-8'), res)
            if self.opts.preprocess_html:
                preprocessor = PreProcessor(self.opts, log=getattr(self, 'log', None))
-                res = preprocessor(res)
+                res = preprocessor(res.decode('utf-8')).encode('utf-8')
            f.write(res)
        self.write_inline_css(inline_class, border_styles)
        stream.seek(0)
--- a/src/calibre/ebooks/txt/heuristicprocessor.py
+++ b/src/calibre/ebooks/txt/heuristicprocessor.py
@ -0,0 +1,58 @@
 # -*- coding: utf-8 -*-
 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import re
 from calibre import prepare_string_for_xml
 class TXTHeuristicProcessor(object):
    def __init__(self):
        self.ITALICIZE_WORDS = [
            'Etc.', 'etc.', 'viz.', 'ie.', 'i.e.', 'Ie.', 'I.e.', 'eg.',
            'e.g.', 'Eg.', 'E.g.', 'et al.', 'et cetra', 'n.b.', 'N.b.',
            'nota bene', 'Nota bene', 'Ste.', 'Mme.', 'Mdme.',
            'Mlle.', 'Mons.', 'PS.', 'PPS.',
        ]
        self.ITALICIZE_STYLE_PATS = [
            r'(?msu)_(?P<words>.+?)_',
            r'(?msu)/(?P<words>[^<>]+?)/',
            r'(?msu)~~(?P<words>.+?)~~',
            r'(?msu)\*(?P<words>.+?)\*',
            r'(?msu)~(?P<words>.+?)~',
            r'(?msu)_/(?P<words>[^<>]+?)/_',
            r'(?msu)_\*(?P<words>.+?)\*_',
            r'(?msu)\*/(?P<words>[^<>]+?)/\*',
            r'(?msu)_\*/(?P<words>[^<>]+?)/\*_',
            r'(?msu)/:(?P<words>[^<>]+?):/',
            r'(?msu)\|:(?P<words>.+?):\|',
        ]
    def process_paragraph(self, paragraph):
        for word in self.ITALICIZE_WORDS:
            paragraph = paragraph.replace(word, '<i>%s</i>' % word)
        for pat in self.ITALICIZE_STYLE_PATS:
            paragraph = re.sub(pat, lambda mo: '<i>%s</i>' % mo.group('words'), paragraph)
        return paragraph
    def convert(self, txt, title='', epub_split_size_kb=0):
        from calibre.ebooks.txt.processor import clean_txt, split_txt, HTML_TEMPLATE
        txt = clean_txt(txt)
        txt = split_txt(txt, epub_split_size_kb)
        processed = []
        for line in txt.split('\n\n'):
            processed.append(u'<p>%s</p>' % self.process_paragraph(prepare_string_for_xml(line.replace('\n', ' '))))
        txt = u'\n'.join(processed)
        txt = re.sub('[ ]{2,}', ' ', txt)
        html = HTML_TEMPLATE % (title, txt)
        from calibre.ebooks.conversion.utils import PreProcessor
        pp = PreProcessor()
        html = pp.markup_chapters(html, pp.get_word_count(html), False)
        return html
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@ -7,10 +7,12 @@ __docformat__ = 'restructuredtext en'
 import os
 from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
 from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
 from calibre.ebooks.chardet import detect
 from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
    separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
-    preserve_spaces, detect_paragraph_type, detect_formatting_type
+    preserve_spaces, detect_paragraph_type, detect_formatting_type, \
    convert_heuristic, normalize_line_endings
 from calibre import _ent_pat, xml_entity_to_unicode
 class TXTInput(InputFormatPlugin):
@ -22,20 +24,24 @@ class TXTInput(InputFormatPlugin):
    options = set([
        OptionRecommendation(name='paragraph_type', recommended_value='auto',
-            choices=['auto', 'block', 'single', 'print'],
+            choices=['auto', 'block', 'single', 'print', 'unformatted'],
            help=_('Paragraph structure.\n'
-                   'choices are [\'auto\', \'block\', \'single\', \'print\', \'markdown\']\n'
+                   'choices are [\'auto\', \'block\', \'single\', \'print\', \'unformatted\']\n'
                   '* auto: Try to auto detect paragraph type.\n'
                   '* block: Treat a blank line as a paragraph break.\n'
                   '* single: Assume every line is a paragraph.\n'
                   '* print:  Assume every line starting with 2+ spaces or a tab '
-                   'starts a paragraph.')),
+                   'starts a paragraph.'
                   '* unformatted: Most lines have hard line breaks, few/no blank lines or indents.')),
        OptionRecommendation(name='formatting_type', recommended_value='auto',
-            choices=['auto', 'none', 'markdown'],
+            choices=['auto', 'none', 'heuristic', 'markdown'],
            help=_('Formatting used within the document.'
-                   '* auto: Try to auto detect the document formatting.\n'
+                   '* auto: Automatically decide which formatting processor to use.\n'
-                   '* none: Do not modify the paragraph formatting. Everything is a paragraph.\n'
+                   '* none: Do not process the document formatting. Everything is a '
-                   '* markdown: Run the input though the markdown pre-processor. '
+                   'paragraph and no styling is applied.\n'
                   '* heuristic: Process using heuristics to determine formatting such '
                   'as chapter headings and italic text.\n'
                   '* markdown: Processing using markdown formatting. '
                   'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
        OptionRecommendation(name='preserve_spaces', recommended_value=False,
            help=_('Normally extra spaces are condensed into a single space. '
@ -47,7 +53,7 @@ class TXTInput(InputFormatPlugin):
    def convert(self, stream, options, file_ext, log,
                accelerators):
        log.debug('Reading text from file...')
-        
+
        txt = stream.read()
        # Get the encoding of the document.
        if options.input_encoding:
@ -67,7 +73,14 @@ class TXTInput(InputFormatPlugin):
        # followed by the &nbsp; entity.
        if options.preserve_spaces:
            txt = preserve_spaces(txt)
-            
+
        # Normalize line endings
        txt = normalize_line_endings(txt)
        # Get length for hyphen removal and punctuation unwrap
        docanalysis = DocAnalysis('txt', txt)
        length = docanalysis.line_length(.5)
        if options.formatting_type == 'auto':
            options.formatting_type = detect_formatting_type(txt)
@ -86,27 +99,39 @@ class TXTInput(InputFormatPlugin):
                    log.debug('Could not reliably determine paragraph type using block')
                    options.paragraph_type = 'block'
                else:
-                    log.debug('Auto detected paragraph type as %s' % options.paragraph_type) 
+                    log.debug('Auto detected paragraph type as %s' % options.paragraph_type)
-            
+
            # Dehyphenate
            dehyphenator = Dehyphenator()
            txt = dehyphenator(txt,'txt', length)
            # We don't check for block because the processor assumes block.
            # single and print at transformed to block for processing.
-            if options.paragraph_type == 'single' or 'unformatted':
+
            if options.paragraph_type == 'single' or options.paragraph_type == 'unformatted':
                txt = separate_paragraphs_single_line(txt)
            elif options.paragraph_type == 'print':
                txt = separate_paragraphs_print_formatted(txt)
            if options.paragraph_type == 'unformatted':
                from calibre.ebooks.conversion.utils import PreProcessor
                from calibre.ebooks.conversion.preprocess import DocAnalysis
                # get length
-                docanalysis = DocAnalysis('txt', txt)
+
                length = docanalysis.line_length(.5)
                # unwrap lines based on punctuation
                preprocessor = PreProcessor(options, log=getattr(self, 'log', None))
                txt = preprocessor.punctuation_unwrap(length, txt, 'txt')
            flow_size = getattr(options, 'flow_size', 0)
-            html = convert_basic(txt, epub_split_size_kb=flow_size)
+
            if options.formatting_type == 'heuristic':
                html = convert_heuristic(txt, epub_split_size_kb=flow_size)
            else:
                html = convert_basic(txt, epub_split_size_kb=flow_size)
        # Dehyphenate in cleanup mode for missed txt and markdown conversion
        dehyphenator = Dehyphenator()
        html = dehyphenator(html,'txt_cleanup', length)
        html = dehyphenator(html,'html_cleanup', length)
        from calibre.customize.ui import plugin_for_input_format
        html_input = plugin_for_input_format('html')
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@ -9,6 +9,7 @@ import os, re
 from calibre import prepare_string_for_xml, isbytestring
 from calibre.ebooks.markdown import markdown
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.txt.heuristicprocessor import TXTHeuristicProcessor
 from calibre.ebooks.conversion.preprocess import DocAnalysis
 __license__   = 'GPL v3'
@ -17,7 +18,7 @@ __docformat__ = 'restructuredtext en'
 HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
-def convert_basic(txt, title='', epub_split_size_kb=0):
+def clean_txt(txt):
    if isbytestring(txt):
        txt = txt.decode('utf-8', 'replace')
    # Strip whitespace from the beginning and end of the line. Also replace
@ -36,6 +37,10 @@ def convert_basic(txt, title='', epub_split_size_kb=0):
    chars = list(range(8)) + [0x0B, 0x0E, 0x0F] + list(range(0x10, 0x19))
    illegal_chars = re.compile(u'|'.join(map(unichr, chars)))
    txt = illegal_chars.sub('', txt)
    return txt
 def split_txt(txt, epub_split_size_kb=0):
    #Takes care if there is no point to split
    if epub_split_size_kb > 0:
        if isinstance(txt, unicode):
@ -50,6 +55,12 @@ def convert_basic(txt, title='', epub_split_size_kb=0):
    if isbytestring(txt):
        txt = txt.decode('utf-8')
    return txt
 def convert_basic(txt, title='', epub_split_size_kb=0):
    txt = clean_txt(txt)
    txt = split_txt(txt, epub_split_size_kb)
    lines = []
    # Split into paragraphs based on having a blank line between text.
    for line in txt.split('\n\n'):
@ -58,6 +69,10 @@ def convert_basic(txt, title='', epub_split_size_kb=0):
    return HTML_TEMPLATE % (title, u'\n'.join(lines))
 def convert_heuristic(txt, title='', epub_split_size_kb=0):
    tp = TXTHeuristicProcessor()
    return tp.convert(txt, title, epub_split_size_kb)
 def convert_markdown(txt, title='', disable_toc=False):
    md = markdown.Markdown(
          extensions=['footnotes', 'tables', 'toc'],
@ -65,9 +80,12 @@ def convert_markdown(txt, title='', disable_toc=False):
          safe_mode=False)
    return HTML_TEMPLATE % (title, md.convert(txt))
-def separate_paragraphs_single_line(txt):
+def normalize_line_endings(txt):
    txt = txt.replace('\r\n', '\n')
    txt = txt.replace('\r', '\n')
    return txt
 def separate_paragraphs_single_line(txt):
    txt = re.sub(u'(?<=.)\n(?=.)', '\n\n', txt)
    return txt
@ -102,7 +120,7 @@ def detect_paragraph_type(txt):
    single: Each line is a paragraph.
    print: Each paragraph starts with a 2+ spaces or a tab
           and ends when a new paragraph is reached.
-    unformatted: most lines have hard line breaks, few/no spaces or indents
+    unformatted: most lines have hard line breaks, few/no blank lines or indents
    returns block, single, print, unformatted
    '''
@ -115,15 +133,21 @@ def detect_paragraph_type(txt):
    hardbreaks = docanalysis.line_histogram(.55)
    if hardbreaks:
-        # Check for print
+        # Determine print percentage
        tab_line_count = len(re.findall('(?mu)^(\t|\s{2,}).+$', txt))
-        if tab_line_count / float(txt_line_count) >= .25:
+        print_percent = tab_line_count / float(txt_line_count)
-            return 'print'
+     
-        
+        # Determine block percentage
        # Check for block
        empty_line_count = len(re.findall('(?mu)^\s*$', txt))
-        if empty_line_count / float(txt_line_count) >= .25:
+        block_percent = empty_line_count / float(txt_line_count)
-            return 'block'
+        
        # Compare the two types - the type with the larger number of instances wins
        # in cases where only one or the other represents the vast majority of the document neither wins
        if print_percent >= block_percent:
            if .15 <= print_percent <= .75:
                return 'print'
        elif .15 <= block_percent <= .75:
            return 'block'     
        # Assume unformatted text with hardbreaks if nothing else matches        
        return 'unformatted'
@ -153,4 +177,4 @@ def detect_formatting_type(txt):
        if txt.count('\\'+c) > 10:
            return 'markdown'
-    return 'none'
+    return 'heuristic'
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@ -8,7 +8,6 @@ __docformat__ = 'restructuredtext en'
 Transform OEB content into plain text
 '''
 import os
 import re
 from lxml import etree
@ -33,6 +32,15 @@ BLOCK_STYLES = [
    'block',
 ]
 HEADING_TAGS = [
    'h1',
    'h2',
    'h3',
    'h4',
    'h5',
    'h6',
 ]
 SPACE_TAGS = [
    'td',
    'br',
@ -47,6 +55,10 @@ class TXTMLizer(object):
        self.log.info('Converting XHTML to TXT...')
        self.oeb_book = oeb_book
        self.opts = opts
        self.toc_ids = []
        self.last_was_heading = False
        self.create_flat_toc(self.oeb_book.toc)
        return self.mlize_spine()
@ -58,8 +70,11 @@ class TXTMLizer(object):
            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
            content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
            content = self.remove_newlines(content)
-            output += self.dump_text(etree.fromstring(content), stylizer)
+            output += self.dump_text(etree.fromstring(content), stylizer, item)
-        output = self.cleanup_text(u''.join(output))
+            output += '\n\n\n\n\n\n'
        output = u''.join(output)
        output = u'\n'.join(l.rstrip() for l in output.splitlines())
        output = self.cleanup_text(output)
        return output
@ -68,6 +83,8 @@ class TXTMLizer(object):
        text = text.replace('\r\n', ' ')
        text = text.replace('\n', ' ')
        text = text.replace('\r', ' ')
        # Condense redundant spaces created by replacing newlines with spaces.
        text = re.sub(r'[ ]{2,}', ' ', text)
        return text
@ -80,6 +97,14 @@ class TXTMLizer(object):
                toc.append(u'* %s\n\n' % item.title)
        return ''.join(toc)
    def create_flat_toc(self, nodes):
        '''
        Turns a hierarchical list of TOC href's into a flat list.
        '''
        for item in nodes:
            self.toc_ids.append(item.href)
            self.create_flat_toc(item.nodes)
    def cleanup_text(self, text):
        self.log.debug('\tClean up text...')
        # Replace bad characters.
@ -92,7 +117,7 @@ class TXTMLizer(object):
        text = text.replace('\f+', ' ')
        # Single line paragraph.
-        text = re.sub('(?<=.)%s(?=.)' % os.linesep, ' ', text)
+        text = re.sub('(?<=.)\n(?=.)', ' ', text)
        # Remove multiple spaces.
        text = re.sub('[ ]{2,}', ' ', text)
@ -101,13 +126,19 @@ class TXTMLizer(object):
        text = re.sub('\n[ ]+\n', '\n\n', text)
        if self.opts.remove_paragraph_spacing:
            text = re.sub('\n{2,}', '\n', text)
-            text = re.sub('(?imu)^(?=.)', '\t', text)
+            text = re.sub(r'(?msu)^(?P<t>[^\t\n]+?)$', lambda mo: u'%s\n\n' % mo.group('t'), text)
            text = re.sub(r'(?msu)(?P<b>[^\n])\n+(?P<t>[^\t\n]+?)(?=\n)', lambda mo: '%s\n\n\n\n\n\n%s' % (mo.group('b'), mo.group('t')), text)
        else:
-            text = re.sub('\n{3,}', '\n\n', text)
+            text = re.sub('\n{7,}', '\n\n\n\n\n\n', text)
        # Replace spaces at the beginning and end of lines
        # We don't replace tabs because those are only added
        # when remove paragraph spacing is enabled.
        text = re.sub('(?imu)^[ ]+', '', text)
        text = re.sub('(?imu)[ ]+$', '', text)
        # Remove empty space and newlines at the beginning of the document.
        text = re.sub(r'(?u)^[ \n]+', '', text)
        if self.opts.max_line_length:
            max_length = self.opts.max_line_length
@ -145,13 +176,11 @@ class TXTMLizer(object):
        return text
-    def dump_text(self, elem, stylizer, end=''):
+    def dump_text(self, elem, stylizer, page):
        '''
        @elem: The element in the etree that we are working on.
        @stylizer: The style information attached to the element.
-        @end: The last two characters of the text from the previous element.
+        @page: OEB page used to determine absolute urls.
              This is used to determine if a blank line is needed when starting
              a new block element.
        '''
        if not isinstance(elem.tag, basestring) \
@ -170,29 +199,45 @@ class TXTMLizer(object):
            return ['']
        tag = barename(elem.tag)
        tag_id = elem.attrib.get('id', None)
        in_block = False
        in_heading = False
        # Are we in a heading?
        # This can either be a heading tag or a TOC item.
        if tag in HEADING_TAGS or '%s#%s' % (page.href, tag_id) in self.toc_ids:
            in_heading = True
            if not self.last_was_heading:
                text.append('\n\n\n\n\n\n')
        # Are we in a paragraph block?
        if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
            if self.opts.remove_paragraph_spacing and not in_heading:
                text.append(u'\t')
            in_block = True
            if not end.endswith(u'\n\n') and hasattr(elem, 'text') and elem.text:
                text.append(u'\n\n')
        if tag in SPACE_TAGS:
            text.append(u' ')
        # Scene breaks.
        if tag == 'hr':
            text.append('\n\n* * *\n\n')
        # Process tags that contain text.
        if hasattr(elem, 'text') and elem.text:
            text.append(elem.text)
        # Recurse down into tags within the tag we are in.
        for item in elem:
-            en = u''
+            text += self.dump_text(item, stylizer, page)
            if len(text) >= 2:
                en = text[-1][-2:]
            text += self.dump_text(item, stylizer, en)
        if in_block:
            text.append(u'\n\n')
        if in_heading:
            text.append(u'\n')
            self.last_was_heading = True
        else:
            self.last_was_heading = False
        if hasattr(elem, 'tail') and elem.tail:
            text.append(elem.tail)
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -637,7 +637,7 @@ class DeviceMixin(object): # {{{
            self.device_manager.mount_device(kls=FOLDER_DEVICE, kind='folder', path=dir)
    def connect_to_bambook(self):
-        self.device_manager.mount_device(kls=BAMBOOKWifi, kind='bambook', 
+        self.device_manager.mount_device(kls=BAMBOOKWifi, kind='bambook',
                                         path=BAMBOOK.settings().extra_customization)
    def connect_to_itunes(self):
@ -1266,8 +1266,8 @@ class DeviceMixin(object): # {{{
        # Force a reset if the caches are not initialized
        if reset or not hasattr(self, 'db_book_title_cache'):
            # Build a cache (map) of the library, so the search isn't On**2
-            self.db_book_title_cache = {}
+            db_book_title_cache = {}
-            self.db_book_uuid_cache = {}
+            db_book_uuid_cache = {}
            # It might be possible to get here without having initialized the
            # library view. In this case, simply give up
            try:
@ -1278,8 +1278,8 @@ class DeviceMixin(object): # {{{
            for id in db.data.iterallids():
                mi = db.get_metadata(id, index_is_id=True)
                title = clean_string(mi.title)
-                if title not in self.db_book_title_cache:
+                if title not in db_book_title_cache:
-                    self.db_book_title_cache[title] = \
+                    db_book_title_cache[title] = \
                                {'authors':{}, 'author_sort':{}, 'db_ids':{}}
                # If there are multiple books in the library with the same title
                # and author, then remember the last one. That is OK, because as
@ -1287,12 +1287,14 @@ class DeviceMixin(object): # {{{
                # as another.
                if mi.authors:
                    authors = clean_string(authors_to_string(mi.authors))
-                    self.db_book_title_cache[title]['authors'][authors] = mi
+                    db_book_title_cache[title]['authors'][authors] = mi
                if mi.author_sort:
                    aus = clean_string(mi.author_sort)
-                    self.db_book_title_cache[title]['author_sort'][aus] = mi
+                    db_book_title_cache[title]['author_sort'][aus] = mi
-                self.db_book_title_cache[title]['db_ids'][mi.application_id] = mi
+                db_book_title_cache[title]['db_ids'][mi.application_id] = mi
-                self.db_book_uuid_cache[mi.uuid] = mi
+                db_book_uuid_cache[mi.uuid] = mi
            self.db_book_title_cache = db_book_title_cache
            self.db_book_uuid_cache = db_book_uuid_cache
        # Now iterate through all the books on the device, setting the
        # in_library field. If the UUID matches a book in the library, then
--- a/src/calibre/gui2/preferences/plugboard.py
+++ b/src/calibre/gui2/preferences/plugboard.py
@ -5,11 +5,11 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-from PyQt4 import QtGui
+from PyQt4.Qt import Qt, QLineEdit, QComboBox, SIGNAL, QListWidgetItem
 from PyQt4.Qt import Qt
 from calibre.gui2 import error_dialog
 from calibre.gui2.device import device_name_for_plugboards
 from calibre.gui2.dialogs.template_dialog import TemplateDialog
 from calibre.gui2.preferences import ConfigWidgetBase, test_widget
 from calibre.gui2.preferences.plugboard_ui import Ui_Form
 from calibre.customize.ui import metadata_writers, device_plugins
@ -17,6 +17,27 @@ from calibre.library.save_to_disk import plugboard_any_format_value, \
                        plugboard_any_device_value, plugboard_save_to_disk_value
 from calibre.utils.formatter import validation_formatter
 class LineEditWithTextBox(QLineEdit):
    '''
    Extend the context menu of a QLineEdit to include more actions.
    '''
    def contextMenuEvent(self, event):
        menu = self.createStandardContextMenu()
        menu.addSeparator()
        action_open_editor = menu.addAction(_('Open Editor'))
        self.connect(action_open_editor, SIGNAL('triggered()'), self.open_editor)
        menu.exec_(event.globalPos())
    def open_editor(self):
        t = TemplateDialog(self, self.text())
        if t.exec_():
            self.setText(t.textbox.toPlainText())
 class ConfigWidget(ConfigWidgetBase, Ui_Form):
    def genesis(self, gui):
@ -72,10 +93,10 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        self.source_widgets = []
        self.dest_widgets = []
        for i in range(0, len(self.dest_fields)-1):
-            w = QtGui.QLineEdit(self)
+            w = LineEditWithTextBox(self)
            self.source_widgets.append(w)
            self.fields_layout.addWidget(w, 5+i, 0, 1, 1)
-            w = QtGui.QComboBox(self)
+            w = QComboBox(self)
            self.dest_widgets.append(w)
            self.fields_layout.addWidget(w, 5+i, 1, 1, 1)
@ -297,7 +318,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
                for op in self.current_plugboards[f][d]:
                    ops.append('([' + op[0] + '] -> ' + op[1] + ')')
                txt = '%s:%s = %s\n'%(f, d, ', '.join(ops))
-                item = QtGui.QListWidgetItem(txt)
+                item = QListWidgetItem(txt)
                item.setData(Qt.UserRole, (f, d))
                self.existing_plugboards.addItem(item)
        self.refilling = False
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@ -181,7 +181,7 @@ class ResultCache(SearchQueryParser): # {{{
        self.search_restriction = ''
        self.field_metadata = field_metadata
        self.all_search_locations = field_metadata.get_search_terms()
-        SearchQueryParser.__init__(self, self.all_search_locations)
+        SearchQueryParser.__init__(self, self.all_search_locations, optimize=True)
        self.build_date_relop_dict()
        self.build_numeric_relop_dict()
@ -264,7 +264,7 @@ class ResultCache(SearchQueryParser): # {{{
                            '<=':[2, relop_le]
                        }
-    def get_dates_matches(self, location, query):
+    def get_dates_matches(self, location, query, candidates):
        matches = set([])
        if len(query) < 2:
            return matches
@ -274,13 +274,15 @@ class ResultCache(SearchQueryParser): # {{{
        loc = self.field_metadata[location]['rec_index']
        if query == 'false':
-            for item in self._data:
+            for id_ in candidates:
                item = self._data[id_]
                if item is None: continue
                if item[loc] is None or item[loc] <= UNDEFINED_DATE:
                    matches.add(item[0])
            return matches
        if query == 'true':
-            for item in self._data:
+            for id_ in candidates:
                item = self._data[id_]
                if item is None: continue
                if item[loc] is not None and item[loc] > UNDEFINED_DATE:
                    matches.add(item[0])
@ -319,7 +321,8 @@ class ResultCache(SearchQueryParser): # {{{
                field_count = query.count('-') + 1
            else:
                field_count = query.count('/') + 1
-        for item in self._data:
+        for id_ in candidates:
            item = self._data[id_]
            if item is None or item[loc] is None: continue
            if relop(item[loc], qd, field_count):
                matches.add(item[0])
@ -335,7 +338,7 @@ class ResultCache(SearchQueryParser): # {{{
                        '<=':[2, lambda r, q: r <= q]
                    }
-    def get_numeric_matches(self, location, query, val_func = None):
+    def get_numeric_matches(self, location, query, candidates, val_func = None):
        matches = set([])
        if len(query) == 0:
            return matches
@ -381,7 +384,8 @@ class ResultCache(SearchQueryParser): # {{{
        except:
            return matches
-        for item in self._data:
+        for id_ in candidates:
            item = self._data[id_]
            if item is None:
                continue
            v = val_func(item)
@ -393,8 +397,13 @@ class ResultCache(SearchQueryParser): # {{{
                matches.add(item[0])
        return matches
-    def get_matches(self, location, query, allow_recursion=True):
+    def get_matches(self, location, query, allow_recursion=True, candidates=None):
        matches = set([])
        if candidates is None:
            candidates = self.universal_set()
        if len(candidates) == 0:
            return matches
        if query and query.strip():
            # get metadata key associated with the search term. Eliminates
            # dealing with plurals and other aliases
@ -476,7 +485,8 @@ class ResultCache(SearchQueryParser): # {{{
                else:
                    q = query
-                for item in self._data:
+                for id_ in candidates:
                    item = self._data[id_]
                    if item is None: continue
                    if col_datatype[loc] == 'bool': # complexity caused by the two-/three-value tweak
--- a/src/calibre/library/custom_columns.py
+++ b/src/calibre/library/custom_columns.py
@ -151,6 +151,8 @@ class CustomColumns(object):
            return v
        def adapt_number(x, d):
            if x is None:
                return None
            if isinstance(x, (str, unicode, bytes)):
                if x.lower() == 'none':
                    return None
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -256,7 +256,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
             'pubdate',
             'flags',
             'uuid',
-             'has_cover'
+             'has_cover',
            ('au_map', 'authors', 'author', 'aum_sortconcat(link.id, authors.name, authors.sort)')
            ]
        lines = []
        for col in columns:
@ -273,9 +274,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        self.FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'timestamp':3,
             'size':4, 'rating':5, 'tags':6, 'comments':7, 'series':8,
-             'publisher':9, 'series_index':10,
+             'publisher':9, 'series_index':10, 'sort':11, 'author_sort':12,
-             'sort':11, 'author_sort':12, 'formats':13, 'isbn':14, 'path':15,
+             'formats':13, 'isbn':14, 'path':15, 'lccn':16, 'pubdate':17,
-             'lccn':16, 'pubdate':17, 'flags':18, 'uuid':19, 'cover':20}
+             'flags':18, 'uuid':19, 'cover':20, 'au_map':21}
        for k,v in self.FIELD_MAP.iteritems():
            self.field_metadata.set_field_record_index(k, v, prefer_custom=False)
@ -687,9 +688,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        Convenience method to return metadata as a :class:`Metadata` object.
        Note that the list of formats is not verified.
        '''
        row = self.data._data[idx] if index_is_id else self.data[idx]
        fm = self.FIELD_MAP
        self.gm_count += 1
-        mi = self.data.get(idx, self.FIELD_MAP['all_metadata'],
+        mi = row[self.FIELD_MAP['all_metadata']]
                           row_is_id = index_is_id)
        if mi is not None:
            if get_cover:
                # Always get the cover, because the value can be wrong if the
@ -699,49 +702,46 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        self.gm_missed += 1
        mi = Metadata(None)
-        self.data.set(idx, self.FIELD_MAP['all_metadata'], mi,
+        self.data.set(idx, fm['all_metadata'], mi, row_is_id = index_is_id)
                      row_is_id = index_is_id)
-        aut_list = self.authors_with_sort_strings(idx, index_is_id=index_is_id)
+        aut_list = row[fm['au_map']]
        aut_list = [p.split(':::') for p in aut_list.split(':#:')]
        aum = []
        aus = {}
        for (author, author_sort) in aut_list:
            aum.append(author)
-            aus[author] = author_sort
+            aus[author] = author_sort.replace('|', ',')
-        mi.title       = self.title(idx, index_is_id=index_is_id)
+        mi.title       = row[fm['title']]
        mi.authors     = aum
-        mi.author_sort = self.author_sort(idx, index_is_id=index_is_id)
+        mi.author_sort = row[fm['author_sort']]
        mi.author_sort_map = aus
-        mi.comments    = self.comments(idx, index_is_id=index_is_id)
+        mi.comments    = row[fm['comments']]
-        mi.publisher   = self.publisher(idx, index_is_id=index_is_id)
+        mi.publisher   = row[fm['publisher']]
-        mi.timestamp   = self.timestamp(idx, index_is_id=index_is_id)
+        mi.timestamp   = row[fm['timestamp']]
-        mi.pubdate     = self.pubdate(idx, index_is_id=index_is_id)
+        mi.pubdate     = row[fm['pubdate']]
-        mi.uuid        = self.uuid(idx, index_is_id=index_is_id)
+        mi.uuid        = row[fm['uuid']]
-        mi.title_sort  = self.title_sort(idx, index_is_id=index_is_id)
+        mi.title_sort  = row[fm['sort']]
-        mi.formats     = self.formats(idx, index_is_id=index_is_id,
+        formats = row[fm['formats']]
-                                        verify_formats=False)
+        if not formats:
-        if hasattr(mi.formats, 'split'):
+            formats = None
-            mi.formats = mi.formats.split(',')
+        mi.formats = formats
-        else:
+        tags = row[fm['tags']]
            mi.formats = None
        tags = self.tags(idx, index_is_id=index_is_id)
        if tags:
            mi.tags = [i.strip() for i in tags.split(',')]
-        mi.series = self.series(idx, index_is_id=index_is_id)
+        mi.series = row[fm['series']]
        if mi.series:
-            mi.series_index = self.series_index(idx, index_is_id=index_is_id)
+            mi.series_index = row[fm['series_index']]
-        mi.rating = self.rating(idx, index_is_id=index_is_id)
+        mi.rating = row[fm['rating']]
-        mi.isbn = self.isbn(idx, index_is_id=index_is_id)
+        mi.isbn = row[fm['isbn']]
        id = idx if index_is_id else self.id(idx)
        mi.application_id = id
        mi.id = id
-        for key,meta in self.field_metadata.iteritems():
+        for key, meta in self.field_metadata.custom_iteritems():
-            if meta['is_custom']:
+            mi.set_user_metadata(key, meta)
-                mi.set_user_metadata(key, meta)
+            mi.set(key, val=self.get_custom(idx, label=meta['label'],
-                mi.set(key, val=self.get_custom(idx, label=meta['label'],
+                                            index_is_id=index_is_id),
-                                                index_is_id=index_is_id),
+                        extra=self.get_custom_extra(idx, label=meta['label'],
-                            extra=self.get_custom_extra(idx, label=meta['label'],
+                                                    index_is_id=index_is_id))
                                                        index_is_id=index_is_id))
        if get_cover:
            mi.cover = self.cover(id, index_is_id=True, as_path=True)
        return mi
@ -877,18 +877,17 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
    def formats(self, index, index_is_id=False, verify_formats=True):
        ''' Return available formats as a comma separated list or None if there are no available formats '''
-        id = index if index_is_id else self.id(index)
+        id_ = index if index_is_id else self.id(index)
-        try:
+        formats = self.data.get(id_, self.FIELD_MAP['formats'], row_is_id=True)
-            formats = self.conn.get('SELECT format FROM data WHERE book=?', (id,))
+        if not formats:
            formats = map(lambda x:x[0], formats)
        except:
            return None
        if not verify_formats:
-            return ','.join(formats)
+            return formats
        formats = formats.split(',')
        ans = []
-        for format in formats:
+        for fmt in formats:
-            if self.format_abspath(id, format, index_is_id=True) is not None:
+            if self.format_abspath(id_, fmt, index_is_id=True) is not None:
-                ans.append(format)
+                ans.append(fmt)
        if not ans:
            return None
        return ','.join(ans)
@ -1607,6 +1606,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
                      ','.join([a.replace(',', '|') for a in authors]),
                      row_is_id=True)
        self.data.set(id, self.FIELD_MAP['author_sort'], ss, row_is_id=True)
        aum = self.authors_with_sort_strings(id, index_is_id=True)
        self.data.set(id, self.FIELD_MAP['au_map'],
            ':#:'.join([':::'.join((au.replace(',', '|'), aus)) for (au, aus) in aum]),
            row_is_id=True)
    def set_authors(self, id, authors, notify=True, commit=True):
        '''
--- a/src/calibre/library/field_metadata.py
+++ b/src/calibre/library/field_metadata.py
@ -180,6 +180,15 @@ class FieldMetadata(dict):
                           'search_terms':['author_sort'],
                           'is_custom':False,
                           'is_category':False}),
            ('au_map',    {'table':None,
                           'column':None,
                           'datatype':'text',
                           'is_multiple':',',
                           'kind':'field',
                           'name':None,
                           'search_terms':[],
                           'is_custom':False,
                           'is_category':False}),
            ('comments',  {'table':None,
                           'column':None,
                           'datatype':'text',
@ -400,6 +409,12 @@ class FieldMetadata(dict):
        for key in self._tb_cats:
            yield (key, self._tb_cats[key])
    def custom_iteritems(self):
        for key in self._tb_cats:
            fm = self._tb_cats[key]
            if fm['is_custom']:
                yield (key, self._tb_cats[key])
    def items(self):
        return list(self.iteritems())
--- a/src/calibre/library/server/browse.py
+++ b/src/calibre/library/server/browse.py
@ -756,7 +756,7 @@ class BrowseServer(object):
        sort = self.browse_sort_book_list(items, list_sort)
        ids = [x[0] for x in items]
        html = render_book_list(ids, self.opts.url_prefix,
-                suffix=_('in search')+': '+query)
+                suffix=_('in search')+': '+xml(query))
        return self.browse_template(sort, category=False, initial_search=query).format(
                title=_('Matching books'),
                script='booklist();', main=html)
--- a/src/calibre/library/sqlite.py
+++ b/src/calibre/library/sqlite.py
@ -87,6 +87,24 @@ class SortedConcatenate(object):
 class SafeSortedConcatenate(SortedConcatenate):
    sep = '|'
 class AumSortedConcatenate(object):
    '''String concatenation aggregator for the author sort map'''
    def __init__(self):
        self.ans = {}
    def step(self, ndx, author, sort):
        if author is not None:
            self.ans[ndx] = author + ':::' + sort
    def finalize(self):
        keys = self.ans.keys()
        l = len(keys)
        if l == 0:
            return 'Unknown:::Unknown'
        if l == 1:
            return self.ans[keys[0]]
        return ':#:'.join([self.ans[v] for v in sorted(keys)])
 class Connection(sqlite.Connection):
    def get(self, *args, **kw):
@ -155,6 +173,7 @@ class DBThread(Thread):
        c_ext_loaded = load_c_extensions(self.conn)
        self.conn.row_factory = sqlite.Row if self.row_factory else  lambda cursor, row : list(row)
        self.conn.create_aggregate('concat', 1, Concatenate)
        self.conn.create_aggregate('aum_sortconcat', 3, AumSortedConcatenate)
        if not c_ext_loaded:
            self.conn.create_aggregate('sortconcat', 2, SortedConcatenate)
            self.conn.create_aggregate('sort_concat', 2, SafeSortedConcatenate)
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
@ -5,8 +5,8 @@
 msgid ""
 msgstr ""
 "Project-Id-Version: calibre 0.7.38\n"
-"POT-Creation-Date: 2011-01-07 13:12+MST\n"
+"POT-Creation-Date: 2011-01-08 18:40+MST\n"
-"PO-Revision-Date: 2011-01-07 13:12+MST\n"
+"PO-Revision-Date: 2011-01-08 18:40+MST\n"
 "Last-Translator: Automatically generated\n"
 "Language-Team: LANGUAGE\n"
 "MIME-Version: 1.0\n"
@ -2905,28 +2905,29 @@ msgstr ""
 msgid " (Preface)"
 msgstr ""
-#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:26
+#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:27
 msgid ""
 "Paragraph structure.\n"
-"choices are ['auto', 'block', 'single', 'print', 'markdown']\n"
+"choices are ['auto', 'block', 'single', 'print', 'unformatted']\n"
 "* auto: Try to auto detect paragraph type.\n"
 "* block: Treat a blank line as a paragraph break.\n"
 "* single: Assume every line is a paragraph.\n"
-"* print:  Assume every line starting with 2+ spaces or a tab starts a paragraph."
+"* print:  Assume every line starting with 2+ spaces or a tab starts a paragraph.* unformatted: Most lines have hard line breaks, few/no spaces or indents."
 msgstr ""
-#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:35
+#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:37
 msgid ""
-"Formatting used within the document.* auto: Try to auto detect the document formatting.\n"
+"Formatting used within the document.* auto: Automatically decide which formatting processor to use.\n"
-"* none: Do not modify the paragraph formatting. Everything is a paragraph.\n"
+"* none: Do not process the document formatting. Everything is a paragraph and no styling is applied.\n"
-"* markdown: Run the input though the markdown pre-processor. To learn more about markdown see"
+"* heuristic: Process using heuristics to determine formatting such as chapter headings and italic text.\n"
 "* markdown: Processing using markdown formatting. To learn more about markdown see"
 msgstr ""
-#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:41
+#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:46
 msgid "Normally extra spaces are condensed into a single space. With this option all spaces will be displayed."
 msgstr ""
-#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:44
+#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:49
 msgid "Do not insert a Table of Contents into the output text."
 msgstr ""
@ -7225,7 +7226,7 @@ msgstr ""
 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/password_ui.py:65
 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/scheduler_ui.py:219
 #: /home/kovid/work/calibre/src/calibre/gui2/preferences/server_ui.py:130
-#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:169
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:172
 msgid "&Show password"
 msgstr ""
@ -10621,48 +10622,56 @@ msgstr ""
 msgid "Mail successfully sent"
 msgstr ""
-#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:136
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:95
 msgid "OK to proceed?"
 msgstr ""
 #: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:96
 msgid "This will display your email password on the screen. Is it OK to proceed?"
 msgstr ""
 #: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:139
 msgid "If you are setting up a new hotmail account, you must log in to it  once before you will be able to send mails."
 msgstr ""
-#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:147
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:150
 msgid "Setup sending email using"
 msgstr ""
-#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:149
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:152
 msgid "If you don't have an account, you can sign up for a free {name} email account at <a href=\"http://{url}\">http://{url}</a>. {extra}"
 msgstr ""
-#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:156
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:159
 msgid "Your %s &email address:"
 msgstr ""
-#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:157
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:160
 msgid "Your %s &username:"
 msgstr ""
-#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:158
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:161
 msgid "Your %s &password:"
 msgstr ""
-#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:176
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:179
 msgid "If you plan to use email to send books to your Kindle, remember to add the your %s email address to the allowed email addresses in your Amazon.com Kindle management page."
 msgstr ""
-#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:183
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:186
 msgid "Setup"
 msgstr ""
-#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:198
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:201
-#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:205
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:208
 msgid "Bad configuration"
 msgstr ""
-#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:199
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:202
 msgid "You must set the From email address"
 msgstr ""
-#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:206
+#: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:209
 msgid "You must set the username and password for the mail server."
 msgstr ""
--- a/src/calibre/utils/formatter.py
+++ b/src/calibre/utils/formatter.py
@ -98,9 +98,10 @@ class _Parser(object):
        m = 'Formatter: ' + message + _(' near ')
        if self.lex_pos > 0:
            m = '{0} {1}'.format(m, self.prog[self.lex_pos-1][1])
-        m = '{0} {1}'.format(m, self.prog[self.lex_pos][1])
+        elif self.lex_pos < len(self.prog):
        if self.lex_pos < len(self.prog):
            m = '{0} {1}'.format(m, self.prog[self.lex_pos+1][1])
        else:
            m = '{0} {1}'.format(m, _('end of program'))
        raise ValueError(m)
    def token(self):
--- a/src/calibre/utils/search_query_parser.py
+++ b/src/calibre/utils/search_query_parser.py
@ -118,8 +118,9 @@ class SearchQueryParser(object):
                failed.append(test[0])
        return failed
-    def __init__(self, locations, test=False):
+    def __init__(self, locations, test=False, optimize=False):
        self._tests_failed = False
        self.optimize = optimize
        # Define a token
        standard_locations = map(lambda x : CaselessLiteral(x)+Suppress(':'),
                locations)
@ -182,38 +183,52 @@ class SearchQueryParser(object):
        # empty the list of searches used for recursion testing
        self.recurse_level = 0
        self.searches_seen = set([])
-        return self._parse(query)
+        candidates = self.universal_set()
        return self._parse(query, candidates)
    # this parse is used internally because it doesn't clear the
    # recursive search test list. However, we permit seeing the
    # same search a few times because the search might appear within
    # another search.
-    def _parse(self, query):
+    def _parse(self, query, candidates=None):
        self.recurse_level += 1
        res = self._parser.parseString(query)[0]
-        t = self.evaluate(res)
+        if candidates is None:
            candidates = self.universal_set()
        t = self.evaluate(res, candidates)
        self.recurse_level -= 1
        return t
    def method(self, group_name):
        return getattr(self, 'evaluate_'+group_name)
-    def evaluate(self, parse_result):
+    def evaluate(self, parse_result, candidates):
-        return self.method(parse_result.getName())(parse_result)
+        return self.method(parse_result.getName())(parse_result, candidates)
-    def evaluate_and(self, argument):
+    def evaluate_and(self, argument, candidates):
-        return self.evaluate(argument[0]).intersection(self.evaluate(argument[1]))
+        # RHS checks only those items matched by LHS
        # returns result of RHS check: RHmatches(LHmatches(c))
        #  return self.evaluate(argument[0]).intersection(self.evaluate(argument[1]))
        l = self.evaluate(argument[0], candidates)
        return l.intersection(self.evaluate(argument[1], l))
-    def evaluate_or(self, argument):
+    def evaluate_or(self, argument, candidates):
-        return self.evaluate(argument[0]).union(self.evaluate(argument[1]))
+        # RHS checks only those elements not matched by LHS
        # returns LHS union RHS: LHmatches(c) + RHmatches(c-LHmatches(c))
        #  return self.evaluate(argument[0]).union(self.evaluate(argument[1]))
        l = self.evaluate(argument[0], candidates)
        return l.union(self.evaluate(argument[1], candidates.difference(l)))
-    def evaluate_not(self, argument):
+    def evaluate_not(self, argument, candidates):
-        return self.universal_set().difference(self.evaluate(argument[0]))
+        # unary op checks only candidates. Result: list of items matching
        # returns: c - matches(c)
        #  return self.universal_set().difference(self.evaluate(argument[0]))
        return candidates.difference(self.evaluate(argument[0], candidates))
-    def evaluate_parenthesis(self, argument):
+    def evaluate_parenthesis(self, argument, candidates):
-        return self.evaluate(argument[0])
+        return self.evaluate(argument[0], candidates)
-    def evaluate_token(self, argument):
+    def evaluate_token(self, argument, candidates):
        location = argument[0]
        query = argument[1]
        if location.lower() == 'search':
@ -224,17 +239,27 @@ class SearchQueryParser(object):
                    raise ParseException(query, len(query), 'undefined saved search', self)
                if self.recurse_level > 5:
                    self.searches_seen.add(query)
-                return self._parse(saved_searches().lookup(query))
+                return self._parse(saved_searches().lookup(query), candidates)
            except: # convert all exceptions (e.g., missing key) to a parse error
                raise ParseException(query, len(query), 'undefined saved search', self)
-        return self.get_matches(location, query)
+        return self._get_matches(location, query, candidates)
-    def get_matches(self, location, query):
+    def _get_matches(self, location, query, candidates):
        if self.optimize:
            return self.get_matches(location, query, candidates=candidates)
        else:
            return self.get_matches(location, query)
    def get_matches(self, location, query, candidates=None):
        '''
        Should return the set of matches for :param:'location` and :param:`query`.
        The search must be performed over all entries is :param:`candidates` is
        None otherwise only over the items in candidates.
        :param:`location` is one of the items in :member:`SearchQueryParser.DEFAULT_LOCATIONS`.
        :param:`query` is a string literal.
        :param: None or a subset of the set returned by :meth:`universal_set`.
        '''
        return set([])
@ -561,7 +586,7 @@ class Tester(SearchQueryParser):
    def universal_set(self):
        return self._universal_set
-    def get_matches(self, location, query):
+    def get_matches(self, location, query, candidates=None):
        location = location.lower()
        if location in self.fields.keys():
            getter = operator.itemgetter(self.fields[location])
@ -573,8 +598,13 @@ class Tester(SearchQueryParser):
        if not query:
            return set([])
        query = query.lower()
-        return set(key for key, val in self.texts.items() \
+        if candidates:
-            if query and query in getattr(getter(val), 'lower', lambda : '')())
+            return set(key for key, val in self.texts.items() \
                if key in candidates and query and query
                        in getattr(getter(val), 'lower', lambda : '')())
        else:
            return set(key for key, val in self.texts.items() \
                if query and query in getattr(getter(val), 'lower', lambda : '')())
@ -592,6 +622,7 @@ class Tester(SearchQueryParser):
 def main(args=sys.argv):
    print 'testing unoptimized'
    tester = Tester(['authors', 'author', 'series', 'formats', 'format',
        'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover',
        'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read',
@ -601,6 +632,16 @@ def main(args=sys.argv):
        print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<'
        return 1
    print '\n\ntesting optimized'
    tester = Tester(['authors', 'author', 'series', 'formats', 'format',
        'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover',
        'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read',
        'all', 'search'], test=True, optimize=True)
    failed = tester.run_tests()
    if tester._tests_failed or failed:
        print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<'
        return 1
    return 0
 if __name__ == '__main__':