Merge branch 'kovidgoyal/master'

2025-07-09 03:04:10 -04:00 · 2013-11-12 13:35:08 +01:00 · 2013-11-12 13:35:08 +01:00 · 726ff97098
commit 726ff97098
parent b17ea76f4c 8265e95b15
36 changed files with 38030 additions and 213 deletions
--- a/4
+++ b/4
@ -34,6 +34,10 @@ License: LGPL-2.1
 The full text of the LGPL is distributed as in
 /usr/share/common-licenses/LGPL-2.1 on Debian systems.

+Files: srx/regex/*
+Copyright: Matthew Barnett
+License: Python Software Foundation License
+
 Files: src/calibre/ebooks/hyphenate.py
 Copyright: Copyright (C) 1990, 2004, 2005 Gerard D.C. Kuiken.
 License: other
--- a/recipes/elmundo.recipe
+++ b/recipes/elmundo.recipe
@ -1,37 +1,43 @@
+# vim:fileencoding=utf-8

 __license__   = 'GPL v3'
-__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2013, Darko Miletic <darko.miletic at gmail.com>'
 '''
 elmundo.es
 '''
-import re
 import time
+from calibre.ptempfile import PersistentTemporaryFile
 from calibre.web.feeds.news import BasicNewsRecipe

 class ElMundo(BasicNewsRecipe):
-    title                 = 'El Mundo'
-    __author__            = 'Darko Miletic'
-    description           = 'Lider de informacion en espaniol'
-    publisher             = 'Unidad Editorial Informacion General S.L.U.'
-    category              = 'news, politics, Spain'
-    oldest_article        = 2
-    max_articles_per_feed = 100
-    no_stylesheets        = True
-    use_embedded_content  = False
-    encoding              = 'iso8859_15'
-    remove_javascript     = True
-    remove_empty_feeds    = True
-    language              = 'es'
-    masthead_url          = 'http://estaticos03.elmundo.es/elmundo/iconos/v4.x/v4.01/bg_h1.png'
-    publication_type      = 'newspaper'
-    extra_css             = """
-                               body{font-family: Arial,Helvetica,sans-serif}
-                               .metadata_noticia{font-size: small}
-                               .pestana_GDP{font-size: small; font-weight:bold}
-                               h1,h2,h3,h4,h5,h6,.subtitulo {color: #3F5974}
-                               .hora{color: red}
-                               .update{color: gray}
-                            """
+    title                     = 'El Mundo'
+    __author__                = 'Darko Miletic'
+    description               = u'Lider de informacion en español'
+    publisher                 = 'Unidad Editorial Informacion General S.L.U.'
+    category                  = 'news, politics, Spain'
+    oldest_article            = 2
+    max_articles_per_feed     = 100
+    no_stylesheets            = True
+    use_embedded_content      = False
+    encoding                  = 'iso8859_15'
+    remove_javascript         = True
+    remove_empty_feeds        = True
+    language                  = 'es'
+    ignore_duplicate_articles = {'url'}
+    masthead_url              = 'http://estaticos03.elmundo.es/assets/desktop/master/img/iconos/elmundo-portada.png'
+    publication_type          = 'newspaper'
+    articles_are_obfuscated   = True
+    temp_files                = []
+    needs_subscription        = 'optional'
+    LOGIN                     = 'https://seguro.elmundo.es/registro/login.html'
+    extra_css                 = """
+                                    body{font-family: Arial,Helvetica,sans-serif}
+                                    .metadata_noticia{font-size: small}
+                                    .pestana_GDP{font-size: small; font-weight:bold}
+                                    h1 {color: #333333; font-family: Georgia,"Times New Roman",Times,serif}
+                                    .hora{color: red}
+                                    .update{color: gray}
+                                    """

    conversion_options = {
                             'comments'  : description
@ -40,86 +46,96 @@ class ElMundo(BasicNewsRecipe):
                            ,'publisher' : publisher
                         }

-    keep_only_tags     = [dict(name='div', attrs={'class':'noticia'})]
-    remove_tags_before = dict(attrs={'class':['titular','antetitulo']     })
+    remove_tags_before = dict(attrs={'class':['titular','antetitulo','entrada']})
    remove_tags_after  = dict(name='div' , attrs={'id':['desarrollo_noticia','tamano']})
+    remove_tags        = [
+                           dict(name='div', attrs={'class':'comentarios'}),
+                           dict(name=['meta', 'link', 'iframe', 'object'])
+                         ]
    remove_attributes  = ['lang','border']
-    remove_tags = [
-                     dict(name='div', attrs={'class':['herramientas','publicidad_google','comenta','col col-2b','apoyos','no-te-pierdas']})
-                    ,dict(name='div', attrs={'class':['publicidad publicidad_cuerpo_noticia','comentarios_nav','mensaje_privado','interact']})
-                    ,dict(name='div', attrs={'class':['num_comentarios estirar']})
-                    ,dict(name='span', attrs={'class':['links_comentar']})
-                    ,dict(name='div', attrs={'id':['comentar']})
-                    ,dict(name='ul', attrs={'class':'herramientas' })
-                    ,dict(name=['object','link','embed','iframe','base','meta'])
-                  ]
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser(self)
+        if self.username is not None and self.password is not None:
+            br.open(self.LOGIN)
+            br.select_form(name='login')
+            br['nick' ] = self.username
+            br['clave'] = self.password
+            br.submit()
+        return br

    feeds = [
-              (u'Portada'         , u'http://estaticos.elmundo.es/elmundo/rss/portada.xml'       )                                      
-             ,(u'Deportes'        , u'http://estaticos.elmundo.es/elmundodeporte/rss/portada.xml')
-             ,(u'Econom\xeda'     , u'http://estaticos.elmundo.es/elmundo/rss/economia.xml'      )
-             ,(u'Espa\xf1a'       , u'http://estaticos.elmundo.es/elmundo/rss/espana.xml'        )
-             ,(u'Internacional'   , u'http://estaticos.elmundo.es/elmundo/rss/internacional.xml' )
-             ,(u'Cultura'         , u'http://estaticos.elmundo.es/elmundo/rss/cultura.xml'       )
-             ,(u'Ciencia/Ecolog\xeda', u'http://estaticos.elmundo.es/elmundo/rss/ciencia.xml'    )
-             ,(u'Comunicaci\xf3n' , u'http://estaticos.elmundo.es/elmundo/rss/comunicacion.xml'  )
-             ,(u'Televisi\xf3n'   , u'http://estaticos.elmundo.es/elmundo/rss/television.xml'    )
+              (u'Portada'         , u'http://elmundo.feedsportal.com/elmundo/rss/portada.xml'       )
+             ,(u'Deportes'        , u'http://elmundo.feedsportal.com/elmundodeporte/rss/portada.xml')
+             ,(u'Econom\xeda'     , u'http://elmundo.feedsportal.com/elmundo/rss/economia.xml'      )
+             ,(u'Espa\xf1a'       , u'http://elmundo.feedsportal.com/elmundo/rss/espana.xml'        )
+             ,(u'Internacional'   , u'http://elmundo.feedsportal.com/elmundo/rss/internacional.xml' )
+             ,(u'Cultura'         , u'http://elmundo.feedsportal.com/elmundo/rss/internacional.xml'       )
+             ,(u'Ciencia/Ecolog\xeda', u'http://elmundo.feedsportal.com/elmundo/rss/ciencia.xml'    )
+             ,(u'Comunicaci\xf3n' , u'http://elmundo.feedsportal.com/elmundo/rss/comunicacion.xml'  )
+             ,(u'Televisi\xf3n'   , u'http://elmundo.feedsportal.com/elmundo/rss/television.xml'    )

-             ,(u'Salud'           , u'http://estaticos.elmundo.es/elmundosalud/rss/portada.xml'  )
-             ,(u'Solidaridad'     , u'http://estaticos.elmundo.es/elmundo/rss/solidaridad.xml'   )
-             ,(u'Su vivienda'     , u'http://estaticos.elmundo.es/elmundo/rss/suvivienda.xml'    )             
-             ,(u'Motor'           , u'http://estaticos.elmundo.es/elmundomotor/rss/portada.xml'  )             
+             ,(u'Salud'           , u'http://elmundo.feedsportal.com/elmundosalud/rss/portada.xml'  )
+             ,(u'Solidaridad'     , u'http://elmundo.feedsportal.com/elmundo/rss/solidaridad.xml'   )
+             ,(u'Su vivienda'     , u'http://elmundo.feedsportal.com/elmundo/rss/suvivienda.xml'    )
+             ,(u'Motor'           , u'http://elmundo.feedsportal.com/elmundodeporte/rss/motor.xml'  )

-             ,(u'Madrid'          , u'http://estaticos.elmundo.es/elmundo/rss/madrid.xml'        )
-             ,(u'Barcelona'       , u'http://estaticos.elmundo.es/elmundo/rss/barcelona.xml'     )
-             ,(u'Pa\xeds Vasco'   , u'http://estaticos.elmundo.es/elmundo/rss/paisvasco.xml'     )	     
-             ,(u'Baleares'        , u'http://estaticos.elmundo.es/elmundo/rss/baleares.xml'      )
-	     ,(u'Castilla y Le\xf3n' , u'http://estaticos.elmundo.es/elmundo/rss/castillayleon.xml' )	     
-	     ,(u'Valladolid'      , u'http://estaticos.elmundo.es/elmundo/rss/valladolid.xml'    )
-	     ,(u'Valencia'        , u'http://estaticos.elmundo.es/elmundo/rss/valencia.xml'      )
-	     ,(u'Alicante'        , u'http://estaticos.elmundo.es/elmundo/rss/alicante.xml'      )
-	     ,(u'Castell\xf3n'    , u'http://estaticos.elmundo.es/elmundo/rss/castellon.xml'     )	
-	     ,(u'Andaluc\xeda'    , u'http://estaticos.elmundo.es/elmundo/rss/andalucia.xml'     )
-	     ,(u'Sevilla'         , u'http://estaticos.elmundo.es/elmundo/rss/andalucia_sevilla.xml'  )
-	     ,(u'M\xe1laga'       , u'http://estaticos.elmundo.es/elmundo/rss/andalucia_malaga.xml'   )
+             ,(u'Madrid'          , u'http://elmundo.feedsportal.com/elmundo/rss/madrid.xml'        )
+             ,(u'Barcelona'       , u'http://elmundo.feedsportal.com/elmundo/rss/barcelona.xml'     )
+             ,(u'Pa\xeds Vasco'   , u'http://elmundo.feedsportal.com/elmundo/rss/paisvasco.xml'     )
+             ,(u'Baleares'        , u'http://elmundo.feedsportal.com/elmundo/rss/baleares.xml'      )
+             ,(u'Castilla y Le\xf3n' , u'http://elmundo.feedsportal.com/elmundo/rss/castillayleon.xml' )
+             ,(u'Valladolid'      , u'http://elmundo.feedsportal.com/elmundo/rss/valladolid.xml'    )
+             ,(u'Valencia'        , u'http://elmundo.feedsportal.com/elmundo/rss/valencia.xml'      )
+             ,(u'Alicante'        , u'http://elmundo.feedsportal.com/elmundo/rss/alicante.xml'      )
+             ,(u'Castell\xf3n'    , u'http://elmundo.feedsportal.com/elmundo/rss/castellon.xml'     )
+             ,(u'Andaluc\xeda'    , u'http://elmundo.feedsportal.com/elmundo/rss/andalucia.xml'     )
+             ,(u'Sevilla'         , u'http://elmundo.feedsportal.com/elmundo/rss/andalucia_sevilla.xml'  )
+             ,(u'M\xe1laga'       , u'http://elmundo.feedsportal.com/elmundo/rss/andalucia_malaga.xml'   )
            ]

-    def preprocess_html(self, soup):
-        for item in soup.findAll(style=True):
-            del item['style']
-        return soup
-
    def get_article_url(self, article):
-        return article.get('guid',  None)
-
-
-    preprocess_regexps = [     
-                           # Para presentar la imagen de los videos incrustados                           
-
-                           (re.compile(r'var imagen', re.DOTALL|re.IGNORECASE), lambda match: '--></script><img src'),
-                           (re.compile(r'.jpg";', re.DOTALL|re.IGNORECASE), lambda match: '.jpg">'),
-                           (re.compile(r'var video=', re.DOTALL|re.IGNORECASE), lambda match: '<script language="Javascript" type="text/javascript"><!--'),
-
-                           # Para que no salga la numeración de comentarios: 1, 2, 3 ...
-
-                           (re.compile(r'<ol>\n<li style="z-index:', re.DOTALL|re.IGNORECASE), lambda match: '<ul><li style="z-index:'),
-                           (re.compile(r'</ol>\n<div class="num_comentarios estirar">', re.DOTALL|re.IGNORECASE), lambda match: '</ul><div class="num_comentarios estirar">'),
-                         ]
+        realurl = article.get('guid',  None)
+        if '/album/' in realurl or '/envivos/' in realurl:
+            return None
+        return realurl

    # Obtener la imagen de portada
-
    def get_cover_url(self):
-       cover = None
+       cover = self.masthead_url
       st = time.localtime()
       year = str(st.tm_year)
       month = "%.2d" % st.tm_mon
       day = "%.2d" % st.tm_mday
-		#http://img.kiosko.net/2011/11/19/es/elmundo.750.jpg
       cover='http://img.kiosko.net/'+ year + '/' +  month + '/' + day +'/es/elmundo.750.jpg'
-       br = BasicNewsRecipe.get_browser(self)
       try:
-           br.open(cover)
+           self.browser.open(cover)
       except:
           self.log("\nPortada no disponible")
-           cover ='http://estaticos03.elmundo.es/elmundo/iconos/v4.x/v4.01/bg_h1.png'
       return cover
+
+    def get_obfuscated_article(self, url):
+        count = 0
+        tries = 5
+        html  = None
+        while (count < tries):
+            try:
+                response = self.browser.open(url)
+                html = response.read()
+                count = tries
+            except:
+                print "Retrying download..."
+            count += 1
+        if html is not None:
+            tfile = PersistentTemporaryFile('_fa.html')
+            tfile.write(html)
+            tfile.close()
+            self.temp_files.append(tfile)
+            return tfile.name
+        return None
+
+    def image_url_processor(self, baseurl, url):
+        if url.startswith('//'):
+           return 'http:' + url
+        return url
+
--- a/recipes/spiegelde.recipe
+++ b/recipes/spiegelde.recipe
@ -6,12 +6,16 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 spiegel.de
 '''

+import time, re
+from contextlib import closing
+from calibre import as_unicode
+from calibre.web.feeds import feed_from_xml, Feed
 from calibre.web.feeds.news import BasicNewsRecipe

 class Spiegel_ger(BasicNewsRecipe):
    title                 = 'Spiegel Online - German'
    __author__            = 'Darko Miletic'
-    description           = "Immer die neueste Meldung auf dem Schirm, sekundenaktuell und uebersichtlich: Mit dem RSS-Angebot von SPIEGEL ONLINE entgeht Ihnen keine wichtige Meldung mehr, selbst wenn Sie keinen Internet-Browser geoeffnet haben. Sie koennen unsere Nachrichten-Feeds ganz einfach abonnieren - unkompliziert, kostenlos und nach Ihren persoenlichen Themen-Vorlieben."
+    description           = "Immer die neueste Meldung auf dem Schirm, sekundenaktuell und uebersichtlich: Mit dem RSS-Angebot von SPIEGEL ONLINE entgeht Ihnen keine wichtige Meldung mehr, selbst wenn Sie keinen Internet-Browser geoeffnet haben. Sie koennen unsere Nachrichten-Feeds ganz einfach abonnieren - unkompliziert, kostenlos und nach Ihren persoenlichen Themen-Vorlieben."  # noqa
    publisher             = 'SPIEGEL ONLINE Gmbh'
    category              = 'SPIEGEL ONLINE, DER SPIEGEL, Nachrichten, News,Dienste, RSS, RSS, Feedreader, Newsfeed, iGoogle, Netvibes, Widget'
    oldest_article        = 7
@ -20,9 +24,18 @@ class Spiegel_ger(BasicNewsRecipe):
    lang                  = 'de-DE'
    no_stylesheets        = True
    use_embedded_content  = False
-    auto_cleanup = True
-    auto_cleanup_keep = '//*[@id="spArticleTopAsset"]'
    encoding              = 'cp1252'
+    keep_only_tags        = [
+        dict(name='h2', attrs={'class':'article-title'}),
+        dict(id=['js-article-top-wide-asset', 'js-article-column']),
+    ]
+    remove_tags = [
+        dict(attrs={'class':lambda x: x and 'asset-html-box' in x.split()}),
+        dict(attrs={'class':lambda x: x and 'article-social-bookmark' in x.split()}),
+        dict(attrs={'class':lambda x: x and 'article-newsfeed-box' in x.split()}),
+        dict(attrs={'class':lambda x: x and 'article-comments-box' in x.split()}),
+        dict(attrs={'class':lambda x: x and 'article-functions-bottom' in x.split()}),
+    ]

    conversion_options = {
                          'comment'          : description
@ -31,10 +44,42 @@ class Spiegel_ger(BasicNewsRecipe):
                        , 'language'         : lang
                        }

-
-
-
    feeds          = [(u'Spiegel Online', u'http://www.spiegel.de/schlagzeilen/index.rss')]

+    def get_article_url(self, *args):
+        url = BasicNewsRecipe.get_article_url(self, *args).replace('#', '/#')
+        ai = re.search(r'ai=(\d+)', url).group(1)
+        soup = self.index_to_soup(url)
+        a = soup.find('a', href=lambda x: x and ai in x)
+        return 'http://www.spiegel.de' + a['href']
+
+    def parse_feeds(self):
+        title, url = self.feeds[0]
+        self.report_progress(0, _('Fetching feed')+' %s...'%(title if title else url))
+        parsed_feeds = []
+        try:
+            with closing(self.browser.open(url)) as s:
+                raw = s.read()
+            raw = raw.replace(b'<guid>http://www.spiegel.de</guid>', b'')
+
+            parsed_feeds.append(feed_from_xml(raw, title=title, log=self.log, oldest_article=self.oldest_article,
+                                        max_articles_per_feed=self.max_articles_per_feed,
+                                        get_article_url=self.get_article_url))
+            if (self.delay > 0):
+                time.sleep(self.delay)
+        except Exception as err:
+            feed = Feed()
+            msg = 'Failed feed: %s'%(title if title else url)
+            feed.populate_from_preparsed_feed(msg, [])
+            feed.description = as_unicode(err)
+            parsed_feeds.append(feed)
+            self.log.exception(msg)
+
+        remove = [f for f in parsed_feeds if len(f) == 0 and
+                self.remove_empty_feeds]
+        for f in remove:
+            parsed_feeds.remove(f)
+
+        return parsed_feeds


--- a/setup/init.py
+++ b/setup/init.py
@ -13,7 +13,8 @@ iswindows = re.search('win(32|64)', sys.platform)
 isosx = 'darwin' in sys.platform
 isfreebsd = 'freebsd' in sys.platform
 isnetbsd = 'netbsd' in sys.platform
-isbsd = isnetbsd or isfreebsd
+isdragonflybsd = 'dragonfly' in sys.platform
+isbsd = isnetbsd or isfreebsd or isdragonflybsd
 islinux = not isosx and not iswindows and not isbsd
 SRC = os.path.abspath('src')
 sys.path.insert(0, SRC)
--- a/setup/extensions.py
+++ b/setup/extensions.py
@ -30,7 +30,6 @@ class Extension(object):
        return list(set([x if os.path.isabs(x) else os.path.join(SRC, x.replace('/',
            os.sep)) for x in paths]))

-
    def __init__(self, name, sources, **kwargs):
        self.name = name
        self.needs_cxx = bool([1 for x in sources if os.path.splitext(x)[1] in
@ -67,10 +66,15 @@ if iswindows:
    icu_libs = ['icudt', 'icuin', 'icuuc', 'icuio']
 if isosx:
    icu_libs = ['icucore']
-    icu_cflags = ['-DU_DISABLE_RENAMING'] # Needed to use system libicucore.dylib
+    icu_cflags = ['-DU_DISABLE_RENAMING']  # Needed to use system libicucore.dylib

 extensions = [

+    Extension('_regex',
+              ['regex/_regex.c', 'regex/_regex_unicode.c'],
+              headers=['regex/_regex.h']
+              ),
+
    Extension('speedup',
        ['calibre/utils/speedup.c'],
        ),
@ -127,7 +131,7 @@ extensions = [

    Extension('freetype',
        ['calibre/utils/fonts/freetype.cpp'],
-        inc_dirs = ft_inc_dirs,
+        inc_dirs=ft_inc_dirs,
        libraries=ft_libs,
        lib_dirs=ft_lib_dirs),

@ -171,23 +175,23 @@ extensions = [

    Extension('pictureflow',
                ['calibre/gui2/pictureflow/pictureflow.cpp'],
-                inc_dirs = ['calibre/gui2/pictureflow'],
-                headers = ['calibre/gui2/pictureflow/pictureflow.h'],
-                sip_files = ['calibre/gui2/pictureflow/pictureflow.sip']
+                inc_dirs=['calibre/gui2/pictureflow'],
+                headers=['calibre/gui2/pictureflow/pictureflow.h'],
+                sip_files=['calibre/gui2/pictureflow/pictureflow.sip']
                ),

    Extension('progress_indicator',
                ['calibre/gui2/progress_indicator/QProgressIndicator.cpp'],
-                inc_dirs = ['calibre/gui2/progress_indicator'],
-                headers = ['calibre/gui2/progress_indicator/QProgressIndicator.h'],
-                sip_files = ['calibre/gui2/progress_indicator/QProgressIndicator.sip']
+                inc_dirs=['calibre/gui2/progress_indicator'],
+                headers=['calibre/gui2/progress_indicator/QProgressIndicator.h'],
+                sip_files=['calibre/gui2/progress_indicator/QProgressIndicator.sip']
                ),

    Extension('qt_hack',
                ['calibre/ebooks/pdf/render/qt_hack.cpp'],
-                inc_dirs = qt_private_inc + ['calibre/ebooks/pdf/render', 'qt-harfbuzz/src'],
-                headers = ['calibre/ebooks/pdf/render/qt_hack.h'],
-                sip_files = ['calibre/ebooks/pdf/render/qt_hack.sip']
+                inc_dirs=qt_private_inc + ['calibre/ebooks/pdf/render', 'qt-harfbuzz/src'],
+                headers=['calibre/ebooks/pdf/render/qt_hack.h'],
+                sip_files=['calibre/ebooks/pdf/render/qt_hack.sip']
                ),

    Extension('unrar',
@ -200,7 +204,7 @@ extensions = [
               volume.o list.o find.o unpack.o cmddata.o filestr.o scantree.o
               '''.split()] + ['calibre/utils/unrar.cpp'],
              inc_dirs=['unrar'],
-              cflags = [('/' if iswindows else '-') + x for x in (
+              cflags=[('/' if iswindows else '-') + x for x in (
                  'DSILENT', 'DRARDLL', 'DUNRAR')] + (
                  [] if iswindows else ['-D_FILE_OFFSET_BITS=64',
                                        '-D_LARGEFILE_SOURCE']),
@ -436,9 +440,9 @@ class Build(Command):
            if iswindows:
                #manifest = dest+'.manifest'
                #cmd = [MT, '-manifest', manifest, '-outputresource:%s;2'%dest]
-                #self.info(*cmd)
-                #self.check_call(cmd)
-                #os.remove(manifest)
+                # self.info(*cmd)
+                # self.check_call(cmd)
+                # os.remove(manifest)
                for x in ('.exp', '.lib'):
                    x = os.path.splitext(dest)[0]+x
                    if os.path.exists(x):
@ -487,7 +491,7 @@ class Build(Command):
           "style/windowmanager.cpp",
        ]
        if not iswindows and not isosx:
-            headers.append( "style/shadowhelper.h")
+            headers.append("style/shadowhelper.h")
            sources.append('style/shadowhelper.cpp')

        pro = textwrap.dedent('''
@ -586,7 +590,7 @@ class Build(Command):
        sbf = self.j(src_dir, self.b(sipf)+'.sbf')
        if self.newer(sbf, [sipf]+ext.headers):
            exe = '.exe' if iswindows else ''
-            cmd = [pyqt.sip_bin+exe, '-w', '-c', src_dir, '-b', sbf, '-I'+\
+            cmd = [pyqt.sip_bin+exe, '-w', '-c', src_dir, '-b', sbf, '-I'+
                    pyqt.pyqt_sip_dir] + shlex.split(pyqt.pyqt_sip_flags) + [sipf]
            self.info(' '.join(cmd))
            self.check_call(cmd)
--- a/setup/installer/linux/freeze2.py
+++ b/setup/installer/linux/freeze2.py
@ -386,8 +386,13 @@ class LinuxFreeze(Command):
                    mod = __import__(sys.calibre_module, fromlist=[1])
                    func = getattr(mod, sys.calibre_function)
                    return func()
-                except SystemExit:
-                    raise
+                except SystemExit as err:
+                    if err.code is None:
+                        return 0
+                    if isinstance(err.code, int):
+                        return err.code
+                    print (err.code)
+                    return 1
                except:
                    import traceback
                    traceback.print_exc()
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -139,6 +139,7 @@ class Plugins(collections.Mapping):
                'woff',
                'unrar',
                'qt_hack',
+                '_regex'
            ]
        if iswindows:
            plugins.extend(['winutil', 'wpd', 'winfonts'])
--- a/src/calibre/debug.py
+++ b/src/calibre/debug.py
@ -62,6 +62,12 @@ Everything after the -- is passed to the script.
            help='Inspect the MOBI file(s) at the specified path(s)')
    parser.add_option('-t', '--tweak-book', action='store_true',
            help='Launch the calibre Tweak Book tool in debug mode.')
+    parser.add_option('-x', '--explode-book', default=None,
+            help='Explode the book (exports the book as a collection of HTML '
+            'files and metadata, which you can edit using standard HTML '
+            'editing tools, and then rebuilds the file from the edited HTML. '
+            'Makes no additional changes to the HTML, unlike a full calibre '
+            'conversion).')
    parser.add_option('-s', '--shutdown-running-calibre', default=False,
            action='store_true',
            help=_('Cause a running calibre instance, if any, to be'
@ -247,6 +253,9 @@ def main(args=sys.argv):
    elif opts.tweak_book:
        from calibre.gui2.tweak_book.main import main
        main(['ebook-tweak'] + args[1:])
+    elif opts.explode_book:
+        from calibre.ebooks.tweak import tweak
+        tweak(opts.explode_book)
    elif opts.test_build:
        from calibre.test_build import test
        test()
--- a/src/calibre/devices/idevice/libimobiledevice.py
+++ b/src/calibre/devices/idevice/libimobiledevice.py
@ -601,7 +601,7 @@ class libiMobileDevice():
        error = self.lib.afc_remove_path(byref(self.afc), str(path))

        if error:
-            self._log(" ERROR: %s" % self._afc_error(error))
+            self._log_error(" ERROR: %s path:%s" % (self._afc_error(error), repr(path)))

    def stat(self, path):
        '''
@ -650,7 +650,7 @@ class libiMobileDevice():

        error = self.lib.afc_client_free(byref(self.afc)) & 0xFFFF
        if error:
-            self._log(" ERROR: %s" % self._afc_error(error))
+            self._log_error(" ERROR: %s" % self._afc_error(error))

    def _afc_client_new(self):
        '''
@ -810,7 +810,7 @@ class libiMobileDevice():
        error = self.lib.afc_file_close(byref(self.afc),
                                        handle) & 0xFFFF
        if error:
-            self._log(" ERROR: %s" % self._afc_error(error))
+            self._log_error(" ERROR: %s handle:%s" % (self._afc_error(error), handle))

    def _afc_file_open(self, filename, mode='r'):
        '''
@ -850,7 +850,7 @@ class libiMobileDevice():
                                           byref(handle)) & 0xFFFF

        if error:
-            self._log(" ERROR: %s" % self._afc_error(error))
+            self._log_error(" ERROR: %s filename:%s" % (self._afc_error(error), repr(filename)))
            return None
        else:
            return handle
@ -887,13 +887,13 @@ class libiMobileDevice():
                                           size,
                                           byref(bytes_read)) & 0xFFFF
            if error:
-                self._log(" ERROR: %s" % self._afc_error(error))
+                self._log_error(" ERROR: %s handle:%s" % (self._afc_error(error), handle))
            return data
        else:
            data = create_string_buffer(size)
            error = self.lib.afc_file_read(byref(self.afc), handle, byref(data), size, byref(bytes_read))
            if error:
-                self._log(" ERROR: %s" % self._afc_error(error))
+                self._log_error(" ERROR: %s handle:%s" % (self._afc_error(error), handle))
            return data.value

    def _afc_file_write(self, handle, content, mode='w'):
@ -933,7 +933,7 @@ class libiMobileDevice():
                                        len(content),
                                        byref(bytes_written)) & 0xFFFF
        if error:
-            self._log(" ERROR: %s" % self._afc_error(error))
+            self._log_error(" ERROR: %s handle:%s" % (self._afc_error(error), handle))
            return False
        return True

@ -1012,7 +1012,7 @@ class libiMobileDevice():
                                           byref(infolist)) & 0xFFFF
        file_stats = {}
        if error:
-            self._log(" ERROR: %s" % self._afc_error(error))
+            self._log_error(" ERROR: %s path:%s" % (self._afc_error(error), repr(path)))
        else:
            num_items = 0
            item_list = []
@ -1049,7 +1049,7 @@ class libiMobileDevice():
        error = self.lib.afc_make_directory(byref(self.afc),
                                            str(path)) & 0xFFFF
        if error:
-            self._log(" ERROR: %s" % self._afc_error(error))
+            self._log_error(" ERROR: %s path:%s" % (self._afc_error(error), repr(path)))

        return error

@ -1078,7 +1078,7 @@ class libiMobileDevice():
                                            str(directory),
                                            byref(dirs)) & 0xFFFF
        if error:
-            self._log(" ERROR: %s" % self._afc_error(error))
+            self._log_error(" ERROR: %s directory:%s" % (self._afc_error(error), repr(directory)))
        else:
            num_dirs = 0
            dir_list = []
@ -1126,7 +1126,7 @@ class libiMobileDevice():
        error = self.lib.house_arrest_client_free(byref(self.house_arrest)) & 0xFFFF
        if error:
            error = error - 0x10000
-            self._log(" ERROR: %s" % self._house_arrest_error(error))
+            self._log_error(" ERROR: %s" % self._house_arrest_error(error))

    def _house_arrest_client_new(self):
        '''
@ -1302,7 +1302,7 @@ class libiMobileDevice():

        if error:
            error = error - 0x10000
-            self._log(" ERROR: %s" % self._idevice_error(error))
+            self._log_error(" ERROR: %s" % self._idevice_error(error))

    def _idevice_get_device_list(self):
        '''
@ -1326,7 +1326,7 @@ class libiMobileDevice():
                self._log(" no connected devices")
            else:
                device_list = None
-                self._log(" ERROR: %s" % self._idevice_error(error))
+                self._log_error(" ERROR: %s" % self._idevice_error(error))
        else:
            index = 0
            while devices[index]:
@ -1859,6 +1859,20 @@ class libiMobileDevice():
        else:
            debug_print()

+    def _log_error(self, *args):
+        '''
+        Print error message with location regardless of self.verbose
+        '''
+        arg1 = arg2 = ''
+
+        if len(args) > 0:
+            arg1 = args[0]
+        if len(args) > 1:
+            arg2 = args[1]
+
+        debug_print(self.LOCATION_TEMPLATE.format(cls=self.__class__.__name__,
+            func=sys._getframe(1).f_code.co_name, arg1=arg1, arg2=arg2))
+
    def _log_location(self, *args):
        '''
        '''
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -803,6 +803,20 @@ def load_builtin_fonts():
                    if u'calibre Symbols' in fam:
                        _rating_font = u'calibre Symbols'

+def setup_gui_option_parser(parser):
+    if islinux:
+        parser.add_option('--detach', default=False, action='store_true',
+                          help='Detach from the controlling terminal, if any (linux only)')
+
+def detach_gui():
+    if islinux and not DEBUG and sys.stdout.isatty():
+        # We are a GUI process running in a terminal so detach from the controlling terminal
+        if os.fork() != 0:
+            raise SystemExit(0)
+        os.setsid()
+        so, se = file(os.devnull, 'a+'), file(os.devnull, 'a+', 0)
+        os.dup2(so.fileno(), sys.__stdout__.fileno())
+        os.dup2(se.fileno(), sys.__stderr__.fileno())

 class Application(QApplication):

@ -824,7 +838,6 @@ class Application(QApplication):
        self._file_open_paths = []
        self._file_open_lock = RLock()
        self.setup_styles(force_calibre_style)
-
    if DEBUG:
        def notify(self, receiver, event):
            if self.redirect_notify:
@ -862,27 +875,28 @@ class Application(QApplication):
        icon_map = {}
        pcache = {}
        for k, v in {
-                'DialogYesButton': u'ok.png',
-                'DialogNoButton': u'window-close.png',
-                'DialogCloseButton': u'window-close.png',
-                'DialogOkButton': u'ok.png',
-                'DialogCancelButton': u'window-close.png',
-                'DialogHelpButton': u'help.png',
-                'DialogOpenButton': u'document_open.png',
-                'DialogSaveButton': u'save.png',
-                'DialogApplyButton': u'ok.png',
-                'DialogDiscardButton': u'trash.png',
-                'MessageBoxInformation': u'dialog_information.png',
-                'MessageBoxWarning': u'dialog_warning.png',
-                'MessageBoxCritical': u'dialog_error.png',
-                'MessageBoxQuestion': u'dialog_question.png',
-                'BrowserReload': u'view-refresh.png',
-                # These two are used to calculate the sizes for the doc widget
-                # title bar buttons, therefore, they have to exist. The actual
-                # icon is not used.
-                'TitleBarCloseButton': u'window-close.png',
-                'TitleBarNormalButton': u'window-close.png',
-                }.iteritems():
+            'DialogYesButton': u'ok.png',
+            'DialogNoButton': u'window-close.png',
+            'DialogCloseButton': u'window-close.png',
+            'DialogOkButton': u'ok.png',
+            'DialogCancelButton': u'window-close.png',
+            'DialogHelpButton': u'help.png',
+            'DialogOpenButton': u'document_open.png',
+            'DialogSaveButton': u'save.png',
+            'DialogApplyButton': u'ok.png',
+            'DialogDiscardButton': u'trash.png',
+            'MessageBoxInformation': u'dialog_information.png',
+            'MessageBoxWarning': u'dialog_warning.png',
+            'MessageBoxCritical': u'dialog_error.png',
+            'MessageBoxQuestion': u'dialog_question.png',
+            'BrowserReload': u'view-refresh.png',
+            # These two are used to calculate the sizes for the doc widget
+            # title bar buttons, therefore, they have to exist. The actual
+            # icon is not used.
+            'TitleBarCloseButton': u'window-close.png',
+            'TitleBarNormalButton': u'window-close.png',
+            'DockWidgetCloseButton': u'window-close.png',
+        }.iteritems():
            if v not in pcache:
                p = I(v)
                if isinstance(p, bytes):
--- a/src/calibre/gui2/actions/view.py
+++ b/src/calibre/gui2/actions/view.py
@ -10,9 +10,9 @@ from functools import partial

 from PyQt4.Qt import Qt, QAction, pyqtSignal

-from calibre.constants import isosx
-from calibre.gui2 import error_dialog, Dispatcher, question_dialog, config, \
-        open_local_file, info_dialog
+from calibre.constants import isosx, iswindows
+from calibre.gui2 import (
+    error_dialog, Dispatcher, question_dialog, config, open_local_file, info_dialog)
 from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
 from calibre.utils.config import prefs, tweaks
 from calibre.ptempfile import PersistentTemporaryFile
@ -119,8 +119,26 @@ class ViewAction(InterfaceAction):
                self.gui.job_manager.launch_gui_app(viewer,
                        kwargs=dict(args=args))
            else:
+                if iswindows:
+                    from calibre.utils.file_associations import file_assoc_windows
+                    ext = name.rpartition('.')[-1]
+                    if ext:
+                        try:
+                            prog = file_assoc_windows(ext)
+                        except Exception:
+                            prog = None
+                        if prog and prog.lower().endswith('calibre.exe'):
+                            name = os.path.basename(name)
+                            return error_dialog(
+                                self.gui, _('No associated program'), _(
+                                    'Windows will try to open %s with calibre itself'
+                                    ' resulting in a duplicate in your calibre library. You'
+                                    ' should install some program capable of viewing this'
+                                    ' file format and tell windows to use that program to open'
+                                    ' files of this type.') % name, show=True)
+
                open_local_file(name)
-                time.sleep(2) # User feedback
+                time.sleep(2)  # User feedback
        finally:
            self.gui.unsetCursor()

@ -145,7 +163,8 @@ class ViewAction(InterfaceAction):
        all_fmts = set([])
        for x in formats:
            if x:
-                for f in x: all_fmts.add(f)
+                for f in x:
+                    all_fmts.add(f)
        if not all_fmts:
            error_dialog(self.gui,  _('Format unavailable'),
                    _('Selected books have no formats'), show=True)
@ -257,7 +276,7 @@ class ViewAction(InterfaceAction):
            self.build_menus(db)

    def view_device_book(self, path):
-        pt = PersistentTemporaryFile('_view_device_book'+\
+        pt = PersistentTemporaryFile('_view_device_book'+
                os.path.splitext(path)[1])
        self.persistent_files.append(pt)
        pt.close()
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@ -12,8 +12,9 @@ from calibre import prints, plugins, force_unicode
 from calibre.constants import (iswindows, __appname__, isosx, DEBUG, islinux,
        filesystem_encoding, get_portable_base)
 from calibre.utils.ipc import gui_socket_address, RC
-from calibre.gui2 import (ORG_NAME, APP_UID, initialize_file_icon_provider,
-    Application, choose_dir, error_dialog, question_dialog, gprefs)
+from calibre.gui2 import (
+    ORG_NAME, APP_UID, initialize_file_icon_provider, Application, choose_dir,
+    error_dialog, question_dialog, gprefs, detach_gui, setup_gui_option_parser)
 from calibre.gui2.main_window import option_parser as _option_parser
 from calibre.utils.config import prefs, dynamic

@ -46,6 +47,7 @@ path_to_ebook to the database.
            help=_('Cause a running calibre instance, if any, to be'
                ' shutdown. Note that if there are running jobs, they '
                'will be silently aborted, so use with care.'))
+    setup_gui_option_parser(parser)
    return parser

 def find_portable_library():
@ -84,6 +86,8 @@ def find_portable_library():
 def init_qt(args):
    parser = option_parser()
    opts, args = parser.parse_args(args)
+    if getattr(opts, 'detach', False):
+        detach_gui()
    find_portable_library()
    if opts.with_library is not None:
        libpath = os.path.expanduser(opts.with_library)
--- a/src/calibre/gui2/tweak_book/boss.py
+++ b/src/calibre/gui2/tweak_book/boss.py
@ -52,6 +52,7 @@ class Boss(QObject):
        fl.edit_file.connect(self.edit_file_requested)
        self.gui.central.current_editor_changed.connect(self.apply_current_editor_state)
        self.gui.central.close_requested.connect(self.editor_close_requested)
+        self.gui.central.search_panel.search_triggered.connect(self.search)

    def mkdtemp(self, prefix=''):
        self.container_count += 1
@ -258,6 +259,84 @@ class Boss(QObject):
            self.update_global_history_actions()
    # }}}

+    def mark_selected_text(self):
+        ed = self.gui.central.current_editor
+        if ed is not None:
+            ed.mark_selected_text()
+            if ed.has_marked_text:
+                self.gui.central.search_panel.set_where('selected-text')
+
+    def search(self, action, overrides=None):
+        ' Run a search/replace '
+        sp = self.gui.central.search_panel
+        # Ensure the search panel is visible
+        sp.setVisible(True)
+        ed = self.gui.central.current_editor
+        name = None
+        for n, x in editors.iteritems():
+            if x is ed:
+                name = n
+                break
+        state = sp.state
+        if overrides:
+            state.update(overrides)
+        searchable_names = self.gui.file_list.searchable_names
+        where = state['where']
+        err = None
+        if name is None and where in {'current', 'selected-text'}:
+            err = _('No file is being edited.')
+        elif where == 'selected' and not searchable_names['selected']:
+            err = _('No files are selected in the Files Browser')
+        elif where == 'selected-text' and not ed.has_marked_text:
+            err = _('No text is marked. First select some text, and then use'
+                    ' The "Mark selected text" action in the Search menu to mark it.')
+        if not err and not state['find']:
+            err = _('No search query specified')
+        if err:
+            return error_dialog(self.gui, _('Cannot search'), err, show=True)
+        del err
+        if where == 'current':
+            files = [name]
+            editor = ed
+        elif where in {'styles', 'text', 'selected'}:
+            files = searchable_names[where]
+            if name in files:
+                editor = ed
+            else:
+                common = set(editors).intersection(set(files))
+                if common:
+                    name = next(x for x in files if x in common)
+                    editor = editors[name]
+                    self.gui.central.show_editor(editor)
+                else:
+                    pass  # TODO: Find the first name with a match and open its editor
+        else:
+            files = [name]
+            pass  # marked text TODO: Implement this
+
+        def no_match():
+            return error_dialog(
+                self.gui, _('Not found'), _(
+                'No matches were found for %s') % state['find'], show=True)
+
+        pat = sp.get_regex(state)
+
+        def do_find():
+            found = editor.find(pat)
+            if found:
+                return
+            if len(files) == 1:
+                if not state['wrap']:
+                    return no_match()
+                found = editor.find(pat, wrap=True)
+                if not found:
+                    return no_match()
+            else:
+                pass  # TODO: handle multiple file search
+
+        if action == 'find':
+            return do_find()
+
    def save_book(self):
        c = current_container()
        for name, ed in editors.iteritems():
@ -278,19 +357,26 @@ class Boss(QObject):
                     _('Saving of the book failed. Click "Show Details"'
                       ' for more information.'), det_msg=tb, show=True)

+    def init_editor(self, name, editor, data=None):
+        editor.undo_redo_state_changed.connect(self.editor_undo_redo_state_changed)
+        editor.data_changed.connect(self.editor_data_changed)
+        editor.copy_available_state_changed.connect(self.editor_copy_available_state_changed)
+        if data is not None:
+            editor.data = data
+        editor.modification_state_changed.connect(self.editor_modification_state_changed)
+        self.gui.central.add_editor(name, editor)
+
    def edit_file(self, name, syntax):
        editor = editors.get(name, None)
        if editor is None:
            editor = editors[name] = editor_from_syntax(syntax, self.gui.editor_tabs)
-            editor.undo_redo_state_changed.connect(self.editor_undo_redo_state_changed)
-            editor.data_changed.connect(self.editor_data_changed)
-            editor.copy_available_state_changed.connect(self.editor_copy_available_state_changed)
-            c = current_container()
-            with c.open(name) as f:
-                editor.data = c.decode(f.read())
-            editor.modification_state_changed.connect(self.editor_modification_state_changed)
-            self.gui.central.add_editor(name, editor)
-        self.gui.central.show_editor(editor)
+            data = current_container().raw_data(name)
+            self.init_editor(name, editor, data)
+        self.show_editor(name)
+
+    def show_editor(self, name):
+        self.gui.central.show_editor(editors[name])
+        editors[name].set_focus()

    def edit_file_requested(self, name, syntax, mime):
        if name in editors:
--- a/src/calibre/gui2/tweak_book/editor/init.py
+++ b/src/calibre/gui2/tweak_book/editor/init.py
@ -6,6 +6,8 @@ from __future__ import (unicode_literals, division, absolute_import,
 __license__ = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'

+from PyQt4.Qt import QTextCharFormat
+
 from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
 from calibre.ebooks.oeb.polish.container import guess_type

@ -25,3 +27,11 @@ def editor_from_syntax(syntax, parent=None):
    from calibre.gui2.tweak_book.editor.widget import Editor
    return Editor(syntax, parent=parent)

+SYNTAX_PROPERTY = QTextCharFormat.UserProperty
+
+class SyntaxTextCharFormat(QTextCharFormat):
+
+    def __init__(self, *args, **kwargs):
+        QTextCharFormat.__init__(self, *args, **kwargs)
+        self.setProperty(SYNTAX_PROPERTY, True)
+
--- a/src/calibre/gui2/tweak_book/editor/syntax/css.py
+++ b/src/calibre/gui2/tweak_book/editor/syntax/css.py
@ -8,10 +8,9 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'

 import re

+from calibre.gui2.tweak_book.editor import SyntaxTextCharFormat
 from calibre.gui2.tweak_book.editor.syntax.base import SyntaxHighlighter

-from PyQt4.Qt import QTextCharFormat
-
 space_pat = re.compile(r'[ \n\t\r\f]+')
 cdo_pat = re.compile(r'/\*')
 sheet_tokens = [(re.compile(k), v, n) for k, v, n in [
@ -242,7 +241,7 @@ def create_formats(highlighter):
        'unknown-normal': _('Invalid text'),
        'unterminated-string': _('Unterminated string'),
    }.iteritems():
-        f = formats[name] = QTextCharFormat(formats['error'])
+        f = formats[name] = SyntaxTextCharFormat(formats['error'])
        f.setToolTip(msg)
    return formats

--- a/src/calibre/gui2/tweak_book/editor/syntax/html.py
+++ b/src/calibre/gui2/tweak_book/editor/syntax/html.py
@ -9,8 +9,9 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 import re
 from functools import partial

-from PyQt4.Qt import (QTextCharFormat, QFont)
+from PyQt4.Qt import QFont

+from calibre.gui2.tweak_book.editor import SyntaxTextCharFormat
 from calibre.gui2.tweak_book.editor.syntax.base import SyntaxHighlighter, run_loop
 from calibre.gui2.tweak_book.editor.syntax.css import create_formats as create_css_formats, state_map as css_state_map, State as CSSState

@ -109,7 +110,7 @@ def mark_nbsp(state, text, nbsp_format):
    ans = []
    fmt = None
    if state.bold or state.italic:
-        fmt = QTextCharFormat()
+        fmt = SyntaxTextCharFormat()
        if state.bold:
            fmt.setFontWeight(QFont.Bold)
        if state.italic:
@ -313,9 +314,9 @@ def create_formats(highlighter):
        'bad-closing': _('A closing tag must contain only the tag name and nothing else'),
        'no-attr-value': _('Expecting an attribute value'),
    }.iteritems():
-        f = formats[name] = QTextCharFormat(formats['error'])
+        f = formats[name] = SyntaxTextCharFormat(formats['error'])
        f.setToolTip(msg)
-    f = formats['title'] = QTextCharFormat()
+    f = formats['title'] = SyntaxTextCharFormat()
    f.setFontWeight(QFont.Bold)
    return formats

--- a/src/calibre/gui2/tweak_book/editor/text.py
+++ b/src/calibre/gui2/tweak_book/editor/text.py
@ -9,16 +9,20 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 import textwrap
 from future_builtins import map

+import regex
 from PyQt4.Qt import (
    QPlainTextEdit, QFontDatabase, QToolTip, QPalette, QFont,
    QTextEdit, QTextFormat, QWidget, QSize, QPainter, Qt, QRect)

 from calibre.gui2.tweak_book import tprefs
+from calibre.gui2.tweak_book.editor import SYNTAX_PROPERTY
 from calibre.gui2.tweak_book.editor.themes import THEMES, default_theme, theme_color
 from calibre.gui2.tweak_book.editor.syntax.base import SyntaxHighlighter
 from calibre.gui2.tweak_book.editor.syntax.html import HTMLHighlighter, XMLHighlighter
 from calibre.gui2.tweak_book.editor.syntax.css import CSSHighlighter

+PARAGRAPH_SEPARATOR = '\u2029'
+
 _dff = None
 def default_font_family():
    global _dff
@ -48,6 +52,8 @@ class TextEdit(QPlainTextEdit):

    def __init__(self, parent=None):
        QPlainTextEdit.__init__(self, parent)
+        self.current_cursor_line = None
+        self.current_search_mark = None
        self.highlighter = SyntaxHighlighter(self)
        self.apply_settings()
        self.setMouseTracking(True)
@ -106,6 +112,7 @@ class TextEdit(QPlainTextEdit):
        w = self.fontMetrics()
        self.number_width = max(map(lambda x:w.width(str(x)), xrange(10)))
        self.size_hint = QSize(100 * w.averageCharWidth(), 50 * w.height())
+        self.highlight_color = theme_color(theme, 'HighlightRegion', 'bg')
    # }}}

    def load_text(self, text, syntax='html'):
@ -126,6 +133,69 @@ class TextEdit(QPlainTextEdit):
        self.setTextCursor(c)
        self.ensureCursorVisible()

+    def update_extra_selections(self):
+        sel = []
+        if self.current_cursor_line is not None:
+            sel.append(self.current_cursor_line)
+        if self.current_search_mark is not None:
+            sel.append(self.current_search_mark)
+        self.setExtraSelections(sel)
+
+    def mark_selected_text(self):
+        sel = QTextEdit.ExtraSelection()
+        sel.format.setBackground(self.highlight_color)
+        sel.cursor = self.textCursor()
+        if sel.cursor.hasSelection():
+            self.current_search_mark = sel
+            c = self.textCursor()
+            c.clearSelection()
+            self.setTextCursor(c)
+        else:
+            self.current_search_mark = None
+        self.update_extra_selections()
+
+    def find(self, pat, wrap=False):
+        reverse = pat.flags & regex.REVERSE
+        c = self.textCursor()
+        c.clearSelection()
+        pos = c.Start if reverse else c.End
+        if wrap:
+            pos = c.End if reverse else c.Start
+        c.movePosition(pos, c.KeepAnchor)
+        raw = unicode(c.selectedText()).replace(PARAGRAPH_SEPARATOR, '\n')
+        m = pat.search(raw)
+        if m is None:
+            return False
+        start, end = m.span()
+        if start == end:
+            return False
+        if wrap:
+            if reverse:
+                textpos = c.anchor()
+                start, end = textpos + end, textpos + start
+        else:
+            if reverse:
+                # Put the cursor at the start of the match
+                start, end = end, start
+            else:
+                textpos = c.anchor()
+                start, end = textpos + start, textpos + end
+        c.clearSelection()
+        c.setPosition(start)
+        c.setPosition(end, c.KeepAnchor)
+        self.setTextCursor(c)
+        return True
+
+    def replace(self, pat, template):
+        c = self.textCursor()
+        raw = unicode(c.selectedText()).replace(PARAGRAPH_SEPARATOR, '\n')
+        m = pat.fullmatch(raw)
+        if m is None:
+            return False
+        text = m.expand(template)
+        c.insertText(text)
+        return True
+
    # Line numbers and cursor line {{{
    def highlight_cursor_line(self):
        sel = QTextEdit.ExtraSelection()
@ -133,7 +203,8 @@ class TextEdit(QPlainTextEdit):
        sel.format.setProperty(QTextFormat.FullWidthSelection, True)
        sel.cursor = self.textCursor()
        sel.cursor.clearSelection()
-        self.setExtraSelections([sel])
+        self.current_cursor_line = sel
+        self.update_extra_selections()
        # Update the cursor line's line number in the line number area
        try:
            self.line_number_area.update(0, self.last_current_lnum[0], self.line_number_area.width(), self.last_current_lnum[1])
@ -211,7 +282,7 @@ class TextEdit(QPlainTextEdit):
            return
        pos = cursor.positionInBlock()
        for r in cursor.block().layout().additionalFormats():
-            if r.start <= pos < r.start + r.length:
+            if r.start <= pos < r.start + r.length and r.format.property(SYNTAX_PROPERTY).toBool():
                return r.format

    def show_tooltip(self, ev):
--- a/src/calibre/gui2/tweak_book/editor/themes.py
+++ b/src/calibre/gui2/tweak_book/editor/themes.py
@ -8,7 +8,9 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'

 from collections import namedtuple

-from PyQt4.Qt import (QColor, QTextCharFormat, QBrush, QFont, QApplication, QPalette)
+from PyQt4.Qt import (QColor, QBrush, QFont, QApplication, QPalette)
+
+from calibre.gui2.tweak_book.editor import SyntaxTextCharFormat

 underline_styles = {'single', 'dash', 'dot', 'dash_dot', 'dash_dot_dot', 'wave', 'spell'}

@ -32,6 +34,7 @@ SOLARIZED = \
    CursorLine   bg={base02}
    CursorColumn bg={base02}
    ColorColumn  bg={base02}
+    HighlightRegion bg={base00}
    MatchParen   fg={red} bg={base01} bold
    Pmenu        fg={base0} bg={base02}
    PmenuSel     fg={base01} bg={base2}
@ -66,6 +69,7 @@ THEMES = {
    CursorLine   bg={cursor_loc}
    CursorColumn bg={cursor_loc}
    ColorColumn  bg={cursor_loc}
+    HighlightRegion bg=323232
    MatchParen   fg=f6f3e8 bg=857b6f bold
    Pmenu        fg=f6f3e8 bg=444444
    PmenuSel     fg=yellow bg={identifier}
@ -104,6 +108,7 @@ THEMES = {
    CursorLine   bg={cursor_loc}
    CursorColumn bg={cursor_loc}
    ColorColumn  bg={cursor_loc}
+    HighlightRegion bg=E3F988
    MatchParen   fg=white bg=80a090 bold
    Pmenu        fg=white bg=808080
    PmenuSel     fg=white bg=808080
@ -130,7 +135,7 @@ THEMES = {
    Error        us=wave uc=red

    '''.format(
-        cursor_loc='white',
+        cursor_loc='F8DE7E',
        identifier='7b5694',
        comment='a0b0c0',
        string='4070a0',
@ -198,10 +203,10 @@ def u(x):
    if 'Dot' in x:
        return x + 'Line'
    return x + 'Underline'
-underline_styles = {x:getattr(QTextCharFormat, u(x)) for x in underline_styles}
+underline_styles = {x:getattr(SyntaxTextCharFormat, u(x)) for x in underline_styles}

 def highlight_to_char_format(h):
-    ans = QTextCharFormat()
+    ans = SyntaxTextCharFormat()
    if h.bold:
        ans.setFontWeight(QFont.Bold)
    if h.italic:
--- a/src/calibre/gui2/tweak_book/editor/widget.py
+++ b/src/calibre/gui2/tweak_book/editor/widget.py
@ -58,12 +58,28 @@ class Editor(QMainWindow):
        if current != raw:
            self.editor.replace_text(raw)

+    def set_focus(self):
+        self.editor.setFocus(Qt.OtherFocusReason)
+
    def undo(self):
        self.editor.undo()

    def redo(self):
        self.editor.redo()

+    def mark_selected_text(self):
+        self.editor.mark_selected_text()
+
+    def find(self, *args, **kwargs):
+        return self.editor.find(*args, **kwargs)
+
+    def replace(self, *args, **kwargs):
+        return self.editor.replace(*args, **kwargs)
+
+    @property
+    def has_marked_text(self):
+        return self.editor.current_search_mark is not None
+
    @dynamic_property
    def is_modified(self):
        def fget(self):
--- a/src/calibre/gui2/tweak_book/file_list.py
+++ b/src/calibre/gui2/tweak_book/file_list.py
@ -338,6 +338,26 @@ class FileList(QTreeWidget):
        syntax = {'text':'html', 'styles':'css'}.get(category, None)
        self.edit_file.emit(name, syntax, mime)

+    @property
+    def all_files(self):
+        return (category.child(i) for category in self.categories.itervalues() for i in xrange(category.childCount()))
+
+    @property
+    def searchable_names(self):
+        ans = {'text':[], 'styles':[], 'selected':[]}
+        for item in self.all_files:
+            category = unicode(item.data(0, CATEGORY_ROLE).toString())
+            mime = unicode(item.data(0, MIME_ROLE).toString())
+            name = unicode(item.data(0, NAME_ROLE).toString())
+            ok = category in {'text', 'styles'}
+            if ok:
+                ans[category].append(name)
+            if not ok and category == 'misc':
+                ok = mime in {guess_type('a.'+x) for x in ('opf', 'ncx', 'txt', 'xml')}
+            if ok and item.isSelected():
+                ans['selected'].append(name)
+        return ans
+
 class FileListWidget(QWidget):

    delete_requested = pyqtSignal(object, object)
@ -359,4 +379,7 @@ class FileListWidget(QWidget):
    def build(self, container, preserve_state=True):
        self.file_list.build(container, preserve_state=preserve_state)

+    @property
+    def searchable_names(self):
+        return self.file_list.searchable_names

--- a/src/calibre/gui2/tweak_book/main.py
+++ b/src/calibre/gui2/tweak_book/main.py
@ -11,17 +11,20 @@ import sys, os
 from PyQt4.Qt import QIcon

 from calibre.constants import islinux
-from calibre.gui2 import Application, ORG_NAME, APP_UID
+from calibre.gui2 import Application, ORG_NAME, APP_UID, setup_gui_option_parser, detach_gui
 from calibre.ptempfile import reset_base_dir
 from calibre.utils.config import OptionParser
 from calibre.gui2.tweak_book.ui import Main

 def option_parser():
-    return OptionParser('''\
+    parser =  OptionParser('''\
 %prog [opts] [path_to_ebook]

 Launch the calibre tweak book tool.
 ''')
+    setup_gui_option_parser(parser)
+    return parser
+

 def main(args=sys.argv):
    # Ensure we can continue to function if GUI is closed
@ -30,6 +33,8 @@ def main(args=sys.argv):

    parser = option_parser()
    opts, args = parser.parse_args(args)
+    if getattr(opts, 'detach', False):
+        detach_gui()
    override = 'calibre-tweak-book' if islinux else None
    app = Application(args, override_program_name=override)
    app.load_builtin_fonts()
--- a/src/calibre/gui2/tweak_book/search.py
+++ b/src/calibre/gui2/tweak_book/search.py
@ -0,0 +1,287 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+
+from PyQt4.Qt import (
+    QWidget, QToolBar, Qt, QHBoxLayout, QSize, QIcon, QGridLayout, QLabel,
+    QPushButton, pyqtSignal, QComboBox, QCheckBox, QSizePolicy)
+
+import regex
+
+from calibre.gui2.widgets2 import HistoryLineEdit2
+from calibre.gui2.tweak_book import tprefs
+
+REGEX_FLAGS = regex.VERSION1 | regex.WORD | regex.FULLCASE | regex.MULTILINE | regex.UNICODE
+
+# The search panel {{{
+
+class PushButton(QPushButton):
+
+    def __init__(self, text, action, parent):
+        QPushButton.__init__(self, text, parent)
+        self.clicked.connect(lambda : parent.search_triggered.emit(action))
+
+class SearchWidget(QWidget):
+
+    DEFAULT_STATE = {
+        'mode': 'normal',
+        'where': 'current',
+        'case_sensitive': False,
+        'direction': 'down',
+        'wrap': True,
+        'dot_all': False,
+    }
+
+    search_triggered = pyqtSignal(object)
+
+    def __init__(self, parent=None):
+        QWidget.__init__(self, parent)
+        self.l = l = QGridLayout(self)
+        l.setContentsMargins(0, 0, 0, 0)
+        self.setLayout(l)
+
+        self.fl = fl = QLabel(_('&Find:'))
+        fl.setAlignment(Qt.AlignRight | Qt.AlignCenter)
+        self.find_text = ft = HistoryLineEdit2(self)
+        ft.initialize('tweak_book_find_edit')
+        ft.returnPressed.connect(lambda : self.search_triggered.emit('find'))
+        fl.setBuddy(ft)
+        l.addWidget(fl, 0, 0)
+        l.addWidget(ft, 0, 1)
+
+        self.rl = rl = QLabel(_('&Replace:'))
+        rl.setAlignment(Qt.AlignRight | Qt.AlignCenter)
+        self.replace_text = rt = HistoryLineEdit2(self)
+        rt.initialize('tweak_book_replace_edit')
+        rl.setBuddy(rt)
+        l.addWidget(rl, 1, 0)
+        l.addWidget(rt, 1, 1)
+        l.setColumnStretch(1, 10)
+
+        self.fb = fb = PushButton(_('&Find'), 'find', self)
+        self.rfb = rfb = PushButton(_('Replace a&nd Find'), 'replace-find', self)
+        self.rb = rb = PushButton(_('&Replace'), 'replace', self)
+        self.rab = rab = PushButton(_('Replace &all'), 'replace-all', self)
+        l.addWidget(fb, 0, 2)
+        l.addWidget(rfb, 0, 3)
+        l.addWidget(rb, 1, 2)
+        l.addWidget(rab, 1, 3)
+
+        self.ml = ml = QLabel(_('&Mode:'))
+        self.ol = ol = QHBoxLayout()
+        ml.setAlignment(Qt.AlignRight | Qt.AlignCenter)
+        l.addWidget(ml, 2, 0)
+        l.addLayout(ol, 2, 1, 1, 3)
+        self.mode_box = mb = QComboBox(self)
+        mb.setSizePolicy(QSizePolicy.Maximum, QSizePolicy.Maximum)
+        mb.addItems([_('Normal'), _('Regex')])
+        mb.setToolTip('<style>dd {margin-bottom: 1.5ex}</style>' + _(
+            '''Select how the search expression is interpreted
+            <dl>
+            <dt><b>Normal</b></dt>
+            <dd>The search expression is treated as normal text, calibre will look for the exact text.</dd>
+            <dt><b>Regex</b></dt>
+            <dd>The search expression is interpreted as a regular expression. See the User Manual for more help on using regular expressions.</dd>
+            </dl>'''))
+        ml.setBuddy(mb)
+        ol.addWidget(mb)
+
+        self.where_box = wb = QComboBox(self)
+        wb.setSizePolicy(QSizePolicy.Maximum, QSizePolicy.Maximum)
+        wb.addItems([_('Current file'), _('All text files'), _('All style files'), _('Selected files'), _('Selected text')])
+        wb.setToolTip('<style>dd {margin-bottom: 1.5ex}</style>' + _(
+            '''
+            Where to search/replace:
+            <dl>
+            <dt><b>Current file</b></dt>
+            <dd>Search only inside the currently opened file</dd>
+            <dt><b>All text files</b></dt>
+            <dd>Search in all text (HTML) files</dd>
+            <dt><b>All style files</b></dt>
+            <dd>Search in all style (CSS) files</dd>
+            <dt><b>Selected files</b></dt>
+            <dd>Search in the files currently selected in the Files Browser</dd>
+            <dt><b>Selected text</b></dt>
+            <dd>Search only within the selected text in the currently opened file</dd>
+            </dl>'''))
+        ol.addWidget(wb)
+
+        self.direction_box = db = QComboBox(self)
+        db.setSizePolicy(QSizePolicy.Maximum, QSizePolicy.Maximum)
+        db.addItems([_('Down'), _('Up')])
+        db.setToolTip('<style>dd {margin-bottom: 1.5ex}</style>' + _(
+            '''
+            Direction to search:
+            <dl>
+            <dt><b>Down</b></dt>
+            <dd>Search for the next match from your current position</dd>
+            <dt><b>Up</b></dt>
+            <dd>Search for the previous match from your current position</dd>
+            </dl>'''))
+        ol.addWidget(db)
+
+        self.cs = cs = QCheckBox(_('&Case sensitive'))
+        cs.setSizePolicy(QSizePolicy.Maximum, QSizePolicy.Maximum)
+        ol.addWidget(cs)
+
+        self.wr = wr = QCheckBox(_('&Wrap'))
+        wr.setToolTip('<p>'+_('When searching reaches the end, wrap around to the beginning and continue the search'))
+        wr.setSizePolicy(QSizePolicy.Maximum, QSizePolicy.Maximum)
+        ol.addWidget(wr)
+
+        self.da = da = QCheckBox(_('&Dot all'))
+        da.setToolTip('<p>'+_("Make the '.' special character match any character at all, including a newline"))
+        da.setSizePolicy(QSizePolicy.Maximum, QSizePolicy.Maximum)
+        ol.addWidget(da)
+
+        self.mode_box.currentIndexChanged[int].connect(self.da.setVisible)
+
+        ol.addStretch(10)
+
+    @dynamic_property
+    def mode(self):
+        def fget(self):
+            return 'normal' if self.mode_box.currentIndex() == 0 else 'regex'
+        def fset(self, val):
+            self.mode_box.setCurrentIndex({'regex':1}.get(val, 0))
+            self.da.setVisible(self.mode == 'regex')
+        return property(fget=fget, fset=fset)
+
+    @dynamic_property
+    def find(self):
+        def fget(self):
+            return unicode(self.find_text.text())
+        def fset(self, val):
+            self.find_text.setText(val)
+        return property(fget=fget, fset=fset)
+
+    @dynamic_property
+    def replace(self):
+        def fget(self):
+            return unicode(self.replace_text.text())
+        def fset(self, val):
+            self.replace_text.setText(val)
+        return property(fget=fget, fset=fset)
+
+    @dynamic_property
+    def where(self):
+        wm = {0:'current', 1:'text', 2:'styles', 3:'selected', 4:'selected-text'}
+        def fget(self):
+            return wm[self.where_box.currentIndex()]
+        def fset(self, val):
+            self.where_box.setCurrentIndex({v:k for k, v in wm.iteritems()}[val])
+        return property(fget=fget, fset=fset)
+
+    @dynamic_property
+    def case_sensitive(self):
+        def fget(self):
+            return self.cs.isChecked()
+        def fset(self, val):
+            self.cs.setChecked(bool(val))
+        return property(fget=fget, fset=fset)
+
+    @dynamic_property
+    def direction(self):
+        def fget(self):
+            return 'down' if self.direction_box.currentIndex() == 0 else 'up'
+        def fset(self, val):
+            self.direction_box.setCurrentIndex(1 if val == 'up' else 0)
+        return property(fget=fget, fset=fset)
+
+    @dynamic_property
+    def wrap(self):
+        def fget(self):
+            return self.wr.isChecked()
+        def fset(self, val):
+            self.wr.setChecked(bool(val))
+        return property(fget=fget, fset=fset)
+
+    @dynamic_property
+    def dot_all(self):
+        def fget(self):
+            return self.da.isChecked()
+        def fset(self, val):
+            self.da.setChecked(bool(val))
+        return property(fget=fget, fset=fset)
+
+    @dynamic_property
+    def state(self):
+        def fget(self):
+            return {x:getattr(self, x) for x in self.DEFAULT_STATE}
+        def fset(self, val):
+            for x in self.DEFAULT_STATE:
+                if x in val:
+                    setattr(self, x, val[x])
+        return property(fget=fget, fset=fset)
+
+    def restore_state(self):
+        self.state = tprefs.get('find-widget-state', self.DEFAULT_STATE)
+        if self.where == 'selected-text':
+            self.where = self.DEFAULT_STATE['where']
+
+    def save_state(self):
+        tprefs.set('find-widget-state', self.state)
+
+# }}}
+
+regex_cache = {}
+
+class SearchPanel(QWidget):
+
+    search_triggered = pyqtSignal(object)
+
+    def __init__(self, parent=None):
+        QWidget.__init__(self, parent)
+        self.l = l = QHBoxLayout()
+        self.setLayout(l)
+        l.setContentsMargins(0, 0, 0, 0)
+        self.t = t = QToolBar(self)
+        l.addWidget(t)
+        t.setOrientation(Qt.Vertical)
+        t.setIconSize(QSize(12, 12))
+        t.setMovable(False)
+        t.setFloatable(False)
+        t.cl = ac = t.addAction(QIcon(I('window-close.png')), _('Close search panel'))
+        ac.triggered.connect(self.hide_panel)
+        self.widget = SearchWidget(self)
+        l.addWidget(self.widget)
+        self.restore_state, self.save_state = self.widget.restore_state, self.widget.save_state
+        self.widget.search_triggered.connect(self.search_triggered)
+
+    def hide_panel(self):
+        self.setVisible(False)
+
+    def show_panel(self):
+        self.setVisible(True)
+        self.widget.find_text.setFocus(Qt.OtherFocusReason)
+
+    @property
+    def state(self):
+        ans = self.widget.state
+        ans['find'] = self.widget.find
+        ans['replace'] = self.widget.replace
+        return ans
+
+    def set_where(self, val):
+        self.widget.where = val
+
+    def get_regex(self, state):
+        raw = state['find']
+        if state['mode'] != 'regex':
+            raw = regex.escape(raw, special_only=True)
+        flags = REGEX_FLAGS
+        if not state['case_sensitive']:
+            flags |= regex.IGNORECASE
+        if state['mode'] == 'regex' and state['dot_all']:
+            flags |= regex.DOTALL
+        if state['direction'] == 'up':
+            flags |= regex.REVERSE
+        ans = regex_cache.get((flags, raw), None)
+        if ans is None:
+            ans = regex_cache[(flags, raw)] = regex.compile(raw, flags=flags)
+        return ans
+
--- a/src/calibre/gui2/tweak_book/ui.py
+++ b/src/calibre/gui2/tweak_book/ui.py
@ -20,6 +20,7 @@ from calibre.gui2.tweak_book.job import BlockingJob
 from calibre.gui2.tweak_book.boss import Boss
 from calibre.gui2.tweak_book.keyboard import KeyboardManager
 from calibre.gui2.tweak_book.preview import Preview
+from calibre.gui2.tweak_book.search import SearchPanel

 class Central(QStackedWidget):

@ -56,6 +57,9 @@ class Central(QStackedWidget):
            self.modified_icon = QIcon(I('modified.png'))
        self.editor_tabs.currentChanged.connect(self.current_editor_changed)
        self.editor_tabs.tabCloseRequested.connect(self._close_requested)
+        self.search_panel = SearchPanel(self)
+        l.addWidget(self.search_panel)
+        self.restore_state()

    def _close_requested(self, index):
        editor = self.editor_tabs.widget(index)
@ -91,6 +95,17 @@ class Central(QStackedWidget):
    def current_editor(self):
        return self.editor_tabs.currentWidget()

+    def save_state(self):
+        tprefs.set('search-panel-visible', self.search_panel.isVisible())
+        self.search_panel.save_state()
+
+    def restore_state(self):
+        self.search_panel.setVisible(tprefs.get('search-panel-visible', False))
+        self.search_panel.restore_state()
+
+    def show_find(self):
+        self.search_panel.show_panel()
+
 class Main(MainWindow):

    APP_NAME = _('Tweak Book')
@ -108,6 +123,9 @@ class Main(MainWindow):
        self.blocking_job = BlockingJob(self)
        self.keyboard = KeyboardManager()

+        self.central = Central(self)
+        self.setCentralWidget(self.central)
+
        self.create_actions()
        self.create_toolbars()
        self.create_docks()
@ -120,9 +138,6 @@ class Main(MainWindow):
        f.setBold(True)
        self.status_bar.setFont(f)

-        self.central = Central(self)
-        self.setCentralWidget(self.central)
-
        self.boss(self)
        g = QApplication.instance().desktop().availableGeometry(self)
        self.resize(g.width()-50, g.height()-50)
@ -139,7 +154,7 @@ class Main(MainWindow):
        group = _('Global Actions')

        def reg(icon, text, target, sid, keys, description):
-            ac = actions[sid] = QAction(QIcon(I(icon)), text, self)
+            ac = actions[sid] = QAction(QIcon(I(icon)), text, self) if icon else QAction(text, self)
            ac.setObjectName('action-' + sid)
            if target is not None:
                ac.triggered.connect(target)
@ -197,7 +212,28 @@ class Main(MainWindow):
        # Preview actions
        group = _('Preview')
        self.action_auto_reload_preview = reg('auto-reload.png', _('Auto reload preview'), None, 'auto-reload-preview', (), _('Auto reload preview'))
-        self.action_reload_preview = reg('view-refresh.png', _('Refresh preview'), None, 'reload-preview', ('F5', 'Ctrl+R'), _('Refresh preview'))
+        self.action_reload_preview = reg('view-refresh.png', _('Refresh preview'), None, 'reload-preview', ('F5',), _('Refresh preview'))
+
+        # Search actions
+        group = _('Search')
+        self.action_find = reg('search.png', _('&Find/Replace'), self.central.show_find, 'find-replace', ('Ctrl+F',), _('Show the Find/Replace panel'))
+        def sreg(name, text, action, overrides={}, keys=(), description=None, icon=None):
+            return reg(icon, text, partial(self.boss.search, action, overrides), name, keys, description or text.replace('&', ''))
+        self.action_find_next = sreg('find-next', _('Find &Next'),
+                                     'find', {'direction':'down'}, ('F3', 'Ctrl+G'), _('Find next match'))
+        self.action_find_previous = sreg('find-previous', _('Find &Previous'),
+                                         'find', {'direction':'up'}, ('Shift+F3', 'Shift+Ctrl+G'), _('Find previous match'))
+        self.action_replace = sreg('replace', _('Replace'),
+                                   'replace', keys=('Ctrl+R'), description=_('Replace current match'))
+        self.action_replace_next = sreg('replace-next', _('&Replace and find next'),
+                                        'replace-find', {'direction':'down'}, ('Ctrl+]'), _('Replace current match and find next'))
+        self.action_replace_previous = sreg('replace-previous', _('R&eplace and find previous'),
+                                        'replace-find', {'direction':'up'}, ('Ctrl+['), _('Replace current match and find previous'))
+        self.action_replace_all = sreg('replace-all', _('Replace &all'),
+                                   'replace-all', keys=('Ctrl+A'), description=_('Replace all matches'))
+        self.action_count = sreg('count-matches', _('&Count all'),
+                                   'count', keys=('Ctrl+N'), description=_('Count number of matches'))
+        self.action_mark = reg(None, _('&Mark selected text'), self.boss.mark_selected_text, 'mark-selected-text', ('Ctrl+Shift+M',), _('Mark selected text'))

    def create_menubar(self):
        b = self.menuBar()
@ -233,6 +269,22 @@ class Main(MainWindow):
            elif name.endswith('-bar'):
                t.addAction(ac)

+        e = b.addMenu(_('&Search'))
+        a = e.addAction
+        a(self.action_find)
+        e.addSeparator()
+        a(self.action_find_next)
+        a(self.action_find_previous)
+        e.addSeparator()
+        a(self.action_replace)
+        a(self.action_replace_next)
+        a(self.action_replace_previous)
+        a(self.action_replace_all)
+        e.addSeparator()
+        a(self.action_count)
+        e.addSeparator()
+        a(self.action_mark)
+
    def create_toolbars(self):
        def create(text, name):
            name += '-bar'
@ -303,6 +355,7 @@ class Main(MainWindow):
    def save_state(self):
        tprefs.set('main_window_geometry', bytearray(self.saveGeometry()))
        tprefs.set('main_window_state', bytearray(self.saveState(self.STATE_VERSION)))
+        self.central.save_state()

    def restore_state(self):
        geom = tprefs.get('main_window_geometry', None)
@ -311,6 +364,7 @@ class Main(MainWindow):
        state = tprefs.get('main_window_state', None)
        if state is not None:
            self.restoreState(state, self.STATE_VERSION)
+        self.central.restore_state()
        # We never want to start with the inspector showing
        self.inspector_dock.close()

--- a/src/calibre/gui2/viewer/main.py
+++ b/src/calibre/gui2/viewer/main.py
@ -18,7 +18,7 @@ from calibre.gui2.viewer.toc import TOC
 from calibre.gui2.widgets import ProgressIndicator
 from calibre.gui2.main_window import MainWindow
 from calibre.gui2 import (Application, ORG_NAME, APP_UID, choose_files,
-    info_dialog, error_dialog, open_url, available_height)
+    info_dialog, error_dialog, open_url, available_height, setup_gui_option_parser, detach_gui)
 from calibre.ebooks.oeb.iterator.book import EbookIterator
 from calibre.ebooks import DRMError
 from calibre.constants import islinux, filesystem_encoding
@ -1183,11 +1183,13 @@ def config(defaults=None):

 def option_parser():
    c = config()
-    return c.option_parser(usage=_('''\
+    parser = c.option_parser(usage=_('''\
 %prog [options] file

 View an ebook.
 '''))
+    setup_gui_option_parser(parser)
+    return parser


 def main(args=sys.argv):
@ -1197,6 +1199,8 @@ def main(args=sys.argv):

    parser = option_parser()
    opts, args = parser.parse_args(args)
+    if getattr(opts, 'detach', False):
+        detach_gui()
    try:
        open_at = float(opts.open_at)
    except:
--- a/src/calibre/test_build.py
+++ b/src/calibre/test_build.py
@ -15,6 +15,12 @@ Test a binary calibre build to ensure that all needed binary images/libraries ha
 import cStringIO
 from calibre.constants import plugins, iswindows

+def test_regex():
+    import regex
+    if regex.findall(r'(?i)(a)(b)', 'ab cd AB 1a1b') != [('a', 'b'), ('A', 'B')]:
+        raise ValueError('regex module failed on a simple search')
+    print ('regex OK!')
+
 def test_html5lib():
    import html5lib.html5parser  # noqa
    from html5lib import parse  # noqa
@ -119,6 +125,7 @@ def test():
    test_woff()
    test_qt()
    test_html5lib()
+    test_regex()
    if iswindows:
        test_winutil()
        test_wpd()
--- a/src/calibre/utils/date.py
+++ b/src/calibre/utils/date.py
@ -69,6 +69,7 @@ local_tz = _local_tz = SafeLocalTimeZone()

 UNDEFINED_DATE = datetime(101,1,1, tzinfo=utc_tz)
 DEFAULT_DATE = datetime(2000,1,1, tzinfo=utc_tz)
+EPOCH = datetime(1970, 1, 1, tzinfo=_utc_tz)

 def is_date_undefined(qt_or_dt):
    d = qt_or_dt
@ -210,15 +211,23 @@ def now():
 def utcnow():
    return datetime.utcnow().replace(tzinfo=_utc_tz)

+
 def utcfromtimestamp(stamp):
    try:
        return datetime.utcfromtimestamp(stamp).replace(tzinfo=_utc_tz)
    except ValueError:
-        # Raised if stamp if out of range for the platforms gmtime function
-        # We print the error for debugging, but otherwise ignore it
-        import traceback
-        traceback.print_exc()
-        return utcnow()
+        # Raised if stamp is out of range for the platforms gmtime function
+        # For example, this happens with negative values on windows
+        try:
+            return EPOCH + timedelta(seconds=stamp)
+        except (ValueError, OverflowError):
+            # datetime can only represent years between 1 and 9999
+            import traceback
+            traceback.print_exc()
+    return utcnow()
+
+def timestampfromdt(dt, assume_utc=True):
+    return (as_utc(dt, assume_utc=assume_utc) - EPOCH).total_seconds()

 # Format date functions

--- a/src/calibre/utils/file_associations.py
+++ b/src/calibre/utils/file_associations.py
@ -0,0 +1,15 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+
+def file_assoc_windows(ft):
+    # See the IQueryAssociations::GetString method documentation on MSDN
+    from win32com.shell import shell, shellcon
+    a = shell.AssocCreate()
+    a.Init(0, '.' + ft.lower())
+    return a.GetString(0, shellcon.ASSOCSTR_EXECUTABLE)
+
--- a/src/calibre/utils/formatter_functions.py
+++ b/src/calibre/utils/formatter_functions.py
@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# coding: utf-8
+# vim:fileencoding=utf-8

 '''
 Created on 13 Jan 2011
@ -135,6 +135,7 @@ class FormatterFunction(object):
            return unicode(ret)

 class BuiltinFormatterFunction(FormatterFunction):
+
    def __init__(self):
        formatter_functions().register_builtin(self)
        eval_func = inspect.getmembers(self.__class__,
@ -355,7 +356,7 @@ class BuiltinLookup(BuiltinFormatterFunction):
            'variable save paths')

    def evaluate(self, formatter, kwargs, mi, locals, val, *args):
-        if len(args) == 2: # here for backwards compatibility
+        if len(args) == 2:  # here for backwards compatibility
            if val:
                return formatter.vformat('{'+args[0].strip()+'}', [], kwargs)
            else:
@ -744,11 +745,11 @@ class BuiltinFormatNumber(BuiltinFormatterFunction):
            v1 = float(val)
        except:
            return ''
-        try: # Try formatting the value as a float
+        try:  # Try formatting the value as a float
            return template.format(v1)
        except:
            pass
-        try: # Try formatting the value as an int
+        try:  # Try formatting the value as an int
            v2 = trunc(v1)
            if v2 == v1:
                return template.format(v2)
@ -1292,11 +1293,11 @@ class BuiltinTransliterate(BuiltinFormatterFunction):
    name = 'transliterate'
    arg_count = 1
    category = 'String manipulation'
-    __doc__ = doc = _('transliterate(a) -- Returns a string in a latin alphabet '
-                      'formed by approximating the sound of the words in the '
-                      'source string. For example, if the source is "Фёдор '
-                      'Миха́йлович Достоевский" the function returns "Fiodor '
-                      'Mikhailovich Dostoievskii".')
+    __doc__ = doc = _(u'transliterate(a) -- Returns a string in a latin alphabet '
+                      u'formed by approximating the sound of the words in the '
+                      u'source string. For example, if the source is "Фёдор '
+                      u'Миха́йлович Достоевский" the function returns "Fiodor '
+                      u'Mikhailovich Dostoievskii".')

    def evaluate(self, formatter, kwargs, mi, locals, source):
        from calibre.utils.filenames import ascii_text
@ -1329,6 +1330,7 @@ _formatter_builtins = [
 ]

 class FormatterUserFunction(FormatterFunction):
+
    def __init__(self, name, doc, arg_count, program_text):
        self.name = name
        self.doc = doc
@ -1338,9 +1340,9 @@ class FormatterUserFunction(FormatterFunction):
 tabs = re.compile(r'^\t*')
 def compile_user_function(name, doc, arg_count, eval_func):
    def replace_func(mo):
-        return  mo.group().replace('\t', '    ')
+        return mo.group().replace('\t', '    ')

-    func = '    ' + '\n    '.join([tabs.sub(replace_func, line )
+    func = '    ' + '\n    '.join([tabs.sub(replace_func, line)
                                   for line in eval_func.splitlines()])
    prog = '''
 from calibre.utils.formatter_functions import FormatterUserFunction
--- a/src/calibre/web/feeds/init.py
+++ b/src/calibre/web/feeds/init.py
@ -78,7 +78,6 @@ class Article(object):
            self._title = clean_ascii_chars(val)
        return property(fget=fget, fset=fset)

-
    def __repr__(self):
        return \
 (u'''\
@ -97,7 +96,7 @@ Has content : %s
        return repr(self)

    def is_same_as(self, other_article):
-        #if self.title != getattr(other_article, 'title', False):
+        # if self.title != getattr(other_article, 'title', False):
        #    return False
        if self.url:
            return self.url == getattr(other_article, 'url', False)
@ -137,7 +136,6 @@ class Feed(object):
                break
            self.parse_article(item)

-
    def populate_from_preparsed_feed(self, title, articles, oldest_article=7,
                           max_articles_per_feed=100):
        self.title      = unicode(title if title else _('Unknown feed'))
@ -176,7 +174,6 @@ class Feed(object):
            d = item.get('date', '')
            article.formatted_date = d

-
    def parse_article(self, item):
        self.id_counter += 1
        id = item.get('id', None)
@ -219,7 +216,8 @@ class Feed(object):
            self.articles.append(article)
        else:
            try:
-                self.logger.debug('Skipping article %s (%s) from feed %s as it is too old.'%(title, article.localtime.strftime('%a, %d %b, %Y %H:%M'), self.title))
+                self.logger.debug('Skipping article %s (%s) from feed %s as it is too old.'%
+                                  (title, article.localtime.strftime('%a, %d %b, %Y %H:%M'), self.title))
            except UnicodeDecodeError:
                if not isinstance(title, unicode):
                    title = title.decode('utf-8', 'replace')
@ -310,7 +308,7 @@ class FeedCollection(list):
        self.duplicates = duplicates
        print len(duplicates)
        print map(len, self)
-        #raise
+        # raise

    def find_article(self, article):
        for j, f in enumerate(self):
--- a/src/regex/README
+++ b/src/regex/README
@ -0,0 +1,5 @@
+This regex engine is taken, with thanks, from: https://code.google.com/p/mrab-regex-hg/
+
+It is licensed under the Python Software Foundation License
+
+Author: Matthew Barnett
--- a/src/regex/init.py
+++ b/src/regex/init.py
@ -0,0 +1,678 @@
+#
+# Secret Labs' Regular Expression Engine
+#
+# Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
+#
+# This version of the SRE library can be redistributed under CNRI's
+# Python 1.6 license.  For any other use, please contact Secret Labs
+# AB (info@pythonware.com).
+#
+# Portions of this engine have been developed in cooperation with
+# CNRI.  Hewlett-Packard provided funding for 1.6 integration and
+# other compatibility work.
+#
+# 2010-01-16 mrab Python front-end re-written and extended
+
+r"""Support for regular expressions (RE).
+
+This module provides regular expression matching operations similar to those
+found in Perl. It supports both 8-bit and Unicode strings; both the pattern and
+the strings being processed can contain null bytes and characters outside the
+US ASCII range.
+
+Regular expressions can contain both special and ordinary characters. Most
+ordinary characters, like "A", "a", or "0", are the simplest regular
+expressions; they simply match themselves. You can concatenate ordinary
+characters, so last matches the string 'last'.
+
+There are a few differences between the old (legacy) behaviour and the new
+(enhanced) behaviour, which are indicated by VERSION0 or VERSION1.
+
+The special characters are:
+    "."                 Matches any character except a newline.
+    "^"                 Matches the start of the string.
+    "$"                 Matches the end of the string or just before the
+                        newline at the end of the string.
+    "*"                 Matches 0 or more (greedy) repetitions of the preceding
+                        RE. Greedy means that it will match as many repetitions
+                        as possible.
+    "+"                 Matches 1 or more (greedy) repetitions of the preceding
+                        RE.
+    "?"                 Matches 0 or 1 (greedy) of the preceding RE.
+    *?,+?,??            Non-greedy versions of the previous three special
+                        characters.
+    *+,++,?+            Possessive versions of the previous three special
+                        characters.
+    {m,n}               Matches from m to n repetitions of the preceding RE.
+    {m,n}?              Non-greedy version of the above.
+    {m,n}+              Possessive version of the above.
+    {...}               Fuzzy matching constraints.
+    "\\"                Either escapes special characters or signals a special
+                        sequence.
+    [...]               Indicates a set of characters. A "^" as the first
+                        character indicates a complementing set.
+    "|"                 A|B, creates an RE that will match either A or B.
+    (...)               Matches the RE inside the parentheses. The contents are
+                        captured and can be retrieved or matched later in the
+                        string.
+    (?flags-flags)      VERSION1: Sets/clears the flags for the remainder of
+                        the group or pattern; VERSION0: Sets the flags for the
+                        entire pattern.
+    (?:...)             Non-capturing version of regular parentheses.
+    (?>...)             Atomic non-capturing version of regular parentheses.
+    (?flags-flags:...)  Non-capturing version of regular parentheses with local
+                        flags.
+    (?P<name>...)       The substring matched by the group is accessible by
+                        name.
+    (?<name>...)        The substring matched by the group is accessible by
+                        name.
+    (?P=name)           Matches the text matched earlier by the group named
+                        name.
+    (?#...)             A comment; ignored.
+    (?=...)             Matches if ... matches next, but doesn't consume the
+                        string.
+    (?!...)             Matches if ... doesn't match next.
+    (?<=...)            Matches if preceded by ....
+    (?<!...)            Matches if not preceded by ....
+    (?(id)yes|no)       Matches yes pattern if group id matched, the (optional)
+                        no pattern otherwise.
+    (?|...|...)         (?|A|B), creates an RE that will match either A or B,
+                        but reuses capture group numbers across the
+                        alternatives.
+
+The fuzzy matching constraints are: "i" to permit insertions, "d" to permit
+deletions, "s" to permit substitutions, "e" to permit any of these. Limits are
+optional with "<=" and "<". If any type of error is provided then any type not
+provided is not permitted.
+
+A cost equation may be provided.
+
+Examples:
+    (?:fuzzy){i<=2}
+    (?:fuzzy){i<=1,s<=2,d<=1,1i+1s+1d<3}
+
+VERSION1: Set operators are supported, and a set can include nested sets. The
+set operators, in order of increasing precedence, are:
+    ||  Set union ("x||y" means "x or y").
+    ~~  (double tilde) Symmetric set difference ("x~~y" means "x or y, but not
+        both").
+    &&  Set intersection ("x&&y" means "x and y").
+    --  (double dash) Set difference ("x--y" means "x but not y").
+
+Implicit union, ie, simple juxtaposition like in [ab], has the highest
+precedence.
+
+VERSION0 and VERSION1:
+The special sequences consist of "\\" and a character from the list below. If
+the ordinary character is not on the list, then the resulting RE will match the
+second character.
+    \number         Matches the contents of the group of the same number if
+                    number is no more than 2 digits, otherwise the character
+                    with the 3-digit octal code.
+    \a              Matches the bell character.
+    \A              Matches only at the start of the string.
+    \b              Matches the empty string, but only at the start or end of a
+                    word.
+    \B              Matches the empty string, but not at the start or end of a
+                    word.
+    \d              Matches any decimal digit; equivalent to the set [0-9] when
+                    matching a bytestring or a Unicode string with the ASCII
+                    flag, or the whole range of Unicode digits when matching a
+                    Unicode string.
+    \D              Matches any non-digit character; equivalent to [^\d].
+    \f              Matches the formfeed character.
+    \g<name>        Matches the text matched by the group named name.
+    \G              Matches the empty string, but only at the position where
+                    the search started.
+    \L<name>        Named list. The list is provided as a keyword argument.
+    \m              Matches the empty string, but only at the start of a word.
+    \M              Matches the empty string, but only at the end of a word.
+    \n              Matches the newline character.
+    \N{name}        Matches the named character.
+    \p{name=value}  Matches the character if its property has the specified
+                    value.
+    \P{name=value}  Matches the character if its property hasn't the specified
+                    value.
+    \r              Matches the carriage-return character.
+    \s              Matches any whitespace character; equivalent to
+                    [ \t\n\r\f\v].
+    \S              Matches any non-whitespace character; equivalent to [^\s].
+    \t              Matches the tab character.
+    \uXXXX          Matches the Unicode codepoint with 4-digit hex code XXXX.
+    \UXXXXXXXX      Matches the Unicode codepoint with 8-digit hex code
+                    XXXXXXXX.
+    \v              Matches the vertical tab character.
+    \w              Matches any alphanumeric character; equivalent to
+                    [a-zA-Z0-9_] when matching a bytestring or a Unicode string
+                    with the ASCII flag, or the whole range of Unicode
+                    alphanumeric characters (letters plus digits plus
+                    underscore) when matching a Unicode string. With LOCALE, it
+                    will match the set [0-9_] plus characters defined as
+                    letters for the current locale.
+    \W              Matches the complement of \w; equivalent to [^\w].
+    \xXX            Matches the character with 2-digit hex code XX.
+    \X              Matches a grapheme.
+    \Z              Matches only at the end of the string.
+    \\              Matches a literal backslash.
+
+This module exports the following functions:
+    match      Match a regular expression pattern at the beginning of a string.
+    fullmatch  Match a regular expression pattern against all of a string.
+    search     Search a string for the presence of a pattern.
+    sub        Substitute occurrences of a pattern found in a string using a
+               template string.
+    subf       Substitute occurrences of a pattern found in a string using a
+               format string.
+    subn       Same as sub, but also return the number of substitutions made.
+    subfn      Same as subf, but also return the number of substitutions made.
+    split      Split a string by the occurrences of a pattern. VERSION1: will
+               split at zero-width match; VERSION0: won't split at zero-width
+               match.
+    splititer  Return an iterator yielding the parts of a split string.
+    findall    Find all occurrences of a pattern in a string.
+    finditer   Return an iterator yielding a match object for each match.
+    compile    Compile a pattern into a Pattern object.
+    purge      Clear the regular expression cache.
+    escape     Backslash all non-alphanumerics or special characters in a
+               string.
+
+Most of the functions support a concurrent parameter: if True, the GIL will be
+released during matching, allowing other Python threads to run concurrently. If
+the string changes during matching, the behaviour is undefined. This parameter
+is not needed when working on the builtin (immutable) string classes.
+
+Some of the functions in this module take flags as optional parameters. Most of
+these flags can also be set within an RE:
+    A   a   ASCII         Make \w, \W, \b, \B, \d, and \D match the
+                          corresponding ASCII character categories. Default
+                          when matching a bytestring.
+    B   b   BESTMATCH     Find the best fuzzy match (default is first).
+    D       DEBUG         Print the parsed pattern.
+    F   f   FULLCASE      Use full case-folding when performing
+                          case-insensitive matching in Unicode.
+    I   i   IGNORECASE    Perform case-insensitive matching.
+    L   L   LOCALE        Make \w, \W, \b, \B, \d, and \D dependent on the
+                          current locale. (One byte per character only.)
+    M   m   MULTILINE     "^" matches the beginning of lines (after a newline)
+                          as well as the string. "$" matches the end of lines
+                          (before a newline) as well as the end of the string.
+    E   e   ENHANCEMATCH  Attempt to improve the fit after finding the first
+                          fuzzy match.
+    R   r   REVERSE       Searches backwards.
+    S   s   DOTALL        "." matches any character at all, including the
+                          newline.
+    U   u   UNICODE       Make \w, \W, \b, \B, \d, and \D dependent on the
+                          Unicode locale. Default when matching a Unicode
+                          string.
+    V0  V0  VERSION0      Turn on the old legacy behaviour.
+    V1  V1  VERSION1      Turn on the new enhanced behaviour. This flag
+                          includes the FULLCASE flag.
+    W   w   WORD          Make \b and \B work with default Unicode word breaks
+                          and make ".", "^" and "$" work with Unicode line
+                          breaks.
+    X   x   VERBOSE       Ignore whitespace and comments for nicer looking REs.
+
+This module also defines an exception 'error'.
+
+"""
+
+# Public symbols.
+__all__ = ["compile", "escape", "findall", "finditer", "fullmatch", "match",
+  "purge", "search", "split", "splititer", "sub", "subf", "subfn", "subn",
+  "template", "Scanner", "A", "ASCII", "B", "BESTMATCH", "D", "DEBUG", "E",
+  "ENHANCEMATCH", "S", "DOTALL", "F", "FULLCASE", "I", "IGNORECASE", "L",
+  "LOCALE", "M", "MULTILINE", "R", "REVERSE", "T", "TEMPLATE", "U", "UNICODE",
+  "V0", "VERSION0", "V1", "VERSION1", "X", "VERBOSE", "W", "WORD", "error",
+  "Regex"]
+
+__version__ = "2.4.35"
+
+# --------------------------------------------------------------------
+# Public interface.
+
+def match(pattern, string, flags=0, pos=None, endpos=None, concurrent=None,
+  **kwargs):
+    """Try to apply the pattern at the start of the string, returning a match
+    object, or None if no match was found."""
+    return _compile(pattern, flags, kwargs).match(string, pos, endpos,
+      concurrent)
+
+def fullmatch(pattern, string, flags=0, pos=None, endpos=None, concurrent=None,
+  **kwargs):
+    """Try to apply the pattern against all of the string, returning a match
+    object, or None if no match was found."""
+    return _compile(pattern, flags, kwargs).fullmatch(string, pos, endpos,
+      concurrent)
+
+def search(pattern, string, flags=0, pos=None, endpos=None, concurrent=None,
+  **kwargs):
+    """Search through string looking for a match to the pattern, returning a
+    match object, or None if no match was found."""
+    return _compile(pattern, flags, kwargs).search(string, pos, endpos,
+      concurrent)
+
+def sub(pattern, repl, string, count=0, flags=0, pos=None, endpos=None,
+  concurrent=None, **kwargs):
+    """Return the string obtained by replacing the leftmost (or rightmost with a
+    reverse pattern) non-overlapping occurrences of the pattern in string by the
+    replacement repl. repl can be either a string or a callable; if a string,
+    backslash escapes in it are processed; if a callable, it's passed the match
+    object and must return a replacement string to be used."""
+    return _compile(pattern, flags, kwargs).sub(repl, string, count, pos,
+      endpos, concurrent)
+
+def subf(pattern, format, string, count=0, flags=0, pos=None, endpos=None,
+  concurrent=None, **kwargs):
+    """Return the string obtained by replacing the leftmost (or rightmost with a
+    reverse pattern) non-overlapping occurrences of the pattern in string by the
+    replacement format. format can be either a string or a callable; if a string,
+    it's treated as a format string; if a callable, it's passed the match object
+    and must return a replacement string to be used."""
+    return _compile(pattern, flags, kwargs).subf(format, string, count, pos,
+      endpos, concurrent)
+
+def subn(pattern, repl, string, count=0, flags=0, pos=None, endpos=None,
+  concurrent=None, **kwargs):
+    """Return a 2-tuple containing (new_string, number). new_string is the string
+    obtained by replacing the leftmost (or rightmost with a reverse pattern)
+    non-overlapping occurrences of the pattern in the source string by the
+    replacement repl. number is the number of substitutions that were made. repl
+    can be either a string or a callable; if a string, backslash escapes in it
+    are processed; if a callable, it's passed the match object and must return a
+    replacement string to be used."""
+    return _compile(pattern, flags, kwargs).subn(repl, string, count, pos,
+      endpos, concurrent)
+
+def subfn(pattern, format, string, count=0, flags=0, pos=None, endpos=None,
+  concurrent=None, **kwargs):
+    """Return a 2-tuple containing (new_string, number). new_string is the string
+    obtained by replacing the leftmost (or rightmost with a reverse pattern)
+    non-overlapping occurrences of the pattern in the source string by the
+    replacement format. number is the number of substitutions that were made. format
+    can be either a string or a callable; if a string, it's treated as a format
+    string; if a callable, it's passed the match object and must return a
+    replacement string to be used."""
+    return _compile(pattern, flags, kwargs).subfn(format, string, count, pos,
+      endpos, concurrent)
+
+def split(pattern, string, maxsplit=0, flags=0, concurrent=None, **kwargs):
+    """Split the source string by the occurrences of the pattern, returning a
+    list containing the resulting substrings.  If capturing parentheses are used
+    in pattern, then the text of all groups in the pattern are also returned as
+    part of the resulting list.  If maxsplit is nonzero, at most maxsplit splits
+    occur, and the remainder of the string is returned as the final element of
+    the list."""
+    return _compile(pattern, flags, kwargs).split(string, maxsplit, concurrent)
+
+def splititer(pattern, string, maxsplit=0, flags=0, concurrent=None, **kwargs):
+    "Return an iterator yielding the parts of a split string."
+    return _compile(pattern, flags, kwargs).splititer(string, maxsplit,
+      concurrent)
+
+def findall(pattern, string, flags=0, pos=None, endpos=None, overlapped=False,
+  concurrent=None, **kwargs):
+    """Return a list of all matches in the string. The matches may be overlapped
+    if overlapped is True. If one or more groups are present in the pattern,
+    return a list of groups; this will be a list of tuples if the pattern has
+    more than one group. Empty matches are included in the result."""
+    return _compile(pattern, flags, kwargs).findall(string, pos, endpos,
+      overlapped, concurrent)
+
+def finditer(pattern, string, flags=0, pos=None, endpos=None, overlapped=False,
+  concurrent=None, **kwargs):
+    """Return an iterator over all matches in the string. The matches may be
+    overlapped if overlapped is True. For each match, the iterator returns a
+    match object. Empty matches are included in the result."""
+    return _compile(pattern, flags, kwargs).finditer(string, pos, endpos,
+      overlapped, concurrent)
+
+def compile(pattern, flags=0, **kwargs):
+    "Compile a regular expression pattern, returning a pattern object."
+    return _compile(pattern, flags, kwargs)
+
+def purge():
+    "Clear the regular expression cache"
+    _cache.clear()
+
+def template(pattern, flags=0):
+    "Compile a template pattern, returning a pattern object."
+    return _compile(pattern, flags | TEMPLATE)
+
+def escape(pattern, special_only=False):
+    "Escape all non-alphanumeric characters or special characters in pattern."
+    if isinstance(pattern, unicode):
+        s = []
+        if special_only:
+            for c in pattern:
+                if c in _METACHARS:
+                    s.append(u"\\")
+                    s.append(c)
+                elif c == u"\x00":
+                    s.append(u"\\000")
+                else:
+                    s.append(c)
+        else:
+            for c in pattern:
+                if c in _ALNUM:
+                    s.append(c)
+                elif c == u"\x00":
+                    s.append(u"\\000")
+                else:
+                    s.append(u"\\")
+                    s.append(c)
+
+        return u"".join(s)
+    else:
+        s = []
+        if special_only:
+            for c in pattern:
+                if c in _METACHARS:
+                    s.append("\\")
+                    s.append(c)
+                elif c == "\x00":
+                    s.append("\\000")
+                else:
+                    s.append(c)
+        else:
+            for c in pattern:
+                if c in _ALNUM:
+                    s.append(c)
+                elif c == "\x00":
+                    s.append("\\000")
+                else:
+                    s.append("\\")
+                    s.append(c)
+
+        return "".join(s)
+
+# --------------------------------------------------------------------
+# Internals.
+
+from . import _regex_core
+from calibre.constants import plugins
+_regex = plugins['_regex'][0]
+from threading import RLock as _RLock
+from ._regex_core import *
+from ._regex_core import (_ALL_VERSIONS, _ALL_ENCODINGS, _FirstSetError,
+  _UnscopedFlagSet, _check_group_features, _compile_firstset,
+  _compile_replacement, _flatten_code, _fold_case, _get_required_string,
+  _parse_pattern, _shrink_cache)
+from ._regex_core import (ALNUM as _ALNUM, Info as _Info, OP as _OP, Source as
+  _Source, Fuzzy as _Fuzzy)
+
+# Version 0 is the old behaviour, compatible with the original 're' module.
+# Version 1 is the new behaviour, which differs slightly.
+
+DEFAULT_VERSION = VERSION0
+
+_METACHARS = frozenset("()[]{}?*+|^$\\.")
+
+_regex_core.DEFAULT_VERSION = DEFAULT_VERSION
+
+# Caches for the patterns and replacements.
+_cache = {}
+_cache_lock = _RLock()
+_named_args = {}
+_replacement_cache = {}
+
+# Maximum size of the cache.
+_MAXCACHE = 500
+_MAXREPCACHE = 500
+
+def _compile(pattern, flags=0, kwargs={}):
+    "Compiles a regular expression to a PatternObject."
+    try:
+        # Do we know what keyword arguments are needed?
+        args_key = pattern, type(pattern), flags
+        args_needed = _named_args[args_key]
+
+        # Are we being provided with its required keyword arguments?
+        args_supplied = set()
+        if args_needed:
+            for k, v in args_needed:
+                try:
+                    args_supplied.add((k, frozenset(kwargs[k])))
+                except KeyError:
+                    raise error("missing named list")
+
+        args_supplied = frozenset(args_supplied)
+
+        # Have we already seen this regular expression and named list?
+        pattern_key = (pattern, type(pattern), flags, args_supplied,
+          DEFAULT_VERSION)
+        return _cache[pattern_key]
+    except KeyError:
+        # It's a new pattern, or new named list for a known pattern.
+        pass
+
+    # Guess the encoding from the class of the pattern string.
+    if isinstance(pattern, unicode):
+        guess_encoding = UNICODE
+    elif isinstance(pattern, str):
+        guess_encoding = ASCII
+    elif isinstance(pattern, _pattern_type):
+        if flags:
+            raise ValueError("can't process flags argument with a compiled pattern")
+
+        return pattern
+    else:
+        raise TypeError("first argument must be a string or compiled pattern")
+
+    # Set the default version in the core code in case it has been changed.
+    _regex_core.DEFAULT_VERSION = DEFAULT_VERSION
+
+    caught_exception = None
+
+    while True:
+        try:
+            source = _Source(pattern)
+            info = _Info(flags, source.char_type, kwargs)
+            info.guess_encoding = guess_encoding
+            source.ignore_space = bool(info.flags & VERBOSE)
+            parsed = _parse_pattern(source, info)
+            break
+        except _UnscopedFlagSet:
+            # Remember the global flags for the next attempt.
+            flags = info.global_flags
+        except error, e:
+            caught_exception = e
+
+        if caught_exception:
+            raise error(str(caught_exception))
+
+    if not source.at_end():
+        raise error("trailing characters in pattern at position %d" % source.pos)
+
+    # Check the global flags for conflicts.
+    version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION
+    if version not in (0, VERSION0, VERSION1):
+        raise ValueError("VERSION0 and VERSION1 flags are mutually incompatible")
+
+    if (info.flags & _ALL_ENCODINGS) not in (0, ASCII, LOCALE, UNICODE):
+        raise ValueError("ASCII, LOCALE and UNICODE flags are mutually incompatible")
+
+    if not (info.flags & _ALL_ENCODINGS):
+        if isinstance(pattern, unicode):
+            info.flags |= UNICODE
+        else:
+            info.flags |= ASCII
+
+    reverse = bool(info.flags & REVERSE)
+    fuzzy = isinstance(parsed, _Fuzzy)
+
+    # Should we print the parsed pattern?
+    if flags & DEBUG:
+        parsed.dump(indent=0, reverse=reverse)
+
+    # Fix the group references.
+    parsed.fix_groups(reverse, False)
+
+    # Optimise the parsed pattern.
+    parsed = parsed.optimise(info)
+    parsed = parsed.pack_characters(info)
+
+    # Get the required string.
+    req_offset, req_chars, req_flags = _get_required_string(parsed, info.flags)
+
+    # Build the named lists.
+    named_lists = {}
+    named_list_indexes = [None] * len(info.named_lists_used)
+    args_needed = set()
+    for key, index in info.named_lists_used.items():
+        name, case_flags = key
+        values = frozenset(kwargs[name])
+        if case_flags:
+            items = frozenset(_fold_case(info, v) for v in values)
+        else:
+            items = values
+        named_lists[name] = values
+        named_list_indexes[index] = items
+        args_needed.add((name, values))
+
+    # Check the features of the groups.
+    _check_group_features(info, parsed)
+
+    # Compile the parsed pattern. The result is a list of tuples.
+    code = parsed.compile(reverse)
+
+    # Is there a group call to the pattern as a whole?
+    key = (0, reverse, fuzzy)
+    ref = info.call_refs.get(key)
+    if ref is not None:
+        code = [(_OP.CALL_REF, ref)] + code + [(_OP.END, )]
+
+    # Add the final 'success' opcode.
+    code += [(_OP.SUCCESS, )]
+
+    # Compile the additional copies of the groups that we need.
+    for group, rev, fuz in info.additional_groups:
+        code += group.compile(rev, fuz)
+
+    # Flatten the code into a list of ints.
+    code = _flatten_code(code)
+
+    if not parsed.has_simple_start():
+        # Get the first set, if possible.
+        try:
+            fs_code = _compile_firstset(info, parsed.get_firstset(reverse))
+            fs_code = _flatten_code(fs_code)
+            code = fs_code + code
+        except _FirstSetError:
+            pass
+
+    # The named capture groups.
+    index_group = dict((v, n) for n, v in info.group_index.items())
+
+    # Create the PatternObject.
+    #
+    # Local flags like IGNORECASE affect the code generation, but aren't needed
+    # by the PatternObject itself. Conversely, global flags like LOCALE _don't_
+    # affect the code generation but _are_ needed by the PatternObject.
+    compiled_pattern = _regex.compile(pattern, info.flags | version, code,
+      info.group_index, index_group, named_lists, named_list_indexes,
+      req_offset, req_chars, req_flags, info.group_count)
+
+    # Do we need to reduce the size of the cache?
+    if len(_cache) >= _MAXCACHE:
+        _cache_lock.acquire()
+        try:
+            _shrink_cache(_cache, _named_args, _MAXCACHE)
+        finally:
+            _cache_lock.release()
+
+    args_needed = frozenset(args_needed)
+
+    # Store this regular expression and named list.
+    pattern_key = (pattern, type(pattern), flags, args_needed, DEFAULT_VERSION)
+    _cache[pattern_key] = compiled_pattern
+
+    # Store what keyword arguments are needed.
+    _named_args[args_key] = args_needed
+
+    return compiled_pattern
+
+def _compile_replacement_helper(pattern, template):
+    "Compiles a replacement template."
+    # This function is called by the _regex module.
+
+    # Have we seen this before?
+    key = pattern.pattern, pattern.flags, template
+    compiled = _replacement_cache.get(key)
+    if compiled is not None:
+        return compiled
+
+    if len(_replacement_cache) >= _MAXREPCACHE:
+        _replacement_cache.clear()
+
+    is_unicode = isinstance(template, unicode)
+    source = _Source(template)
+    if is_unicode:
+        def make_string(char_codes):
+            return u"".join(unichr(c) for c in char_codes)
+    else:
+        def make_string(char_codes):
+            return "".join(chr(c) for c in char_codes)
+
+    compiled = []
+    literal = []
+    while True:
+        ch = source.get()
+        if not ch:
+            break
+        if ch == "\\":
+            # '_compile_replacement' will return either an int group reference
+            # or a string literal. It returns items (plural) in order to handle
+            # a 2-character literal (an invalid escape sequence).
+            is_group, items = _compile_replacement(source, pattern, is_unicode)
+            if is_group:
+                # It's a group, so first flush the literal.
+                if literal:
+                    compiled.append(make_string(literal))
+                    literal = []
+                compiled.extend(items)
+            else:
+                literal.extend(items)
+        else:
+            literal.append(ord(ch))
+
+    # Flush the literal.
+    if literal:
+        compiled.append(make_string(literal))
+
+    _replacement_cache[key] = compiled
+
+    return compiled
+
+# We define _pattern_type here after all the support objects have been defined.
+_pattern_type = type(_compile("", 0, {}))
+
+# We'll define an alias for the 'compile' function so that the repr of a
+# pattern object is eval-able.
+Regex = compile
+
+# Register myself for pickling.
+import copy_reg as _copy_reg
+
+def _pickle(p):
+    return _compile, (p.pattern, p.flags)
+
+_copy_reg.pickle(_pattern_type, _pickle, _compile)
+
+if not hasattr(str, "format"):
+    # Strings don't have the .format method (below Python 2.6).
+    while True:
+        _start = __doc__.find("    subf")
+        if _start < 0:
+            break
+
+        _end = __doc__.find("\n", _start) + 1
+        while __doc__.startswith("     ", _end):
+            _end = __doc__.find("\n", _end) + 1
+
+        __doc__ = __doc__[ : _start] + __doc__[_end : ]
+
+    __all__ = [_name for _name in __all__ if not _name.startswith("subf")]
+
+    del _start, _end
+
+    del subf, subfn
--- a/src/regex/_regex.c
+++ b/src/regex/_regex.c
--- a/src/regex/_regex.h
+++ b/src/regex/_regex.h
@ -0,0 +1,228 @@
+/*
+ * Secret Labs' Regular Expression Engine
+ *
+ * regular expression matching engine
+ *
+ * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
+ *
+ * NOTE: This file is generated by regex.py.  If you need
+ * to change anything in here, edit regex.py and run it.
+ *
+ * 2010-01-16 mrab Re-written
+ */
+
+/* Supports Unicode version 6.3.0. */
+
+#define RE_MAGIC 20100116
+
+#include "_regex_unicode.h"
+
+/* Operators. */
+#define RE_OP_FAILURE 0
+#define RE_OP_SUCCESS 1
+#define RE_OP_ANY 2
+#define RE_OP_ANY_ALL 3
+#define RE_OP_ANY_ALL_REV 4
+#define RE_OP_ANY_REV 5
+#define RE_OP_ANY_U 6
+#define RE_OP_ANY_U_REV 7
+#define RE_OP_ATOMIC 8
+#define RE_OP_BOUNDARY 9
+#define RE_OP_BRANCH 10
+#define RE_OP_CALL_REF 11
+#define RE_OP_CHARACTER 12
+#define RE_OP_CHARACTER_IGN 13
+#define RE_OP_CHARACTER_IGN_REV 14
+#define RE_OP_CHARACTER_REV 15
+#define RE_OP_DEFAULT_BOUNDARY 16
+#define RE_OP_DEFAULT_END_OF_WORD 17
+#define RE_OP_DEFAULT_START_OF_WORD 18
+#define RE_OP_END 19
+#define RE_OP_END_OF_LINE 20
+#define RE_OP_END_OF_LINE_U 21
+#define RE_OP_END_OF_STRING 22
+#define RE_OP_END_OF_STRING_LINE 23
+#define RE_OP_END_OF_STRING_LINE_U 24
+#define RE_OP_END_OF_WORD 25
+#define RE_OP_FUZZY 26
+#define RE_OP_GRAPHEME_BOUNDARY 27
+#define RE_OP_GREEDY_REPEAT 28
+#define RE_OP_GROUP 29
+#define RE_OP_GROUP_CALL 30
+#define RE_OP_GROUP_EXISTS 31
+#define RE_OP_LAZY_REPEAT 32
+#define RE_OP_LOOKAROUND 33
+#define RE_OP_NEXT 34
+#define RE_OP_PROPERTY 35
+#define RE_OP_PROPERTY_IGN 36
+#define RE_OP_PROPERTY_IGN_REV 37
+#define RE_OP_PROPERTY_REV 38
+#define RE_OP_RANGE 39
+#define RE_OP_RANGE_IGN 40
+#define RE_OP_RANGE_IGN_REV 41
+#define RE_OP_RANGE_REV 42
+#define RE_OP_REF_GROUP 43
+#define RE_OP_REF_GROUP_FLD 44
+#define RE_OP_REF_GROUP_FLD_REV 45
+#define RE_OP_REF_GROUP_IGN 46
+#define RE_OP_REF_GROUP_IGN_REV 47
+#define RE_OP_REF_GROUP_REV 48
+#define RE_OP_SEARCH_ANCHOR 49
+#define RE_OP_SET_DIFF 50
+#define RE_OP_SET_DIFF_IGN 51
+#define RE_OP_SET_DIFF_IGN_REV 52
+#define RE_OP_SET_DIFF_REV 53
+#define RE_OP_SET_INTER 54
+#define RE_OP_SET_INTER_IGN 55
+#define RE_OP_SET_INTER_IGN_REV 56
+#define RE_OP_SET_INTER_REV 57
+#define RE_OP_SET_SYM_DIFF 58
+#define RE_OP_SET_SYM_DIFF_IGN 59
+#define RE_OP_SET_SYM_DIFF_IGN_REV 60
+#define RE_OP_SET_SYM_DIFF_REV 61
+#define RE_OP_SET_UNION 62
+#define RE_OP_SET_UNION_IGN 63
+#define RE_OP_SET_UNION_IGN_REV 64
+#define RE_OP_SET_UNION_REV 65
+#define RE_OP_START_OF_LINE 66
+#define RE_OP_START_OF_LINE_U 67
+#define RE_OP_START_OF_STRING 68
+#define RE_OP_START_OF_WORD 69
+#define RE_OP_STRING 70
+#define RE_OP_STRING_FLD 71
+#define RE_OP_STRING_FLD_REV 72
+#define RE_OP_STRING_IGN 73
+#define RE_OP_STRING_IGN_REV 74
+#define RE_OP_STRING_REV 75
+#define RE_OP_STRING_SET 76
+#define RE_OP_STRING_SET_FLD 77
+#define RE_OP_STRING_SET_FLD_REV 78
+#define RE_OP_STRING_SET_IGN 79
+#define RE_OP_STRING_SET_IGN_REV 80
+#define RE_OP_STRING_SET_REV 81
+#define RE_OP_BODY_END 82
+#define RE_OP_BODY_START 83
+#define RE_OP_END_FUZZY 84
+#define RE_OP_END_GREEDY_REPEAT 85
+#define RE_OP_END_GROUP 86
+#define RE_OP_END_LAZY_REPEAT 87
+#define RE_OP_GREEDY_REPEAT_ONE 88
+#define RE_OP_GROUP_RETURN 89
+#define RE_OP_LAZY_REPEAT_ONE 90
+#define RE_OP_MATCH_BODY 91
+#define RE_OP_MATCH_TAIL 92
+#define RE_OP_START_GROUP 93
+
+char* re_op_text[] = {
+    "RE_OP_FAILURE",
+    "RE_OP_SUCCESS",
+    "RE_OP_ANY",
+    "RE_OP_ANY_ALL",
+    "RE_OP_ANY_ALL_REV",
+    "RE_OP_ANY_REV",
+    "RE_OP_ANY_U",
+    "RE_OP_ANY_U_REV",
+    "RE_OP_ATOMIC",
+    "RE_OP_BOUNDARY",
+    "RE_OP_BRANCH",
+    "RE_OP_CALL_REF",
+    "RE_OP_CHARACTER",
+    "RE_OP_CHARACTER_IGN",
+    "RE_OP_CHARACTER_IGN_REV",
+    "RE_OP_CHARACTER_REV",
+    "RE_OP_DEFAULT_BOUNDARY",
+    "RE_OP_DEFAULT_END_OF_WORD",
+    "RE_OP_DEFAULT_START_OF_WORD",
+    "RE_OP_END",
+    "RE_OP_END_OF_LINE",
+    "RE_OP_END_OF_LINE_U",
+    "RE_OP_END_OF_STRING",
+    "RE_OP_END_OF_STRING_LINE",
+    "RE_OP_END_OF_STRING_LINE_U",
+    "RE_OP_END_OF_WORD",
+    "RE_OP_FUZZY",
+    "RE_OP_GRAPHEME_BOUNDARY",
+    "RE_OP_GREEDY_REPEAT",
+    "RE_OP_GROUP",
+    "RE_OP_GROUP_CALL",
+    "RE_OP_GROUP_EXISTS",
+    "RE_OP_LAZY_REPEAT",
+    "RE_OP_LOOKAROUND",
+    "RE_OP_NEXT",
+    "RE_OP_PROPERTY",
+    "RE_OP_PROPERTY_IGN",
+    "RE_OP_PROPERTY_IGN_REV",
+    "RE_OP_PROPERTY_REV",
+    "RE_OP_RANGE",
+    "RE_OP_RANGE_IGN",
+    "RE_OP_RANGE_IGN_REV",
+    "RE_OP_RANGE_REV",
+    "RE_OP_REF_GROUP",
+    "RE_OP_REF_GROUP_FLD",
+    "RE_OP_REF_GROUP_FLD_REV",
+    "RE_OP_REF_GROUP_IGN",
+    "RE_OP_REF_GROUP_IGN_REV",
+    "RE_OP_REF_GROUP_REV",
+    "RE_OP_SEARCH_ANCHOR",
+    "RE_OP_SET_DIFF",
+    "RE_OP_SET_DIFF_IGN",
+    "RE_OP_SET_DIFF_IGN_REV",
+    "RE_OP_SET_DIFF_REV",
+    "RE_OP_SET_INTER",
+    "RE_OP_SET_INTER_IGN",
+    "RE_OP_SET_INTER_IGN_REV",
+    "RE_OP_SET_INTER_REV",
+    "RE_OP_SET_SYM_DIFF",
+    "RE_OP_SET_SYM_DIFF_IGN",
+    "RE_OP_SET_SYM_DIFF_IGN_REV",
+    "RE_OP_SET_SYM_DIFF_REV",
+    "RE_OP_SET_UNION",
+    "RE_OP_SET_UNION_IGN",
+    "RE_OP_SET_UNION_IGN_REV",
+    "RE_OP_SET_UNION_REV",
+    "RE_OP_START_OF_LINE",
+    "RE_OP_START_OF_LINE_U",
+    "RE_OP_START_OF_STRING",
+    "RE_OP_START_OF_WORD",
+    "RE_OP_STRING",
+    "RE_OP_STRING_FLD",
+    "RE_OP_STRING_FLD_REV",
+    "RE_OP_STRING_IGN",
+    "RE_OP_STRING_IGN_REV",
+    "RE_OP_STRING_REV",
+    "RE_OP_STRING_SET",
+    "RE_OP_STRING_SET_FLD",
+    "RE_OP_STRING_SET_FLD_REV",
+    "RE_OP_STRING_SET_IGN",
+    "RE_OP_STRING_SET_IGN_REV",
+    "RE_OP_STRING_SET_REV",
+    "RE_OP_BODY_END",
+    "RE_OP_BODY_START",
+    "RE_OP_END_FUZZY",
+    "RE_OP_END_GREEDY_REPEAT",
+    "RE_OP_END_GROUP",
+    "RE_OP_END_LAZY_REPEAT",
+    "RE_OP_GREEDY_REPEAT_ONE",
+    "RE_OP_GROUP_RETURN",
+    "RE_OP_LAZY_REPEAT_ONE",
+    "RE_OP_MATCH_BODY",
+    "RE_OP_MATCH_TAIL",
+    "RE_OP_START_GROUP",
+};
+
+#define RE_FLAG_ASCII 0x80
+#define RE_FLAG_BESTMATCH 0x1000
+#define RE_FLAG_DEBUG 0x200
+#define RE_FLAG_DOTALL 0x10
+#define RE_FLAG_ENHANCEMATCH 0x8000
+#define RE_FLAG_FULLCASE 0x4000
+#define RE_FLAG_IGNORECASE 0x2
+#define RE_FLAG_LOCALE 0x4
+#define RE_FLAG_MULTILINE 0x8
+#define RE_FLAG_REVERSE 0x400
+#define RE_FLAG_TEMPLATE 0x1
+#define RE_FLAG_UNICODE 0x20
+#define RE_FLAG_VERBOSE 0x40
+#define RE_FLAG_VERSION0 0x2000
+#define RE_FLAG_VERSION1 0x100
+#define RE_FLAG_WORD 0x800
--- a/src/regex/_regex_core.py
+++ b/src/regex/_regex_core.py
--- a/src/regex/_regex_unicode.c
+++ b/src/regex/_regex_unicode.c
--- a/src/regex/_regex_unicode.h
+++ b/src/regex/_regex_unicode.h
@ -0,0 +1,220 @@
+typedef unsigned char RE_UINT8;
+typedef signed char RE_INT8;
+typedef unsigned short RE_UINT16;
+typedef signed short RE_INT16;
+typedef unsigned int RE_UINT32;
+typedef signed int RE_INT32;
+
+typedef unsigned char BOOL;
+enum {FALSE, TRUE};
+
+#define RE_ASCII_MAX 0x7F
+#define RE_LOCALE_MAX 0xFF
+#define RE_UNICODE_MAX 0x10FFFF
+
+#define RE_MAX_CASES 4
+#define RE_MAX_FOLDED 3
+
+typedef struct RE_Property {
+    RE_UINT16 name;
+    RE_UINT8 id;
+    RE_UINT8 value_set;
+} RE_Property;
+
+typedef struct RE_PropertyValue {
+    RE_UINT16 name;
+    RE_UINT8 value_set;
+    RE_UINT8 id;
+} RE_PropertyValue;
+
+typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 ch);
+
+#define RE_PROP_GC 0x0
+#define RE_PROP_CASED 0xA
+#define RE_PROP_UPPERCASE 0x9
+#define RE_PROP_LOWERCASE 0x8
+
+#define RE_PROP_C 30
+#define RE_PROP_L 31
+#define RE_PROP_M 32
+#define RE_PROP_N 33
+#define RE_PROP_P 34
+#define RE_PROP_S 35
+#define RE_PROP_Z 36
+
+#define RE_PROP_CN 0
+#define RE_PROP_LU 1
+#define RE_PROP_LL 2
+#define RE_PROP_LT 3
+#define RE_PROP_LM 4
+#define RE_PROP_LO 5
+#define RE_PROP_MN 6
+#define RE_PROP_ME 7
+#define RE_PROP_MC 8
+#define RE_PROP_ND 9
+#define RE_PROP_NL 10
+#define RE_PROP_NO 11
+#define RE_PROP_ZS 12
+#define RE_PROP_ZL 13
+#define RE_PROP_ZP 14
+#define RE_PROP_CC 15
+#define RE_PROP_CF 16
+#define RE_PROP_CO 17
+#define RE_PROP_CS 18
+#define RE_PROP_PD 19
+#define RE_PROP_PS 20
+#define RE_PROP_PE 21
+#define RE_PROP_PC 22
+#define RE_PROP_PO 23
+#define RE_PROP_SM 24
+#define RE_PROP_SC 25
+#define RE_PROP_SK 26
+#define RE_PROP_SO 27
+#define RE_PROP_PI 28
+#define RE_PROP_PF 29
+
+#define RE_PROP_C_MASK 0x00078001
+#define RE_PROP_L_MASK 0x0000003E
+#define RE_PROP_M_MASK 0x000001C0
+#define RE_PROP_N_MASK 0x00000E00
+#define RE_PROP_P_MASK 0x30F80000
+#define RE_PROP_S_MASK 0x0F000000
+#define RE_PROP_Z_MASK 0x00007000
+
+#define RE_PROP_ALNUM 0x460001
+#define RE_PROP_ALPHA 0x070001
+#define RE_PROP_ANY 0x470001
+#define RE_PROP_ASCII 0x480001
+#define RE_PROP_ASSIGNED 0x490001
+#define RE_PROP_BLANK 0x4A0001
+#define RE_PROP_CNTRL 0x00000F
+#define RE_PROP_DIGIT 0x000009
+#define RE_PROP_GRAPH 0x4B0001
+#define RE_PROP_LOWER 0x080001
+#define RE_PROP_PRINT 0x4C0001
+#define RE_PROP_PUNCT 0x000022
+#define RE_PROP_SPACE 0x190001
+#define RE_PROP_UPPER 0x090001
+#define RE_PROP_WORD 0x4D0001
+#define RE_PROP_XDIGIT 0x4E0001
+
+#define RE_BREAK_OTHER 0
+#define RE_BREAK_DOUBLEQUOTE 1
+#define RE_BREAK_SINGLEQUOTE 2
+#define RE_BREAK_HEBREWLETTER 3
+#define RE_BREAK_CR 4
+#define RE_BREAK_LF 5
+#define RE_BREAK_NEWLINE 6
+#define RE_BREAK_EXTEND 7
+#define RE_BREAK_REGIONALINDICATOR 8
+#define RE_BREAK_FORMAT 9
+#define RE_BREAK_KATAKANA 10
+#define RE_BREAK_ALETTER 11
+#define RE_BREAK_MIDLETTER 12
+#define RE_BREAK_MIDNUM 13
+#define RE_BREAK_MIDNUMLET 14
+#define RE_BREAK_NUMERIC 15
+#define RE_BREAK_EXTENDNUMLET 16
+
+#define RE_GBREAK_OTHER 0
+#define RE_GBREAK_CR 1
+#define RE_GBREAK_LF 2
+#define RE_GBREAK_CONTROL 3
+#define RE_GBREAK_EXTEND 4
+#define RE_GBREAK_REGIONALINDICATOR 5
+#define RE_GBREAK_SPACINGMARK 6
+#define RE_GBREAK_L 7
+#define RE_GBREAK_V 8
+#define RE_GBREAK_T 9
+#define RE_GBREAK_LV 10
+#define RE_GBREAK_LVT 11
+#define RE_GBREAK_PREPEND 12
+
+extern char* re_strings[1155];
+extern RE_Property re_properties[145];
+extern RE_PropertyValue re_property_values[1244];
+extern RE_UINT16 re_expand_on_folding[104];
+extern RE_GetPropertyFunc re_get_property[79];
+
+RE_UINT32 re_get_general_category(RE_UINT32 ch);
+RE_UINT32 re_get_block(RE_UINT32 ch);
+RE_UINT32 re_get_script(RE_UINT32 ch);
+RE_UINT32 re_get_word_break(RE_UINT32 ch);
+RE_UINT32 re_get_grapheme_cluster_break(RE_UINT32 ch);
+RE_UINT32 re_get_sentence_break(RE_UINT32 ch);
+RE_UINT32 re_get_math(RE_UINT32 ch);
+RE_UINT32 re_get_alphabetic(RE_UINT32 ch);
+RE_UINT32 re_get_lowercase(RE_UINT32 ch);
+RE_UINT32 re_get_uppercase(RE_UINT32 ch);
+RE_UINT32 re_get_cased(RE_UINT32 ch);
+RE_UINT32 re_get_case_ignorable(RE_UINT32 ch);
+RE_UINT32 re_get_changes_when_lowercased(RE_UINT32 ch);
+RE_UINT32 re_get_changes_when_uppercased(RE_UINT32 ch);
+RE_UINT32 re_get_changes_when_titlecased(RE_UINT32 ch);
+RE_UINT32 re_get_changes_when_casefolded(RE_UINT32 ch);
+RE_UINT32 re_get_changes_when_casemapped(RE_UINT32 ch);
+RE_UINT32 re_get_id_start(RE_UINT32 ch);
+RE_UINT32 re_get_id_continue(RE_UINT32 ch);
+RE_UINT32 re_get_xid_start(RE_UINT32 ch);
+RE_UINT32 re_get_xid_continue(RE_UINT32 ch);
+RE_UINT32 re_get_default_ignorable_code_point(RE_UINT32 ch);
+RE_UINT32 re_get_grapheme_extend(RE_UINT32 ch);
+RE_UINT32 re_get_grapheme_base(RE_UINT32 ch);
+RE_UINT32 re_get_grapheme_link(RE_UINT32 ch);
+RE_UINT32 re_get_white_space(RE_UINT32 ch);
+RE_UINT32 re_get_bidi_control(RE_UINT32 ch);
+RE_UINT32 re_get_join_control(RE_UINT32 ch);
+RE_UINT32 re_get_dash(RE_UINT32 ch);
+RE_UINT32 re_get_hyphen(RE_UINT32 ch);
+RE_UINT32 re_get_quotation_mark(RE_UINT32 ch);
+RE_UINT32 re_get_terminal_punctuation(RE_UINT32 ch);
+RE_UINT32 re_get_other_math(RE_UINT32 ch);
+RE_UINT32 re_get_hex_digit(RE_UINT32 ch);
+RE_UINT32 re_get_ascii_hex_digit(RE_UINT32 ch);
+RE_UINT32 re_get_other_alphabetic(RE_UINT32 ch);
+RE_UINT32 re_get_ideographic(RE_UINT32 ch);
+RE_UINT32 re_get_diacritic(RE_UINT32 ch);
+RE_UINT32 re_get_extender(RE_UINT32 ch);
+RE_UINT32 re_get_other_lowercase(RE_UINT32 ch);
+RE_UINT32 re_get_other_uppercase(RE_UINT32 ch);
+RE_UINT32 re_get_noncharacter_code_point(RE_UINT32 ch);
+RE_UINT32 re_get_other_grapheme_extend(RE_UINT32 ch);
+RE_UINT32 re_get_ids_binary_operator(RE_UINT32 ch);
+RE_UINT32 re_get_ids_trinary_operator(RE_UINT32 ch);
+RE_UINT32 re_get_radical(RE_UINT32 ch);
+RE_UINT32 re_get_unified_ideograph(RE_UINT32 ch);
+RE_UINT32 re_get_other_default_ignorable_code_point(RE_UINT32 ch);
+RE_UINT32 re_get_deprecated(RE_UINT32 ch);
+RE_UINT32 re_get_soft_dotted(RE_UINT32 ch);
+RE_UINT32 re_get_logical_order_exception(RE_UINT32 ch);
+RE_UINT32 re_get_other_id_start(RE_UINT32 ch);
+RE_UINT32 re_get_other_id_continue(RE_UINT32 ch);
+RE_UINT32 re_get_sterm(RE_UINT32 ch);
+RE_UINT32 re_get_variation_selector(RE_UINT32 ch);
+RE_UINT32 re_get_pattern_white_space(RE_UINT32 ch);
+RE_UINT32 re_get_pattern_syntax(RE_UINT32 ch);
+RE_UINT32 re_get_hangul_syllable_type(RE_UINT32 ch);
+RE_UINT32 re_get_bidi_class(RE_UINT32 ch);
+RE_UINT32 re_get_canonical_combining_class(RE_UINT32 ch);
+RE_UINT32 re_get_decomposition_type(RE_UINT32 ch);
+RE_UINT32 re_get_east_asian_width(RE_UINT32 ch);
+RE_UINT32 re_get_joining_group(RE_UINT32 ch);
+RE_UINT32 re_get_joining_type(RE_UINT32 ch);
+RE_UINT32 re_get_line_break(RE_UINT32 ch);
+RE_UINT32 re_get_numeric_type(RE_UINT32 ch);
+RE_UINT32 re_get_numeric_value(RE_UINT32 ch);
+RE_UINT32 re_get_bidi_mirrored(RE_UINT32 ch);
+RE_UINT32 re_get_indic_matra_category(RE_UINT32 ch);
+RE_UINT32 re_get_indic_syllabic_category(RE_UINT32 ch);
+RE_UINT32 re_get_alphanumeric(RE_UINT32 ch);
+RE_UINT32 re_get_any(RE_UINT32 ch);
+RE_UINT32 re_get_ascii(RE_UINT32 ch);
+RE_UINT32 re_get_assigned(RE_UINT32 ch);
+RE_UINT32 re_get_blank(RE_UINT32 ch);
+RE_UINT32 re_get_graph(RE_UINT32 ch);
+RE_UINT32 re_get_print(RE_UINT32 ch);
+RE_UINT32 re_get_word(RE_UINT32 ch);
+RE_UINT32 re_get_xdigit(RE_UINT32 ch);
+int re_get_all_cases(RE_UINT32 ch, RE_UINT32* codepoints);
+RE_UINT32 re_get_simple_case_folding(RE_UINT32 ch);
+int re_get_full_case_folding(RE_UINT32 ch, RE_UINT32* codepoints);