Merge remote-tracking branch 'kovid/master'

2025-07-09 03:04:10 -04:00 · 2014-05-04 20:16:27 +02:00 · 2014-05-04 20:16:27 +02:00 · 26b73c4f93
commit 26b73c4f93
parent aa46bace01 5b89f3fe4e
53 changed files with 948 additions and 528 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -20,6 +20,56 @@
 #   new recipes:
 #     - title: 

+- version: 1.35.0
+  date: 2014-05-02
+
+  new features:
+    - title: "Edit Book: Redesign the syntax highlighter to improve performance for large documents and extended editing sessions."
+      tickets: [1314339]
+
+    - title: "Edit book: Make cursor movement smooth by not highlighting matching tags while the cursor is moving. Only match highlighting tags if the cursor stays still for a time."
+
+    - title: "Spellcheck dialog: Indicate whether a word is ignored in the Misspelled column"
+
+    - title: "Spellcheck dialog: Pressing Ctrl+C on the words list copies only selected words, regardless of current cell"
+
+    - title: "Add a copy to clipboard action to the context menu for the spell check dialog"
+
+    - title: "Edit book: Fix save button incorrectly disabled after a failed save"
+      tickets: [1313567]
+
+  bug fixes:
+    - title: "Edit Book: Fix an error when merging CSS stylesheets that contain @charset rules"
+
+    - title: "Edit book: Fix extra invalid entries being generated in the manifest when editing an AZW3 file that has no images."
+
+    - title: "Edit book: Fix a hang when editing an HTML or XML file with text of the form <abc: (i.e. a tag name with a trailing colon)."
+      tickets: [1314009]
+
+    - title: "Fix regression that prevented the ebook editor from starting on linux systems with locale set to 'C'"
+      tickets: [1315064] 
+
+    - title: "DOCX Input: Fix formatting of the generated Index when the index is complex, i.e. with lots of references to the same item, multiple level of sub-items, etc."
+
+    - title: "Smarten punctuation: Fix a double quote preceded by a hyphen at the end of a sentence (before the start of the next tag) being converted into an opening quote instead of closing quote."
+      tickets: [1286477]
+
+    - title: "News download: Fix very long URLs for links to pages causing errors on windows because of max path length restrictions."
+      tickets: [1313982]
+
+    - title: "Edit book: Fix saved search dialog causing high CPU usage"
+
+    - title: "Edit book: Fix importing of Lithuanian dictionary from OpenOffice, that does not specify a country code."
+      tickets: [1313315]
+
+  improved recipes:
+    - Ars Technica
+    - Daily Mirror
+    - Birmingham Evening Mail
+    - NRC - Next
+    - Private Eye
+    - NZZ
+
 - version: 1.34.0
  date: 2014-04-25

--- a/manual/edit.rst
+++ b/manual/edit.rst
@ -78,7 +78,7 @@ sum of the individual file sizes.
 Many files have special meaning, in the book. These will typically have
 an icon next to their names, indicating the special meaning. For example, in
 the picture to the left, you can see that the files :guilabel:`cover_image.jpg`
-and :guilabel:`titlepage.xhtml` have the ocon of a cover next to them, this
+and :guilabel:`titlepage.xhtml` have the icon of a cover next to them, this
 indicates they are the book cover image and titlepage. Similarly, the
 :guilabel:`content.opf` file has a metadata icon next to it, indicating the
 book metadata is present in it and the the :guilabel:`toc.ncx` file has a T
@ -123,7 +123,9 @@ Changing text file order

 You can re-arrange the order in which text (HTML) files are opened when reading
 the book by simply dragging and dropping them in the Files browser. For the
-technically inclined, this is called re-ordering the book spine.
+technically inclined, this is called re-ordering the book spine. Note that you
+have to drop the items *between* other items, not on top of them, this can be a
+little fiddly until you get used to it.

 Marking the cover
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
--- a/recipes/ars_technica.recipe
+++ b/recipes/ars_technica.recipe
@ -48,16 +48,16 @@ class ArsTechnica(BasicNewsRecipe):
                  ]
    remove_attributes = ['lang']

-
    feeds = [
-              (u'Infinite Loop (Apple content)'        , u'http://feeds.arstechnica.com/arstechnica/apple/'      )
-             ,(u'Opposable Thumbs (Gaming content)'    , u'http://feeds.arstechnica.com/arstechnica/gaming/'     )
-             ,(u'Gear and Gadgets'                     , u'http://feeds.arstechnica.com/arstechnica/gadgets/'    )
-             ,(u'Uptime (IT content)'                  , u'http://feeds.arstechnica.com/arstechnica/business/'   )
+             (u'Infinite Loop (Apple content)'        , u'http://feeds.arstechnica.com/arstechnica/apple/')
+             ,(u'Opposable Thumbs (Gaming content)'    , u'http://feeds.arstechnica.com/arstechnica/gaming/')
+             ,(u'Gear and Gadgets'                     , u'http://feeds.arstechnica.com/arstechnica/gadgets/')
+             ,(u'Uptime (IT content)'                  , u'http://feeds.arstechnica.com/arstechnica/business/')
             ,(u'Open Ended (Open Source content)'     , u'http://feeds.arstechnica.com/arstechnica/open-source/')
-             ,(u'One Microsoft Way'                    , u'http://feeds.arstechnica.com/arstechnica/microsoft/'  )
-             ,(u'Scientific method (Science content)'       , u'http://feeds.arstechnica.com/arstechnica/science/'    )
+             ,(u'One Microsoft Way'                    , u'http://feeds.arstechnica.com/arstechnica/microsoft/')
+             ,(u'Scientific method (Science content)'       , u'http://feeds.arstechnica.com/arstechnica/science/')
             ,(u'Law & Disorder (Tech policy content)' , u'http://feeds.arstechnica.com/arstechnica/tech-policy/')
+             ,(u'Risk Assessment (Security content)'   , u'http://feeds.arstechnica.com/arstechnica/security/')
            ]

    def append_page(self, soup, appendtag, position):
@ -75,7 +75,6 @@ class ArsTechnica(BasicNewsRecipe):
                pager.extract()
                appendtag.insert(position,texttag)

-
    def preprocess_html(self, soup):
        self.append_page(soup, soup.body, 3)
        for item in soup.findAll('a'):
@ -91,10 +90,9 @@ class ArsTechnica(BasicNewsRecipe):
                    str = self.tag_to_string(item)
                    item.replaceWith(str)
        for item in soup.findAll('img'):
-            if not item.has_key('alt'):
+            if 'alt' not in item:
                item['alt'] = 'image'
        return soup

    def preprocess_raw_html(self, raw, url):
        return '<html><head>'+raw[raw.find('</head>'):]
-
--- a/recipes/birmingham_evening_mail.recipe
+++ b/recipes/birmingham_evening_mail.recipe
@ -5,18 +5,18 @@ import re
 class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    title          = u'Birmingham Evening Mail'
    description = 'News for Birmingham UK'
-    #timefmt = ''
+    # timefmt = ''
    __author__ = 'Dave Asbury'
-    # v1 21/12/13
+    # 1/5/14
    masthead_url        = 'http://images.icnetwork.co.uk/upl/icbirmingham/apr2004/6/5/0007417F-982A-107F-969980BFB6FA0000.jpg'
-    oldest_article = 1
+    oldest_article = 2
    max_articles_per_feed = 10
-    #linearize_tables = True
+    # linearize_tables = True
    remove_empty_feeds = True
    remove_javascript     = True
    no_stylesheets = True
    remove_attributes = ['style']
-    #auto_cleanup = True
+    # auto_cleanup = True
    language = 'en_GB'
    compress_news_images = True
    compress_news_images_max_size = 30
@ -26,8 +26,14 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
        dict(attrs={'class' : 'gallery-data'}),
        dict(attrs={'class' : 'ir btn-fullscreen'}),
        dict(attrs={'class' : 'tools clearfix'}),
+        dict(attrs={'class' : 'shareButtons'}),
+
                ]
    keep_only_tags = [
+        dict(name='h1'),
+        dict(attrs={'class' : 'lead-text'}),
+        # dict(attrs={'class' : 'styleGroup article-header'}),
+        # dict(attrs={'class' : 'body '}),
        dict(attrs={'class' : 'tmCol article'}),]

    feeds          = [
@ -38,11 +44,15 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
        (u'Lifestyle',u'http://www.birminghammail.co.uk/lifestyle/rss.xml'),
        ]
    extra_css = '''
-                    h1{font-weight:bold;font-size: 175%;}
+        h1{font-weight:bold;}
        h2{font-weight:normal;font-size:75%;}
        figure {font-size:50%;}
+        #body{font-size:14px;}
+        #.photo-caption {display: block;margin-left: auto;margin-right: auto;width:100%;font-size:40%;}
+        #.publish-info {font-size:50%;}
        img {display: block;margin-left: auto;margin-right: auto;width:100%;font-size:50%;}
    '''
+
    def get_cover_url(self):
        soup = self.index_to_soup('http://www.birminghammail.co.uk')
        cov = soup.find(attrs={'src' : re.compile('http://images.icnetwork.co.uk/upl/birm')})
--- a/recipes/daily_mirror.recipe
+++ b/recipes/daily_mirror.recipe
@ -7,80 +7,70 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    description = 'News as provided by The Daily Mirror -UK'

    __author__ = 'Dave Asbury'
-    # last updated 27/8/13
+    # last updated 1/5/14
    language = 'en_GB'
-    #cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
+    # cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'

    masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif'
-    #recursions = 10
+
    compress_news_images = True
    compress_news_images_max_size = 30
    oldest_article = 1.5
-    max_articles_per_feed = 10
+    max_articles_per_feed = 12
    remove_empty_feeds = True
    remove_javascript     = True
    no_stylesheets = True
-    ignore_duplicate_articles = {'url'}
+    ignore_duplicate_articles = {'title'}

-    #auto_cleanup = True
-    #conversion_options = { 'linearize_tables' : True }
-
-    keep_only_tags = [dict(name='h1'),
+    keep_only_tags = [
+        dict(name='h1'),
        dict(name='div',attrs={'class' : 'lead-text'}),
-                         dict(attrs={'class' : 'tools clearfix'}),
-                         dict(name='div',attrs={'class' : 'widget relatedContents pictures widget-editable viziwyg-section-245 inpage-widget-158123'}),
-                        # dict(name='figure',attrs={'class' : 'clearfix'}),
+        dict(name='figure',attrs={'class' : 'inline-image clearfix '}),
+        dict(name='div',attrs={'class' : 'styleGroup clearfix'}),
+        dict(name='div',attrs={
+            'class' : 'widget relatedContents pictures widget-editable viziwyg-section-70 inpage-widget-2230659'}),
        dict(name='div',attrs={'class' :'body '}),
-                 dict(name='div',attrs={'class' :'thumb'}),
-                         dict(attrs={'img alt' : ['Perishers','Horace']}),
-                 #dict(attrs={'class' : 'tmRow span-15-5 col-1 article-page'}),
-       #dict(attrs={'class' : ['article-attr','byline append-1','published']}),
-       # dict(name='p'),
    ]

    remove_tags = [
-           dict(attrs={'class' : ['article sa-teaser type-opinion','last','gallery-caption','gallery-data','ir btn-fullscreen','avatar']}),  # ,'image-gallery'
+           dict(attrs={'class' : ['article sa-teaser type-opinion','image-gallery','gallery-caption']}),
           dict(attrs={'class' : 'comment'}),
           dict(name='title'),
+           dict(name='ul'),
           dict(name='ul',attrs={'class' :  'clearfix breadcrumbs '}),
           dict(name='ul',attrs={'id' : 'login-201109171215'}),
-           #'widget navigation breadcrumb widget-editable viziwyg-section-198 inpage-widget-80721 span-17','image-credit'
           dict(name='div',attrs={'class' : ['inline-ad span-16 last','caption']}),
    ]

    preprocess_regexps = [
-        (re.compile(r'- mirror.co.uk', re.IGNORECASE | re.DOTALL), lambda match: '')]
+        (re.compile(r'- mirror.co.uk', re.IGNORECASE | re.DOTALL), lambda match: ''),
+        (re.compile(r'<span class="image-credit">Getty</span>', re.IGNORECASE | re.DOTALL), lambda match: ''),
+        ]

    feeds          = [
-        (u'News',u'http://www.mirror.co.uk/news/rss.xml'),
+        (u'UK News',u'http://www.mirror.co.uk/news/uk-news/rss.xml'),
+        (u'world News',u'http://www.mirror.co.uk/news/world-news/rss.xml'),
        (u'Sports',u'http://www.mirror.co.uk/sport/rss.xml'),
        (u'3AM',u'http://www.mirror.co.uk/3am/rss.xml'),
        (u'Lifestyle',u'http://www.mirror.co.uk/lifestyle/rss.xml')


-
-
        # example of commented out feed not needed ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
  ]
    extra_css = '''
-                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:170%;}
-                    .article figure figcaption {display: block;margin-left: auto;margin-right: auto;
-                    width:100%;font-family:Arial,Helvetica,sans-serif;font-size:40%;}
-
-                    #h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;}
-                    p{font-family:Arial,Helvetica,sans-serif;}
-                    body{font-family:Helvetica,Arial,sans-serif;}
-                    .article figure{display: block;margin-left: auto;margin-right: auto;width:100%;}
-                    .lead-text p {font-size:150%}
+    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
    '''

    def get_cover_url(self):
        soup = self.index_to_soup('http://www.politicshome.com/uk/latest_frontpage.html')
-        # look for the block containing the mirror button and url
+# look for the block containing the mirror button and url
        cov = soup.find(attrs={'style' : 'background-image: url(http://www.politicshome.com/images/sources/source_frontpage_button_92.gif);'})
        cov2 = str(cov)
        cov2='http://www.politicshome.com'+cov2[9:-142]
-        # cov2 now contains url of the page containing pic
+# cov2 now contains url of the page containing pic
        soup = self.index_to_soup(cov2)
        cov = soup.find(attrs={'id' : 'large'})
        cov=str(cov)
@ -96,7 +86,4 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
        except:
            cover_url ='http://profile.ak.fbcdn.net/hprofile-ak-snc4/373019_6149699161_1710984811_n.jpg'

-        # print '******** string is  ', cov2,' ***'
-        #cover_url = cov2
-        #cover_url = 'http://www.thesun.co.uk/img/global/new-masthead-logo.png'
        return cover_url
--- a/recipes/icons/private_eye.png
+++ b/recipes/icons/private_eye.png
--- a/recipes/nrc_next.recipe
+++ b/recipes/nrc_next.recipe
@ -3,15 +3,16 @@
 # Based on veezh's original recipe, Kovid Goyal's New York Times recipe and Snaabs nrc Handelsblad recipe

 __license__   = 'GPL v3'
-__copyright__ = '2013, Niels Giesen'
+__copyright__ = '2014, Niels Giesen'

 '''
 www.nrc.nl
 '''
-import os, zipfile
-import time
+import os, zipfile, re
+from io import BytesIO
+
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ptempfile import PersistentTemporaryFile
+from datetime import date, timedelta


 class NRCNext(BasicNewsRecipe):
@ -19,8 +20,8 @@ class NRCNext(BasicNewsRecipe):
    title = u'nrc•next'
    description = u'De ePaper-versie van nrc•next'
    language = 'nl'
-    lang = 'nl-NL'
    needs_subscription = True
+    requires_version = (1, 24, 0)

    __author__ = 'Niels Giesen'

@ -28,48 +29,45 @@ class NRCNext(BasicNewsRecipe):
        'no_default_epub_cover' : True
    }

-    def get_browser(self):
-        br = BasicNewsRecipe.get_browser(self)
-        if self.username is not None and self.password is not None:
-            br.open('http://login.nrc.nl/login')
-            br.select_form(nr=0)
-            br['username'] = self.username
-            br['password'] = self.password
-            br.submit()
-        return br
-
    def build_index(self):
-
-        today = time.strftime("%Y%m%d")
-
-        domain = "http://digitaleeditie.nrc.nl"
-
-        url = domain + "/digitaleeditie/helekrant/epub/nn_" + today + ".epub"
-        #print url
-
+        from calibre.web.jsbrowser.browser import Browser, ElementNotFound
+        br = Browser()
+        br.visit('http://login.nrc.nl/login', timeout=60)
+        f = br.select_form('#command')
+        f['username'] = self.username
+        f['password'] = self.password
+        br.submit()
+        raw = br.html
+        if '>log out<' not in raw:
+            raise ValueError('Failed to login, check username and password')
+        epubraw = None
+        for today in (date.today(), date.today() - timedelta(days=1),):
+            url = 'http://digitaleeditie.nrc.nl/digitaleeditie/NN/%s/%d/%s___/downloads.html' % (today.strftime('%Y'), today.month - 1, today.strftime('%Y%m%d'))
+            self.log('Trying to download epub from:', url)
+            br.start_load(url, timeout=60)
            try:
-            br = self.get_browser()
-            f = br.open(url)
-        except:
-            self.report_progress(0,_('Kan niet inloggen om editie te downloaden'))
+                epubraw = br.download_file('#CompleteDownloads .download-list .download-button')
+                break
+            except ElementNotFound:
+                self.log('%r not available yet' % url)
+                continue
+
+        if epubraw is None:
            raise ValueError('Krant van vandaag nog niet beschikbaar')

-        tmp = PersistentTemporaryFile(suffix='.epub')
-        self.report_progress(0,_('downloading epub'))
-        tmp.write(f.read())
-        f.close()
-        br.close()
-        if zipfile.is_zipfile(tmp):
-            try:
-                zfile = zipfile.ZipFile(tmp.name, 'r')
+        zfile = zipfile.ZipFile(BytesIO(epubraw), 'r')
        zfile.extractall(self.output_dir)
-                self.report_progress(0,_('extracting epub'))
-            except zipfile.BadZipfile:
-                self.report_progress(0,_('BadZip error, continuing'))
-
-        tmp.close()
+        namelist = zfile.namelist()
+        emre = re.compile("&lt;em(?:.*)&gt;(.*)&lt;/em&gt;")
+        subst = '\\1'
+        for name in namelist:
+            _, ext = os.path.splitext(name);
+            if (ext == '.html') or (ext == '.ncx'):
+                fname = os.path.join(self.output_dir, name)
+                with open(fname) as f:
+                    s = f.read()
+                    s = emre.sub(subst, s)
+                with open(fname, 'w') as f:
+                    f.write(s)
        index = os.path.join(self.output_dir, 'metadata.opf')
-
-        self.report_progress(1,_('epub downloaded and extracted'))
-
        return index
--- a/recipes/nzz_webpaper.recipe
+++ b/recipes/nzz_webpaper.recipe
@ -47,7 +47,6 @@ class Nzz(BasicNewsRecipe):
        # print soup.prettify()

        articles = {}
-        key = None
        sections = []
        ans = []
        issue = soup.find("link",rel="prefetch")
--- a/recipes/private_eye.recipe
+++ b/recipes/private_eye.recipe
@ -1,31 +1,38 @@
+import re
+
 from calibre.web.feeds.news import BasicNewsRecipe

 class AdvancedUserRecipe1359406781(BasicNewsRecipe):
    title          = u'Private Eye'
-    oldest_article = 15
+    publication_type = 'magazine'
+    description = u'Private Eye is a fortnightly British satirical and current affairs magazine, edited by Ian Hislop'
+    oldest_article = 13
    max_articles_per_feed = 100
    remove_empty_feeds = True
    remove_javascript     = True
    no_stylesheets = True
    ignore_duplicate_articles = {'title'}
    language = 'en_GB'
-    __author__ = 'Martyn Pritchard'
    encoding   =  'iso-8859-1'
-    compress_news_images = True
-    compress_news_images_auto_size = 8
-    scale_news_images_to_device = False
-    scale_news_images = (220, 300)
+    __author__ = u'MartynPritchard@yahoo.com'
+    __copyright__ = '2014, Martyn Pritchard <MartynPritchard@yahoo.com>'

    def get_cover_url(self):
-            soup = self.index_to_soup('http://www.private-eye.co.uk')
-            cov = soup.find(attrs={'width' : '180', 'border' : '0'})
-            cover_url = 'http://www.private-eye.co.uk/'+cov['src']
+        cover_url = None
+        soup = self.index_to_soup('http://www.private-eye.co.uk/current_issue.php')
+        for citem in soup.findAll('img'):
+            if citem['src'].endswith('big.jpg'):
+                return 'http://www.private-eye.co.uk/' + citem['src']
        return cover_url

-    keep_only_tags = [dict(name='table', attrs={'width':['100%'], 'border':['0'], 'align': ['center'], 'cellspacing':['0'], 'cellpadding':['0']}),
-                      dict(name='table', attrs={'width':['480'], 'cellspacing':['0'], 'cellpadding':['0']}),
-                      dict(name='table', attrs={'width':['490'], 'border':['0'], 'align': ['left'], 'cellspacing':['0'], 'cellpadding':['1']}),
-                      dict(name='table', attrs={'width':['500'], 'cellspacing':['0'], 'cellpadding':['0']}),
+    remove_tags_before = {'class':"sub_dave"}
+    remove_tags = [dict(name='td', attrs={'class':'sub_dave'})]
+
+    preprocess_regexps = [
+                   (re.compile(r'../grfx', re.DOTALL|re.IGNORECASE), lambda match: 'http://www.private-eye.co.uk/grfx'),
+                   (re.compile(r'More From This Issue.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'),
+                   (re.compile(r'More top stories in the latest issue:.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'),
+                   (re.compile(r'Also Available Online.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'),
                         ]

    feeds          = [(u'Private Eye', u'http://www.private-eye.co.uk/rss/rss.php')]
--- a/resources/images/devices/itunes.png
+++ b/resources/images/devices/itunes.png
--- a/setup/installer/windows/notes.rst
+++ b/setup/installer/windows/notes.rst
@ -28,9 +28,8 @@ bit.
   (First check if the version of VS 2008 you have is not already SP1)
 3) Install The Windows SDK. You need to install a version that is built for VS
 2008. Get it from here: http://www.microsoft.com/en-us/download/details.aspx?id=3138
-4) If you are building 64bit, edit the properties of the Visual Studio command
-prompt shortcut to pass "amd64" instead of "x86" to the vsvars.bat file so that
-it uses the 64 bit tools.
+4) If you are building 64bit, remember to use the 64bit version of the visual
+studio command prompt.

 I've read that it is possible to use the 64-bit compiler that comes with the
 Windows SDK With VS 2008 Express Edition, but I can't be bothered figuring it
--- a/setup/publish.py
+++ b/setup/publish.py
@ -81,7 +81,7 @@ class Manual(Command):
                os.makedirs('.build'+os.sep+'html')
            os.environ['__appname__'] = __appname__
            os.environ['__version__'] = __version__
-            subprocess.check_call(['sphinx-build', '-b', 'html', '-t', 'online',
+            subprocess.check_call(['sphinx-build2', '-b', 'html', '-t', 'online',
                                   '-d', '.build/doctrees', '.', '.build/html'])
            subprocess.check_call(['sphinx-build', '-b', 'myepub', '-d',
                                   '.build/doctrees', '.', '.build/epub'])
--- a/setup/translations.py
+++ b/setup/translations.py
@ -13,10 +13,11 @@ from functools import partial
 from setup import Command, __appname__, __version__, require_git_master

 def qt_sources():
-    qtdir = glob.glob('/usr/src/qt-*')[-1]
+    # QT5XX: Change this
+    qtdir = '/usr/src/qt4'
    j = partial(os.path.join, qtdir)
    return list(map(j, [
-            'src/gui/widgets/qdialogbuttonbox.cpp',
+            'gui/widgets/qdialogbuttonbox.cpp',
    ]))

 class POT(Command):  # {{{
@ -152,7 +153,8 @@ class Translations(POT):  # {{{

    def run(self, opts):
        l = {}
-        execfile(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'lc_data.py'), l, l)
+        exec(compile(open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'lc_data.py'))
+             .read(), os.path.join(os.path.dirname(os.path.abspath(__file__)), 'lc_data.py'), 'exec'), l, l)
        lcdata = {k:{k1:v1 for k1, v1 in v} for k, v in l['data']}
        self.iso639_errors = []
        for f in self.po_files():
@ -308,14 +310,14 @@ class GetTranslations(Translations):  # {{{
        os.mkdir(errors)
        pofilter = ('pofilter', '-i', self.LP_PATH, '-o', errors,
                '-t', 'accelerators', '-t', 'escapes', '-t', 'variables',
-                #'-t', 'xmltags',
-                #'-t', 'brackets',
-                #'-t', 'emails',
-                #'-t', 'doublequoting',
-                #'-t', 'filepaths',
-                #'-t', 'numbers',
+                # '-t', 'xmltags',
+                # '-t', 'brackets',
+                # '-t', 'emails',
+                # '-t', 'doublequoting',
+                # '-t', 'filepaths',
+                # '-t', 'numbers',
                '-t', 'options',
-                #'-t', 'urls',
+                # '-t', 'urls',
                '-t', 'printf')
        subprocess.check_call(pofilter)
        errfiles = glob.glob(errors+os.sep+'*.po')
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = u'calibre'
-numeric_version = (1, 34, 0)
+numeric_version = (1, 35, 0)
 __version__   = u'.'.join(map(unicode, numeric_version))
 __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"

--- a/src/calibre/db/backend.py
+++ b/src/calibre/db/backend.py
@ -27,7 +27,8 @@ from calibre.utils.icu import sort_key
 from calibre.utils.config import to_json, from_json, prefs, tweaks
 from calibre.utils.date import utcfromtimestamp, parse_date
 from calibre.utils.filenames import (
-    is_case_sensitive, samefile, hardlink_file, ascii_filename, WindowsAtomicFolderMove, atomic_rename)
+    is_case_sensitive, samefile, hardlink_file, ascii_filename,
+    WindowsAtomicFolderMove, atomic_rename, remove_dir_if_empty)
 from calibre.utils.magick.draw import save_cover_data_to
 from calibre.utils.formatter_functions import load_user_template_functions
 from calibre.db.tables import (OneToOneTable, ManyToOneTable, ManyToManyTable,
@ -1184,8 +1185,7 @@ class DB(object):
        Read all data from the db into the python in-memory tables
        '''

-        with self.conn:  # Use a single transaction, to ensure nothing modifies
-                         # the db while we are reading
+        with self.conn:  # Use a single transaction, to ensure nothing modifies the db while we are reading
            for table in self.tables.itervalues():
                try:
                    table.read(self)
@ -1538,11 +1538,7 @@ class DB(object):
        if permanent:
            for path in paths:
                self.rmtree(path)
-                try:
-                    os.rmdir(os.path.dirname(path))
-                except OSError as e:
-                    if e.errno != errno.ENOTEMPTY:
-                        raise
+                remove_dir_if_empty(os.path.dirname(path), ignore_metadata_caches=True)
        else:
            delete_service().delete_books(paths, self.library_path)

@ -1667,5 +1663,3 @@ class DB(object):
        vals = [(book_id, fmt, size, name) for fmt, size, name in formats]
        self.executemany('INSERT INTO data (book,format,uncompressed_size,name) VALUES (?,?,?,?)', vals)
    # }}}
-
-
--- a/src/calibre/db/delete_service.py
+++ b/src/calibre/db/delete_service.py
@ -11,6 +11,7 @@ from threading import Thread
 from Queue import Queue

 from calibre.ptempfile import remove_dir
+from calibre.utils.filenames import remove_dir_if_empty
 from calibre.utils.recycle_bin import delete_tree, delete_file

 class DeleteService(Thread):
@ -93,10 +94,10 @@ class DeleteService(Thread):
                    time.sleep(1)
                    shutil.move(path, dest)
                if delete_empty_parent:
-                    self.remove_dir_if_empty(os.path.dirname(path))
+                    remove_dir_if_empty(os.path.dirname(path), ignore_metadata_caches=True)
                requests.append(dest)
        if not requests:
-            self.remove_dir_if_empty(tdir)
+            remove_dir_if_empty(tdir)
        else:
            self.requests.put(tdir)

--- a/src/calibre/db/search.py
+++ b/src/calibre/db/search.py
@ -814,7 +814,7 @@ class Search(object):
    def _update_caches(self, sqp, book_ids):
        book_ids = sqp.all_book_ids = set(book_ids)
        remove = set()
-        for query, result in self.cache:
+        for query, result in tuple(self.cache):
            try:
                matches = sqp.parse(query)
            except ParseException:
--- a/src/calibre/db/tests/add_remove.py
+++ b/src/calibre/db/tests/add_remove.py
@ -231,6 +231,8 @@ class AddRemoveTest(BaseTest):
        fmtpath = cache.format_abspath(1, 'FMT1')
        bookpath = os.path.dirname(fmtpath)
        authorpath = os.path.dirname(bookpath)
+        os.mkdir(os.path.join(authorpath, '.DS_Store'))
+        open(os.path.join(authorpath, 'Thumbs.db'), 'wb').close()
        item_id = {v:k for k, v in cache.fields['#series'].table.id_map.iteritems()}['My Series Two']
        cache.remove_books((1,), permanent=True)
        for x in (fmtpath, bookpath, authorpath):
--- a/src/calibre/devices/idevice/libimobiledevice.py
+++ b/src/calibre/devices/idevice/libimobiledevice.py
@ -233,15 +233,37 @@ class libiMobileDevice():
        dst: file to be created on iOS filesystem
        '''
        self._log_location("src:{0} dst:{1}".format(repr(src), repr(dst)))
-        mode = 'rb'
-        with open(src, mode) as f:
-            content = bytearray(f.read())
+        BUFFER_SIZE = 10 * 1024 * 1024

-        mode = 'wb'
-        handle = self._afc_file_open(str(dst), mode=mode)
+        handle = self._afc_file_open(str(dst), mode='wb')
        if handle is not None:
-            success = self._afc_file_write(handle, content, mode=mode)
+            # Get the file size
+            file_stats = os.stat(src)
+            file_size = file_stats.st_size
+            self._log("file_size: {:,} bytes".format(file_size))
+            if file_size > BUFFER_SIZE:
+                bytes_remaining = file_size
+                with open(src, 'rb') as f:
+                    while bytes_remaining:
+                        if bytes_remaining > BUFFER_SIZE:
+                            self._log("copying {:,} byte chunk".format(BUFFER_SIZE))
+                            content = bytearray(f.read(BUFFER_SIZE))
+                            success = self._afc_file_write(handle, content, mode='wb')
+                            bytes_remaining -= BUFFER_SIZE
+                        else:
+                            self._log("copying final {:,} bytes".format(bytes_remaining))
+                            content = bytearray(f.read(bytes_remaining))
+                            success = self._afc_file_write(handle, content, mode='wb')
+                            bytes_remaining = 0
                            self._log(" success: {0}".format(success))
+            else:
+                with open(src, 'rb') as f:
+                    content = bytearray(f.read())
+                handle = self._afc_file_open(str(dst), mode='wb')
+                if handle is not None:
+                    success = self._afc_file_write(handle, content, mode='wb')
+                    self._log(" success: {0}".format(success))
+
            self._afc_file_close(handle)
        else:
            self._log(" could not create copy")
--- a/src/calibre/devices/smart_device_app/driver.py
+++ b/src/calibre/devices/smart_device_app/driver.py
@ -525,10 +525,15 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
    # Network functions

    def _read_binary_from_net(self, length):
+        try:
            self.device_socket.settimeout(self.MAX_CLIENT_COMM_TIMEOUT)
            v = self.device_socket.recv(length)
            self.device_socket.settimeout(None)
            return v
+        except:
+            self._close_device_socket()
+            raise
+

    def _read_string_from_net(self):
        data = bytes(0)
@ -556,23 +561,30 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
    def _send_byte_string(self, sock, s):
        if not isinstance(s, bytes):
            self._debug('given a non-byte string!')
+            self._close_device_socket()
            raise PacketError("Internal error: found a string that isn't bytes")
        sent_len = 0
        total_len = len(s)
        while sent_len < total_len:
            try:
+                sock.settimeout(self.MAX_CLIENT_COMM_TIMEOUT)
                if sent_len == 0:
                    amt_sent = sock.send(s)
                else:
                    amt_sent = sock.send(s[sent_len:])
+                sock.settimeout(None)
                if amt_sent <= 0:
                    raise IOError('Bad write on socket')
                sent_len += amt_sent
            except socket.error as e:
                self._debug('socket error', e, e.errno)
                if e.args[0] != EAGAIN and e.args[0] != EINTR:
+                    self._close_device_socket()
                    raise
                time.sleep(0.1)  # lets not hammer the OS too hard
+            except:
+                self._close_device_socket()
+                raise

    # This must be protected by a lock because it is called from the GUI thread
    # (the sync stuff) and the device manager thread
@ -592,7 +604,6 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
            s = self._json_encode(self.opcodes[op], arg)
            if print_debug_info and extra_debug:
                self._debug('send string', s)
-            self.device_socket.settimeout(self.MAX_CLIENT_COMM_TIMEOUT)
            self._send_byte_string(self.device_socket, (b'%d' % len(s)) + s)
            if not wait_for_response:
                return None, None
@ -617,7 +628,6 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
        extra_debug = self.settings().extra_customization[self.OPT_EXTRA_DEBUG]
        try:
            v = self._read_string_from_net()
-            self.device_socket.settimeout(None)
            if print_debug_info and extra_debug:
                self._debug('received string', v)
            if v:
@ -655,10 +665,10 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
                               'metadata': book_metadata, 'thisBook': this_book,
                               'totalBooks': total_books,
                               'willStreamBooks': True,
-                               'willStreamBinary' : True},
+                               'willStreamBinary' : True,
+                               'wantsSendOkToSendbook' : self.can_send_ok_to_sendbook},
                          print_debug_info=False,
-                          wait_for_response=False)
-
+                          wait_for_response=self.can_send_ok_to_sendbook)
        self._set_known_metadata(book_metadata)
        pos = 0
        failed = False
@ -1029,6 +1039,8 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
            self._debug('Device can use cached metadata', self.client_can_use_metadata_cache)
            self.client_cache_uses_lpaths = result.get('cacheUsesLpaths', False)
            self._debug('Cache uses lpaths', self.client_cache_uses_lpaths)
+            self.can_send_ok_to_sendbook = result.get('canSendOkToSendbook', False)
+            self._debug('Can send OK to sendbook', self.can_send_ok_to_sendbook)

            if not self.settings().extra_customization[self.OPT_USE_METADATA_CACHE]:
                self.client_can_use_metadata_cache = False
--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@ -15,6 +15,7 @@ import os, subprocess, time, re, sys, glob
 from itertools import repeat

 from calibre import prints, as_unicode
+from calibre.constants import DEBUG
 from calibre.devices.interface import DevicePlugin
 from calibre.devices.errors import DeviceError
 from calibre.devices.usbms.deviceconfig import DeviceConfig
@ -615,6 +616,8 @@ class Device(DeviceConfig, DevicePlugin):
            'the device has already been ejected, or your '
            'kernel is exporting a deprecated version of SYSFS.')
                    %self.__class__.__name__)
+        if DEBUG:
+            print '\nFound device nodes:', main, carda, cardb

        self._linux_mount_map = {}
        mp, ret = mount(main, 'main')
@ -659,6 +662,8 @@ class Device(DeviceConfig, DevicePlugin):
                    os.remove(path)
                except:
                    pass
+            if DEBUG and ro:
+                print '\nThe mountpoint', mp, 'is readonly, ignoring it'
            return ro

        for mp in ('_main_prefix', '_card_a_prefix', '_card_b_prefix'):
@ -722,7 +727,7 @@ class Device(DeviceConfig, DevicePlugin):
                        d.serial == objif.GetProperty('usb.serial'):
                    dpaths = manager.FindDeviceStringMatch('storage.originating_device', path)
                    for dpath in dpaths:
-                        #devif = dbus.Interface(bus.get_object('org.freedesktop.Hal', dpath), 'org.freedesktop.Hal.Device')
+                        # devif = dbus.Interface(bus.get_object('org.freedesktop.Hal', dpath), 'org.freedesktop.Hal.Device')
                        try:
                            vpaths = manager.FindDeviceStringMatch('block.storage_device', dpath)
                            for vpath in vpaths:
@ -740,13 +745,13 @@ class Device(DeviceConfig, DevicePlugin):
                                            'vol': volif,
                                            'label': vdevif.GetProperty('volume.label')}
                                    vols.append(vol)
-                                except dbus.exceptions.DBusException, e:
+                                except dbus.exceptions.DBusException as e:
                                    print e
                                    continue
-                        except dbus.exceptions.DBusException, e:
+                        except dbus.exceptions.DBusException as e:
                            print e
                            continue
-            except dbus.exceptions.DBusException, e:
+            except dbus.exceptions.DBusException as e:
                continue

        def ocmp(x,y):
@ -779,7 +784,7 @@ class Device(DeviceConfig, DevicePlugin):
                            print "ERROR: Timeout waiting for mount to complete"
                            continue
                    mp = vol['dev'].GetProperty('volume.mount_point')
-                except dbus.exceptions.DBusException, e:
+                except dbus.exceptions.DBusException as e:
                    print "Failed to mount ", e
                    continue

@ -828,7 +833,7 @@ class Device(DeviceConfig, DevicePlugin):
                print "FBSD:	umount main:", self._main_prefix
            try:
                self._main_vol.Unmount([])
-            except dbus.exceptions.DBusException, e:
+            except dbus.exceptions.DBusException as e:
                print 'Unable to eject ', e

        if self._card_a_prefix:
@ -836,7 +841,7 @@ class Device(DeviceConfig, DevicePlugin):
                print "FBSD:	umount card a:", self._card_a_prefix
            try:
                self._card_a_vol.Unmount([])
-            except dbus.exceptions.DBusException, e:
+            except dbus.exceptions.DBusException as e:
                print 'Unable to eject ', e

        if self._card_b_prefix:
@ -844,7 +849,7 @@ class Device(DeviceConfig, DevicePlugin):
                print "FBSD:	umount card b:", self._card_b_prefix
            try:
                self._card_b_vol.Unmount([])
-            except dbus.exceptions.DBusException, e:
+            except dbus.exceptions.DBusException as e:
                print 'Unable to eject ', e

        self._main_prefix = None
--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -31,7 +31,7 @@ BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'ht
                   'epub', 'fb2', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
                   'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb',
                   'xps', 'oxps', 'azw4', 'book', 'zbf', 'pobi', 'docx', 'docm', 'md',
-                   'textile', 'markdown', 'ibook', 'iba', 'azw3', 'ps']
+                   'textile', 'markdown', 'ibook', 'iba', 'azw3', 'ps', 'kepub']

 class HTMLRenderer(object):

--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -77,7 +77,7 @@ class Plumber(object):

    def __init__(self, input, output, log, report_progress=DummyReporter(),
            dummy=False, merge_plugin_recs=True, abort_after_input_dump=False,
-            override_input_metadata=False, for_regex_wizard=False):
+            override_input_metadata=False, for_regex_wizard=False, view_kepub=False):
        '''
        :param input: Path to input file.
        :param output: Path to output file/directory
@ -702,6 +702,8 @@ OptionRecommendation(name='search_replace',
        if not input_fmt:
            raise ValueError('Input file must have an extension')
        input_fmt = input_fmt[1:].lower().replace('original_', '')
+        if view_kepub and input_fmt.lower() == 'kepub':
+            input_fmt = 'epub'
        self.archive_input_tdir = None
        if input_fmt in ARCHIVE_FMTS:
            self.log('Processing archive...')
--- a/src/calibre/ebooks/docx/index.py
+++ b/src/calibre/ebooks/docx/index.py
@ -119,7 +119,7 @@ def process_index(field, index, xe_fields, log):

    return hyperlinks, blocks

-def split_up_block(block, a, text, parts):
+def split_up_block(block, a, text, parts, ldict):
    prefix = parts[:-1]
    a.text = parts[-1]
    parent = a.getparent()
@ -127,31 +127,100 @@ def split_up_block(block, a, text, parts):
    for i, prefix in enumerate(prefix):
        m = 1.5 * i
        span = parent.makeelement('span', style=style % m)
+        ldict[span]    = i
        parent.append(span)
        span.text = prefix
    span = parent.makeelement('span', style=style % ((i + 1) * 1.5))
    parent.append(span)
    span.append(a)
+    ldict[span]    = len(prefix)

-def merge_blocks(prev_block, next_block, prev_path, next_path):
-    pa, na = prev_block.xpath('descendant::a'), next_block.xpath('descendant::a[1]')
-    if not pa or not na:
-        return
-    pa, na = pa[-1], na[0]
-    if prev_path == next_path:
+"""
+The merge algorithm is a little tricky.
+We start with a list of elementary blocks. Each is an HtmlElement, a p node
+with a list of child nodes. The last child is a link, and the earlier ones are 
+just text.
+The list is in reverse order from what we want in the index.
+There is a dictionary ldict which records the level of each child node.
+
+Now we want to do a reduce-like operation, combining all blocks with the same
+top level index entry into a single block representing the structure of all
+references, subentries, etc. under that top entry.
+Here's the algorithm.
+
+Given a block p and the next block n, and the top level entries p1 and n1 in each
+block, which we assume have the same text:
+
+Start with (p, p1) and (n, n1).
+
+Given (p, p1, ..., pk) and (n, n1, ..., nk) which we want to merge:
+
+If there are no more levels in n, then add the link from nk to the links for pk.
+This might be the first link for pk, or we might get a list of references.
+
+Otherwise nk+1 is the next level in n. Look for a matching entry in p. It must have
+the same text, it must follow pk, it must come before we find any other p entries at 
+the same level as pk, and it must have the same level as nk+1.
+
+If we find such a matching entry, go back to the start with (p ... pk+1) and (n ... nk+1).
+
+If there is no matching entry, then because of the original reversed order we want
+to insert nk+1 and all following entries from n into p immediately following pk.
+"""
+
+def find_match(prev_block, pind, nextent, ldict):
+    curlevel = ldict[prev_block[pind]]
+    for p in range(pind+1, len(prev_block)):
+        trylev = ldict[prev_block[p]]
+        if trylev <= curlevel:
+            return -1
+        if trylev > (curlevel+1):
+            continue
+        if prev_block[p].text_content() == nextent.text_content():
+            return p
+    return -1
+
+def add_link(pent, nent, ldict):
+    na = nent.xpath('descendant::a[1]')
+    na = na[0]
+    pa = pent.xpath('descendant::a')
+    if pa and len(pa) > 0:
        # Put on same line with a comma
+        pa = pa[-1]
        pa.tail = ', '
        p = pa.getparent()
        p.insert(p.index(pa) + 1, na)
    else:
-        # Add a line to the previous block
-        ps, ns = pa.getparent(), na.getparent()
-        p = ps.getparent()
-        p.insert(p.index(ps) + 1, ns)
+        # substitute link na for plain text in pent
+        pent.text = ""
+        pent.append(na)
+
+def merge_blocks(prev_block, next_block, pind, nind, next_path, ldict):
+    # First elements match. Any more in next?
+    if len(next_path) == (nind + 1):
+        nextent = next_block[nind]
+        add_link(prev_block[pind], nextent, ldict)
+        return
+
+    nind = nind + 1
+    nextent = next_block[nind]
+    prevent = find_match(prev_block, pind, nextent, ldict)
+    if prevent > 0:
+        merge_blocks(prev_block, next_block, prevent, nind, next_path, ldict)
+        return
+    
+    # Want to insert elements into previous block
+    while nind < len(next_block):
+        # insert takes it out of old
+        pind = pind + 1
+        prev_block.insert(pind, next_block[nind])
+
    next_block.getparent().remove(next_block)

 def polish_index_markup(index, blocks):
+    # Blocks are in reverse order at this point
    path_map = {}
+    ldict = {}
    for block in blocks:
        cls = block.get('class', '') or ''
        block.set('class', (cls + ' index-entry').lstrip())
@ -162,20 +231,22 @@ def polish_index_markup(index, blocks):
        if ':' in text:
            path_map[block] = parts = filter(None, (x.strip() for x in text.split(':')))
            if len(parts) > 1:
-                split_up_block(block, a[0], text, parts)
+                split_up_block(block, a[0], text, parts, ldict)
        else:
+            # try using a span all the time
            path_map[block] = [text]
+            parent = a[0].getparent()
+            span = parent.makeelement('span', style='display:block; margin-left: 0em')
+            parent.append(span)
+            span.append(a[0])
+            ldict[span] = 0

+    # We want a single block for each main entry
    prev_block = blocks[0]
    for block in blocks[1:]:
        pp, pn = path_map[prev_block], path_map[block]
-        if pp == pn:
-            merge_blocks(prev_block, block, pp, pn)
-        elif len(pp) > 1 and len(pn) >= len(pp):
-            if pn[:-1] in (pp[:-1], pp):
-                merge_blocks(prev_block, block, pp, pn)
-            # It's possible to have pn starting with pp but having more
-            # than one extra entry, but until I see that in the wild, I'm not
-            # going to bother
+        if pp[0] == pn[0]:
+            merge_blocks(prev_block, block, 0, 0, pn, ldict)
+        else:
            prev_block = block

--- a/src/calibre/ebooks/mobi/reader/mobi8.py
+++ b/src/calibre/ebooks/mobi/reader/mobi8.py
@ -61,7 +61,8 @@ def reverse_tag_iter(block):

 class Mobi8Reader(object):

-    def __init__(self, mobi6_reader, log):
+    def __init__(self, mobi6_reader, log, for_tweak=False):
+        self.for_tweak = for_tweak
        self.mobi6_reader, self.log = mobi6_reader, log
        self.header = mobi6_reader.book_header
        self.encrypted_fonts = []
@ -459,6 +460,20 @@ class Mobi8Reader(object):
        def exclude(path):
            return os.path.basename(path) == 'debug-raw.html'

+        # If there are no images then the azw3 input plugin dumps all
+        # binary records as .unknown images, remove them
+        if self.for_tweak and os.path.exists('images') and os.path.isdir('images'):
+            files = os.listdir('images')
+            unknown = [x for x in files if x.endswith('.unknown')]
+            if len(files) == len(unknown):
+                [os.remove('images/'+f) for f in files]
+
+        if self.for_tweak:
+            try:
+                os.remove('debug-raw.html')
+            except:
+                pass
+
        opf.create_manifest_from_files_in([os.getcwdu()], exclude=exclude)
        for entry in opf.manifest:
            if entry.mime_type == 'text/html':
@ -539,4 +554,3 @@ class Mobi8Reader(object):
                parent.add_item(href, frag, text)
                current_depth = depth
        return ans
-
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -125,8 +125,8 @@ def iterlinks(root, find_links_in_css=True):

        if tag == XHTML('object'):
            codebase = None
-            ## <object> tags have attributes that are relative to
-            ## codebase
+            # <object> tags have attributes that are relative to
+            # codebase
            if 'codebase' in attribs:
                codebase = el.get('codebase')
                yield (el, 'codebase', codebase, 0)
@ -604,8 +604,8 @@ class Metadata(object):
                allowed = self.allowed
                if allowed is not None and term not in allowed:
                    raise AttributeError(
-                        'attribute %r not valid for metadata term %r'
-                            % (self.attr(term), barename(obj.term)))
+                        'attribute %r not valid for metadata term %r' % (
+                            self.attr(term), barename(obj.term)))
                return self.attr(term)

            def __get__(self, obj, cls):
--- a/src/calibre/ebooks/oeb/iterator/init.py
+++ b/src/calibre/ebooks/oeb/iterator/init.py
@ -14,7 +14,7 @@ from calibre.customize.ui import available_input_formats
 def is_supported(path):
    ext = os.path.splitext(path)[1].replace('.', '').lower()
    ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
-    return ext in available_input_formats()
+    return ext in available_input_formats() or ext == 'kepub'

 class UnsupportedFormatError(Exception):

--- a/src/calibre/ebooks/oeb/iterator/book.py
+++ b/src/calibre/ebooks/oeb/iterator/book.py
@ -75,7 +75,7 @@ class EbookIterator(BookmarksMixin):
                    return i

    def __enter__(self, processed=False, only_input_plugin=False,
-            run_char_count=True, read_anchor_map=True,
+            run_char_count=True, read_anchor_map=True, view_kepub=False,
            extract_embedded_fonts_for_qt=False):
        ''' Convert an ebook file into an exploded OEB book suitable for
        display in viewers/preprocessing etc. '''
@ -85,7 +85,7 @@ class EbookIterator(BookmarksMixin):
        self.delete_on_exit = []
        self._tdir = TemporaryDirectory('_ebook_iter')
        self.base  = self._tdir.__enter__()
-        plumber = Plumber(self.pathtoebook, self.base, self.log)
+        plumber = Plumber(self.pathtoebook, self.base, self.log, view_kepub=view_kepub)
        plumber.setup_options()
        if self.pathtoebook.lower().endswith('.opf'):
            plumber.opts.dont_package = True
--- a/src/calibre/ebooks/oeb/polish/container.py
+++ b/src/calibre/ebooks/oeb/polish/container.py
@ -1022,20 +1022,9 @@ def do_explode(path, dest):
        mr = MobiReader(stream, default_log, None, None)

        with CurrentDir(dest):
-            mr = Mobi8Reader(mr, default_log)
+            mr = Mobi8Reader(mr, default_log, for_tweak=True)
            opf = os.path.abspath(mr())
            obfuscated_fonts = mr.encrypted_fonts
-            # If there are no images then the azw3 input plugin dumps all
-            # binary records as .unknown images, remove them
-            if os.path.exists('images') and os.path.isdir('images'):
-                files = os.listdir('images')
-                unknown = [x for x in files if x.endswith('.unknown')]
-                if len(files) == len(unknown):
-                    [os.remove('images/'+f) for f in files]
-            try:
-                os.remove('debug-raw.html')
-            except:
-                pass

    return opf, obfuscated_fonts

--- a/src/calibre/ebooks/oeb/polish/split.py
+++ b/src/calibre/ebooks/oeb/polish/split.py
@ -419,7 +419,7 @@ def merge_css(container, names, master):

        # Remove charset rules
        cr = [r for r in sheet.cssRules if r.type == r.CHARSET_RULE]
-        [sheet.remove(r) for r in cr]
+        [sheet.deleteRule(sheet.cssRules.index(r)) for r in cr]
        for rule in sheet.cssRules:
            msheet.add(rule)

--- a/src/calibre/ebooks/oeb/polish/utils.py
+++ b/src/calibre/ebooks/oeb/polish/utils.py
@ -126,3 +126,27 @@ def link_stylesheets(container, names, sheets, remove=False, mtype='text/css'):
            container.dirty(name)

    return changed_names
+
+def lead_text(top_elem, num_words=10):
+    ''' Return the leading text contained in top_elem (including descendants)
+    upto a maximum of num_words words. More efficient than using
+    etree.tostring(method='text') as it does not have to serialize the entire
+    sub-tree rooted at top_elem.'''
+    pat = re.compile(r'\s+', flags=re.UNICODE)
+    words = []
+
+    def get_text(x, attr='text'):
+        ans = getattr(x, attr)
+        if ans:
+            words.extend(filter(None, pat.split(ans)))
+
+    stack = [(top_elem, 'text')]
+    while stack and len(words) < num_words:
+        elem, attr = stack.pop()
+        get_text(elem, attr)
+        if attr == 'text':
+            if elem is not top_elem:
+                stack.append((elem, 'tail'))
+            stack.extend(reversed(list((c, 'text') for c in elem.iterchildren('*'))))
+    return ' '.join(words[:num_words])
+
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -158,7 +158,7 @@ def _config():  # {{{
              help=_('Options for the LRF ebook viewer'))
    c.add_opt('internally_viewed_formats', default=['LRF', 'EPUB', 'LIT',
        'MOBI', 'PRC', 'POBI', 'AZW', 'AZW3', 'HTML', 'FB2', 'PDB', 'RB',
-        'SNB', 'HTMLZ'], help=_(
+        'SNB', 'HTMLZ', 'KEPUB'], help=_(
            'Formats that are viewed using the internal viewer'))
    c.add_opt('column_map', default=ALL_COLUMNS,
              help=_('Columns to be displayed in the book list'))
@ -854,8 +854,8 @@ def setup_gui_option_parser(parser):
                          help='Detach from the controlling terminal, if any (linux only)')

 def detach_gui():
-    if islinux and not DEBUG and sys.stdout.isatty():
-        # We are a GUI process running in a terminal so detach from the controlling terminal
+    if islinux and not DEBUG:
+        # Detach from the controlling process.
        if os.fork() != 0:
            raise SystemExit(0)
        os.setsid()
--- a/src/calibre/gui2/tweak_book/boss.py
+++ b/src/calibre/gui2/tweak_book/boss.py
@ -830,6 +830,7 @@ class Boss(QObject):
        if self.doing_terminal_save:
            prints(tb, file=sys.stderr)
            return
+        self.gui.action_save.setEnabled(True)
        error_dialog(self.gui, _('Could not save'),
                     _('Saving of the book failed. Click "Show Details"'
                       ' for more information. You can try to save a copy'
--- a/src/calibre/gui2/tweak_book/diff/highlight.py
+++ b/src/calibre/gui2/tweak_book/diff/highlight.py
@ -28,9 +28,9 @@ class QtHighlighter(QTextDocument):
        QTextDocument.__init__(self, parent)
        self.l = QPlainTextDocumentLayout(self)
        self.setDocumentLayout(self.l)
-        self.highlighter = hlclass(self)
+        self.highlighter = hlclass()
        self.highlighter.apply_theme(get_theme())
-        self.highlighter.setDocument(self)
+        self.highlighter.set_document(self)
        self.setPlainText(text)

    def copy_lines(self, lo, hi, cursor):
--- a/src/calibre/gui2/tweak_book/editor/init.py
+++ b/src/calibre/gui2/tweak_book/editor/init.py
@ -6,7 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import,
 __license__ = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'

-from PyQt4.Qt import QTextCharFormat
+from PyQt4.Qt import QTextCharFormat, QFont

 from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
 from calibre.ebooks.oeb.polish.container import guess_type
@ -41,3 +41,9 @@ class SyntaxTextCharFormat(QTextCharFormat):
        QTextCharFormat.__init__(self, *args)
        self.setProperty(SYNTAX_PROPERTY, True)

+    def __repr__(self):
+        return 'SyntaxFormat(id=%s, color=%s, italic=%s, bold=%s)' % (
+            id(self), self.foreground().color().name(), self.fontItalic(), self.fontWeight() >= QFont.DemiBold)
+    __str__ = __repr__
+
+
--- a/src/calibre/gui2/tweak_book/editor/smart/html.py
+++ b/src/calibre/gui2/tweak_book/editor/smart/html.py
@ -24,8 +24,8 @@ class Tag(object):
    def __init__(self, start_block, tag_start, end_block, tag_end, self_closing=False):
        self.start_block, self.end_block = start_block, end_block
        self.start_offset, self.end_offset = tag_start.offset, tag_end.offset
-        tag = tag_start.name or tag_start.prefix
-        if tag_start.name and tag_start.prefix:
+        tag = tag_start.name
+        if tag_start.prefix:
            tag = tag_start.prefix + ':' + tag
        self.name = tag
        self.self_closing = self_closing
@ -101,8 +101,8 @@ def find_tag_definition(block, offset):
        return None, False
    tag_start = boundary
    closing = tag_start.closing
-    tag = tag_start.name or tag_start.prefix
-    if tag_start.name and tag_start.prefix:
+    tag = tag_start.name
+    if tag_start.prefix:
        tag = tag_start.prefix + ':' + tag
    return tag, closing

--- a/src/calibre/gui2/tweak_book/editor/syntax/base.py
+++ b/src/calibre/gui2/tweak_book/editor/syntax/base.py
@ -6,45 +6,61 @@ from __future__ import (unicode_literals, division, absolute_import,
 __license__ = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'

-from PyQt4.Qt import (QSyntaxHighlighter, QApplication, QCursor, Qt)
+from collections import defaultdict
+
+from PyQt4.Qt import (
+    QTextCursor, pyqtSlot, QTextBlockUserData, QTextLayout)

 from ..themes import highlight_to_char_format
+from calibre.gui2.tweak_book.widgets import BusyCursor
+
+def run_loop(user_data, state_map, formats, text):
+    state = user_data.state
+    i = 0
+    seen_states = defaultdict(set)
+    while i < len(text):
+        orig_i = i
+        seen_states[i].add(state.parse)
+        fmt = state_map[state.parse](state, text, i, formats, user_data)
+        for num, f in fmt:
+            if num > 0:
+                yield i, num, f
+                i += num
+        if orig_i == i and state.parse in seen_states[i]:
+            # Something went wrong in the syntax highlighter
+            print ('Syntax highlighter returned a zero length format, parse state:', state.parse)
+            break

 class SimpleState(object):

-    def __init__(self, value):
-        self.parse = value
+    __slots__ = ('parse',)

-    @property
-    def value(self):
-        return self.parse
+    def __init__(self):
+        self.parse = 0

-def run_loop(state, state_map, formats, text):
-    i = 0
-    while i < len(text):
-        fmt = state_map[state.parse](state, text, i, formats)
-        for num, f in fmt:
-            yield i, num, f
-            i += num
+    def copy(self):
+        s = SimpleState()
+        s.parse = self.parse
+        return s

-class SyntaxHighlighter(QSyntaxHighlighter):
+class SimpleUserData(QTextBlockUserData):
+
+    def __init__(self):
+        QTextBlockUserData.__init__(self)
+        self.state = SimpleState()
+
+    def clear(self, state=None):
+        self.state = SimpleState() if state is None else state
+
+class SyntaxHighlighter(object):

-    state_map = {0:lambda state, text, i, formats:[(len(text), None)]}
    create_formats_func = lambda highlighter: {}
    spell_attributes = ()
    tag_ok_for_spell = lambda x: False
+    user_data_factory = SimpleUserData

-    def __init__(self, *args, **kwargs):
-        QSyntaxHighlighter.__init__(self, *args, **kwargs)
-
-    def create_state(self, num):
-        return SimpleState(max(0, num))
-
-    def rehighlight(self):
-        self.outlineexplorer_data = {}
-        QApplication.setOverrideCursor(QCursor(Qt.WaitCursor))
-        QSyntaxHighlighter.rehighlight(self)
-        QApplication.restoreOverrideCursor()
+    def __init__(self):
+        self.doc = None

    def apply_theme(self, theme):
        self.theme = {k:highlight_to_char_format(v) for k, v in theme.iteritems()}
@ -54,20 +70,93 @@ class SyntaxHighlighter(QSyntaxHighlighter):
    def create_formats(self):
        self.formats = self.create_formats_func()

-    def highlightBlock(self, text):
-        try:
-            state = self.previousBlockState()
-            self.setCurrentBlockUserData(None)  # Ensure that any stale user data is discarded
-            state = self.create_state(state)
-            state.get_user_data, state.set_user_data = self.currentBlockUserData, self.setCurrentBlockUserData
-            for i, num, fmt in run_loop(state, self.state_map, self.formats, unicode(text)):
-                if fmt is not None:
-                    self.setFormat(i, num, fmt)
-            self.setCurrentBlockState(state.value)
-        except:
-            import traceback
-            traceback.print_exc()
-        finally:
-            # Disabled as it causes crashes
-            pass  # QApplication.processEvents()  # Try to keep the editor responsive to user input
+    def set_document(self, doc):
+        old_doc = self.doc
+        if old_doc is not None:
+            old_doc.contentsChange.disconnect(self.reformat_blocks)
+            c = QTextCursor(old_doc)
+            c.beginEditBlock()
+            blk = old_doc.begin()
+            while blk.isValid():
+                blk.layout().clearAdditionalFormats()
+                blk = blk.next()
+            c.endEditBlock()
+        self.doc = None
+        if doc is not None:
+            self.doc = doc
+            doc.contentsChange.connect(self.reformat_blocks)
+            self.rehighlight()
+
+    def rehighlight(self):
+        doc = self.doc
+        if doc is None:
+            return
+        lb = doc.lastBlock()
+        with BusyCursor():
+            self.reformat_blocks(0, 0, lb.position() + lb.length())
+
+    def get_user_data(self, block):
+        ud = block.userData()
+        new_data = False
+        if ud is None:
+            ud = self.user_data_factory()
+            block.setUserData(ud)
+            new_data = True
+        return ud, new_data
+
+    @pyqtSlot(int, int, int)
+    def reformat_blocks(self, position, removed, added):
+        doc = self.doc
+        if doc is None:
+            return
+        last_block = doc.findBlock(position + added + (1 if removed > 0 else 0))
+        if not last_block.isValid():
+            last_block = doc.lastBlock()
+        end_pos = last_block.position() + last_block.length()
+        force_next_highlight = False
+
+        doc.contentsChange.disconnect(self.reformat_blocks)
+        try:
+            block = doc.findBlock(position)
+            while block.isValid() and (block.position() < end_pos or force_next_highlight):
+                ud, new_ud = self.get_user_data(block)
+                orig_state = ud.state
+                pblock = block.previous()
+                if pblock.isValid():
+                    start_state = pblock.userData()
+                    if start_state is None:
+                        start_state = self.user_data_factory().state
+                    else:
+                        start_state = start_state.state.copy()
+                else:
+                    start_state = self.user_data_factory().state
+                ud.clear(state=start_state)  # Ensure no stale user data lingers
+                formats = []
+                for i, num, fmt in run_loop(ud, self.state_map, self.formats, unicode(block.text())):
+                    if fmt is not None:
+                        formats.append((i, num, fmt))
+                self.apply_format_changes(doc, block, formats)
+                force_next_highlight = new_ud or ud.state != orig_state
+                block = block.next()
+        finally:
+            doc.contentsChange.connect(self.reformat_blocks)
+
+    def apply_format_changes(self, doc, block, formats):
+        layout = block.layout()
+        preedit_start = layout.preeditAreaPosition()
+        preedit_length = layout.preeditAreaText().length()
+        ranges = []
+        R = QTextLayout.FormatRange
+        for i, num, fmt in formats:
+            # Adjust range by pre-edit text, if any
+            if preedit_start != 0:
+                if i >= preedit_start:
+                    i += preedit_length
+                elif i + num >= preedit_start:
+                    num += preedit_length
+            r = R()
+            r.start, r.length, r.format = i, num, fmt
+            ranges.append(r)
+        layout.setAdditionalFormats(ranges)
+        doc.markContentsDirty(block.position(), block.length())

--- a/src/calibre/gui2/tweak_book/editor/syntax/css.py
+++ b/src/calibre/gui2/tweak_book/editor/syntax/css.py
@ -8,6 +8,8 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'

 import re

+from PyQt4.Qt import QTextBlockUserData
+
 from calibre.gui2.tweak_book.editor import SyntaxTextCharFormat
 from calibre.gui2.tweak_book.editor.syntax.base import SyntaxHighlighter

@ -118,41 +120,63 @@ content_tokens = [(re.compile(k), v, n) for k, v, n in [

 ]]

-class State(object):
+NORMAL = 0
+IN_COMMENT_NORMAL = 1
+IN_SQS = 2
+IN_DQS = 3
+IN_CONTENT = 4
+IN_COMMENT_CONTENT = 5

-    NORMAL = 0
-    IN_COMMENT_NORMAL = 1
-    IN_SQS = 2
-    IN_DQS = 3
-    IN_CONTENT = 4
-    IN_COMMENT_CONTENT = 5
+class CSSState(object):

-    def __init__(self, num):
-        self.parse  = num & 0b1111
-        self.blocks = num >> 4
+    __slots__ = ('parse', 'blocks')

-    @property
-    def value(self):
-        return ((self.parse & 0b1111) | (max(0, self.blocks) << 4))
+    def __init__(self):
+        self.parse  = NORMAL
+        self.blocks = 0

+    def copy(self):
+        s = CSSState()
+        s.parse, s.blocks = self.parse, self.blocks
+        return s

-def normal(state, text, i, formats):
+    def __eq__(self, other):
+        return self.parse == getattr(other, 'parse', -1) and \
+            self.blocks == getattr(other, 'blocks', -1)
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __repr__(self):
+        return "CSSState(parse=%s, blocks=%s)" % (self.parse, self.blocks)
+    __str__ = __repr__
+
+class CSSUserData(QTextBlockUserData):
+
+    def __init__(self):
+        QTextBlockUserData.__init__(self)
+        self.state = CSSState()
+
+    def clear(self, state=None):
+        self.state = CSSState() if state is None else state
+
+def normal(state, text, i, formats, user_data):
    ' The normal state (outside content blocks {})'
    m = space_pat.match(text, i)
    if m is not None:
        return [(len(m.group()), None)]
    cdo = cdo_pat.match(text, i)
    if cdo is not None:
-        state.parse = State.IN_COMMENT_NORMAL
+        state.parse = IN_COMMENT_NORMAL
        return [(len(cdo.group()), formats['comment'])]
    if text[i] == '"':
-        state.parse = State.IN_DQS
+        state.parse = IN_DQS
        return [(1, formats['string'])]
    if text[i] == "'":
-        state.parse = State.IN_SQS
+        state.parse = IN_SQS
        return [(1, formats['string'])]
    if text[i] == '{':
-        state.parse = State.IN_CONTENT
+        state.parse = IN_CONTENT
        state.blocks += 1
        return [(1, formats['bracket'])]
    for token, fmt, name in sheet_tokens:
@ -162,24 +186,24 @@ def normal(state, text, i, formats):

    return [(len(text) - i, formats['unknown-normal'])]

-def content(state, text, i, formats):
+def content(state, text, i, formats, user_data):
    ' Inside content blocks '
    m = space_pat.match(text, i)
    if m is not None:
        return [(len(m.group()), None)]
    cdo = cdo_pat.match(text, i)
    if cdo is not None:
-        state.parse = State.IN_COMMENT_CONTENT
+        state.parse = IN_COMMENT_CONTENT
        return [(len(cdo.group()), formats['comment'])]
    if text[i] == '"':
-        state.parse = State.IN_DQS
+        state.parse = IN_DQS
        return [(1, formats['string'])]
    if text[i] == "'":
-        state.parse = State.IN_SQS
+        state.parse = IN_SQS
        return [(1, formats['string'])]
    if text[i] == '}':
        state.blocks -= 1
-        state.parse = State.NORMAL if state.blocks < 1 else State.IN_CONTENT
+        state.parse = NORMAL if state.blocks < 1 else IN_CONTENT
        return [(1, formats['bracket'])]
    if text[i] == '{':
        state.blocks += 1
@ -191,34 +215,34 @@ def content(state, text, i, formats):

    return [(len(text) - i, formats['unknown-normal'])]

-def comment(state, text, i, formats):
+def comment(state, text, i, formats, user_data):
    ' Inside a comment '
    pos = text.find('*/', i)
    if pos == -1:
        return [(len(text), formats['comment'])]
-    state.parse = State.NORMAL if state.parse == State.IN_COMMENT_NORMAL else State.IN_CONTENT
+    state.parse = NORMAL if state.parse == IN_COMMENT_NORMAL else IN_CONTENT
    return [(pos - i + 2, formats['comment'])]

-def in_string(state, text, i, formats):
+def in_string(state, text, i, formats, user_data):
    'Inside a string'
-    q = '"' if state.parse == State.IN_DQS else "'"
+    q = '"' if state.parse == IN_DQS else "'"
    pos = text.find(q, i)
    if pos == -1:
        if text[-1] == '\\':
            # Multi-line string
            return [(len(text) - i, formats['string'])]
-        state.parse = (State.NORMAL if state.blocks < 1 else State.IN_CONTENT)
+        state.parse = (NORMAL if state.blocks < 1 else IN_CONTENT)
        return [(len(text) - i, formats['unterminated-string'])]
-    state.parse = (State.NORMAL if state.blocks < 1 else State.IN_CONTENT)
+    state.parse = (NORMAL if state.blocks < 1 else IN_CONTENT)
    return [(pos - i + len(q), formats['string'])]

 state_map = {
-    State.NORMAL:normal,
-    State.IN_COMMENT_NORMAL: comment,
-    State.IN_COMMENT_CONTENT: comment,
-    State.IN_SQS: in_string,
-    State.IN_DQS: in_string,
-    State.IN_CONTENT: content,
+    NORMAL:normal,
+    IN_COMMENT_NORMAL: comment,
+    IN_COMMENT_CONTENT: comment,
+    IN_SQS: in_string,
+    IN_DQS: in_string,
+    IN_CONTENT: content,
 }

 def create_formats(highlighter):
@ -252,9 +276,8 @@ class CSSHighlighter(SyntaxHighlighter):

    state_map = state_map
    create_formats_func = create_formats
+    user_data_factory = CSSUserData

-    def create_state(self, num):
-        return State(max(0, num))

 if __name__ == '__main__':
    from calibre.gui2.tweak_book.editor.widget import launch_editor
--- a/src/calibre/gui2/tweak_book/editor/syntax/html.py
+++ b/src/calibre/gui2/tweak_book/editor/syntax/html.py
@ -15,7 +15,8 @@ from PyQt4.Qt import QFont, QTextBlockUserData
 from calibre.ebooks.oeb.polish.spell import html_spell_tags, xml_spell_tags
 from calibre.gui2.tweak_book.editor import SyntaxTextCharFormat
 from calibre.gui2.tweak_book.editor.syntax.base import SyntaxHighlighter, run_loop
-from calibre.gui2.tweak_book.editor.syntax.css import create_formats as create_css_formats, state_map as css_state_map, State as CSSState
+from calibre.gui2.tweak_book.editor.syntax.css import (
+    create_formats as create_css_formats, state_map as css_state_map, CSSState, CSSUserData)

 from html5lib.constants import cdataElements, rcdataElements

@ -51,41 +52,33 @@ Attr = namedtuple('Attr', 'offset type data')

 class Tag(object):

-    __slots__ = ('name', 'bold', 'italic', 'lang', 'hash')
+    __slots__ = ('name', 'bold', 'italic', 'lang')

    def __init__(self, name, bold=None, italic=None):
        self.name = name
        self.bold = name in bold_tags if bold is None else bold
        self.italic = name in italic_tags if italic is None else italic
        self.lang = None
-        self.hash = 0
-
-    def __hash__(self):
-        return self.hash

    def __eq__(self, other):
        return self.name == getattr(other, 'name', None) and self.lang == getattr(other, 'lang', False)

    def copy(self):
        ans = Tag(self.name, self.bold, self.italic)
-        ans.lang, ans.hash = self.lang, self.hash
+        ans.lang = self.lang
        return ans

-    def update_hash(self):
-        self.hash = hash((self.name, self.lang))
-
 class State(object):

-    __slots__ = ('tag_being_defined', 'tags', 'is_bold', 'is_italic',
-                 'current_lang', 'parse', 'get_user_data', 'set_user_data',
-                 'css_formats', 'stack', 'sub_parser_state', 'default_lang',
-                 'attribute_name',)
+    __slots__ = (
+        'tag_being_defined', 'tags', 'is_bold', 'is_italic', 'current_lang',
+        'parse', 'css_formats', 'sub_parser_state', 'default_lang', 'attribute_name',)

    def __init__(self):
        self.tags = []
        self.is_bold = self.is_italic = False
-        self.tag_being_defined = self.current_lang = self.get_user_data = self.set_user_data = \
-            self.css_formats = self.stack = self.sub_parser_state = self.default_lang = self.attribute_name = None
+        self.tag_being_defined = self.current_lang =  self.css_formats = \
+            self.sub_parser_state = self.default_lang = self.attribute_name = None
        self.parse = NORMAL

    def copy(self):
@ -95,17 +88,10 @@ class State(object):
        self.tags = [x.copy() for x in self.tags]
        if self.tag_being_defined is not None:
            self.tag_being_defined = self.tag_being_defined.copy()
+        if self.sub_parser_state is not None:
+            ans.sub_parser_state = self.sub_parser_state.copy()
        return ans

-    @property
-    def value(self):
-        if self.tag_being_defined is not None:
-            self.tag_being_defined.update_hash()
-        return self.stack.index_for(self)
-
-    def __hash__(self):
-        return hash((self.parse, self.sub_parser_state, self.tag_being_defined, self.attribute_name, tuple(self.tags)))
-
    def __eq__(self, other):
        return (
            self.parse == getattr(other, 'parse', -1) and
@ -115,6 +101,9 @@ class State(object):
            self.tags == getattr(other, 'tags', None)
        )

+    def __ne__(self, other):
+        return not self.__eq__(other)
+
    def open_tag(self, name):
        self.tag_being_defined = Tag(name)

@ -128,7 +117,7 @@ class State(object):
            return  # No matching open tag found, ignore the closing tag
        # Remove all tags upto the matching open tag
        self.tags = self.tags[:-len(removed_tags)]
-        self.sub_parser_state = 0
+        self.sub_parser_state = None
        # Check if we should still be bold or italic
        if self.is_bold:
            self.is_bold = False
@ -154,71 +143,41 @@ class State(object):
        if self.tag_being_defined is None:
            return
        t, self.tag_being_defined = self.tag_being_defined, None
-        t.update_hash()
        self.tags.append(t)
        self.is_bold = self.is_bold or t.bold
        self.is_italic = self.is_italic or t.italic
        self.current_lang = t.lang or self.current_lang
        if t.name in cdata_tags:
            self.parse = CSS if t.name == 'style' else CDATA
-            self.sub_parser_state = 0
+            self.sub_parser_state = None

    def __repr__(self):
        return '<State %s is_bold=%s is_italic=%s current_lang=%s>' % (
            '->'.join(x.name for x in self.tags), self.is_bold, self.is_italic, self.current_lang)
    __str__ = __repr__

-class Stack(object):
-
-    ''' Maintain an efficient bi-directional mapping between states and index
-    numbers. Ensures that if state1 == state2 then their corresponding index
-    numbers are the same and vice versa. This is need so that the state number
-    passed to Qt does not change unless the underlying state has actually
-    changed. '''
-
-    def __init__(self):
-        self.index_map = []
-        self.state_map = {}
-
-    def index_for(self, state):
-        ans = self.state_map.get(state, None)
-        if ans is None:
-            self.state_map[state] = ans = len(self.index_map)
-            self.index_map.append(state)
-        return ans
-
-    def state_for(self, index):
-        try:
-            return self.index_map[index]
-        except IndexError:
-            return None
-
 class HTMLUserData(QTextBlockUserData):

    def __init__(self):
        QTextBlockUserData.__init__(self)
        self.tags = []
        self.attributes = []
+        self.state = State()
+        self.css_user_data = None

-def add_tag_data(state, tag):
-    ud = q = state.get_user_data()
-    if ud is None:
-        ud = HTMLUserData()
-    ud.tags.append(tag)
-    if q is None:
-        state.set_user_data(ud)
+    def clear(self, state=None):
+        self.tags, self.attributes = [], []
+        self.state = State() if state is None else state
+
+def add_tag_data(user_data, tag):
+    user_data.tags.append(tag)

 ATTR_NAME, ATTR_VALUE, ATTR_START, ATTR_END = object(), object(), object(), object()

-def add_attr_data(state, data_type, data, offset):
-    ud = q = state.get_user_data()
-    if ud is None:
-        ud = HTMLUserData()
-    ud.attributes.append(Attr(offset, data_type, data))
-    if q is None:
-        state.set_user_data(ud)
+def add_attr_data(user_data, data_type, data, offset):
+    user_data.attributes.append(Attr(offset, data_type, data))

-def css(state, text, i, formats):
+def css(state, text, i, formats, user_data):
    ' Inside a <style> tag '
    pat = cdata_close_pats['style']
    m = pat.search(text, i)
@ -227,18 +186,18 @@ def css(state, text, i, formats):
    else:
        css_text = text[i:m.start()]
    ans = []
-    css_state = CSSState(state.sub_parser_state)
-    for j, num, fmt in run_loop(css_state, css_state_map, state.css_formats, css_text):
+    css_user_data = user_data.css_user_data = user_data.css_user_data or CSSUserData()
+    state.sub_parser_state = css_user_data.state = state.sub_parser_state or CSSState()
+    for j, num, fmt in run_loop(css_user_data, css_state_map, formats['css_sub_formats'], css_text):
        ans.append((num, fmt))
-    state.sub_parser_state = css_state.value
    if m is not None:
-        state.sub_parser_state = 0
+        state.sub_parser_state = None
        state.parse = IN_CLOSING_TAG
-        add_tag_data(state, TagStart(m.start(), 'style', '', True, True))
+        add_tag_data(user_data, TagStart(m.start(), '', 'style', True, True))
        ans.extend([(2, formats['end_tag']), (len(m.group()) - 2, formats['tag_name'])])
    return ans

-def cdata(state, text, i, formats):
+def cdata(state, text, i, formats, user_data):
    'CDATA inside tags like <title> or <style>'
    name = state.tags[-1].name
    pat = cdata_close_pats[name]
@ -248,7 +207,7 @@ def cdata(state, text, i, formats):
        return [(len(text) - i, fmt)]
    state.parse = IN_CLOSING_TAG
    num = m.start() - i
-    add_tag_data(state, TagStart(m.start(), name, '', True, True))
+    add_tag_data(user_data, TagStart(m.start(), '', name, True, True))
    return [(num, fmt), (2, formats['end_tag']), (len(m.group()) - 2, formats['tag_name'])]

 def mark_nbsp(state, text, nbsp_format):
@ -268,7 +227,7 @@ def mark_nbsp(state, text, nbsp_format):
        ans = [(len(text), fmt)]
    return ans

-def normal(state, text, i, formats):
+def normal(state, text, i, formats, user_data):
    ' The normal state in between tags '
    ch = text[i]
    if ch == '<':
@ -288,18 +247,23 @@ def normal(state, text, i, formats):
        if m is None:
            return [(1, formats['<'])]

-        name = m.group()
-        closing = name.startswith('/')
-        state.parse = IN_CLOSING_TAG if closing else IN_OPENING_TAG
-        ans = [(2 if closing else 1, formats['end_tag' if closing else 'tag'])]
+        tname = m.group()
+        closing = tname.startswith('/')
        if closing:
-            name = name[1:]
-        prefix, name = name.partition(':')[0::2]
-        if prefix and name:
+            tname = tname[1:]
+        if ':' in tname:
+            prefix, name = tname.split(':', 1)
+        else:
+            prefix, name = '', tname
+        if prefix and not name:
+            return [(len(m.group()) + 1, formats['only-prefix'])]
+        ans = [(2 if closing else 1, formats['end_tag' if closing else 'tag'])]
+        if prefix:
            ans.append((len(prefix)+1, formats['nsprefix']))
-        ans.append((len(name or prefix), formats['tag_name']))
-        add_tag_data(state, TagStart(i, prefix, name, closing, True))
-        (state.close_tag if closing else state.open_tag)(name or prefix)
+        ans.append((len(name), formats['tag_name']))
+        state.parse = IN_CLOSING_TAG if closing else IN_OPENING_TAG
+        add_tag_data(user_data, TagStart(i, prefix, name, closing, True))
+        (state.close_tag if closing else state.open_tag)(name)
        return ans

    if ch == '&':
@ -314,7 +278,7 @@ def normal(state, text, i, formats):
    t = normal_pat.search(text, i).group()
    return mark_nbsp(state, t, formats['nbsp'])

-def opening_tag(cdata_tags, state, text, i, formats):
+def opening_tag(cdata_tags, state, text, i, formats, user_data):
    'An opening tag, like <a>'
    ch = text[i]
    if ch in space_chars:
@ -325,24 +289,26 @@ def opening_tag(cdata_tags, state, text, i, formats):
            return [(1, formats['/'])]
        state.parse = NORMAL
        l = len(m.group())
-        add_tag_data(state, TagEnd(i + l - 1, True, False))
+        add_tag_data(user_data, TagEnd(i + l - 1, True, False))
        return [(l, formats['tag'])]
    if ch == '>':
        state.finish_opening_tag(cdata_tags)
-        add_tag_data(state, TagEnd(i, False, False))
+        add_tag_data(user_data, TagEnd(i, False, False))
        return [(1, formats['tag'])]
    m = attribute_name_pat.match(text, i)
    if m is None:
        return [(1, formats['?'])]
    state.parse = ATTRIBUTE_NAME
    attrname = state.attribute_name = m.group()
-    add_attr_data(state, ATTR_NAME, attrname, m.start())
+    add_attr_data(user_data, ATTR_NAME, attrname, m.start())
    prefix, name = attrname.partition(':')[0::2]
+    if not prefix and not name:
+        return [(len(attrname), formats['?'])]
    if prefix and name:
        return [(len(prefix) + 1, formats['nsprefix']), (len(name), formats['attr'])]
    return [(len(prefix), formats['attr'])]

-def attribute_name(state, text, i, formats):
+def attribute_name(state, text, i, formats, user_data):
    ' After attribute name '
    ch = text[i]
    if ch in space_chars:
@ -354,7 +320,7 @@ def attribute_name(state, text, i, formats):
    state.parse = IN_OPENING_TAG
    return [(0, None)]

-def attribute_value(state, text, i, formats):
+def attribute_value(state, text, i, formats, user_data):
    ' After attribute = '
    ch = text[i]
    if ch in space_chars:
@ -368,20 +334,20 @@ def attribute_value(state, text, i, formats):
        return [(1, formats['no-attr-value'])]
    return [(len(m.group()), formats['string'])]

-def quoted_val(state, text, i, formats):
+def quoted_val(state, text, i, formats, user_data):
    ' A quoted attribute value '
    quote = '"' if state.parse is DQ_VAL else "'"
-    add_attr_data(state, ATTR_VALUE, ATTR_START, i)
+    add_attr_data(user_data, ATTR_VALUE, ATTR_START, i)
    pos = text.find(quote, i)
    if pos == -1:
        num = len(text) - i
    else:
        num = pos - i + 1
        state.parse = IN_OPENING_TAG
-        add_attr_data(state, ATTR_VALUE, ATTR_END, i + num)
+        add_attr_data(user_data, ATTR_VALUE, ATTR_END, i + num)
    return [(num, formats['string'])]

-def closing_tag(state, text, i, formats):
+def closing_tag(state, text, i, formats, user_data):
    ' A closing tag like </a> '
    ch = text[i]
    if ch in space_chars:
@ -394,10 +360,10 @@ def closing_tag(state, text, i, formats):
    ans = [(1, formats['end_tag'])]
    if num > 1:
        ans.insert(0, (num - 1, formats['bad-closing']))
-    add_tag_data(state, TagEnd(pos, False, False))
+    add_tag_data(user_data, TagEnd(pos, False, False))
    return ans

-def in_comment(state, text, i, formats):
+def in_comment(state, text, i, formats, user_data):
    ' Comment, processing instruction or doctype '
    end = {IN_COMMENT:'-->', IN_PI:'?>'}.get(state.parse, '>')
    pos = text.find(end, i)
@ -428,7 +394,7 @@ for x in (SQ_VAL, DQ_VAL):
 xml_state_map = state_map.copy()
 xml_state_map[IN_OPENING_TAG] = partial(opening_tag, set())

-def create_formats(highlighter):
+def create_formats(highlighter, add_css=True):
    t = highlighter.theme
    formats = {
        'tag': t['Function'],
@ -452,11 +418,14 @@ def create_formats(highlighter):
            '?': _('Unknown character'),
            'bad-closing': _('A closing tag must contain only the tag name and nothing else'),
            'no-attr-value': _('Expecting an attribute value'),
+            'only-prefix': _('A tag name cannot end with a colon'),
    }.iteritems():
        f = formats[name] = SyntaxTextCharFormat(formats['error'])
        f.setToolTip(msg)
    f = formats['title'] = SyntaxTextCharFormat()
    f.setFontWeight(QFont.Bold)
+    if add_css:
+        formats['css_sub_formats'] = create_css_formats(highlighter)
    return formats


@ -465,18 +434,7 @@ class HTMLHighlighter(SyntaxHighlighter):
    state_map = state_map
    create_formats_func = create_formats
    spell_attributes = ('alt', 'title')
-
-    def create_formats(self):
-        super(HTMLHighlighter, self).create_formats()
-        self.default_state = State()
-        self.default_state.css_formats = create_css_formats(self)
-        self.default_state.stack = Stack()
-
-    def create_state(self, val):
-        if val < 0:
-            return self.default_state.copy()
-        ans = self.default_state.stack.state_for(val) or self.default_state
-        return ans.copy()
+    user_data_factory = HTMLUserData

    def tag_ok_for_spell(self, name):
        return name not in html_spell_tags
@ -486,6 +444,9 @@ class XMLHighlighter(HTMLHighlighter):
    state_map = xml_state_map
    spell_attributes = ('opf:file-as',)

+    def create_formats_func(self):
+        return create_formats(self, add_css=False)
+
    def tag_ok_for_spell(self, name):
        return name in xml_spell_tags

@ -507,7 +468,7 @@ if __name__ == '__main__':
        </style>
        <style type="text/css">p.small { font-size: x-small; color:gray }</style>
    </head id="invalid attribute on closing tag">
-    <body>
+    <body><p:
        <!-- The start of the actual body text -->
        <h1>A heading that should appear in bold, with an <i>italic</i> word</h1>
        <p>Some text with inline formatting, that is syntax highlighted. A <b>bold</b> word, and an <em>italic</em> word. \
--- a/src/calibre/gui2/tweak_book/editor/text.py
+++ b/src/calibre/gui2/tweak_book/editor/text.py
@ -14,7 +14,7 @@ import regex
 from PyQt4.Qt import (
    QPlainTextEdit, QFontDatabase, QToolTip, QPalette, QFont, QKeySequence,
    QTextEdit, QTextFormat, QWidget, QSize, QPainter, Qt, QRect, pyqtSlot,
-    QApplication, QMimeData, QColor, QColorDialog)
+    QApplication, QMimeData, QColor, QColorDialog, QTimer)

 from calibre import prepare_string_for_xml, xml_entity_to_unicode
 from calibre.gui2.tweak_book import tprefs, TOP
@ -135,7 +135,9 @@ class TextEdit(PlainTextEdit):
        self.smarts = NullSmarts(self)
        self.current_cursor_line = None
        self.current_search_mark = None
-        self.highlighter = SyntaxHighlighter(self)
+        self.smarts_highlight_timer = t = QTimer()
+        t.setInterval(750), t.setSingleShot(True), t.timeout.connect(self.update_extra_selections)
+        self.highlighter = SyntaxHighlighter()
        self.line_number_area = LineNumbers(self)
        self.apply_settings()
        self.setMouseTracking(True)
@ -206,9 +208,9 @@ class TextEdit(PlainTextEdit):

    def load_text(self, text, syntax='html', process_template=False):
        self.syntax = syntax
-        self.highlighter = get_highlighter(syntax)(self)
+        self.highlighter = get_highlighter(syntax)()
        self.highlighter.apply_theme(self.theme)
-        self.highlighter.setDocument(self.document())
+        self.highlighter.set_document(self.document())
        sclass = {'html':HTMLSmarts, 'xml':HTMLSmarts}.get(syntax, None)
        if sclass is not None:
            self.smarts = sclass(self)
@ -252,13 +254,16 @@ class TextEdit(PlainTextEdit):
        self.setTextCursor(c)
        self.ensureCursorVisible()

-    def update_extra_selections(self):
+    def update_extra_selections(self, instant=True):
        sel = []
        if self.current_cursor_line is not None:
            sel.append(self.current_cursor_line)
        if self.current_search_mark is not None:
            sel.append(self.current_search_mark)
+        if instant:
            sel.extend(self.smarts.get_extra_selections(self))
+        else:
+            self.smarts_highlight_timer.start()
        self.setExtraSelections(sel)

    # Search and replace {{{
@ -456,7 +461,7 @@ class TextEdit(PlainTextEdit):
        sel.cursor = self.textCursor()
        sel.cursor.clearSelection()
        self.current_cursor_line = sel
-        self.update_extra_selections()
+        self.update_extra_selections(instant=False)
        # Update the cursor line's line number in the line number area
        try:
            self.line_number_area.update(0, self.last_current_lnum[0], self.line_number_area.width(), self.last_current_lnum[1])
--- a/src/calibre/gui2/tweak_book/file_list.py
+++ b/src/calibre/gui2/tweak_book/file_list.py
@ -393,9 +393,6 @@ class FileList(QTreeWidget):
        for name, linear in container.spine_names:
            processed[name] = create_item(name, linear=linear)

-        all_files = list(container.manifest_type_map.iteritems())
-        all_files.append((guess_type('a.opf'), [container.opf_name]))
-
        for name in container.name_path_map:
            if name in processed:
                continue
--- a/src/calibre/gui2/tweak_book/search.py
+++ b/src/calibre/gui2/tweak_book/search.py
@ -84,6 +84,11 @@ class WhereBox(QComboBox):
            <dd>Search only within the marked text in the currently opened file. You can mark text using the Search menu.</dd>
            </dl>'''))
        self.emphasize = emphasize
+        self.ofont = QFont(self.font())
+        if emphasize:
+            f = self.emph_font = QFont(self.ofont)
+            f.setBold(True), f.setItalic(True)
+            self.setFont(f)

    @dynamic_property
    def where(self):
@ -94,16 +99,16 @@ class WhereBox(QComboBox):
            self.setCurrentIndex({v:k for k, v in wm.iteritems()}[val])
        return property(fget=fget, fset=fset)

-    def paintEvent(self, ev):
+    def showPopup(self):
        # We do it like this so that the popup uses a normal font
        if self.emphasize:
-            ofont = self.font()
-            f = QFont(ofont)
-            f.setBold(True), f.setItalic(True)
-            self.setFont(f)
-        QComboBox.paintEvent(self, ev)
+            self.setFont(self.ofont)
+        QComboBox.showPopup(self)
+
+    def hidePopup(self):
        if self.emphasize:
-            self.setFont(ofont)
+            self.setFont(self.emph_font)
+        QComboBox.hidePopup(self)

 class DirectionBox(QComboBox):

@ -766,7 +771,7 @@ class SavedSearches(Dialog):
            def err():
                error_dialog(self, _('Invalid data'), _(
                    'The file %s does not contain valid saved searches') % path, show=True)
-            if not isinstance(obj, dict) or not 'version' in obj or not 'searches' in obj or obj['version'] not in (1,):
+            if not isinstance(obj, dict) or 'version' not in obj or 'searches' not in obj or obj['version'] not in (1,):
                return err()
            searches = []
            for item in obj['searches']:
--- a/src/calibre/gui2/tweak_book/spell.py
+++ b/src/calibre/gui2/tweak_book/spell.py
@ -16,7 +16,7 @@ from PyQt4.Qt import (
    QStackedLayout, QLabel, QVBoxLayout, QWidget, QPushButton, QIcon, QMenu,
    QDialogButtonBox, QLineEdit, QDialog, QToolButton, QFormLayout, QHBoxLayout,
    pyqtSignal, QAbstractTableModel, QModelIndex, QTimer, QTableView, QCheckBox,
-    QComboBox, QListWidget, QListWidgetItem, QInputDialog, QPlainTextEdit)
+    QComboBox, QListWidget, QListWidgetItem, QInputDialog, QPlainTextEdit, QKeySequence)

 from calibre.constants import __appname__, plugins
 from calibre.ebooks.oeb.polish.spell import replace_word, get_all_words, merge_locations
@ -122,7 +122,40 @@ class AddDictionary(QDialog):  # {{{
        QDialog.accept(self)
 # }}}

-class ManageUserDictionaries(Dialog):  # {{{
+# User Dictionaries {{{
+
+class UserWordList(QListWidget):
+
+    def __init__(self, parent=None):
+        QListWidget.__init__(self, parent)
+
+    def contextMenuEvent(self, ev):
+        m = QMenu(self)
+        m.addAction(_('Copy selected words to clipboard'), self.copy_to_clipboard)
+        m.addAction(_('Select all words'), self.select_all)
+        m.exec_(ev.globalPos())
+
+    def select_all(self):
+        for item in (self.item(i) for i in xrange(self.count())):
+            item.setSelected(True)
+
+    def copy_to_clipboard(self):
+        words = []
+        for item in (self.item(i) for i in xrange(self.count())):
+            if item.isSelected():
+                words.append(item.data(Qt.UserRole).toPyObject()[0])
+        if words:
+            QApplication.clipboard().setText('\n'.join(words))
+
+    def keyPressEvent(self, ev):
+        if ev == QKeySequence.Copy:
+            self.copy_to_clipboard()
+            ev.accept()
+            return
+        return QListWidget.keyPressEvent(self, ev)
+
+
+class ManageUserDictionaries(Dialog):

    def __init__(self, parent=None):
        self.dictionaries_changed = False
@ -162,7 +195,7 @@ class ManageUserDictionaries(Dialog):  # {{{
        l.addWidget(a)
        self.la = la = QLabel(_('Words in this dictionary:'))
        l.addWidget(la)
-        self.words = w = QListWidget(self)
+        self.words = w = UserWordList(self)
        w.setSelectionMode(w.ExtendedSelection)
        l.addWidget(w)
        self.add_word_button = b = QPushButton(_('&Add word'), self)
@ -586,6 +619,11 @@ class WordsModel(QAbstractTableModel):
            elif role == Qt.InitialSortOrderRole:
                return Qt.DescendingOrder if section == 1 else Qt.AscendingOrder

+    def misspelled_text(self, w):
+        if self.spell_map[w]:
+            return _('Ignored') if dictionaries.is_word_ignored(*w) else ''
+        return '✓'
+
    def data(self, index, role=Qt.DisplayRole):
        try:
            word, locale = self.items[index.row()]
@ -604,7 +642,7 @@ class WordsModel(QAbstractTableModel):
                    pl = '%s (%s)' % (pl, countrycode)
                return pl
            if col == 3:
-                return '' if self.spell_map[(word, locale)] else '✓'
+                return self.misspelled_text((word, locale))
        if role == Qt.TextAlignmentRole:
            return Qt.AlignVCenter | (Qt.AlignLeft if index.column() == 0 else Qt.AlignHCenter)

@ -635,7 +673,7 @@ class WordsModel(QAbstractTableModel):
                locale = w[1]
                return (calibre_langcode_to_name(locale.langcode), locale.countrycode)
        else:
-            key = self.spell_map.get
+            key = self.misspelled_text
        return key

    def do_sort(self):
@ -762,6 +800,10 @@ class WordsView(QTableView):
        self.verticalHeader().close()

    def keyPressEvent(self, ev):
+        if ev == QKeySequence.Copy:
+            self.copy_to_clipboard()
+            ev.accept()
+            return
        ret = QTableView.keyPressEvent(self, ev)
        if ev.key() in (Qt.Key_PageUp, Qt.Key_PageDown, Qt.Key_Up, Qt.Key_Down):
            idx = self.currentIndex()
@ -794,9 +836,19 @@ class WordsView(QTableView):
        a.setMenu(am)
        for dic in sorted(dictionaries.active_user_dictionaries, key=lambda x:sort_key(x.name)):
            am.addAction(dic.name, partial(self.add_all.emit, dic.name))
+        m.addSeparator()
+        m.addAction(_('Copy selected words to clipboard'), self.copy_to_clipboard)

        m.exec_(ev.globalPos())

+    def copy_to_clipboard(self):
+        rows = {i.row() for i in self.selectedIndexes()}
+        words = {self.model().word_for_row(r) for r in rows}
+        words.discard(None)
+        words = sorted({w[0] for w in words}, key=sort_key)
+        if words:
+            QApplication.clipboard().setText('\n'.join(words))
+
 class SpellCheck(Dialog):

    work_finished = pyqtSignal(object, object)
@ -1202,5 +1254,5 @@ def find_next(word, locations, current_editor, current_editor_name,
 if __name__ == '__main__':
    app = QApplication([])
    dictionaries.initialize()
-    SpellCheck.test()
+    ManageUserDictionaries.test()
    del app
--- a/src/calibre/gui2/tweak_book/widgets.py
+++ b/src/calibre/gui2/tweak_book/widgets.py
@ -18,9 +18,10 @@ from PyQt4.Qt import (
    QListView, QTextDocument, QSize, QComboBox, QFrame, QCursor)

 from calibre import prepare_string_for_xml
+from calibre.ebooks.oeb.polish.utils import lead_text
 from calibre.gui2 import error_dialog, choose_files, choose_save_file, NONE, info_dialog
 from calibre.gui2.tweak_book import tprefs
-from calibre.utils.icu import primary_sort_key, sort_key
+from calibre.utils.icu import primary_sort_key, sort_key, primary_contains
 from calibre.utils.matcher import get_char, Matcher
 from calibre.gui2.complete2 import EditWithComplete

@ -568,11 +569,12 @@ class NamesModel(QAbstractListModel):
            if text == name:
                return i

-def create_filterable_names_list(names, filter_text=None, parent=None):
+def create_filterable_names_list(names, filter_text=None, parent=None, model=NamesModel):
    nl = QListView(parent)
-    nl.m = m = NamesModel(names, parent=nl)
+    nl.m = m = model(names, parent=nl)
    m.filtered.connect(lambda all_items: nl.scrollTo(m.index(0)))
    nl.setModel(m)
+    if model is NamesModel:
        nl.d = NamesDelegate(nl)
        nl.setItemDelegate(nl.d)
    f = QLineEdit(parent)
@ -583,6 +585,39 @@ def create_filterable_names_list(names, filter_text=None, parent=None):
 # }}}

 # Insert Link {{{
+
+class AnchorsModel(QAbstractListModel):
+
+    filtered = pyqtSignal(object)
+
+    def __init__(self, names, parent=None):
+        self.items = []
+        self.names = []
+        QAbstractListModel.__init__(self, parent=parent)
+
+    def rowCount(self, parent=ROOT):
+        return len(self.items)
+
+    def data(self, index, role):
+        if role == Qt.UserRole:
+            return self.items[index.row()]
+        if role == Qt.DisplayRole:
+            return '\n'.join(self.items[index.row()])
+        if role == Qt.ToolTipRole:
+            text, frag = self.items[index.row()]
+            return _('Anchor: %s\nLeading text: %s') % (frag, text)
+
+    def set_names(self, names):
+        self.names = names
+        self.filter('')
+
+    def filter(self, query):
+        query = unicode(query or '')
+        self.beginResetModel()
+        self.items = [x for x in self.names if primary_contains(query, x[0]) or primary_contains(query, x[1])]
+        self.endResetModel()
+        self.filtered.emit(not bool(query))
+
 class InsertLink(Dialog):

    def __init__(self, container, source_name, initial_text=None, parent=None):
@ -612,7 +647,8 @@ class InsertLink(Dialog):
        fnl.addWidget(la), fnl.addWidget(f), fnl.addWidget(fn)
        h.addLayout(fnl), h.setStretch(0, 2)

-        fn, f = create_filterable_names_list([], filter_text=_('Filter locations'), parent=self)
+        fn, f = create_filterable_names_list([], filter_text=_('Filter locations'), parent=self, model=AnchorsModel)
+        fn.setSpacing(5)
        self.anchor_names, self.anchor_names_filter = fn, f
        fn.selectionModel().selectionChanged.connect(self.update_target)
        fn.doubleClicked.connect(self.accept, type=Qt.QueuedConnection)
@ -648,8 +684,12 @@ class InsertLink(Dialog):
        if name not in self.anchor_cache:
            from calibre.ebooks.oeb.base import XHTML_NS
            root = self.container.parsed(name)
-            self.anchor_cache[name] = sorted(
-                (set(root.xpath('//*/@id')) | set(root.xpath('//h:a/@name', namespaces={'h':XHTML_NS}))) - {''}, key=primary_sort_key)
+            ac = self.anchor_cache[name] = []
+            for item in set(root.xpath('//*[@id]')) | set(root.xpath('//h:a[@name]', namespaces={'h':XHTML_NS})):
+                frag = item.get('id', None) or item.get('name')
+                text = lead_text(item, num_words=4)
+                ac.append((text, frag))
+            ac.sort(key=lambda (text, frag): primary_sort_key(text))
        self.anchor_names.model().set_names(self.anchor_cache[name])
        self.update_target()

@ -665,7 +705,7 @@ class InsertLink(Dialog):
        frag = ''
        rows = list(self.anchor_names.selectionModel().selectedRows())
        if rows:
-            anchor = self.anchor_names.model().data(rows[0], Qt.UserRole).toPyObject()[0]
+            anchor = self.anchor_names.model().data(rows[0], Qt.UserRole)[1]
            if anchor:
                frag = '#' + anchor
        href += frag
@ -886,4 +926,4 @@ class InsertSemantics(Dialog):

 if __name__ == '__main__':
    app = QApplication([])
-    InsertTag.test()
+    InsertLink.test()
--- a/src/calibre/gui2/viewer/main.py
+++ b/src/calibre/gui2/viewer/main.py
@ -1017,7 +1017,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
        self.iterator = EbookIterator(pathtoebook)
        self.open_progress_indicator(_('Loading ebook...'))
        worker = Worker(target=partial(self.iterator.__enter__,
-            extract_embedded_fonts_for_qt=True))
+            extract_embedded_fonts_for_qt=True, view_kepub=True))
        worker.start()
        while worker.isAlive():
            worker.join(0.1)
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@ -929,7 +929,7 @@ Name=E-book Viewer
 GenericName=Viewer for E-books
 Comment=Viewer for E-books in all the major formats
 TryExec=ebook-viewer
-Exec=ebook-viewer %f
+Exec=ebook-viewer --detach %f
 Icon=calibre-viewer
 Categories=Graphics;Viewer;
 '''
@ -942,7 +942,7 @@ Name=Edit E-book
 GenericName=Edit E-books
 Comment=Edit e-books in various formats
 TryExec=ebook-edit
-Exec=ebook-edit %f
+Exec=ebook-edit --detach %f
 Icon=calibre-ebook-edit
 Categories=Office;
 '''
@ -955,7 +955,7 @@ Name=calibre
 GenericName=E-book library management
 Comment=E-book library management: Convert, view, share, catalogue all your e-books
 TryExec=calibre
-Exec=calibre %F
+Exec=calibre --detach %F
 Icon=calibre-gui
 Categories=Office;
 '''
--- a/src/calibre/spell/dictionary.py
+++ b/src/calibre/spell/dictionary.py
@ -101,7 +101,10 @@ def custom_dictionaries(reread=False):
    return _custom

 default_en_locale = 'en-US'
-ul = parse_lang_code(get_system_locale() or 'en-US')
+try:
+    ul = parse_lang_code(get_system_locale() or 'en-US')
+except ValueError:
+    ul = None
 if ul is not None and ul.langcode == 'eng' and ul.countrycode in 'GB BS BZ GH IE IN JM NZ TT'.split():
    default_en_locale = 'en-' + ul.countrycode
 default_preferred_locales = {'eng':default_en_locale, 'deu':'de-DE', 'spa':'es-ES', 'fra':'fr-FR'}
--- a/src/calibre/spell/import_from.py
+++ b/src/calibre/spell/import_from.py
@ -68,6 +68,18 @@ def import_from_libreoffice_source_tree(source_path):
    if want_locales:
        raise Exception('Failed to find dictionaries for some wanted locales: %s' % want_locales)

+def fill_country_code(x):
+    return {'lt':'lt_LT'}.get(x, x)
+
+def uniq(vals, kmap=lambda x:x):
+    ''' Remove all duplicates from vals, while preserving order. kmap must be a
+    callable that returns a hashable value for every item in vals '''
+    vals = vals or ()
+    lvals = (kmap(x) for x in vals)
+    seen = set()
+    seen_add = seen.add
+    return tuple(x for x, k in zip(vals, lvals) if k not in seen and not seen_add(k))
+
 def import_from_oxt(source_path, name, dest_dir=None, prefix='dic-'):
    from calibre.spell.dictionary import parse_lang_code
    dest_dir = dest_dir or os.path.join(config_dir, 'dictionaries')
@ -81,10 +93,10 @@ def import_from_oxt(source_path, name, dest_dir=None, prefix='dic-'):
        for (dic, aff), locales in parse_xcu(zf.open(xcu).read(), origin='').iteritems():
            dic, aff = dic.lstrip('/'), aff.lstrip('/')
            d = tempfile.mkdtemp(prefix=prefix, dir=dest_dir)
-            locales = [x for x in locales if parse_lang_code(x).countrycode]
+            locales = uniq([x for x in map(fill_country_code, locales) if parse_lang_code(x).countrycode])
            if not locales:
                continue
-            metadata = [name] + locales
+            metadata = [name] + list(locales)
            with open(os.path.join(d, 'locales'), 'wb') as f:
                f.write(('\n'.join(metadata)).encode('utf-8'))
            with open(os.path.join(d, '%s.dic' % locales[0]), 'wb') as f:
--- a/src/calibre/utils/filenames.py
+++ b/src/calibre/utils/filenames.py
@ -459,3 +459,34 @@ def atomic_rename(oldpath, newpath):
                time.sleep(1)
    else:
        os.rename(oldpath, newpath)
+
+def remove_dir_if_empty(path, ignore_metadata_caches=False):
+    ''' Remove a directory if it is empty or contains only the folder metadata
+    caches from different OSes. To delete the folder if it contains only
+    metadata caches, set ignore_metadata_caches to True.'''
+    try:
+        os.rmdir(path)
+    except OSError as e:
+        if e.errno == errno.ENOTEMPTY or len(os.listdir(path)) > 0:
+            # Some linux systems appear to raise an EPERM instead of an
+            # ENOTEMPTY, see https://bugs.launchpad.net/bugs/1240797
+            if ignore_metadata_caches:
+                try:
+                    found = False
+                    for x in os.listdir(path):
+                        if x.lower() in {'.ds_store', 'thumbs.db'}:
+                            found = True
+                            x = os.path.join(path, x)
+                            if os.path.isdir(x):
+                                import shutil
+                                shutil.rmtree(x)
+                            else:
+                                os.remove(x)
+                except Exception:  # We could get an error, if, for example, windows has locked Thumbs.db
+                    found = False
+                if found:
+                    remove_dir_if_empty(path)
+            return
+        raise
+
+
--- a/src/calibre/utils/icu_test.py
+++ b/src/calibre/utils/icu_test.py
@ -116,13 +116,12 @@ class TestICU(unittest.TestCase):
        for group in [
            ('Šaa', 'Smith', 'Solženicyn', 'Štepánek'),
            ('01', '1'),
-            ('1', '11', '13'),
        ]:
            last = None
            for x in group:
                order, length = icu.numeric_collator().collation_order(x)
                if last is not None:
-                    self.ae(last, order)
+                    self.ae(last, order, 'Order for %s not correct: %s != %s' % (x, last, order))
                last = order

        self.ae(dict(icu.partition_by_first_letter(['A1', '', 'a1', '\U0001f431', '\U0001f431x'])),
--- a/src/calibre/utils/smartypants.py
+++ b/src/calibre/utils/smartypants.py
@ -696,6 +696,10 @@ def educateQuotes(str):
            """ % (close_class,), re.VERBOSE)
    str = closing_double_quotes_regex.sub(r"""\1&#8221;""", str)

+    if str.endswith('-"'):
+        # A string that endswith -" is sometimes used for dialogue
+        str = str[:-1] + '&#8221;'
+
    # Any remaining quotes should be opening ones.
    str = re.sub(r'"', r"""&#8220;""", str)

@ -859,8 +863,8 @@ def _tokenize(str):

    tokens = []

-    #depth = 6
-    #nested_tags = "|".join(['(?:<(?:[^<>]',] * depth) + (')*>)' * depth)
+    # depth = 6
+    # nested_tags = "|".join(['(?:<(?:[^<>]',] * depth) + (')*>)' * depth)
    # match = r"""(?: <! ( -- .*? -- \s* )+ > ) |  # comments
    # (?: <\? .*? \?> ) |  # directives
    # %s  # nested tags       """ % (nested_tags,)
--- a/src/calibre/utils/unrar.py
+++ b/src/calibre/utils/unrar.py
@ -17,7 +17,8 @@ class UNRARError(Exception):
    pass

 class DevNull:
-    def write(self, x): pass
+    def write(self, x):
+        pass

 class RARStream(object):

@ -184,15 +185,15 @@ def extract_member(stream, match=re.compile(r'\.(jpg|jpeg|gif|png)\s*$', re.I),
            return h['filename'], et.getvalue()

 def extract_first_alphabetically(stream):
-    names_ = [x for x in names(stream) if os.path.splitext(x)[1][1:].lower() in
-            {'png', 'jpg', 'jpeg', 'gif'}]
-    names_.sort()
+    names_ = sorted([x for x in names(stream) if os.path.splitext(x)[1][1:].lower() in
+            {'png', 'jpg', 'jpeg', 'gif'}])
    return extract_member(stream, name=names_[0], match=None)

 # Test normal RAR file {{{
 def test_basic():

-    stream = BytesIO(b"Rar!\x1a\x07\x00\xcf\x90s\x00\x00\r\x00\x00\x00\x00\x00\x00\x00\x14\xe7z\x00\x80#\x00\x17\x00\x00\x00\r\x00\x00\x00\x03\xc2\xb3\x96o\x00\x00\x00\x00\x1d3\x03\x00\x00\x00\x00\x00CMT\x0c\x00\x8b\xec\x8e\xef\x14\xf6\xe6h\x04\x17\xff\xcd\x0f\xffk9b\x11]^\x80\xd3dt \x90+\x00\x14\x00\x00\x00\x08\x00\x00\x00\x03\xf1\x84\x93\\\xb9]yA\x1d3\t\x00\xa4\x81\x00\x001\\sub-one\x00\xc0\x0c\x00\x8f\xec\x89\xfe.JM\x86\x82\x0c_\xfd\xfd\xd7\x11\x1a\xef@\x9eHt \x80'\x00\x0e\x00\x00\x00\x04\x00\x00\x00\x03\x9f\xa8\x17\xf8\xaf]yA\x1d3\x07\x00\xa4\x81\x00\x00one.txt\x00\x08\xbf\x08\xae\xf3\xca\x87\xfeo\xfe\xd2n\x80-Ht \x82:\x00\x18\x00\x00\x00\x10\x00\x00\x00\x03\xa86\x81\xdf\xf9fyA\x1d3\x1a\x00\xa4\x81\x00\x00\xe8\xaf\xb6\xe6\xaf\x94\xe5\xb1\x81.txt\x00\x8bh\xf6\xd4kA\\.\x00txt\x0c\x00\x8b\xec\x8e\xef\x14\xf6\xe2l\x91\x189\xff\xdf\xfe\xc2\xd3:g\x9a\x19F=cYt \x928\x00\x11\x00\x00\x00\x08\x00\x00\x00\x03\x7f\xd6\xb6\x7f\xeafyA\x1d3\x16\x00\xa4\x81\x00\x00F\xc3\xbc\xc3\x9fe.txt\x00\x01\x00F\xfc\xdfe\x00.txt\x00\xc0<D\xfe\xc8\xef\xbc\xd1\x04I?\xfd\xff\xdbF)]\xe8\xb9\xe1t \x90/\x00\x13\x00\x00\x00\x08\x00\x00\x00\x03\x1a$\x932\xc2]yA\x1d3\r\x00\xa4\x81\x00\x002\\sub-two.txt\x00\xc0\x10\x00S\xec\xcb\x7f\x8b\xa5(\x0b\x01\xcb\xef\xdf\xf6t\x89\x97z\x0eft \x90)\x00\r\x00\x00\x00\r\x00\x00\x00\x03c\x89K\xd3\xc8fyA\x140\x07\x00\xff\xa1\x00\x00symlink\x00\xc02/sub-two.txt\xeb\x86t\xe0\x90#\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\xb9]yA\x140\x01\x00\xedA\x00\x001\x00\xc0\xe0Dt\xe0\x90#\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\xc2]yA\x140\x01\x00\xedA\x00\x002\x00\xc0u\xa1t \x80,\x00\r\x00\x00\x00\r\x00\x00\x00\x03T\xea\x04\xca\xe6\x84yA\x140\x0c\x00\xa4\x81\x00\x00uncompresseduncompressed\n\xda\x10t \x900\x00\x0e\x00\x00\x00\x04\x00\x00\x00\x035K.\xa6\x18\x85yA\x1d5\x0e\x00\xa4\x81\x00\x00max-compressed\x00\xc0\x00\x08\xbf\x08\xae\xf2\xcc\x01s\xf8\xff\xec\x96\xe8\xc4={\x00@\x07\x00")
+    stream = BytesIO(
+        b"Rar!\x1a\x07\x00\xcf\x90s\x00\x00\r\x00\x00\x00\x00\x00\x00\x00\x14\xe7z\x00\x80#\x00\x17\x00\x00\x00\r\x00\x00\x00\x03\xc2\xb3\x96o\x00\x00\x00\x00\x1d3\x03\x00\x00\x00\x00\x00CMT\x0c\x00\x8b\xec\x8e\xef\x14\xf6\xe6h\x04\x17\xff\xcd\x0f\xffk9b\x11]^\x80\xd3dt \x90+\x00\x14\x00\x00\x00\x08\x00\x00\x00\x03\xf1\x84\x93\\\xb9]yA\x1d3\t\x00\xa4\x81\x00\x001\\sub-one\x00\xc0\x0c\x00\x8f\xec\x89\xfe.JM\x86\x82\x0c_\xfd\xfd\xd7\x11\x1a\xef@\x9eHt \x80'\x00\x0e\x00\x00\x00\x04\x00\x00\x00\x03\x9f\xa8\x17\xf8\xaf]yA\x1d3\x07\x00\xa4\x81\x00\x00one.txt\x00\x08\xbf\x08\xae\xf3\xca\x87\xfeo\xfe\xd2n\x80-Ht \x82:\x00\x18\x00\x00\x00\x10\x00\x00\x00\x03\xa86\x81\xdf\xf9fyA\x1d3\x1a\x00\xa4\x81\x00\x00\xe8\xaf\xb6\xe6\xaf\x94\xe5\xb1\x81.txt\x00\x8bh\xf6\xd4kA\\.\x00txt\x0c\x00\x8b\xec\x8e\xef\x14\xf6\xe2l\x91\x189\xff\xdf\xfe\xc2\xd3:g\x9a\x19F=cYt \x928\x00\x11\x00\x00\x00\x08\x00\x00\x00\x03\x7f\xd6\xb6\x7f\xeafyA\x1d3\x16\x00\xa4\x81\x00\x00F\xc3\xbc\xc3\x9fe.txt\x00\x01\x00F\xfc\xdfe\x00.txt\x00\xc0<D\xfe\xc8\xef\xbc\xd1\x04I?\xfd\xff\xdbF)]\xe8\xb9\xe1t \x90/\x00\x13\x00\x00\x00\x08\x00\x00\x00\x03\x1a$\x932\xc2]yA\x1d3\r\x00\xa4\x81\x00\x002\\sub-two.txt\x00\xc0\x10\x00S\xec\xcb\x7f\x8b\xa5(\x0b\x01\xcb\xef\xdf\xf6t\x89\x97z\x0eft \x90)\x00\r\x00\x00\x00\r\x00\x00\x00\x03c\x89K\xd3\xc8fyA\x140\x07\x00\xff\xa1\x00\x00symlink\x00\xc02/sub-two.txt\xeb\x86t\xe0\x90#\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\xb9]yA\x140\x01\x00\xedA\x00\x001\x00\xc0\xe0Dt\xe0\x90#\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\xc2]yA\x140\x01\x00\xedA\x00\x002\x00\xc0u\xa1t \x80,\x00\r\x00\x00\x00\r\x00\x00\x00\x03T\xea\x04\xca\xe6\x84yA\x140\x0c\x00\xa4\x81\x00\x00uncompresseduncompressed\n\xda\x10t \x900\x00\x0e\x00\x00\x00\x04\x00\x00\x00\x035K.\xa6\x18\x85yA\x1d5\x0e\x00\xa4\x81\x00\x00max-compressed\x00\xc0\x00\x08\xbf\x08\xae\xf2\xcc\x01s\xf8\xff\xec\x96\xe8\xc4={\x00@\x07\x00")  # noqa
    tdata = {u'1': b'',
                u'1/sub-one': b'sub-one\n',
                u'2': b'',
@ -234,20 +235,23 @@ def test_basic():
    from calibre.utils.mem import memory
    import gc
    del f
-    for i in xrange(3): gc.collect()
-    num = 300
+    for i in xrange(3):
+        gc.collect()
+    def get_mem_use(num):
        start = memory()
        s = SaveStream(stream)
        for i in xrange(num):
            with s:
                f = RARFile(stream)
                f.test()
-    del f
-    del s
-    for i in xrange(3): gc.collect()
-    used = memory() - start
-    if used > 1 and not isosx:
-        raise ValueError('Leaked %s MB for %d calls'%(used, num))
+        del f, s
+        for i in xrange(3):
+            gc.collect()
+        return memory() - start
+    (get_mem_use(20))
+    a, b = get_mem_use(10), get_mem_use(110)
+    if not isosx and abs(b - a) > 1:
+        raise ValueError('Leaked %s MB for %d calls'%(b - a, 100))
    # }}}

 def test_rar(path):
--- a/src/calibre/web/fetch/simple.py
+++ b/src/calibre/web/fetch/simple.py
@ -542,7 +542,7 @@ class RecursiveFetcher(object):
                        _fname.decode('latin1', 'replace')
                    _fname = _fname.encode('ascii', 'replace').replace('%', '').replace(os.sep, '')
                    _fname = ascii_filename(_fname)
-                    _fname = os.path.splitext(_fname)[0]+'.xhtml'
+                    _fname = os.path.splitext(_fname)[0][:120] + '.xhtml'
                    res = os.path.join(linkdiskpath, _fname)
                    self.downloaded_paths.append(res)
                    self.filemap[nurl] = res