From adbda39a3c2633a7806a3f7393b6ab175e70e537 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 12 Jul 2014 11:34:31 +0530
Subject: [PATCH] Fix spurious detection of words starting at hyphen boundaries

---
 src/calibre/utils/icu.c       | 3 ++-
 src/calibre/utils/icu_test.py | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/calibre/utils/icu.c b/src/calibre/utils/icu.c
index c6b6564ff3..ec9a778b58 100644
--- a/src/calibre/utils/icu.c
+++ b/src/calibre/utils/icu.c
@@ -618,7 +618,8 @@ icu_BreakIterator_index(icu_BreakIterator *self, PyObject *args, PyObject *kwarg
         if (self->type == UBRK_WORD && ubrk_getRuleStatus(self->break_iterator) == UBRK_WORD_NONE) 
             continue;  // We are not at the start of a word
         if (self->text_len >= prev + sz && memcmp(self->text + prev, buf, sz * sizeof(UChar)) == 0) {
-            // Needle is present at text[prev:] we have to check if it is followed by a non-hyphen boundary
+            // Needle is present at text[prev:] we have to check if it is not surrounded by hyphen boundaries
+            if (prev > 0 && (self->text[prev-1] == 0x2d || self->text[prev-1] == 0x2010)) continue; // At a hyphen boundary
             if(
                 ubrk_isBoundary(self->break_iterator, prev + sz) &&
                 (self->text_len == prev + sz || (self->text[prev + sz] != 0x2d && self->text[prev + sz] != 0x2010))
diff --git a/src/calibre/utils/icu_test.py b/src/calibre/utils/icu_test.py
index 4f7a474dba..5ddd0bc345 100644
--- a/src/calibre/utils/icu_test.py
+++ b/src/calibre/utils/icu_test.py
@@ -166,6 +166,7 @@ class TestICU(unittest.TestCase):
                 ('one', 'one-two one', 8),
                 ('one-two', 'one-two-three one-two', 14),
                 ('one', 'onet one', 5),
+                ('two', 'one-two two', 8),
                 ('i', 'i', 0),
                 ('i', 'six i', 4),
                 ('i', '', -1), ('', '', -1), ('', 'i', -1),