mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-11-04 03:27:00 -05:00 
			
		
		
		
	Fix ICU find returning incorrect position and length parameters when non-BMP characters are present on wide python builds
This commit is contained in:
		
							parent
							
								
									27327e811b
								
							
						
					
					
						commit
						4eaee89487
					
				@ -191,6 +191,9 @@ end:
 | 
				
			|||||||
// Collator.find {{{
 | 
					// Collator.find {{{
 | 
				
			||||||
static PyObject *
 | 
					static PyObject *
 | 
				
			||||||
icu_Collator_find(icu_Collator *self, PyObject *args, PyObject *kwargs) {
 | 
					icu_Collator_find(icu_Collator *self, PyObject *args, PyObject *kwargs) {
 | 
				
			||||||
 | 
					#if PY_VERSION_HEX >= 0x03030000 
 | 
				
			||||||
 | 
					#error Not implemented for python >= 3.3
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
    PyObject *a_ = NULL, *b_ = NULL;
 | 
					    PyObject *a_ = NULL, *b_ = NULL;
 | 
				
			||||||
    UChar *a = NULL, *b = NULL;
 | 
					    UChar *a = NULL, *b = NULL;
 | 
				
			||||||
    int32_t asz = 0, bsz = 0, pos = -1, length = -1;
 | 
					    int32_t asz = 0, bsz = 0, pos = -1, length = -1;
 | 
				
			||||||
@ -207,10 +210,16 @@ icu_Collator_find(icu_Collator *self, PyObject *args, PyObject *kwargs) {
 | 
				
			|||||||
    search = usearch_openFromCollator(a, asz, b, bsz, self->collator, NULL, &status);
 | 
					    search = usearch_openFromCollator(a, asz, b, bsz, self->collator, NULL, &status);
 | 
				
			||||||
    if (U_SUCCESS(status)) {
 | 
					    if (U_SUCCESS(status)) {
 | 
				
			||||||
        pos = usearch_first(search, &status);
 | 
					        pos = usearch_first(search, &status);
 | 
				
			||||||
        if (pos != USEARCH_DONE) 
 | 
					        if (pos != USEARCH_DONE) {
 | 
				
			||||||
            length = usearch_getMatchedLength(search);
 | 
					            length = usearch_getMatchedLength(search);
 | 
				
			||||||
        else
 | 
					#ifdef Py_UNICODE_WIDE
 | 
				
			||||||
            pos = -1;
 | 
					            // We have to return number of unicode characters since the string
 | 
				
			||||||
 | 
					            // could contain surrogate pairs which are represented as a single
 | 
				
			||||||
 | 
					            // character in python wide builds
 | 
				
			||||||
 | 
					            length = u_countChar32(b + pos, length);
 | 
				
			||||||
 | 
					            pos = u_countChar32(b, pos);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					        } else pos = -1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
end:
 | 
					end:
 | 
				
			||||||
    if (search != NULL) usearch_close(search);
 | 
					    if (search != NULL) usearch_close(search);
 | 
				
			||||||
 | 
				
			|||||||
@ -92,7 +92,8 @@ class TestICU(unittest.TestCase):
 | 
				
			|||||||
    def test_find(self):
 | 
					    def test_find(self):
 | 
				
			||||||
        ' Test searching for substrings '
 | 
					        ' Test searching for substrings '
 | 
				
			||||||
        self.ae((1, 1), icu.find(b'a', b'1ab'))
 | 
					        self.ae((1, 1), icu.find(b'a', b'1ab'))
 | 
				
			||||||
        self.ae((1, 2), icu.find('\U0001f431', 'x\U0001f431x'))
 | 
					        self.ae((1, 1 if sys.maxunicode >= 0x10ffff else 2), icu.find('\U0001f431', 'x\U0001f431x'))
 | 
				
			||||||
 | 
					        self.ae((1 if sys.maxunicode >= 0x10ffff else 2, 1), icu.find('y', '\U0001f431y'))
 | 
				
			||||||
        self.ae((0, 4), icu.primary_find('pena', 'peña'))
 | 
					        self.ae((0, 4), icu.primary_find('pena', 'peña'))
 | 
				
			||||||
        for k, v in {u'pèché': u'peche', u'flüße':u'Flusse', u'Štepánek':u'ŠtepaneK'}.iteritems():
 | 
					        for k, v in {u'pèché': u'peche', u'flüße':u'Flusse', u'Štepánek':u'ŠtepaneK'}.iteritems():
 | 
				
			||||||
            self.ae((1, len(k)), icu.primary_find(v, ' ' + k), 'Failed to find %s in %s' % (v, k))
 | 
					            self.ae((1, len(k)), icu.primary_find(v, ' ' + k), 'Failed to find %s in %s' % (v, k))
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user