From 4f4b64e33b73efca5ad4423f246d8a94f266cc3c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 17 Aug 2014 10:20:18 +0530
Subject: [PATCH] ICU: Small perf improvements by specializing the method
 signatures in the python icu module to avoid unnecessary creation/unwrapping
 of tuples when calling the methods

---
 src/calibre/utils/icu.c | 80 +++++++++++++++++------------------------
 1 file changed, 33 insertions(+), 47 deletions(-)

diff --git a/src/calibre/utils/icu.c b/src/calibre/utils/icu.c
index 158e79dbc4..6141fd668d 100644
--- a/src/calibre/utils/icu.c
+++ b/src/calibre/utils/icu.c
@@ -139,13 +139,12 @@ icu_Collator_capsule(icu_Collator *self, void *closure) {
 
 // Collator.sort_key {{{
 static PyObject *
-icu_Collator_sort_key(icu_Collator *self, PyObject *args, PyObject *kwargs) {
+icu_Collator_sort_key(icu_Collator *self, PyObject *input) {
     int32_t sz = 0, key_size = 0, bsz = 0;
     UChar *buf = NULL;
     uint8_t *buf2 = NULL;
-    PyObject *ans = NULL, *input = NULL;
+    PyObject *ans = NULL;
   
-    if (!PyArg_ParseTuple(args, "O", &input)) return NULL;
     buf = python_to_icu(input, &sz, 1);
     if (buf == NULL) return NULL;
 
@@ -169,7 +168,7 @@ end:
 
 // Collator.strcmp {{{
 static PyObject *
-icu_Collator_strcmp(icu_Collator *self, PyObject *args, PyObject *kwargs) {
+icu_Collator_strcmp(icu_Collator *self, PyObject *args) {
     PyObject *a_ = NULL, *b_ = NULL;
     int32_t asz = 0, bsz = 0;
     UChar *a = NULL, *b = NULL;
@@ -190,7 +189,7 @@ end:
 
 // Collator.find {{{
 static PyObject *
-icu_Collator_find(icu_Collator *self, PyObject *args, PyObject *kwargs) {
+icu_Collator_find(icu_Collator *self, PyObject *args) {
 #if PY_VERSION_HEX >= 0x03030000 
 #error Not implemented for python >= 3.3
 #endif
@@ -231,7 +230,7 @@ end:
 
 // Collator.contains {{{
 static PyObject *
-icu_Collator_contains(icu_Collator *self, PyObject *args, PyObject *kwargs) {
+icu_Collator_contains(icu_Collator *self, PyObject *args) {
     PyObject *a_ = NULL, *b_ = NULL;
     UChar *a = NULL, *b = NULL;
     int32_t asz = 0, bsz = 0, pos = -1;
@@ -264,7 +263,7 @@ end:
 
 // Collator.contractions {{{
 static PyObject *
-icu_Collator_contractions(icu_Collator *self, PyObject *args, PyObject *kwargs) {
+icu_Collator_contractions(icu_Collator *self, PyObject *args) {
     UErrorCode status = U_ZERO_ERROR;
     UChar *str = NULL;
     UChar32 start=0, end=0;
@@ -305,7 +304,7 @@ end:
 
 // Collator.startswith {{{
 static PyObject *
-icu_Collator_startswith(icu_Collator *self, PyObject *args, PyObject *kwargs) {
+icu_Collator_startswith(icu_Collator *self, PyObject *args) {
     PyObject *a_ = NULL, *b_ = NULL;
     int32_t asz = 0, bsz = 0;
     UChar *a = NULL, *b = NULL;
@@ -334,16 +333,13 @@ end:
 
 // Collator.collation_order {{{
 static PyObject *
-icu_Collator_collation_order(icu_Collator *self, PyObject *args, PyObject *kwargs) {
-    PyObject *a_ = NULL;
+icu_Collator_collation_order(icu_Collator *self, PyObject *a_) {
     int32_t asz = 0;
     UChar *a = NULL;
     UErrorCode status = U_ZERO_ERROR;
     UCollationElements *iter = NULL;
     int order = 0, len = -1;
   
-    if (!PyArg_ParseTuple(args, "O", &a_)) return NULL;
-
     a = python_to_icu(a_, &asz, 1);
     if (a == NULL) goto end;
 
@@ -384,10 +380,10 @@ icu_Collator_set_upper_first(icu_Collator *self, PyObject *val, void *closure) {
 // }}}
 
 static PyObject*
-icu_Collator_clone(icu_Collator *self, PyObject *args, PyObject *kwargs);
+icu_Collator_clone(icu_Collator *self, PyObject *args);
 
 static PyMethodDef icu_Collator_methods[] = {
-    {"sort_key", (PyCFunction)icu_Collator_sort_key, METH_VARARGS,
+    {"sort_key", (PyCFunction)icu_Collator_sort_key, METH_O,
      "sort_key(unicode object) -> Return a sort key for the given object as a bytestring. The idea is that these bytestring will sort using the builtin cmp function, just like the original unicode strings would sort in the current locale with ICU."
     },
 
@@ -403,11 +399,11 @@ static PyMethodDef icu_Collator_methods[] = {
         "contains(pattern, source) -> return True iff the pattern was found in the source."
     },
 
-    {"contractions", (PyCFunction)icu_Collator_contractions, METH_VARARGS,
+    {"contractions", (PyCFunction)icu_Collator_contractions, METH_NOARGS,
         "contractions() -> returns the contractions defined for this collator."
     },
 
-    {"clone", (PyCFunction)icu_Collator_clone, METH_VARARGS,
+    {"clone", (PyCFunction)icu_Collator_clone, METH_NOARGS,
         "clone() -> returns a clone of this collator."
     },
 
@@ -415,7 +411,7 @@ static PyMethodDef icu_Collator_methods[] = {
         "startswith(a, b) -> returns True iff a startswith b, following the current collation rules."
     },
 
-    {"collation_order", (PyCFunction)icu_Collator_collation_order, METH_VARARGS,
+    {"collation_order", (PyCFunction)icu_Collator_collation_order, METH_O,
         "collation_order(string) -> returns (order, length) where order is an integer that gives the position of string in a list. length gives the number of characters used for order."
     },
 
@@ -502,7 +498,7 @@ static PyTypeObject icu_CollatorType = { // {{{
 
 // Collator.clone {{{
 static PyObject*
-icu_Collator_clone(icu_Collator *self, PyObject *args, PyObject *kwargs)
+icu_Collator_clone(icu_Collator *self, PyObject *args)
 {
     UCollator *collator;
     UErrorCode status = U_ZERO_ERROR;
@@ -576,13 +572,11 @@ icu_BreakIterator_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 
 // BreakIterator.set_text {{{
 static PyObject *
-icu_BreakIterator_set_text(icu_BreakIterator *self, PyObject *args, PyObject *kwargs) {
+icu_BreakIterator_set_text(icu_BreakIterator *self, PyObject *input) {
     int32_t sz = 0;
     UChar *buf = NULL;
     UErrorCode status = U_ZERO_ERROR;
-    PyObject *input = NULL;
   
-    if (!PyArg_ParseTuple(args, "O", &input)) return NULL;
     buf = python_to_icu(input, &sz, 1);
     if (buf == NULL) return NULL;
     ubrk_setText(self->break_iterator, buf, sz, &status);
@@ -597,16 +591,14 @@ icu_BreakIterator_set_text(icu_BreakIterator *self, PyObject *args, PyObject *kw
 
 // BreakIterator.index {{{
 static PyObject *
-icu_BreakIterator_index(icu_BreakIterator *self, PyObject *args, PyObject *kwargs) {
+icu_BreakIterator_index(icu_BreakIterator *self, PyObject *token) {
 #if PY_VERSION_HEX >= 0x03030000 
 #error Not implemented for python >= 3.3
 #endif
 
     UChar *buf = NULL;
     int32_t prev = 0, p = 0, sz = 0, ans = -1;
-    PyObject *token = NULL;
   
-    if (!PyArg_ParseTuple(args, "O", &token)) return NULL;
     buf = python_to_icu(token, &sz, 1);
     if (buf == NULL) return NULL;
     if (sz < 1) goto end;
@@ -643,7 +635,7 @@ end:
 
 // BreakIterator.split2 {{{
 static PyObject *
-icu_BreakIterator_split2(icu_BreakIterator *self, PyObject *args, PyObject *kwargs) {
+icu_BreakIterator_split2(icu_BreakIterator *self, PyObject *args) {
 #if PY_VERSION_HEX >= 0x03030000 
 #error Not implemented for python >= 3.3
 #endif
@@ -703,15 +695,15 @@ icu_BreakIterator_split2(icu_BreakIterator *self, PyObject *args, PyObject *kwar
 } // }}}
 
 static PyMethodDef icu_BreakIterator_methods[] = {
-    {"set_text", (PyCFunction)icu_BreakIterator_set_text, METH_VARARGS,
+    {"set_text", (PyCFunction)icu_BreakIterator_set_text, METH_O,
      "set_text(unicode object) -> Set the text this iterator will operate on"
     },
 
-    {"split2", (PyCFunction)icu_BreakIterator_split2, METH_VARARGS,
+    {"split2", (PyCFunction)icu_BreakIterator_split2, METH_NOARGS,
      "split2() -> Split the current text into tokens, returning a list of 2-tuples of the form (position of token, length of token). The numbers are suitable for indexing python strings regardless of narrow/wide builds."
     },
 
-    {"index", (PyCFunction)icu_BreakIterator_index, METH_VARARGS,
+    {"index", (PyCFunction)icu_BreakIterator_index, METH_O,
      "index(token) -> Find the index of the first match for token. Useful to find, for example, words that could also be a part of a larger word. For example, index('i') in 'string i' will be 7 not 3. Returns -1 if not found."
     },
 
@@ -806,15 +798,13 @@ end:
 
 // swap_case {{{
 
-static PyObject* icu_swap_case(PyObject *self, PyObject *args) {
-    PyObject *input = NULL, *result = NULL;
+static PyObject* icu_swap_case(PyObject *self, PyObject *input) {
+    PyObject *result = NULL;
     UErrorCode status = U_ZERO_ERROR;
     UChar *input_buf = NULL, *output_buf = NULL;
     UChar32 *buf = NULL;
     int32_t sz = 0, sz32 = 0, i = 0;
 
-    if (!PyArg_ParseTuple(args, "O", &input)) return NULL;
-
     input_buf = python_to_icu(input, &sz, 1);
     if (input_buf == NULL) goto end;
     output_buf = (UChar*) calloc(3 * sz, sizeof(UChar));
@@ -1009,12 +999,11 @@ end:
 
 // roundtrip {{{
 static PyObject *
-icu_roundtrip(PyObject *self, PyObject *args) {
+icu_roundtrip(PyObject *self, PyObject *src) {
     int32_t sz = 0;
     UChar *icu = NULL;
-    PyObject *ret = NULL, *src = NULL;
+    PyObject *ret = NULL;
   
-    if (!PyArg_ParseTuple(args, "O", &src)) return NULL;
     icu = python_to_icu(src, &sz, 1);
     if (icu != NULL) {
         ret = icu_to_python(icu, sz);
@@ -1047,12 +1036,10 @@ icu_break_iterator_locales(PyObject *self, PyObject *args) {
 
 // string_length {{{
 static PyObject *
-icu_string_length(PyObject *self, PyObject *args) {
+icu_string_length(PyObject *self, PyObject *src) {
     int32_t sz = 0;
     UChar *icu = NULL;
-    PyObject *src = NULL;
   
-    if (!PyArg_ParseTuple(args, "O", &src)) return NULL;
     icu = python_to_icu(src, &sz, 1);
     if (icu == NULL) return NULL;
     sz = u_countChar32(icu, sz);
@@ -1062,19 +1049,18 @@ icu_string_length(PyObject *self, PyObject *args) {
 
 // utf16_length {{{
 static PyObject *
-icu_utf16_length(PyObject *self, PyObject *args) {
+icu_utf16_length(PyObject *self, PyObject *src) {
 #if PY_VERSION_HEX >= 0x03030000 
 #error Not implemented for python >= 3.3
 #endif
 
     int32_t sz = 0;
-    PyObject *src = NULL;
 #ifdef Py_UNICODE_WIDE
     int32_t i = 0, t = 0;
     Py_UNICODE *data = NULL;
 #endif
   
-    if (!PyArg_ParseTuple(args, "U", &src)) return NULL;
+    if (!PyUnicode_Check(src)) { PyErr_SetString(PyExc_TypeError, "Must be a unicode object"); return NULL; }
     sz = PyUnicode_GET_SIZE(src);
 #ifdef Py_UNICODE_WIDE
     data = PyUnicode_AS_UNICODE(src);
@@ -1092,7 +1078,7 @@ static PyMethodDef icu_methods[] = {
         "change_case(unicode object, which, locale) -> change case to one of UPPER_CASE, LOWER_CASE, TITLE_CASE"
     },
 
-    {"swap_case", icu_swap_case, METH_VARARGS,
+    {"swap_case", icu_swap_case, METH_O,
         "swap_case(unicode object) -> swaps the case using the simple, locale independent unicode algorithm"
     },
 
@@ -1104,7 +1090,7 @@ static PyMethodDef icu_methods[] = {
         "set_filesystem_encoding(encoding) -> Set the filesystem encoding for python."
     },
 
-    {"get_available_transliterators", icu_get_available_transliterators, METH_VARARGS,
+    {"get_available_transliterators", icu_get_available_transliterators, METH_NOARGS,
         "get_available_transliterators() -> Return list of available transliterators. This list is rather limited on OS X."
     },
 
@@ -1124,19 +1110,19 @@ static PyMethodDef icu_methods[] = {
      "normalize(mode, unicode_text) -> Return a python unicode string which is normalized in the specified mode."
     },
 
-    {"roundtrip", icu_roundtrip, METH_VARARGS, 
+    {"roundtrip", icu_roundtrip, METH_O, 
      "roundtrip(string) -> Roundtrip a unicode object from python to ICU back to python (useful for testing)"
     },
 
-    {"available_locales_for_break_iterator", icu_break_iterator_locales, METH_VARARGS, 
+    {"available_locales_for_break_iterator", icu_break_iterator_locales, METH_NOARGS, 
      "available_locales_for_break_iterator() -> Return tuple of all available locales for the BreakIterator"
     },
 
-    {"string_length", icu_string_length, METH_VARARGS, 
+    {"string_length", icu_string_length, METH_O, 
      "string_length(string) -> Return the length of a string (number of unicode code points in the string). Useful on narrow python builds where len() returns an incorrect answer if the string contains surrogate pairs."
     },
 
-    {"utf16_length", icu_utf16_length, METH_VARARGS, 
+    {"utf16_length", icu_utf16_length, METH_O, 
      "utf16_length(string) -> Return the length of a string (number of UTF-16 code points in the string). Useful on wide python builds where len() returns an incorrect answer if the string contains surrogate pairs."
     },