[SciPy-dev] scipy.cluster

David Cournapeau david@ar.media.kyoto-u.ac...
Wed Jun 20 02:09:59 CDT 2007


Nils Wagner wrote:
> Hi all,
>
> The recent changes in scipy.cluster have introduced some MemoryErrors
>
> ======================================================================
> ERROR: Testing that kmeans2 init methods work.
> ----------------------------------------------------------------------
> Traceback (most recent call last):
>   File
> "/usr/lib64/python2.4/site-packages/scipy/cluster/tests/test_vq.py",
> line 137, in check_kmeans2_init
>     kmeans2(data, 3, minit = 'random')
>   File "/usr/lib64/python2.4/site-packages/scipy/cluster/vq.py", line
> 545, in kmeans2
>     return _kmeans2(data, clusters, iter, nc)
>   File "/usr/lib64/python2.4/site-packages/scipy/cluster/vq.py", line
> 558, in _kmeans2
>     label = vq(data, code)[0]
>   File "/usr/lib64/python2.4/site-packages/scipy/cluster/vq.py", line
> 144, in vq
>     results = _vq.vq(c_obs, c_code_book)
> MemoryError
>
> ======================================================================
> ERROR: Testing simple call to kmeans2 with rank 1 data.
> ----------------------------------------------------------------------
> Traceback (most recent call last):
>   File
> "/usr/lib64/python2.4/site-packages/scipy/cluster/tests/test_vq.py",
> line 129, in check_kmeans2_rank1
>     code1 = kmeans2(data1, code, iter = 1)[0]
>   File "/usr/lib64/python2.4/site-packages/scipy/cluster/vq.py", line
> 545, in kmeans2
>     return _kmeans2(data, clusters, iter, nc)
>   File "/usr/lib64/python2.4/site-packages/scipy/cluster/vq.py", line
> 558, in _kmeans2
>     label = vq(data, code)[0]
>   File "/usr/lib64/python2.4/site-packages/scipy/cluster/vq.py", line
> 144, in vq
>     results = _vq.vq(c_obs, c_code_book)
> MemoryError
>
> ======================================================================
> ERROR: Testing simple call to kmeans2 and its results.
> ----------------------------------------------------------------------
> Traceback (most recent call last):
>   File
> "/usr/lib64/python2.4/site-packages/scipy/cluster/tests/test_vq.py",
> line 114, in check_kmeans2_simple
>     code1 = kmeans2(X, code, iter = 1)[0]
>   File "/usr/lib64/python2.4/site-packages/scipy/cluster/vq.py", line
> 545, in kmeans2
>     return _kmeans2(data, clusters, iter, nc)
>   File "/usr/lib64/python2.4/site-packages/scipy/cluster/vq.py", line
> 558, in _kmeans2
>     label = vq(data, code)[0]
>   File "/usr/lib64/python2.4/site-packages/scipy/cluster/vq.py", line
> 144, in vq
>     results = _vq.vq(c_obs, c_code_book)
> MemoryError
>
> ======================================================================
> ERROR: This will cause kmean to have a cluster with no points.
> ----------------------------------------------------------------------
> Traceback (most recent call last):
>   File
> "/usr/lib64/python2.4/site-packages/scipy/cluster/tests/test_vq.py",
> line 108, in check_kmeans_lost_cluster
>     res = kmeans(data, initk)
>   File "/usr/lib64/python2.4/site-packages/scipy/cluster/vq.py", line
> 400, in kmeans
>     result = _kmeans(obs, guess, thresh = thresh)
>   File "/usr/lib64/python2.4/site-packages/scipy/cluster/vq.py", line
> 320, in _kmeans
>     obs_code, distort = vq(obs, code_book)
>   File "/usr/lib64/python2.4/site-packages/scipy/cluster/vq.py", line
> 144, in vq
>     results = _vq.vq(c_obs, c_code_book)
> MemoryError
>
> ======================================================================
> ERROR: check_kmeans_simple (scipy.cluster.tests.test_vq.test_kmean)
> ----------------------------------------------------------------------
> Traceback (most recent call last):
>   File
> "/usr/lib64/python2.4/site-packages/scipy/cluster/tests/test_vq.py",
> line 96, in check_kmeans_simple
>     code1 = kmeans(X, code, iter = 1)[0]
>   File "/usr/lib64/python2.4/site-packages/scipy/cluster/vq.py", line
> 400, in kmeans
>     result = _kmeans(obs, guess, thresh = thresh)
>   File "/usr/lib64/python2.4/site-packages/scipy/cluster/vq.py", line
> 320, in _kmeans
>     obs_code, distort = vq(obs, code_book)
>   File "/usr/lib64/python2.4/site-packages/scipy/cluster/vq.py", line
> 144, in vq
>     results = _vq.vq(c_obs, c_code_book)
> MemoryError
>
> ======================================================================
> ERROR: check_vq (scipy.cluster.tests.test_vq.test_vq)
> ----------------------------------------------------------------------
> Traceback (most recent call last):
>   File
> "/usr/lib64/python2.4/site-packages/scipy/cluster/tests/test_vq.py",
> line 63, in check_vq
>     label1, dist = _vq.vq(X, initc)
> MemoryError
>
> ======================================================================
> ERROR: Test special rank 1 vq algo, python implementation.
> ----------------------------------------------------------------------
> Traceback (most recent call last):
>   File
> "/usr/lib64/python2.4/site-packages/scipy/cluster/tests/test_vq.py",
> line 85, in check_vq_1d
>     a, b = _vq.vq(data, initc)
> MemoryError
That's regression is my fault :) I see that your machine is a 64 bits, 
which may have exposed some bugs I didn't see. Unfortunately, I don't 
have a 64 bits machine available right now...

There is one obvious error I could spot, though: could you try this 
simple patch ?

Index: Lib/cluster/src/vq_module.c
===================================================================
--- Lib/cluster/src/vq_module.c (revision 3110)
+++ Lib/cluster/src/vq_module.c (working copy)
@@ -1,5 +1,5 @@
 /*
- * Last Change: Tue Jun 19 11:00 PM 2007 J
+ * Last Change: Wed Jun 20 04:00 PM 2007 J
  *
  */
 #include <Python.h>
@@ -97,24 +97,24 @@
             if (dist_a == NULL) {
                 goto clean_code_a;
             }
-            index_a = (PyArrayObject*)PyArray_EMPTY(1, &n, NPY_INT, 0);
+            index_a = (PyArrayObject*)PyArray_EMPTY(1, &n, 
PyArray_INTP, 0);
             if (index_a == NULL) {
                 goto clean_dist_a;
             }
             float_tvq((float*)obs_a->data, (float*)code_a->data, n, nc, d,
-                    (int*)index_a->data, (float*)dist_a->data);
+                    (npy_intp*)index_a->data, (float*)dist_a->data);
             break;
         case NPY_DOUBLE:
             dist_a = (PyArrayObject*)PyArray_EMPTY(1, &n, typenum1, 0);
             if (dist_a == NULL) {
                 goto clean_code_a;
             }
-            index_a = (PyArrayObject*)PyArray_EMPTY(1, &n, NPY_INT, 0);
+            index_a = (PyArrayObject*)PyArray_EMPTY(1, &n, 
PyArray_INTP, 0);
             if (index_a == NULL) {
                 goto clean_dist_a;
             }
             double_tvq((double*)obs_a->data, (double*)code_a->data, n, 
nc, d,
-                    (int*)index_a->data, (double*)dist_a->data);
+                    (npy_intp*)index_a->data, (double*)dist_a->data);
             break;
         default:
             PyErr_Format(PyExc_ValueError,
@@ -151,4 +151,3 @@
     Py_DECREF(obs_a);
     return NULL;
 }
-

David


More information about the Scipy-dev mailing list