[Numpy-svn] r3258 - in trunk/numpy/core: code_generators include/numpy src

numpy-svn at scipy.org numpy-svn at scipy.org
Thu Oct 5 08:37:23 CDT 2006


Author: oliphant
Date: 2006-10-05 08:37:20 -0500 (Thu, 05 Oct 2006)
New Revision: 3258

Modified:
   trunk/numpy/core/code_generators/array_api_order.txt
   trunk/numpy/core/include/numpy/old_defines.h
   trunk/numpy/core/src/arrayobject.c
   trunk/numpy/core/src/ufuncobject.c
Log:
Speed up broadcasting ufuncs by making sure the loop axis is the one with the smallest strides.

Modified: trunk/numpy/core/code_generators/array_api_order.txt
===================================================================
--- trunk/numpy/core/code_generators/array_api_order.txt	2006-10-05 13:35:06 UTC (rev 3257)
+++ trunk/numpy/core/code_generators/array_api_order.txt	2006-10-05 13:37:20 UTC (rev 3258)
@@ -78,7 +78,7 @@
 PyArray_NewFlagsObject
 PyArray_CanCastScalar
 PyArray_CompareUCS4
-PyArray_RemoveLargest
+PyArray_RemoveSmallest
 PyArray_ElementStrides
 PyArray_Item_INCREF
 PyArray_Item_XDECREF

Modified: trunk/numpy/core/include/numpy/old_defines.h
===================================================================
--- trunk/numpy/core/include/numpy/old_defines.h	2006-10-05 13:35:06 UTC (rev 3257)
+++ trunk/numpy/core/include/numpy/old_defines.h	2006-10-05 13:37:20 UTC (rev 3258)
@@ -165,3 +165,5 @@
 #define PyArray_MAX_ELSIZE NPY_MAX_ELSIZE
 
 #define PyArray_USE_PYMEM NPY_USE_PYMEM
+
+#define PyArray_RemoveLargest PyArray_RemoveSmallest

Modified: trunk/numpy/core/src/arrayobject.c
===================================================================
--- trunk/numpy/core/src/arrayobject.c	2006-10-05 13:35:06 UTC (rev 3257)
+++ trunk/numpy/core/src/arrayobject.c	2006-10-05 13:37:20 UTC (rev 3258)
@@ -950,7 +950,7 @@
                 return -1;
         }
 
-        maxaxis = PyArray_RemoveLargest(multi);
+        maxaxis = PyArray_RemoveSmallest(multi);
         if (maxaxis < 0) { /* copy 1 0-d array to another */
                 PyArray_XDECREF(dest);
                 memcpy(dest->data, src->data, elsize);
@@ -7455,7 +7455,7 @@
 
         icopyfunc = in->descr->f->copyswapn;
         ocopyfunc = out->descr->f->copyswapn;
-        maxaxis = PyArray_RemoveLargest(multi);
+        maxaxis = PyArray_RemoveSmallest(multi);
         if (maxaxis < 0) { /* cast 1 0-d array to another */
                 N = 1;
                 maxdim = 1;
@@ -8765,7 +8765,8 @@
 
 /*OBJECT_API
  Get Iterator that iterates over all but one axis (don't use this with
- PyArray_ITER_GOTO1D).  The axis will be over-written if negative.
+ PyArray_ITER_GOTO1D).  The axis will be over-written if negative
+ with the axis having the smallest stride. 
 */
 static PyObject *
 PyArray_IterAllButAxis(PyObject *obj, int *inaxis)
@@ -8778,15 +8779,21 @@
         if (PyArray_NDIM(obj)==0)
                 return (PyObject *)it;
         if (*inaxis < 0) {
-                int i, maxaxis=0;
-                intp maxdim=PyArray_DIM(obj,0);
+                int i, minaxis=0;
+                intp minstride=0;
+                i = 0;
+                while (minstride==0 && i<PyArray_NDIM(obj)) {
+                        minstride = PyArray_STRIDE(obj,i);
+                        i++;
+                }
                 for (i=1; i<PyArray_NDIM(obj); i++) {
-                        if (PyArray_DIM(obj,i) > maxdim) {
-                                maxaxis = i;
-                                maxdim = PyArray_DIM(obj,i);
+                        if (PyArray_STRIDE(obj,i) > 0 && 
+                            PyArray_STRIDE(obj, i) < minstride) {
+                                minaxis = i;
+                                minstride = PyArray_STRIDE(obj,i);
                         }
                 }
-                *inaxis = maxaxis;
+                *inaxis = minaxis;
         }
         axis = *inaxis;
         /* adjust so that will not iterate over axis */
@@ -8807,27 +8814,36 @@
    adjusted */
 
 /*OBJECT_API
-  Adjusts previously broadcasted iterators so that the largest axis
-  is not iterated over.
-  Returns dimension which is largest in the range [0,multi->nd).
+  Adjusts previously broadcasted iterators so that the axis with 
+  the smallest sum of iterator strides is not iterated over.
+  Returns dimension which is smallest in the range [0,multi->nd).
   A -1 is returned if multi->nd == 0.
  */
 static int
-PyArray_RemoveLargest(PyArrayMultiIterObject *multi)
+PyArray_RemoveSmallest(PyArrayMultiIterObject *multi)
 {
         PyArrayIterObject *it;
-        int i;
-        int axis=0;
-        intp longest;
+        int i, j;
+        int axis;
+        intp smallest;
+        intp sumstrides[NPY_MAXDIMS];
 
         if (multi->nd == 0) return -1;
 
-        longest = multi->dimensions[0];
+
+        for (i=0; i<multi->nd; i++) {
+                sumstrides[i] = 0;
+                for (j=0; j<multi->numiter; j++) {
+                        sumstrides[i] = multi->iters[j]->strides[i];
+                }
+        }
+        axis=0;
+        smallest = sumstrides[0];
         /* Find longest dimension */
         for (i=1; i<multi->nd; i++) {
-                if (multi->dimensions[i] > longest) {
+                if (sumstrides[i] < smallest) {
                         axis = i;
-                        longest = multi->dimensions[i];
+                        smallest = sumstrides[i];
                 }
         }
 

Modified: trunk/numpy/core/src/ufuncobject.c
===================================================================
--- trunk/numpy/core/src/ufuncobject.c	2006-10-05 13:35:06 UTC (rev 3257)
+++ trunk/numpy/core/src/ufuncobject.c	2006-10-05 13:37:20 UTC (rev 3258)
@@ -1144,37 +1144,60 @@
 
         loop->numiter = self->nargs;
 
-        /* Fill in steps */
-        if (loop->meth != ONE_UFUNCLOOP) {
-		int ldim = 0;
-		intp maxdim=-1;
+	/* Fill in steps */
+	if (loop->meth != ONE_UFUNCLOOP) {
+		int ldim;
+		intp minsum;
+		intp maxdim;
 		PyArrayIterObject *it;
+		intp stride_sum[NPY_MAXDIMS];
+		int j;
 
-                /* Fix iterators */
+		/* Fix iterators */
 
-                /* Find the **largest** dimension */
+		/* Optimize axis the iteration takes place over
 
-		maxdim = -1;
-		for (i=loop->nd - 1; i>=0; i--) {
-			if (loop->dimensions[i] > maxdim) {
+		The first thought was to have the loop go 
+		over the largest dimension to minimize the number of loops
+		
+		However, on processors with slow memory bus and cache,
+		the slowest loops occur when the memory access occurs for
+		large strides. 
+		
+		Thus, choose the axis for which strides of the last iterator is 
+		smallest but non-zero.
+		 */
+
+		for (i=0; i<loop->nd-1; i++) {
+			stride_sum[i] = 0;
+			for (j=0; j<loop->numiter; j++) {
+				stride_sum[i] += loop->iters[j]->strides[i];
+			}
+		}
+
+		ldim = loop->nd - 1;
+		minsum = stride_sum[loop->nd-1];
+		for (i=loop->nd - 2; i>=0; i--) {
+			if (stride_sum[i] < minsum ) {
 				ldim = i;
-				maxdim = loop->dimensions[i];
+				minsum = stride_sum[i];
 			}
 		}
 
+		maxdim = loop->dimensions[ldim];
 		loop->size /= maxdim;
-                loop->bufcnt = maxdim;
+		loop->bufcnt = maxdim;
 		loop->lastdim = ldim;
 
-                /* Fix the iterators so the inner loop occurs over the
+		/* Fix the iterators so the inner loop occurs over the
 		   largest dimensions -- This can be done by
 		   setting the size to 1 in that dimension
 		   (just in the iterators)
-                 */
+		 */
 
 		for (i=0; i<loop->numiter; i++) {
 			it = loop->iters[i];
-                        it->contiguous = 0;
+			it->contiguous = 0;
 			it->size /= (it->dims_m1[ldim]+1);
 			it->dims_m1[ldim] = 0;
 			it->backstrides[ldim] = 0;
@@ -1184,7 +1207,7 @@
 			   so don't change them) */
 
 			/* Set the steps to the strides in that dimension */
-                        loop->steps[i] = it->strides[ldim];
+			loop->steps[i] = it->strides[ldim];
 		}
 
 		/* fix up steps where we will be copying data to
@@ -1201,8 +1224,8 @@
 				/* These are changed later if casting is needed */
 			}
 		}
-        }
-        else { /* uniformly-strided case ONE_UFUNCLOOP */
+	}
+	else { /* uniformly-strided case ONE_UFUNCLOOP */
 		for (i=0; i<self->nargs; i++) {
 			if (PyArray_SIZE(mps[i]) == 1)
 				loop->steps[i] = 0;
@@ -1466,19 +1489,21 @@
 			       loop->steps, loop->funcdata);
 		UFUNC_CHECK_ERROR(loop);
 		break;
-	case NOBUFFER_UFUNCLOOP:
+	case NOBUFFER_UFUNCLOOP: 
 		/* Everything is notswapped, aligned and of the
 		   right type but not contiguous. -- Almost as fast.
 		*/
-                /*fprintf(stderr, "NOBUFFER...%d\n", loop->size);*/
+		/*fprintf(stderr, "NOBUFFER...%d\n", loop->size);*/
 		while (loop->index < loop->size) {
 			for (i=0; i<self->nargs; i++)
 				loop->bufptr[i] = loop->iters[i]->dataptr;
-
+			
 			loop->function((char **)loop->bufptr, &(loop->bufcnt),
 				       loop->steps, loop->funcdata);
 			UFUNC_CHECK_ERROR(loop);
 
+			/* Adjust loop pointers */
+			
 			for (i=0; i<self->nargs; i++) {
 				PyArray_ITER_NEXT(loop->iters[i]);
 			}
@@ -1506,7 +1531,7 @@
 		int ninnerloops = loop->ninnerloops;
 		Bool pyobject[NPY_MAXARGS];
 		int datasize[NPY_MAXARGS];
-                int i, j, k, stopcondition;
+                int j, k, stopcondition;
 		char *myptr1, *myptr2;
 
 



More information about the Numpy-svn mailing list