# [Scipy-svn] r3780 - in trunk/scipy/sparse: . sparsetools tests

scipy-svn@scip... scipy-svn@scip...
Fri Jan 4 03:53:33 CST 2008

Author: wnbell
Date: 2008-01-04 03:53:29 -0600 (Fri, 04 Jan 2008)
New Revision: 3780

Modified:
trunk/scipy/sparse/bsr.py
trunk/scipy/sparse/sparsetools/fixed_size.h
trunk/scipy/sparse/sparsetools/sparsetools.h
trunk/scipy/sparse/tests/test_sparse.py
Log:
use recursive template for BSR matrix vector product

Modified: trunk/scipy/sparse/bsr.py
===================================================================
--- trunk/scipy/sparse/bsr.py	2008-01-04 07:00:52 UTC (rev 3779)
+++ trunk/scipy/sparse/bsr.py	2008-01-04 09:53:29 UTC (rev 3780)
@@ -11,7 +11,7 @@
from sparsetools import bsr_matvec, csr_matmat_pass1, bsr_matmat_pass2
from data import _data_matrix
from compressed import _cs_matrix
-from base import isspmatrix
+from base import isspmatrix, _formats
from sputils import isshape, getdtype, to_native, isscalarlike, isdense, \
upcast

@@ -53,8 +53,8 @@
- The Block Compressed Row (BSR) format is very similar to the
Compressed Sparse Row (CSR) format.  BSR is appropriate for
sparse matrices with dense sub matrices like the last example
-          below.  Such matrices often arise, for instance, in finite
-          element discretizations.
+          below.  Such matrices often arise, for instance, in vector-valued
+          finite element discretizations.

Examples
@@ -113,11 +113,11 @@
self.data   = zeros( (0,) + blocksize, getdtype(dtype, default=float) )
self.indices = zeros( 0, dtype=intc )

-                X,Y = blocksize
-                if (M % X) != 0 or (N % Y) != 0:
+                R,C = blocksize
+                if (M % R) != 0 or (N % C) != 0:
raise ValueError, 'shape must be multiple of blocksize'

-                self.indptr  = zeros(M/X + 1, dtype=intc )
+                self.indptr  = zeros(M/R + 1, dtype=intc )

elif len(arg1) == 2:
# (data,(row,col)) format
@@ -279,7 +279,7 @@
"""
if isdense(other):
M,N = self.shape
-            X,Y = self.blocksize
+            R,C = self.blocksize

if other.shape != (N,) and other.shape != (N,1):
raise ValueError, "dimension mismatch"
@@ -299,7 +299,7 @@
y = output

-            bsr_matvec(M/X, N/Y, X, Y, \
+            bsr_matvec(M/R, N/C, R, C, \
self.indptr, self.indices, ravel(self.data), ravel(other), y)

if isinstance(other, matrix):
@@ -389,15 +389,15 @@
"""

M,N = self.shape
-        X,Y = self.blocksize
+        R,C = self.blocksize

-        row  = (X * arange(M/X)).repeat(diff(self.indptr))
-        row  = row.repeat(X*Y).reshape(-1,X,Y)
-        row += tile( arange(X).reshape(-1,1), (1,Y) )
+        row  = (R * arange(M/R)).repeat(diff(self.indptr))
+        row  = row.repeat(R*C).reshape(-1,R,C)
+        row += tile( arange(R).reshape(-1,1), (1,C) )
row  = row.reshape(-1)

-        col  = (Y * self.indices).repeat(X*Y).reshape(-1,X,Y)
-        col += tile( arange(Y), (X,1) )
+        col  = (C * self.indices).repeat(R*C).reshape(-1,R,C)
+        col += tile( arange(C), (R,1) )
col  = col.reshape(-1)

data = self.data.reshape(-1)
@@ -411,16 +411,16 @@

def transpose(self):

-        X,Y = self.blocksize
+        R,C = self.blocksize
M,N = self.shape

if self.nnz == 0:
-            return bsr_matrix((N,M),blocksize=(Y,X))
+            return bsr_matrix((N,M),blocksize=(C,R))

#use CSR.T to determine a permutation for BSR.T
from csr import csr_matrix
data = arange(len(self.indices), dtype=self.indices.dtype)
-        proxy = csr_matrix((data,self.indices,self.indptr),shape=(M/X,N/Y))
+        proxy = csr_matrix((data,self.indices,self.indptr),shape=(M/R,N/C))
proxy = proxy.tocsc()

data    = self.data.swapaxes(1,2)[proxy.data] #permute data

Modified: trunk/scipy/sparse/sparsetools/fixed_size.h
===================================================================
--- trunk/scipy/sparse/sparsetools/fixed_size.h	2008-01-04 07:00:52 UTC (rev 3779)
+++ trunk/scipy/sparse/sparsetools/fixed_size.h	2008-01-04 09:53:29 UTC (rev 3780)
@@ -19,7 +19,7 @@
inline T operator()(const T * V1, const T * V2)
{
_dot<N-1,T> d;
-            return (*V1) * (*V2) + d(++V1, ++V2);
+            return (*V1) * (*V2) + d(V1 + 1, V2 + 1);
}
};
template<class T>
@@ -41,8 +41,40 @@

+/*
+ *  Matrix Vector Product
+ *
+ */
+template<int M, int N, class T>
+class _matvec
+{
+    public:
+        inline void operator()(const T * A, const T * X, T * Y)
+        {
+            *Y += dot<N,T>(A,X);
+            _matvec<M-1,N,T> d;
+            d(A + N, X, Y + 1);
+        }
+};
+template<int N, class T>
+class _matvec<1,N,T>
+{
+    public:
+        inline void operator()(const T * A, const T * X, T * Y)
+        {
+            *Y += dot<N,T>(A,X);
+        }
+};

+template<int M, int N, class T>
+inline void matvec(const T * A, const T * X, T * Y)
+{
+    _matvec<M,N,T> d;
+    d(A,X,Y);
+}

+
+
template<int N, class T, class bin_op>
class _vec_binop_vec
{

Modified: trunk/scipy/sparse/sparsetools/sparsetools.h
===================================================================
--- trunk/scipy/sparse/sparsetools/sparsetools.h	2008-01-04 07:00:52 UTC (rev 3779)
+++ trunk/scipy/sparse/sparsetools/sparsetools.h	2008-01-04 09:53:29 UTC (rev 3780)
@@ -330,7 +330,7 @@
const I Bj[],
I Cp[])
{
-//    // method that uses O(n) temp storage
+//    // method that uses O(1) temp storage
//    const I hash_size = 1 << 5;
//    I vals[hash_size];
@@ -477,7 +477,6 @@
for(I i = 0; i < SIZE; i++){
Cx[i] = 0;
}
-    //std::cout << "n_brow " << n_brow << " Cp[-1] " << Cp[n_brow] << " R " << R << " C " << C << " N " << N << std::endl;

std::vector<I>  next(n_bcol,-1);
std::vector<T*> mats(n_bcol);
@@ -567,18 +566,9 @@
I Cp[],         I Cj[],          T Cx[],
const bin_op& op)
{
-   //Method that works for unsorted indices
-   // assert( csr_has_sorted_indices(n_brow,Ap,Aj) );
-   // assert( csr_has_sorted_indices(n_brow,Bp,Bj) );
-
-    //if (R == 3 && C == 3){
-    //    bsr_binop_bsr_fixed<I,T,3,3>(n_brow,n_bcol,Ap,Aj,Ax,Bp,Bj,Bx,Cp,Cj,Cx,op);
-    //    return;
-    //}
-
const I RC = R*C;
-    T result[8*8];
-    //T zeros[8*8];
+    T * result = Cx;
+
Cp[0] = 0;
I nnz = 0;

@@ -600,9 +590,7 @@

if( is_nonzero_block(result,RC) ){
Cj[nnz] = A_j;
-                    for(I n = 0; n < RC; n++){
-                        Cx[RC*nnz + n] = result[n];
-                    }
+                    result += RC;
nnz++;
}

@@ -616,9 +604,7 @@

if(is_nonzero_block(result,RC)){
Cj[nnz] = A_j;
-                    for(I n = 0; n < RC; n++){
-                        Cx[RC*nnz + n] = result[n];
-                    }
+                    result += RC;
nnz++;
}

@@ -631,9 +617,7 @@
}
if(is_nonzero_block(result,RC)){
Cj[nnz] = B_j;
-                    for(I n = 0; n < RC; n++){
-                        Cx[RC*nnz + n] = result[n];
-                    }
+                    result += RC;
nnz++;
}

@@ -644,15 +628,14 @@

//tail
while(A_pos < A_end){
+
for(I n = 0; n < RC; n++){
result[n] = op(Ax[RC*A_pos + n],0);
}

if(is_nonzero_block(result,RC)){
Cj[nnz] = A_j;
-                for(I n = 0; n < RC; n++){
-                    Cx[RC*nnz + n] = result[n];
-                }
+                result += RC;
nnz++;
}

@@ -663,11 +646,10 @@
for(I n = 0; n < RC; n++){
result[n] = op(0,Bx[RC*B_pos + n]);
}
+
if(is_nonzero_block(result,RC)){
Cj[nnz] = B_j;
-                for(I n = 0; n < RC; n++){
-                    Cx[RC*nnz + n] = result[n];
-                }
+                result += RC;
nnz++;
}

@@ -1159,38 +1141,15 @@
const T Xx[],
T Yx[])
{
-    //TODO make a matvec template
-    for(I i = 0; i < n_brow; i++) {
-        T r0 = 0;
-        T r1 = 0;
-        T r2 = 0;
-        T r3 = 0;
-        T r4 = 0;
-        T r5 = 0;
-        T r6 = 0;
-        T r7 = 0;
+    for(I i = 0; i < R*n_brow; i++){
+        Yx[i] = 0;
+    }

+    for(I i = 0; i < n_brow; i++) {
for(I jj = Ap[i]; jj < Ap[i+1]; jj++) {
I j = Aj[jj];
-            const T * base = Ax + jj*(R*C);
-            if (R > 0) r0 += dot<C>(base + 0*C, Xx + j*C);
-            if (R > 1) r1 += dot<C>(base + 1*C, Xx + j*C);
-            if (R > 2) r2 += dot<C>(base + 2*C, Xx + j*C);
-            if (R > 3) r3 += dot<C>(base + 3*C, Xx + j*C);
-            if (R > 4) r4 += dot<C>(base + 4*C, Xx + j*C);
-            if (R > 5) r5 += dot<C>(base + 5*C, Xx + j*C);
-            if (R > 6) r6 += dot<C>(base + 6*C, Xx + j*C);
-            if (R > 7) r7 += dot<C>(base + 7*C, Xx + j*C);
+            matvec<R,C>(Ax + jj*R*C, Xx + j*C, Yx + i*R);
}
-
-        if (R > 0) Yx[R*i+0] = r0;
-        if (R > 1) Yx[R*i+1] = r1;
-        if (R > 2) Yx[R*i+2] = r2;
-        if (R > 3) Yx[R*i+3] = r3;
-        if (R > 4) Yx[R*i+4] = r4;
-        if (R > 5) Yx[R*i+5] = r5;
-        if (R > 6) Yx[R*i+6] = r6;
-        if (R > 7) Yx[R*i+7] = r7;
}
}

@@ -1241,7 +1200,7 @@

//otherwise use general method
-    for(I i = 0; i < n_brow; i++){
+    for(I i = 0; i < R*n_brow; i++){
Yx[i] = 0;
}

Modified: trunk/scipy/sparse/tests/test_sparse.py
===================================================================
--- trunk/scipy/sparse/tests/test_sparse.py	2008-01-04 07:00:52 UTC (rev 3779)
+++ trunk/scipy/sparse/tests/test_sparse.py	2008-01-04 09:53:29 UTC (rev 3780)
@@ -46,7 +46,7 @@
class TestSparseTools(NumpyTestCase):
"""Simple benchmarks for sparse matrix module"""

-    def test_arithmetic(self,level=4):
+    def test_arithmetic(self,level=5):
matrices = []
#matrices.append( ('A','Identity', spidentity(500**2,format='csr')) )
matrices.append( ('A','Poisson5pt', poisson2d(500,format='csr'))  )
@@ -128,7 +128,7 @@
print fmt % (A.format,name,shape,A.nnz,1e3*(end-start)/float(iter) )

-    def bench_matvec(self,level=5):
+    def bench_matvec(self,level=4):
matrices = []
matrices.append(('Identity',   spidentity(10**4,format='dia')))
matrices.append(('Identity',   spidentity(10**4,format='csr')))