[Scipy-svn] r3784 - trunk/scipy/sparse/sparsetools

scipy-svn@scip... scipy-svn@scip...
Sat Jan 5 00:05:59 CST 2008


Author: wnbell
Date: 2008-01-05 00:05:54 -0600 (Sat, 05 Jan 2008)
New Revision: 3784

Modified:
   trunk/scipy/sparse/sparsetools/fixed_size.h
   trunk/scipy/sparse/sparsetools/sparsetools.h
Log:
added fixed size matrix matrix products for fast BSR * BSR


Modified: trunk/scipy/sparse/sparsetools/fixed_size.h
===================================================================
--- trunk/scipy/sparse/sparsetools/fixed_size.h	2008-01-04 21:33:02 UTC (rev 3783)
+++ trunk/scipy/sparse/sparsetools/fixed_size.h	2008-01-05 06:05:54 UTC (rev 3784)
@@ -2,7 +2,7 @@
 #define FIXED_SIZE_H
 
 /*
- * templates for fixed size array and vector arithmetic
+ * templates for fixed size vector and matrix arithmetic
  * 
  */
 
@@ -12,95 +12,139 @@
  *  Dot Product
  * 
  */
-template<int N, class T>
+template<int N, int SX, int SY, class T>
 class _dot
 {
     public:
-        inline T operator()(const T * V1, const T * V2)
+        inline T operator()(const T * X, const T * Y)
         {
-            _dot<N-1,T> d;
-            return (*V1) * (*V2) + d(V1 + 1, V2 + 1);
+            _dot<N-1,SX,SY,T> d;
+            return (*X) * (*Y) + d(X + SX, Y + SY);
         }
 };
-template<class T>
-class _dot<1,T>
+template<int SX, int SY, class T>
+class _dot<1,SX,SY,T>
 {
     public:
-        inline T operator()(const T * V1, const T * V2)
+        inline T operator()(const T * X, const T * Y)
         {
-            return (*V1) * (*V2);
+            return (*X) * (*Y);
         }
 };
 
-template<int N, class T>
-inline T dot(const T * V1, const T * V2)
+template<int N, int SX, int SY, class T>
+inline T dot(const T * X, const T * Y)
 {
-    _dot<N,T> d;
-    return d(V1, V2);
+    _dot<N,SX,SY,T> d;
+    return d(X, Y);
 }
 
 
 
 /*
- *  Matrix Vector Product
+ *  Matrix Vector Product Y = A*X
  * 
  */
-template<int M, int N, class T>
+template<int M, int N, int SX, int SY, class T>
 class _matvec
 {
     public:
         inline void operator()(const T * A, const T * X, T * Y)
         {
-            *Y += dot<N,T>(A,X);
-            _matvec<M-1,N,T> d;
-            d(A + N, X, Y + 1);
+            *Y += dot<N,1,SX,T>(A,X);
+            _matvec<M-1,N,SX,SY,T> d;
+            d(A + N, X, Y + SY);
         }
 };
-template<int N, class T>
-class _matvec<1,N,T>
+
+template<int N, int SX, int SY, class T>
+class _matvec<1,N,SX,SY,T>
 {
     public:
         inline void operator()(const T * A, const T * X, T * Y)
         {
-            *Y += dot<N,T>(A,X);
+            *Y += dot<N,1,SX,T>(A,X);
         }
 };
 
-template<int M, int N, class T>
+template<int M, int N, int SX, int SY, class T>
 inline void matvec(const T * A, const T * X, T * Y)
 {
-    _matvec<M,N,T> d;
+    _matvec<M,N,SX,SY,T> d;
     d(A,X,Y);
 }
 
 
+/*
+ *  Matrix Matrix Product C = A*B
+ *
+ *  C is L*N
+ *  A is L*M
+ *  B is M*N
+ * 
+ */
+template<int L, int M, int N, int U, class T>
+class _matmat
+{
+    public:
+        inline void operator()(const T * A, const T * B, T * C)
+        {
+            matvec<L,M,N,N>(A,B,C);
+            
+            _matmat<L,M,N,U-1,T> d;
+            d(A, B + 1, C + 1);
+        }
+};
+template<int L, int M, int N, class T>
+class _matmat<L,M,N,0,T>
+{
+    public:
+        inline void operator()(const T * A, const T * B, T * C)
+        {
+            matvec<L,M,N,N>(A,B,C);
+        }
+};
 
+template<int L, int M, int N, class T>
+inline void matmat(const T * A, const T * B, T * C)
+{
+    _matmat<L,M,N,N-1,T> d;
+    d(A,B,C);
+}
+
+
+
+/*
+ * Binary vector operation Z = op(X,Y) 
+ *
+ */
+
 template<int N, class T, class bin_op>
 class _vec_binop_vec
 {
     public:
-        inline void operator()(const T * V1, const T * V2, T * V3, const bin_op& op)
+        inline void operator()(const T * X, const T * Y, T * Z, const bin_op& op)
         {
-            *V3 = op( *V1, *V2 );
+            *Z = op( *X, *Y );
             _vec_binop_vec<N-1,T,bin_op> d;
-            d(V1 + 1, V2 + 1, V3 + 1, op);
+            d(X + 1, Y + 1, Z + 1, op);
         }
 };
 template<class T, class bin_op>
 class _vec_binop_vec<1,T,bin_op>
 {
     public:
-        inline void operator()(const T * V1, const T * V2, T * V3, const bin_op& op)
+        inline void operator()(const T * X, const T * Y, T * Z, const bin_op& op)
         {
-            *V3 = op( *V1, *V2 );
+            *Z = op( *X, *Y );
         }
 };
 
 template<int N, class T, class bin_op>
-inline void vec_binop_vec(const T * V1, const T * V2, T * V3, const bin_op& op)
+inline void vec_binop_vec(const T * X, const T * Y, T * Z, const bin_op& op)
 {
     _vec_binop_vec<N,T,bin_op> d;
-    d(V1,V2,V3,op);
+    d(X,Y,Z,op);
 }
 
 

Modified: trunk/scipy/sparse/sparsetools/sparsetools.h
===================================================================
--- trunk/scipy/sparse/sparsetools/sparsetools.h	2008-01-04 21:33:02 UTC (rev 3783)
+++ trunk/scipy/sparse/sparsetools/sparsetools.h	2008-01-05 06:05:54 UTC (rev 3784)
@@ -462,6 +462,74 @@
         Cp[i+1] = nnz;
     }
 }
+
+
+
+template <class I, class T, int R, int C, int N>
+void bsr_matmat_pass2_fixed(const I n_brow,  const I n_bcol, 
+      	                    const I Ap[],    const I Aj[],    const T Ax[],
+      	                    const I Bp[],    const I Bj[],    const T Bx[],
+      	                          I Cp[],          I Cj[],          T Cx[])
+{
+    const I RC = R*C;
+    const I RN = R*N;
+    const I NC = N*C;
+    const I SIZE = RC*Cp[n_brow];
+
+    for(I i = 0; i < SIZE; i++){
+        Cx[i] = 0;
+    }
+ 
+    std::vector<I>  next(n_bcol,-1);
+    std::vector<T*> mats(n_bcol);
+
+    
+    I nnz = 0;
+
+    Cp[0] = 0;
+
+    for(I i = 0; i < n_brow; i++){
+        I head   = -2;
+        I length =  0;
+
+        I jj_start = Ap[i];
+        I jj_end   = Ap[i+1];
+        for(I jj = jj_start; jj < jj_end; jj++){
+            I j = Aj[jj];
+
+            I kk_start = Bp[j];
+            I kk_end   = Bp[j+1];
+            for(I kk = kk_start; kk < kk_end; kk++){
+                I k = Bj[kk];
+
+                if(next[k] == -1){
+                    next[k] = head;                        
+                    head = k;
+                    Cj[nnz] = k;
+                    mats[k] = Cx + RC*nnz;
+                    nnz++;
+                    length++;
+                }
+
+                const T * A = Ax + jj*RN;
+                const T * B = Bx + kk*NC;
+                T * result = mats[k];
+                matmat<R,C,N>(A,B,result);
+            }
+        }         
+
+        for(I jj = 0; jj < length; jj++){
+            I temp = head;                
+            head = next[head];
+            next[temp] = -1; //clear arrays
+        }
+
+    }
+    
+}
+
+#define F(X,Y,Z) bsr_matmat_pass2_fixed<I,T,X,Y,Z>
+
 template <class I, class T>
 void bsr_matmat_pass2(const I n_brow,  const I n_bcol, 
                       const I R,       const I C,       const I N,
@@ -469,21 +537,55 @@
       	              const I Bp[],    const I Bj[],    const T Bx[],
       	                    I Cp[],          I Cj[],          T Cx[])
 {
+    assert(R > 0 && C > 0 && N > 0);
+
+#ifdef TESTING
+    void (*dispatch[4][4][4])(I,I,const I*,const I*,const T*,
+                                  const I*,const I*,const T*,
+                                        I*,      I*,      T*) = \
+    {
+        { { F(1,1,1), F(1,1,2), F(1,1,3), F(1,1,4) },
+          { F(1,2,1), F(1,2,2), F(1,2,3), F(1,2,4) },
+          { F(1,3,1), F(1,3,2), F(1,3,3), F(1,3,4) },
+          { F(1,4,1), F(1,4,2), F(1,4,3), F(1,4,4) },
+        },
+        { { F(2,1,1), F(2,1,2), F(2,1,3), F(2,1,4) },
+          { F(2,2,1), F(2,2,2), F(2,2,3), F(2,2,4) },
+          { F(2,3,1), F(2,3,2), F(2,3,3), F(2,3,4) },
+          { F(2,4,1), F(2,4,2), F(2,4,3), F(2,4,4) },
+        },
+        { { F(3,1,1), F(3,1,2), F(3,1,3), F(3,1,4) },
+          { F(3,2,1), F(3,2,2), F(3,2,3), F(3,2,4) },
+          { F(3,3,1), F(3,3,2), F(3,3,3), F(3,3,4) },
+          { F(3,4,1), F(3,4,2), F(3,4,3), F(3,4,4) },
+        },
+        { { F(4,1,1), F(4,1,2), F(4,1,3), F(4,1,4) },
+          { F(4,2,1), F(4,2,2), F(4,2,3), F(4,2,4) },
+          { F(4,3,1), F(4,3,2), F(4,3,3), F(4,3,4) },
+          { F(4,4,1), F(4,4,2), F(4,4,3), F(4,4,4) },
+        }
+    };
+    
+    if (R <= 4 && C <= 4 && N <= 4){
+        dispatch[R-1][N-1][C-1](n_brow,n_bcol,Ap,Aj,Ax,Bp,Bj,Bx,Cp,Cj,Cx);
+        return;
+    }
+#endif
+
     const I RC = R*C;
     const I RN = R*N;
     const I NC = N*C;
     const I SIZE = RC*Cp[n_brow];
 
+
     for(I i = 0; i < SIZE; i++){
         Cx[i] = 0;
     }
  
     std::vector<I>  next(n_bcol,-1);
     std::vector<T*> mats(n_bcol);
-
     
     I nnz = 0;
-
     Cp[0] = 0;
 
     for(I i = 0; i < n_brow; i++){
@@ -518,34 +620,24 @@
                             result[C*r + c] += A[N*r + n] * B[C*n + c];
 
                         }
-                        //std::cout << "result[" << r << "," << c << "] = " << result[C*r + c] << std::endl;
                     }
                 }
             }
         }         
 
         for(I jj = 0; jj < length; jj++){
-
-            //if(is_nonzero_block(result,RC)){
-            //    Cj[nnz] = head;
-            //    Cx[nnz] = sums[head];
-            //    nnz++;
-            //}
-
             I temp = head;                
             head = next[head];
-
             next[temp] = -1; //clear arrays
         }
 
-        //Cp[i+1] = nnz;
     }
 }
+#undef F
 
 
 
 
-
 template <class I, class T>
 bool is_nonzero_block(const T block[], const I blocksize){
     for(I i = 0; i < blocksize; i++){
@@ -1148,7 +1240,7 @@
     for(I i = 0; i < n_brow; i++) {
         for(I jj = Ap[i]; jj < Ap[i+1]; jj++) {
             I j = Aj[jj];
-            matvec<R,C>(Ax + jj*R*C, Xx + j*C, Yx + i*R);
+            matvec<R,C,1,1>(Ax + jj*R*C, Xx + j*C, Yx + i*R);
         }
     }
 }



More information about the Scipy-svn mailing list