[Scipy-svn] r3090 - in trunk/Lib/sandbox/pyem: . doc tests

scipy-svn@scip... scipy-svn@scip...
Mon Jun 11 02:01:32 CDT 2007


Author: cdavid
Date: 2007-06-11 02:01:12 -0500 (Mon, 11 Jun 2007)
New Revision: 3090

Modified:
   trunk/Lib/sandbox/pyem/densities.py
   trunk/Lib/sandbox/pyem/doc/tutorial.pdf
   trunk/Lib/sandbox/pyem/gauss_mix.py
   trunk/Lib/sandbox/pyem/gmm_em.py
   trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py
Log:
* Correct bogus GM._get_va which caused bogus isodensity plot + test
* Support for plain matrix in GM.check_state



Modified: trunk/Lib/sandbox/pyem/densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/densities.py	2007-06-10 16:28:20 UTC (rev 3089)
+++ trunk/Lib/sandbox/pyem/densities.py	2007-06-11 07:01:12 UTC (rev 3090)
@@ -1,7 +1,7 @@
 #! /usr/bin/python
 #
 # Copyrighted David Cournapeau
-# Last Change: Sat Jun 09 10:00 PM 2007 J
+# Last Change: Mon Jun 11 03:00 PM 2007 J
 """This module implements various basic functions related to multivariate
 gaussian, such as pdf estimation, confidence interval/ellipsoids, etc..."""
 
@@ -246,9 +246,9 @@
     circle  = mahal * N.array([N.cos(theta), N.sin(theta)])
 
     # Get the dimension which we are interested in:
-    mu  = mu[dim]
+    mu  = mu[c]
     if mode == 'diag':
-        va      = va[dim]
+        va      = va[c]
         elps    = N.outer(mu, N.ones(npoints))
         elps    += N.dot(N.diag(N.sqrt(va)), circle)
     elif mode == 'full':

Modified: trunk/Lib/sandbox/pyem/doc/tutorial.pdf
===================================================================
(Binary files differ)

Modified: trunk/Lib/sandbox/pyem/gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-10 16:28:20 UTC (rev 3089)
+++ trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-11 07:01:12 UTC (rev 3090)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Sat Jun 09 10:00 PM 2007 J
+# Last Change: Mon Jun 11 03:00 PM 2007 J
 
 """Module implementing GM, a class which represents Gaussian mixtures.
 
@@ -296,27 +296,26 @@
         to be positive definite.
         """
         if not self.is_valid:
-            raise GmParamError("""Parameters of the model has not been 
-                set yet, please set them using self.set_param()""")
+            raise GmParamError("Parameters of the model has not been"\
+                "set yet, please set them using self.set_param()")
 
-        if self.mode == 'full':
-            raise NotImplementedError, "not implemented for full mode yet"
-        
-        # # How to check w: if one component is negligeable, what shall
-        # # we do ?
-        # M   = N.max(self.w)
-        # m   = N.min(self.w)
-
-        # maxc    = m / M
-
         # Check condition number for cov matrix
-        cond    = N.zeros(self.k)
-        ava     = N.absolute(self.va)
-        for c in range(self.k):
-            cond[c] = N.amax(ava[c, :]) / N.amin(ava[c, :])
+        if self.mode == 'diag':
+            tinfo = N.finfo(self.va.dtype)
+            if N.any(self.va < tinfo.eps):
+                raise GmParamError("variances are singular")
+        elif self.mode == 'full':
+            try:
+                d = self.d
+                for i in range(self.k):
+                    N.linalg.cholesky(self.va[i*d:i*d+d, :])
+            except N.linalg.LinAlgError:
+                raise GmParamError("matrix %d is singular " % i)
 
-        print cond
+        else:
+            raise GmParamError("Unknown mode")
 
+        return True
     @classmethod
     def gen_param(cls, d, nc, varmode = 'diag', spread = 1):
         """Generate random, valid parameters for a gaussian mixture model.
@@ -341,13 +340,14 @@
         -----
         This is a class method.
         """
-        w   = abs(randn(nc))
+        w   = N.abs(randn(nc))
         w   = w / sum(w, 0)
 
-        mu  = spread * randn(nc, d)
+        mu  = spread * N.sqrt(d) * randn(nc, d)
         if varmode == 'diag':
-            va  = abs(randn(nc, d))
+            va  = N.abs(randn(nc, d))
         elif varmode == 'full':
+            # If A is invertible, A'A is positive definite
             va  = randn(nc * d, d)
             for k in range(nc):
                 va[k*d:k*d+d]   = N.dot( va[k*d:k*d+d], 
@@ -509,7 +509,7 @@
         return retval
 
     def density_on_grid(self, dim = misc.DEF_VIS_DIM, nx = 50, ny = 50,
-            maxlevel = 0.95):
+            maxlevel = 0.95, V = None):
         """Do all the necessary computation for contour plot of mixture's
         density.
         
@@ -556,8 +556,10 @@
                 N.linspace(ax[2]-0.2*h, ax[3]+0.2*h, ny), dim = dim)
         lden = N.log(den)
         # XXX: how to find "good" values for level ?
-        V = [-5, -3, -1, -0.5, ]
-        V.extend(N.linspace(0, N.max(lden), 4).tolist())
+        if V is None:
+            #V = [-5, -3, -1, -0.5, ]
+            #V.extend(list(N.linspace(0, N.max(lden), 20)))
+            V = N.linspace(-5, N.max(lden), 20)
         return X, Y, lden, N.array(V)
 
     def _densityctr(self, rangex, rangey, dim = misc.DEF_VIS_DIM):
@@ -578,7 +580,8 @@
         return X, Y, den
 
     def _get_va(self, dim):
-        """Returns variance limited do dimension in dim."""
+        """Returns variance limited do 2 dimension in tuple dim."""
+        assert len(dim) == 2
         dim = N.array(dim)
         if dim.any() < 0 or dim.any() >= self.d:
             raise ValueError("dim elements should be between 0 and dimension"\
@@ -586,9 +589,16 @@
         if self.mode == 'diag':
             return self.va[:, dim]
         elif self.mode == 'full':
-            tidx = N.array([N.array(dim) + i * self.d for i in range(self.k)])
-            tidx.flatten()
-            return self.va[tidx, dim]
+            ld = dim.size
+            vaselid = N.empty((ld * self.k, ld), N.int)
+            for i in range(self.k):
+                vaselid[ld*i] = dim[0] + i * self.d
+                vaselid[ld*i+1] = dim[1] + i * self.d
+            vadid = N.empty((ld * self.k, ld), N.int)
+            for i in range(self.k):
+                vadid[ld*i] = dim
+                vadid[ld*i+1] = dim
+            return self.va[vaselid, vadid]
         else:
             raise ValueError("Unkown mode")
 

Modified: trunk/Lib/sandbox/pyem/gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-10 16:28:20 UTC (rev 3089)
+++ trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-11 07:01:12 UTC (rev 3090)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Sun Jun 10 06:00 PM 2007 J
+# Last Change: Mon Jun 11 01:00 PM 2007 J
 
 """Module implementing GMM, a class to estimate Gaussian mixture models using
 EM, and EM, a class which use GMM instances to estimate models parameters using
@@ -20,7 +20,7 @@
 import densities
 #from kmean import kmean
 from scipy.cluster.vq import kmeans2 as kmean
-#from gauss_mix import GM
+from gauss_mix import GmParamError
 
 #from misc import _DEF_ALPHA, _MIN_DBL_DELTA, _MIN_INV_COND
 
@@ -91,13 +91,18 @@
         """ Init the model at random."""
         k   = self.gm.k
         d   = self.gm.d
+        w   = N.ones(k) / k
+        mu  = randn(k, d)
         if self.gm.mode == 'diag':
-            w   = N.ones(k) / k
-            mu  = randn(k, d)
             va  = N.fabs(randn(k, d))
         else:
-            raise GmmParamError("init_random not implemented for "
-                    "mode %s yet", self.gm.mode)
+            # If A is invertible, A'A is positive definite
+            va  = randn(k * d, d)
+            for i in range(k):
+                va[i*d:i*d+d]   = N.dot( va[i*d:i*d+d], 
+                    va[i*d:i*d+d].T)
+            #raise GmmParamError("init_random not implemented for "\
+            #        "mode %s yet" % self.gm.mode)
 
         self.gm.set_param(w, mu, va)
         
@@ -106,14 +111,12 @@
     def init_test(self, data):
         """Use values already in the model as initialization.
         
-        Useful for testing purpose when reproducability is necessary."""
+        Useful for testing purpose when reproducability is necessary. This does
+        nothing but checking that the mixture model has valid initial
+        values."""
+        # We have
         try:
-            if self.gm.check_state():
-                self.isinit = True
-            else:
-                raise GmParamError("the mixture is initialized, but the"\
-                        "parameters are not valid")
-
+            self.gm.check_state()
         except GmParamError, e:
             print "Model is not properly initalized, cannot init EM."
             raise "Message was %s" % str(e)

Modified: trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py	2007-06-10 16:28:20 UTC (rev 3089)
+++ trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py	2007-06-11 07:01:12 UTC (rev 3090)
@@ -1,5 +1,5 @@
 #! /usr/bin/env python
-# Last Change: Sat Jun 09 03:00 PM 2007 J
+# Last Change: Mon Jun 11 03:00 PM 2007 J
 
 # For now, just test that all mode/dim execute correctly
 
@@ -41,6 +41,23 @@
         except ValueError, e:
             print "Ok, density_grid failed as expected (with msg: " + str(e) + ")"
 
+    def test_get_va(self):
+        """Test _get_va for diag and full mode."""
+        d = 3
+        k = 2
+        ld = 2
+        dim = [0, 2]
+        w, mu, va = GM.gen_param(d, k, 'full')
+        va = N.arange(d*d*k).reshape(d*k, d)
+        gm = GM.fromvalues(w, mu, va)
 
+        tva = N.empty(ld * ld * k)
+        for i in range(k * ld * ld):
+            tva[i] = dim[i%ld] + (i % 4)/ ld  * dim[1] * d + d*d * (i / (ld*ld))
+        tva = tva.reshape(ld * k, ld)
+        sva = gm._get_va(dim)
+        assert N.all(sva == tva)
+
+
 if __name__ == "__main__":
     NumpyTest().run()



More information about the Scipy-svn mailing list