# [Scipy-svn] r3090 - in trunk/Lib/sandbox/pyem: . doc tests

scipy-svn@scip... scipy-svn@scip...
Mon Jun 11 02:01:32 CDT 2007

```Author: cdavid
Date: 2007-06-11 02:01:12 -0500 (Mon, 11 Jun 2007)
New Revision: 3090

Modified:
trunk/Lib/sandbox/pyem/densities.py
trunk/Lib/sandbox/pyem/doc/tutorial.pdf
trunk/Lib/sandbox/pyem/gauss_mix.py
trunk/Lib/sandbox/pyem/gmm_em.py
trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py
Log:
* Correct bogus GM._get_va which caused bogus isodensity plot + test
* Support for plain matrix in GM.check_state

Modified: trunk/Lib/sandbox/pyem/densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/densities.py	2007-06-10 16:28:20 UTC (rev 3089)
+++ trunk/Lib/sandbox/pyem/densities.py	2007-06-11 07:01:12 UTC (rev 3090)
@@ -1,7 +1,7 @@
#! /usr/bin/python
#
-# Last Change: Sat Jun 09 10:00 PM 2007 J
+# Last Change: Mon Jun 11 03:00 PM 2007 J
"""This module implements various basic functions related to multivariate
gaussian, such as pdf estimation, confidence interval/ellipsoids, etc..."""

@@ -246,9 +246,9 @@
circle  = mahal * N.array([N.cos(theta), N.sin(theta)])

# Get the dimension which we are interested in:
-    mu  = mu[dim]
+    mu  = mu[c]
if mode == 'diag':
-        va      = va[dim]
+        va      = va[c]
elps    = N.outer(mu, N.ones(npoints))
elps    += N.dot(N.diag(N.sqrt(va)), circle)
elif mode == 'full':

Modified: trunk/Lib/sandbox/pyem/doc/tutorial.pdf
===================================================================
(Binary files differ)

Modified: trunk/Lib/sandbox/pyem/gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-10 16:28:20 UTC (rev 3089)
+++ trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-11 07:01:12 UTC (rev 3090)
@@ -1,5 +1,5 @@
# /usr/bin/python
-# Last Change: Sat Jun 09 10:00 PM 2007 J
+# Last Change: Mon Jun 11 03:00 PM 2007 J

"""Module implementing GM, a class which represents Gaussian mixtures.

@@ -296,27 +296,26 @@
to be positive definite.
"""
if not self.is_valid:
-            raise GmParamError("""Parameters of the model has not been
-                set yet, please set them using self.set_param()""")
+            raise GmParamError("Parameters of the model has not been"\
+                "set yet, please set them using self.set_param()")

-        if self.mode == 'full':
-            raise NotImplementedError, "not implemented for full mode yet"
-
-        # # How to check w: if one component is negligeable, what shall
-        # # we do ?
-        # M   = N.max(self.w)
-        # m   = N.min(self.w)
-
-        # maxc    = m / M
-
# Check condition number for cov matrix
-        cond    = N.zeros(self.k)
-        ava     = N.absolute(self.va)
-        for c in range(self.k):
-            cond[c] = N.amax(ava[c, :]) / N.amin(ava[c, :])
+        if self.mode == 'diag':
+            tinfo = N.finfo(self.va.dtype)
+            if N.any(self.va < tinfo.eps):
+                raise GmParamError("variances are singular")
+        elif self.mode == 'full':
+            try:
+                d = self.d
+                for i in range(self.k):
+                    N.linalg.cholesky(self.va[i*d:i*d+d, :])
+            except N.linalg.LinAlgError:
+                raise GmParamError("matrix %d is singular " % i)

-        print cond
+        else:
+            raise GmParamError("Unknown mode")

+        return True
@classmethod
def gen_param(cls, d, nc, varmode = 'diag', spread = 1):
"""Generate random, valid parameters for a gaussian mixture model.
@@ -341,13 +340,14 @@
-----
This is a class method.
"""
-        w   = abs(randn(nc))
+        w   = N.abs(randn(nc))
w   = w / sum(w, 0)

-        mu  = spread * randn(nc, d)
+        mu  = spread * N.sqrt(d) * randn(nc, d)
if varmode == 'diag':
-            va  = abs(randn(nc, d))
+            va  = N.abs(randn(nc, d))
elif varmode == 'full':
+            # If A is invertible, A'A is positive definite
va  = randn(nc * d, d)
for k in range(nc):
va[k*d:k*d+d]   = N.dot( va[k*d:k*d+d],
@@ -509,7 +509,7 @@
return retval

def density_on_grid(self, dim = misc.DEF_VIS_DIM, nx = 50, ny = 50,
-            maxlevel = 0.95):
+            maxlevel = 0.95, V = None):
"""Do all the necessary computation for contour plot of mixture's
density.

@@ -556,8 +556,10 @@
N.linspace(ax[2]-0.2*h, ax[3]+0.2*h, ny), dim = dim)
lden = N.log(den)
# XXX: how to find "good" values for level ?
-        V = [-5, -3, -1, -0.5, ]
-        V.extend(N.linspace(0, N.max(lden), 4).tolist())
+        if V is None:
+            #V = [-5, -3, -1, -0.5, ]
+            #V.extend(list(N.linspace(0, N.max(lden), 20)))
+            V = N.linspace(-5, N.max(lden), 20)
return X, Y, lden, N.array(V)

def _densityctr(self, rangex, rangey, dim = misc.DEF_VIS_DIM):
@@ -578,7 +580,8 @@
return X, Y, den

def _get_va(self, dim):
-        """Returns variance limited do dimension in dim."""
+        """Returns variance limited do 2 dimension in tuple dim."""
+        assert len(dim) == 2
dim = N.array(dim)
if dim.any() < 0 or dim.any() >= self.d:
raise ValueError("dim elements should be between 0 and dimension"\
@@ -586,9 +589,16 @@
if self.mode == 'diag':
return self.va[:, dim]
elif self.mode == 'full':
-            tidx = N.array([N.array(dim) + i * self.d for i in range(self.k)])
-            tidx.flatten()
-            return self.va[tidx, dim]
+            ld = dim.size
+            vaselid = N.empty((ld * self.k, ld), N.int)
+            for i in range(self.k):
+                vaselid[ld*i] = dim[0] + i * self.d
+                vaselid[ld*i+1] = dim[1] + i * self.d
+            vadid = N.empty((ld * self.k, ld), N.int)
+            for i in range(self.k):
else:
raise ValueError("Unkown mode")

Modified: trunk/Lib/sandbox/pyem/gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-10 16:28:20 UTC (rev 3089)
+++ trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-11 07:01:12 UTC (rev 3090)
@@ -1,5 +1,5 @@
# /usr/bin/python
-# Last Change: Sun Jun 10 06:00 PM 2007 J
+# Last Change: Mon Jun 11 01:00 PM 2007 J

"""Module implementing GMM, a class to estimate Gaussian mixture models using
EM, and EM, a class which use GMM instances to estimate models parameters using
@@ -20,7 +20,7 @@
import densities
#from kmean import kmean
from scipy.cluster.vq import kmeans2 as kmean
-#from gauss_mix import GM
+from gauss_mix import GmParamError

#from misc import _DEF_ALPHA, _MIN_DBL_DELTA, _MIN_INV_COND

@@ -91,13 +91,18 @@
""" Init the model at random."""
k   = self.gm.k
d   = self.gm.d
+        w   = N.ones(k) / k
+        mu  = randn(k, d)
if self.gm.mode == 'diag':
-            w   = N.ones(k) / k
-            mu  = randn(k, d)
va  = N.fabs(randn(k, d))
else:
-            raise GmmParamError("init_random not implemented for "
-                    "mode %s yet", self.gm.mode)
+            # If A is invertible, A'A is positive definite
+            va  = randn(k * d, d)
+            for i in range(k):
+                va[i*d:i*d+d]   = N.dot( va[i*d:i*d+d],
+                    va[i*d:i*d+d].T)
+            #raise GmmParamError("init_random not implemented for "\
+            #        "mode %s yet" % self.gm.mode)

self.gm.set_param(w, mu, va)

@@ -106,14 +111,12 @@
def init_test(self, data):
"""Use values already in the model as initialization.

-        Useful for testing purpose when reproducability is necessary."""
+        Useful for testing purpose when reproducability is necessary. This does
+        nothing but checking that the mixture model has valid initial
+        values."""
+        # We have
try:
-            if self.gm.check_state():
-                self.isinit = True
-            else:
-                raise GmParamError("the mixture is initialized, but the"\
-                        "parameters are not valid")
-
+            self.gm.check_state()
except GmParamError, e:
print "Model is not properly initalized, cannot init EM."
raise "Message was %s" % str(e)

Modified: trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py	2007-06-10 16:28:20 UTC (rev 3089)
+++ trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py	2007-06-11 07:01:12 UTC (rev 3090)
@@ -1,5 +1,5 @@
#! /usr/bin/env python
-# Last Change: Sat Jun 09 03:00 PM 2007 J
+# Last Change: Mon Jun 11 03:00 PM 2007 J

# For now, just test that all mode/dim execute correctly

@@ -41,6 +41,23 @@
except ValueError, e:
print "Ok, density_grid failed as expected (with msg: " + str(e) + ")"

+    def test_get_va(self):
+        """Test _get_va for diag and full mode."""
+        d = 3
+        k = 2
+        ld = 2
+        dim = [0, 2]
+        w, mu, va = GM.gen_param(d, k, 'full')
+        va = N.arange(d*d*k).reshape(d*k, d)
+        gm = GM.fromvalues(w, mu, va)

+        tva = N.empty(ld * ld * k)
+        for i in range(k * ld * ld):
+            tva[i] = dim[i%ld] + (i % 4)/ ld  * dim[1] * d + d*d * (i / (ld*ld))
+        tva = tva.reshape(ld * k, ld)
+        sva = gm._get_va(dim)
+        assert N.all(sva == tva)
+
+
if __name__ == "__main__":
NumpyTest().run()

```