[Scipy-svn] r3090 - in trunk/Lib/sandbox/pyem: . doc tests
scipy-svn@scip...
scipy-svn@scip...
Mon Jun 11 02:01:32 CDT 2007
Author: cdavid
Date: 2007-06-11 02:01:12 -0500 (Mon, 11 Jun 2007)
New Revision: 3090
Modified:
trunk/Lib/sandbox/pyem/densities.py
trunk/Lib/sandbox/pyem/doc/tutorial.pdf
trunk/Lib/sandbox/pyem/gauss_mix.py
trunk/Lib/sandbox/pyem/gmm_em.py
trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py
Log:
* Correct bogus GM._get_va which caused bogus isodensity plot + test
* Support for plain matrix in GM.check_state
Modified: trunk/Lib/sandbox/pyem/densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/densities.py 2007-06-10 16:28:20 UTC (rev 3089)
+++ trunk/Lib/sandbox/pyem/densities.py 2007-06-11 07:01:12 UTC (rev 3090)
@@ -1,7 +1,7 @@
#! /usr/bin/python
#
# Copyrighted David Cournapeau
-# Last Change: Sat Jun 09 10:00 PM 2007 J
+# Last Change: Mon Jun 11 03:00 PM 2007 J
"""This module implements various basic functions related to multivariate
gaussian, such as pdf estimation, confidence interval/ellipsoids, etc..."""
@@ -246,9 +246,9 @@
circle = mahal * N.array([N.cos(theta), N.sin(theta)])
# Get the dimension which we are interested in:
- mu = mu[dim]
+ mu = mu[c]
if mode == 'diag':
- va = va[dim]
+ va = va[c]
elps = N.outer(mu, N.ones(npoints))
elps += N.dot(N.diag(N.sqrt(va)), circle)
elif mode == 'full':
Modified: trunk/Lib/sandbox/pyem/doc/tutorial.pdf
===================================================================
(Binary files differ)
Modified: trunk/Lib/sandbox/pyem/gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-10 16:28:20 UTC (rev 3089)
+++ trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-11 07:01:12 UTC (rev 3090)
@@ -1,5 +1,5 @@
# /usr/bin/python
-# Last Change: Sat Jun 09 10:00 PM 2007 J
+# Last Change: Mon Jun 11 03:00 PM 2007 J
"""Module implementing GM, a class which represents Gaussian mixtures.
@@ -296,27 +296,26 @@
to be positive definite.
"""
if not self.is_valid:
- raise GmParamError("""Parameters of the model has not been
- set yet, please set them using self.set_param()""")
+ raise GmParamError("Parameters of the model has not been"\
+ "set yet, please set them using self.set_param()")
- if self.mode == 'full':
- raise NotImplementedError, "not implemented for full mode yet"
-
- # # How to check w: if one component is negligeable, what shall
- # # we do ?
- # M = N.max(self.w)
- # m = N.min(self.w)
-
- # maxc = m / M
-
# Check condition number for cov matrix
- cond = N.zeros(self.k)
- ava = N.absolute(self.va)
- for c in range(self.k):
- cond[c] = N.amax(ava[c, :]) / N.amin(ava[c, :])
+ if self.mode == 'diag':
+ tinfo = N.finfo(self.va.dtype)
+ if N.any(self.va < tinfo.eps):
+ raise GmParamError("variances are singular")
+ elif self.mode == 'full':
+ try:
+ d = self.d
+ for i in range(self.k):
+ N.linalg.cholesky(self.va[i*d:i*d+d, :])
+ except N.linalg.LinAlgError:
+ raise GmParamError("matrix %d is singular " % i)
- print cond
+ else:
+ raise GmParamError("Unknown mode")
+ return True
@classmethod
def gen_param(cls, d, nc, varmode = 'diag', spread = 1):
"""Generate random, valid parameters for a gaussian mixture model.
@@ -341,13 +340,14 @@
-----
This is a class method.
"""
- w = abs(randn(nc))
+ w = N.abs(randn(nc))
w = w / sum(w, 0)
- mu = spread * randn(nc, d)
+ mu = spread * N.sqrt(d) * randn(nc, d)
if varmode == 'diag':
- va = abs(randn(nc, d))
+ va = N.abs(randn(nc, d))
elif varmode == 'full':
+ # If A is invertible, A'A is positive definite
va = randn(nc * d, d)
for k in range(nc):
va[k*d:k*d+d] = N.dot( va[k*d:k*d+d],
@@ -509,7 +509,7 @@
return retval
def density_on_grid(self, dim = misc.DEF_VIS_DIM, nx = 50, ny = 50,
- maxlevel = 0.95):
+ maxlevel = 0.95, V = None):
"""Do all the necessary computation for contour plot of mixture's
density.
@@ -556,8 +556,10 @@
N.linspace(ax[2]-0.2*h, ax[3]+0.2*h, ny), dim = dim)
lden = N.log(den)
# XXX: how to find "good" values for level ?
- V = [-5, -3, -1, -0.5, ]
- V.extend(N.linspace(0, N.max(lden), 4).tolist())
+ if V is None:
+ #V = [-5, -3, -1, -0.5, ]
+ #V.extend(list(N.linspace(0, N.max(lden), 20)))
+ V = N.linspace(-5, N.max(lden), 20)
return X, Y, lden, N.array(V)
def _densityctr(self, rangex, rangey, dim = misc.DEF_VIS_DIM):
@@ -578,7 +580,8 @@
return X, Y, den
def _get_va(self, dim):
- """Returns variance limited do dimension in dim."""
+ """Returns variance limited do 2 dimension in tuple dim."""
+ assert len(dim) == 2
dim = N.array(dim)
if dim.any() < 0 or dim.any() >= self.d:
raise ValueError("dim elements should be between 0 and dimension"\
@@ -586,9 +589,16 @@
if self.mode == 'diag':
return self.va[:, dim]
elif self.mode == 'full':
- tidx = N.array([N.array(dim) + i * self.d for i in range(self.k)])
- tidx.flatten()
- return self.va[tidx, dim]
+ ld = dim.size
+ vaselid = N.empty((ld * self.k, ld), N.int)
+ for i in range(self.k):
+ vaselid[ld*i] = dim[0] + i * self.d
+ vaselid[ld*i+1] = dim[1] + i * self.d
+ vadid = N.empty((ld * self.k, ld), N.int)
+ for i in range(self.k):
+ vadid[ld*i] = dim
+ vadid[ld*i+1] = dim
+ return self.va[vaselid, vadid]
else:
raise ValueError("Unkown mode")
Modified: trunk/Lib/sandbox/pyem/gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-10 16:28:20 UTC (rev 3089)
+++ trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-11 07:01:12 UTC (rev 3090)
@@ -1,5 +1,5 @@
# /usr/bin/python
-# Last Change: Sun Jun 10 06:00 PM 2007 J
+# Last Change: Mon Jun 11 01:00 PM 2007 J
"""Module implementing GMM, a class to estimate Gaussian mixture models using
EM, and EM, a class which use GMM instances to estimate models parameters using
@@ -20,7 +20,7 @@
import densities
#from kmean import kmean
from scipy.cluster.vq import kmeans2 as kmean
-#from gauss_mix import GM
+from gauss_mix import GmParamError
#from misc import _DEF_ALPHA, _MIN_DBL_DELTA, _MIN_INV_COND
@@ -91,13 +91,18 @@
""" Init the model at random."""
k = self.gm.k
d = self.gm.d
+ w = N.ones(k) / k
+ mu = randn(k, d)
if self.gm.mode == 'diag':
- w = N.ones(k) / k
- mu = randn(k, d)
va = N.fabs(randn(k, d))
else:
- raise GmmParamError("init_random not implemented for "
- "mode %s yet", self.gm.mode)
+ # If A is invertible, A'A is positive definite
+ va = randn(k * d, d)
+ for i in range(k):
+ va[i*d:i*d+d] = N.dot( va[i*d:i*d+d],
+ va[i*d:i*d+d].T)
+ #raise GmmParamError("init_random not implemented for "\
+ # "mode %s yet" % self.gm.mode)
self.gm.set_param(w, mu, va)
@@ -106,14 +111,12 @@
def init_test(self, data):
"""Use values already in the model as initialization.
- Useful for testing purpose when reproducability is necessary."""
+ Useful for testing purpose when reproducability is necessary. This does
+ nothing but checking that the mixture model has valid initial
+ values."""
+ # We have
try:
- if self.gm.check_state():
- self.isinit = True
- else:
- raise GmParamError("the mixture is initialized, but the"\
- "parameters are not valid")
-
+ self.gm.check_state()
except GmParamError, e:
print "Model is not properly initalized, cannot init EM."
raise "Message was %s" % str(e)
Modified: trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py 2007-06-10 16:28:20 UTC (rev 3089)
+++ trunk/Lib/sandbox/pyem/tests/test_gauss_mix.py 2007-06-11 07:01:12 UTC (rev 3090)
@@ -1,5 +1,5 @@
#! /usr/bin/env python
-# Last Change: Sat Jun 09 03:00 PM 2007 J
+# Last Change: Mon Jun 11 03:00 PM 2007 J
# For now, just test that all mode/dim execute correctly
@@ -41,6 +41,23 @@
except ValueError, e:
print "Ok, density_grid failed as expected (with msg: " + str(e) + ")"
+ def test_get_va(self):
+ """Test _get_va for diag and full mode."""
+ d = 3
+ k = 2
+ ld = 2
+ dim = [0, 2]
+ w, mu, va = GM.gen_param(d, k, 'full')
+ va = N.arange(d*d*k).reshape(d*k, d)
+ gm = GM.fromvalues(w, mu, va)
+ tva = N.empty(ld * ld * k)
+ for i in range(k * ld * ld):
+ tva[i] = dim[i%ld] + (i % 4)/ ld * dim[1] * d + d*d * (i / (ld*ld))
+ tva = tva.reshape(ld * k, ld)
+ sva = gm._get_va(dim)
+ assert N.all(sva == tva)
+
+
if __name__ == "__main__":
NumpyTest().run()
More information about the Scipy-svn
mailing list