[Scipy-svn] r3078 - in trunk/Lib/sandbox/pyem: . data/oldfaithful

scipy-svn@scip... scipy-svn@scip...
Fri Jun 8 06:15:47 CDT 2007


Author: cdavid
Date: 2007-06-08 06:15:39 -0500 (Fri, 08 Jun 2007)
New Revision: 3078

Modified:
   trunk/Lib/sandbox/pyem/data/oldfaithful/__init__.py
   trunk/Lib/sandbox/pyem/densities.py
   trunk/Lib/sandbox/pyem/gauss_mix.py
   trunk/Lib/sandbox/pyem/gmm_em.py
   trunk/Lib/sandbox/pyem/online_em.py
Log:
Add function to plot density contours in GM.

Modified: trunk/Lib/sandbox/pyem/data/oldfaithful/__init__.py
===================================================================
--- trunk/Lib/sandbox/pyem/data/oldfaithful/__init__.py	2007-06-08 04:42:23 UTC (rev 3077)
+++ trunk/Lib/sandbox/pyem/data/oldfaithful/__init__.py	2007-06-08 11:15:39 UTC (rev 3078)
@@ -1,6 +1,6 @@
 #! /usr/bin/env python
-# Last Change: Wed Apr 25 06:00 PM 2007 J
-import faith as _faith
+# Last Change: Fri Jun 08 12:00 PM 2007 J
+import data as _faith
 __doc__     = _faith.DESCRSHORT
 copyright   = _faith.COPYRIGHT
 source      = _faith.SOURCE

Modified: trunk/Lib/sandbox/pyem/densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/densities.py	2007-06-08 04:42:23 UTC (rev 3077)
+++ trunk/Lib/sandbox/pyem/densities.py	2007-06-08 11:15:39 UTC (rev 3078)
@@ -1,7 +1,7 @@
 #! /usr/bin/python
 #
 # Copyrighted David Cournapeau
-# Last Change: Fri Nov 10 10:00 AM 2006 J
+# Last Change: Fri Jun 08 07:00 PM 2007 J
 
 import numpy as N
 import numpy.linalg as lin
@@ -188,6 +188,10 @@
         else:
             raise DenError("mean and variance are not dim conformant")
 
+    # When X is a sample from multivariante N(mu, sigma), (X-mu)Sigma^-1(X-mu)
+    # follows a Chi2(d) law. Here, we only take 2 dimension, so Chi2 with 2
+    # degree of freedom (See Wasserman. This is easy to see with characteristic
+    # functions)
     chi22d  = chi2(2)
     mahal   = N.sqrt(chi22d.ppf(level))
     
@@ -218,6 +222,26 @@
 
     return elps[0, :], elps[1, :]
 
+def multiple_gauss_den(data, mu, va):
+    """Helper function to generate several Gaussian
+    pdf (different parameters) from the same data"""
+    mu  = N.atleast_2d(mu)
+    va  = N.atleast_2d(va)
+
+    K   = mu.shape[0]
+    n   = data.shape[0]
+    d   = mu.shape[1]
+    
+    y   = N.zeros((K, n))
+    if mu.size == va.size:
+        for i in range(K):
+            y[i] = gauss_den(data, mu[i, :], va[i, :])
+        return y.T
+    else:
+        for i in range(K):
+            y[i] = gauss_den(data, mu[i, :], va[d*i:d*i+d, :])
+        return y.T
+
 if __name__ == "__main__":
     import pylab
 

Modified: trunk/Lib/sandbox/pyem/gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-08 04:42:23 UTC (rev 3077)
+++ trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-08 11:15:39 UTC (rev 3078)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Mon Jun 04 07:00 PM 2007 J
+# Last Change: Fri Jun 08 07:00 PM 2007 J
 
 # Module to implement GaussianMixture class.
 
@@ -344,6 +344,8 @@
     def _get_component_pdf(self, x):
         """Returns a list of pdf, one for each component. Summing them gives
         the pdf of the mixture."""
+        # XXX: have a public function to compute the pdf at given points
+        # instead...
         std = N.sqrt(self.va[:,0])
         retval = N.empty((x.size, self.k))
         for c in range(self.k):
@@ -352,6 +354,47 @@
 
         return retval
 
+    def density_on_grid(self, nx = 50, ny = 50, maxlevel = 0.95):
+        """Do all the necessary computation for contour plot of mixture's density.
+        
+        Returns X, Y, Z and V as expected by mpl contour function."""
+
+        # Ok, it is a bit gory. Basically, we want to compute the size of the
+        # grid. We use conf_ellipse, which will return a couple of points for
+        # each component, and we can find a grid size which then is just big
+        # enough to contain all ellipses. This won't work well if two
+        # ellipsoids are crossing each other a lot (because this assumes that
+        # at a given point, one component is largely dominant for its
+        # contribution to the pdf).
+
+        # XXX: we need log pdf, not the pdf... this can save some computing
+        Xe, Ye = self.conf_ellipses(level = maxlevel)
+        ax = [N.min(Xe), N.max(Xe), N.min(Ye), N.max(Ye)]
+
+        w = ax[1] - ax[0]
+        h = ax[3] - ax[2]
+        X, Y, den = self._densityctr(N.linspace(ax[0]-0.2*w, ax[1]+0.2*w, nx), \
+                N.linspace(ax[2]-0.2*h, ax[3]+0.2*h, ny))
+        lden = N.log(den)
+        V = [-5, -3, -1, -0.5, ]
+        V.extend(N.linspace(0, N.max(lden), 4).tolist())
+        return X, Y, lden, N.array(V)
+
+    def _densityctr(self, xrange, yrange):
+        """Helper function to compute density contours on a grid."""
+        gr = N.meshgrid(xrange, yrange)
+        X = gr[0].flatten()
+        Y = gr[1].flatten()
+        xdata = N.concatenate((X[:, N.newaxis], Y[:, N.newaxis]), axis = 1)
+        # XXX refactor computing pdf
+        d = densities.multiple_gauss_den(xdata, self.mu, self.va) * self.w
+        d = N.sum(d, 1)
+        d = d.reshape(len(yrange), len(xrange))
+
+        X = gr[0]
+        Y = gr[1]
+        return X, Y, d
+
     # Syntactic sugar
     def __repr__(self):
         repr    = ""

Modified: trunk/Lib/sandbox/pyem/gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-08 04:42:23 UTC (rev 3077)
+++ trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-08 11:15:39 UTC (rev 3078)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Fri Jun 01 05:00 PM 2007 J
+# Last Change: Fri Jun 08 08:00 PM 2007 J
 
 # TODO:
 #   - which methods to avoid va shrinking to 0 ? There are several options, 
@@ -65,7 +65,8 @@
         d       = self.gm.d
         init    = data[0:k, :]
 
-        (code, label)   = kmean(data, init, niter)
+        # XXX: This is bogus: should do better (in kmean or here, do not know yet)
+        (code, label)   = kmean(data, init, niter, minit = 'matrix')
 
         w   = N.ones(k) / k
         mu  = code.copy()
@@ -135,7 +136,7 @@
         n   = data.shape[0]
 
         # compute the gaussian pdf
-        tgd	= multiple_gauss_den(data, self.gm.mu, self.gm.va)
+        tgd	= densities.multiple_gauss_den(data, self.gm.mu, self.gm.va)
         # multiply by the weight
         tgd	*= self.gm.w
         # Normalize to get a pdf
@@ -202,7 +203,7 @@
         the data """
         assert(self.isinit)
         # compute the gaussian pdf
-        tgd	= multiple_gauss_den(data, self.gm.mu, self.gm.va)
+        tgd	= densities.multiple_gauss_den(data, self.gm.mu, self.gm.va)
         # multiply by the weight
         tgd	*= self.gm.w
 
@@ -367,27 +368,6 @@
     else:
         return False
 
-def multiple_gauss_den(data, mu, va):
-    """Helper function to generate several Gaussian
-    pdf (different parameters) from the same data"""
-    mu  = N.atleast_2d(mu)
-    va  = N.atleast_2d(va)
-
-    K   = mu.shape[0]
-    n   = data.shape[0]
-    d   = mu.shape[1]
-    
-    y   = N.zeros((K, n))
-    if mu.size == va.size:
-        for i in range(K):
-            y[i] = densities.gauss_den(data, mu[i, :], va[i, :])
-        return y.T
-    else:
-        for i in range(K):
-            y[i] = densities.gauss_den(data, mu[i, :], 
-                        va[d*i:d*i+d, :])
-        return y.T
-
 if __name__ == "__main__":
     import copy
     #=============================

Modified: trunk/Lib/sandbox/pyem/online_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/online_em.py	2007-06-08 04:42:23 UTC (rev 3077)
+++ trunk/Lib/sandbox/pyem/online_em.py	2007-06-08 11:15:39 UTC (rev 3078)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Fri Jun 01 05:00 PM 2007 J
+# Last Change: Fri Jun 08 08:00 PM 2007 J
 
 #---------------------------------------------
 # This is not meant to be used yet !!!! I am 
@@ -67,7 +67,7 @@
             self.cxx    = N.outer(w, mean(init_data ** 2, 0))
 
             # w, mu and va init is the same that in the standard case
-            (code, label)   = kmean(init_data, init_data[0:k, :], niter)
+            (code, label)   = kmean(init_data, init_data[0:k, :], iter = niter, minit = 'matrix')
             mu          = code.copy()
             va          = N.zeros((k, d))
             for i in range(k):
@@ -102,7 +102,7 @@
             self.cxx    = N.outer(w, mean(init_data ** 2, 0))
 
             # w, mu and va init is the same that in the standard case
-            (code, label)   = kmean(init_data, init_data[0:k, :], niter)
+            (code, label)   = kmean(init_data, init_data[0:k, :], iter = niter, minit = 'matrix')
             mu          = code.copy()
             va          = N.zeros((k, d))
             for i in range(k):
@@ -176,7 +176,7 @@
 
         # w, mu and va init is the same that in the standard case
         (code, label)   = kmean(init_data[:, N.newaxis], \
-                init_data[0:k, N.newaxis], niter)
+                init_data[0:k, N.newaxis], iter = niter)
         mu          = code.copy()
         va          = N.zeros((k, 1))
         for i in range(k):



More information about the Scipy-svn mailing list