[Scipy-svn] r2279 - in trunk/Lib/sandbox/pyem: . pyem

scipy-svn at scipy.org scipy-svn at scipy.org
Thu Oct 12 08:47:55 CDT 2006


Author: cdavid
Date: 2006-10-12 08:47:46 -0500 (Thu, 12 Oct 2006)
New Revision: 2279

Modified:
   trunk/Lib/sandbox/pyem/.bzrignore
   trunk/Lib/sandbox/pyem/Changelog
   trunk/Lib/sandbox/pyem/pyem/densities.py
   trunk/Lib/sandbox/pyem/pyem/gauss_mix.py
   trunk/Lib/sandbox/pyem/pyem/gmm_em.py
   trunk/Lib/sandbox/pyem/pyem/kmean.py
   trunk/Lib/sandbox/pyem/pyem/profile_densities.py
   trunk/Lib/sandbox/pyem/pyem/profile_gmm.py
Log:
[pyem @ david at ar.media.kyoto-u.ac.jp-20060829112035-336d11e78ac00702]
- Add a plot1d method to GM class

- Modify kmean to pick up vq from scipy if available (still needs to
  modify setup.py, though)

- modifiy gmm_em update method (outerproduct -> dot, which is a bit
  faster)
David Cournapeau <david at ar.media.kyoto-u.ac.jp> | 2006-08-29 20:20:35 +0900 (Tue, 29 Aug 2006)

Modified: trunk/Lib/sandbox/pyem/.bzrignore
===================================================================
--- trunk/Lib/sandbox/pyem/.bzrignore	2006-10-12 13:47:30 UTC (rev 2278)
+++ trunk/Lib/sandbox/pyem/.bzrignore	2006-10-12 13:47:46 UTC (rev 2279)
@@ -19,3 +19,6 @@
 pyem/tmp/blop.py
 pyem/tmp/
 pyem/tmp
+matcode
+../MSG
+MSG

Modified: trunk/Lib/sandbox/pyem/Changelog
===================================================================
--- trunk/Lib/sandbox/pyem/Changelog	2006-10-12 13:47:30 UTC (rev 2278)
+++ trunk/Lib/sandbox/pyem/Changelog	2006-10-12 13:47:46 UTC (rev 2279)
@@ -1,3 +1,14 @@
+pyem (0.5.2) Tue, 29 Aug 2006 14:53:49 +0900
+
+	* Add a class method to GM to create a model directly from
+	w, mean and var values (uses decorator: python 2.4)
+	* Add a plot1d method to GM to plot a GM in one dimension (where
+	the confidence ell kind of plot does not make sense). This draws
+	each Gaussian pdf, fill the area on the confidence interval
+	(matplotlib is really cool)
+
+-- David Cournapeau <david at ar.media.kyoto-u.ac.jp> 
+
 pyem (0.5.2) Mon, 28 Aug 2006 13:20:13 +0900
 
 	* Add a plot method to Gm class, so that it is easier

Modified: trunk/Lib/sandbox/pyem/pyem/densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/pyem/densities.py	2006-10-12 13:47:30 UTC (rev 2278)
+++ trunk/Lib/sandbox/pyem/pyem/densities.py	2006-10-12 13:47:46 UTC (rev 2279)
@@ -1,7 +1,7 @@
 #! /usr/bin/python
 #
 # Copyrighted David Cournapeau
-# Last Change: Mon Aug 28 12:00 PM 2006 J
+# Last Change: Tue Aug 29 04:00 PM 2006 J
 
 import numpy as N
 import numpy.linalg as lin
@@ -204,8 +204,6 @@
         else:
             raise DenError("mean and variance are not dim conformant")
 
-    # TODO: Get a confidence interval using the Chi2 distribution
-    # of points at a given mahalanobis distance...
     chi22d  = chi2(2)
     mahal   = N.sqrt(chi22d.ppf(level))
     

Modified: trunk/Lib/sandbox/pyem/pyem/gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/pyem/gauss_mix.py	2006-10-12 13:47:30 UTC (rev 2278)
+++ trunk/Lib/sandbox/pyem/pyem/gauss_mix.py	2006-10-12 13:47:46 UTC (rev 2279)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Mon Aug 28 01:00 PM 2006 J
+# Last Change: Tue Aug 29 08:00 PM 2006 J
 
 # Module to implement GaussianMixture class.
 
@@ -8,13 +8,15 @@
 import numpy.linalg as lin
 import densities
 
-MAX_DEV = 1e-10
+MAX_DEV     = 1e-10
+MAX_COND    = 1e10
 
 # Right now, two main usages of a Gaussian Model are possible
 #   - init a Gaussian Model with meta-parameters, and trains it
 #   - set-up a Gaussian Model to sample it, draw ellipsoides 
 #   of confidences. In this case, we would like to init it with
-#   known values of parameters.
+#   known values of parameters. This can be done with the class method 
+#   fromval
 #
 #   For now, we have to init with meta-parameters, and set 
 #   the parameters afterward. There should be a better way ?
@@ -24,6 +26,8 @@
 #   be used as long as w, mu and va are not set
 #   - We have to use scipy now for chisquare pdf, so there may be other
 #   methods to be used, ie for implementing random index.
+#   - there is no check on internal state of the GM, that is does w, mu and va values
+#   make sense (eg singular values)
 class GmParamError:
     """Exception raised for errors in gmm params
 
@@ -70,7 +74,9 @@
         self.is_valid   = False
 
     def set_param(self, weights, mu, sigma):
-        """Set parameters of the model"""
+        """Set parameters of the model. Args should
+        be conformant with metparameters d and k given during
+        initialisation"""
         k, d, mode  = check_gmm_param(weights, mu, sigma)
         if not k == self.k:
             raise GmParamError("Number of given components is %d, expected %d" 
@@ -87,6 +93,26 @@
 
         self.is_valid   = True
 
+    @classmethod
+    def fromvalues(cls, weights, mu, sigma):
+        """This class method can be used to create a GM model
+        directly from its parameters weights, mean and variance
+        
+        w, mu, va   = GM.gen_param(d, k)
+        gm  = GM(d, k)
+        gm.set_param(w, mu, va)
+
+        and
+        
+        w, mu, va   = GM.gen_param(d, k)
+        gm  = GM.fromvalue(w, mu, va)
+
+        Are equivalent """
+        k, d, mode  = check_gmm_param(weights, mu, sigma)
+        res = cls(d, k, mode)
+        res.set_param(weights, mu, sigma)
+        return res
+        
     def sample(self, nframes):
         """ Sample nframes frames from the model """
         if not self.is_valid:
@@ -139,6 +165,10 @@
             Will plot samples X draw from the mixture model, and
             plot the ellipses of equi-probability from the mean with
             fixed level of confidence 0.39.  """
+        if not self.is_valid:
+            raise GmParamError("""Parameters of the model has not been 
+                set yet, please set them using self.set_param()""")
+
         Xe  = []
         Ye  = []   
         if self.mode == 'diag':
@@ -157,6 +187,31 @@
 
         return Xe, Ye
     
+    def check_state(self):
+        """
+        """
+        if not self.is_valid:
+            raise GmParamError("""Parameters of the model has not been 
+                set yet, please set them using self.set_param()""")
+
+        if self.mode == 'full':
+            raise NotImplementedError, "not implemented for full mode yet"
+        
+        # # How to check w: if one component is negligeable, what shall
+        # # we do ?
+        # M   = N.max(self.w)
+        # m   = N.min(self.w)
+
+        # maxc    = m / M
+
+        # Check condition number for cov matrix
+        cond    = N.zeros(self.k)
+        ava     = N.absolute(self.va)
+        for c in range(self.k):
+            cond[c] = N.amax(ava[c,:]) / N.amin(ava[c,:])
+
+        print cond
+
     def gen_param(self, d, nc, varmode = 'diag', spread = 1):
         """Generate valid parameters for a gaussian mixture model.
         d is the dimension, nc the number of components, and varmode
@@ -188,7 +243,13 @@
         """Plot the ellipsoides directly for the model
         
         Returns a list of lines, so that their style can be modified. By default,
-        the style is red color, and nolegend for all of them"""
+        the style is red color, and nolegend for all of them.
+        
+        Does not work for 1d"""
+        if not self.is_valid:
+            raise GmParamError("""Parameters of the model has not been 
+                set yet, please set them using self.set_param()""")
+
         k       = self.k
         Xe, Ye  = self.conf_ellipses(*args, **kargs)
         try:
@@ -199,6 +260,50 @@
         except ImportError:
             raise GmParamError("matplotlib not found, cannot plot...")
 
+    def plot1d(self, level = 0.5):
+        """TODO: this is not documented"""
+        # This is not optimized at all, may be slow. Should not be
+        # difficult to make much faster, but it is late, and I am lazy
+        if not self.d == 1:
+            raise GmParamError("the model is not one dimensional model")
+        from scipy.stats import norm
+        nrm     = norm(0, 1)
+        pval    = N.sqrt(self.va[:,0]) * nrm.ppf((1+level)/2)
+
+        # Compute reasonable min/max for the normal pdf
+        mc  = 4
+        std = N.sqrt(self.va[:,0])
+        m   = N.amin(self.mu[:, 0] - 5 * std)
+        M   = N.amax(self.mu[:, 0] + 5 * std)
+
+        np  = 500
+        x   = N.linspace(m, M, np)
+        Yf  = N.zeros(np)
+        Yt  = N.zeros(np)
+        try:
+            import pylab as P
+            for c in range(self.k):
+                y   = self.w[c]/(N.sqrt(2*N.pi) * std[c]) * \
+                        N.exp(-(x-self.mu[c][0])**2/(2*std[c]**2))
+                Yt  += y
+                P.plot(x, y, 'r:')
+                #P.axvspan(-pval[c] + self.mu[c][0], pval[c] + self.mu[c][0], 
+                #        facecolor = 'b', alpha = 0.2)
+                id1 = -pval[c] + self.mu[c]
+                id2 = pval[c] + self.mu[c]
+                xc  = x[:, N.where(x>id1)[0]]
+                xc  = xc[:, N.where(xc<id2)[0]]
+                Yf  = self.w[c]/(N.sqrt(2*N.pi) * std[c]) * \
+                        N.exp(-(xc-self.mu[c][0])**2/(2*std[c]**2))
+                xc  = N.concatenate(([xc[0]], xc, [xc[-1]]))
+                Yf  = N.concatenate(([0], Yf, [0]))
+                P.fill(xc, Yf, facecolor = 'b', alpha = 0.2)
+                #P.fill([xc[0], xc[0], xc[-1], xc[-1]], 
+                #        [0, Yf[0], Yf[-1], 0], facecolor = 'b', alpha = 0.2)
+            P.plot(x, Yt, 'r')
+        except ImportError:
+            raise GmParamError("matplotlib not found, cannot plot...")
+
 # Function to generate a random index: this is kept outside any class,
 # as the function can be useful for other
 def gen_rand_index(p, n):
@@ -283,13 +388,14 @@
     #   - mode : mode of covariance matrices
     d       = 5
     k       = 4
+
+    # Now, drawing a model
     mode    = 'full'
     nframes = 1e3
 
     # Build a model with random parameters
     w, mu, va   = GM.gen_param(d, k, mode, spread = 3)
-    gm          = GM(d, k, mode)
-    gm.set_param(w, mu, va)
+    gm          = GM.fromvalues(w, mu, va)
 
     # Sample nframes frames  from the model
     X   = gm.sample(nframes)

Modified: trunk/Lib/sandbox/pyem/pyem/gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/pyem/gmm_em.py	2006-10-12 13:47:30 UTC (rev 2278)
+++ trunk/Lib/sandbox/pyem/pyem/gmm_em.py	2006-10-12 13:47:46 UTC (rev 2279)
@@ -1,8 +1,10 @@
 # /usr/bin/python
-# Last Change: Mon Aug 28 05:00 PM 2006 J
+# Last Change: Tue Aug 29 03:00 PM 2006 J
 
 # TODO:
-#   - which methods to avoid va shrinking to 0 ?
+#   - which methods to avoid va shrinking to 0 ? There are several options, not sure which
+#   ones are appropriates
+#   - improve EM trainer
 #   - online EM
 
 import numpy as N
@@ -163,22 +165,27 @@
             w   = invn * mGamma
 
         elif self.gm.mode == 'full':
+            # In full mode, this is the bottleneck: the triple loop
+            # kills performances. This is pretty straightforward
+            # algebra, so computing it in C should not be too difficult
             mu  = N.zeros((k, d))
             va  = N.zeros((k*d, d))
 
+            gamma   = gamma.transpose()
             for c in range(k):
-                x   = N.sum(N.outerproduct(gamma[:, c], 
-                            N.ones((1, d))) * data, axis = 0)
+                #x   = N.sum(N.outer(gamma[:, c], 
+                #            N.ones((1, d))) * data, axis = 0)
+                x   = N.dot(gamma[c:c+1,:], data)[0,:]
                 xx  = N.zeros((d, d))
                 
                 # This should be much faster than recursing on n...
                 for i in range(d):
                     for j in range(d):
-                        xx[i,j] = N.sum(data[:,i] * data[:,j] * gamma[:,c], axis = 0)
+                        xx[i,j] = N.sum(data[:,i] * data[:,j] * gamma[c,:], axis = 0)
 
                 mu[c,:] = x / mGamma[c]
                 va[c*d:c*d+d,:] = xx  / mGamma[c] - \
-                                    N.outerproduct(mu[c,:], mu[c,:])
+                                    N.outer(mu[c,:], mu[c,:])
             w   = invn * mGamma
         else:
             raise GmmParamError("varmode not recognized")
@@ -223,11 +230,6 @@
 
         return like
     
-class OnlineEM:
-    "An online EM trainer. "
-    def __init__(self):
-        raise GmmError("not implemented yet")
-
 # Misc functions
 def multiple_gauss_den(data, mu, va):
     """Helper function to generate several Gaussian
@@ -265,7 +267,7 @@
     #   row of d elements
     k       = 3 
     d       = 2         
-    mode    = 'diag'        
+    mode    = 'full'        
     nframes = 1e3
 
     #+++++++++++++++++++++++++++++++++++++++++++

Modified: trunk/Lib/sandbox/pyem/pyem/kmean.py
===================================================================
--- trunk/Lib/sandbox/pyem/pyem/kmean.py	2006-10-12 13:47:30 UTC (rev 2278)
+++ trunk/Lib/sandbox/pyem/pyem/kmean.py	2006-10-12 13:47:46 UTC (rev 2279)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Mon Aug 28 09:00 PM 2006 J
+# Last Change: Tue Aug 29 12:00 PM 2006 J
 
 import numpy as N
 
@@ -29,7 +29,7 @@
 #        Kmean will be REALLY slow"""
 #        _vq = _py_vq
 try:
-    from sccipy.cluster.vq import vq
+    from scipy.cluster.vq import vq
     print "using scipy.cluster.vq"
     def _vq(*args, **kw): return vq(*args, **kw)[0]
 except ImportError:
@@ -42,7 +42,7 @@
         _vq = _py_vq
 
 def kmean(data, init, iter = 10):
-    """Simple kmean implementation for EM
+    """Simple kmean implementation for EM. Runs iter iterations.
     
     returns a tuple (code, label), where code are the final
     centroids, and label are the class label indec for each

Modified: trunk/Lib/sandbox/pyem/pyem/profile_densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/pyem/profile_densities.py	2006-10-12 13:47:30 UTC (rev 2278)
+++ trunk/Lib/sandbox/pyem/pyem/profile_densities.py	2006-10-12 13:47:46 UTC (rev 2279)
@@ -46,6 +46,12 @@
 def benchc():
     bench(DC.gauss_den)
 
+def benchpyfull():
+    bench(D.gauss_den, 'full')
+
+def benchcfull():
+    bench(DC.gauss_den, 'full')
+
 if __name__ == "__main__":
     import profile
     import pstats

Modified: trunk/Lib/sandbox/pyem/pyem/profile_gmm.py
===================================================================
--- trunk/Lib/sandbox/pyem/pyem/profile_gmm.py	2006-10-12 13:47:30 UTC (rev 2278)
+++ trunk/Lib/sandbox/pyem/pyem/profile_gmm.py	2006-10-12 13:47:46 UTC (rev 2279)
@@ -10,7 +10,7 @@
     k       = 20
     nframes = 1e4
     niter   = 10
-    mode    = 'diag'
+    mode    = 'full'
 
     #+++++++++++++++++++++++++++++++++++++++++++
     # Create an artificial GMM model, samples it
@@ -42,7 +42,7 @@
 
     print "computing..."
     for i in range(niter):
-        print "iter %d" % i
+        print "iteration %d" % i
         g, tgd  = gmm.sufficient_statistics(data)
         like[i] = N.sum(N.log(N.sum(tgd, 1)))
         gmm.update_em(data, g)



More information about the Scipy-svn mailing list