[Scipy-svn] r3087 - in trunk/Lib/sandbox/pyem: . doc examples

scipy-svn@scip... scipy-svn@scip...
Sat Jun 9 09:03:18 CDT 2007


Author: cdavid
Date: 2007-06-09 09:03:01 -0500 (Sat, 09 Jun 2007)
New Revision: 3087

Added:
   trunk/Lib/sandbox/pyem/doc/pdfestimation.png
Modified:
   trunk/Lib/sandbox/pyem/__init__.py
   trunk/Lib/sandbox/pyem/_c_densities.py
   trunk/Lib/sandbox/pyem/densities.py
   trunk/Lib/sandbox/pyem/doc/
   trunk/Lib/sandbox/pyem/doc/Makefile
   trunk/Lib/sandbox/pyem/doc/index.txt
   trunk/Lib/sandbox/pyem/doc/tutorial.pdf
   trunk/Lib/sandbox/pyem/examples/pdfestimation.py
   trunk/Lib/sandbox/pyem/gauss_mix.py
   trunk/Lib/sandbox/pyem/gmm_em.py
   trunk/Lib/sandbox/pyem/info.py
   trunk/Lib/sandbox/pyem/online_em.py
Log:
Heavy liftup of the code + docstrings.

Modified: trunk/Lib/sandbox/pyem/__init__.py
===================================================================
--- trunk/Lib/sandbox/pyem/__init__.py	2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/__init__.py	2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,5 +1,5 @@
 #! /usr/bin/env python
-# Last Change: Mon May 28 01:00 PM 2007 J
+# Last Change: Sat Jun 09 10:00 PM 2007 J
 
 from info import __doc__
 
@@ -8,7 +8,7 @@
 #from online_em import OnGMM as _OnGMM
 #import examples as _examples
 
-__all__ = filter(lambda s:not s.startswith('_'),dir())
+__all__ = filter(lambda s:not s.startswith('_'), dir())
 
 from numpy.testing import NumpyTest
 test = NumpyTest().test

Modified: trunk/Lib/sandbox/pyem/_c_densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/_c_densities.py	2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/_c_densities.py	2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,28 +1,34 @@
 #! /usr/bin/python
 #
 # Copyrighted David Cournapeau
-# Last Change: Thu Nov 09 05:00 PM 2006 J
+# Last Change: Sat Jun 09 10:00 PM 2007 J
 
+"""This module implements some function of densities module in C for efficiency
+reasons.  gaussian, such as pdf estimation, confidence interval/ellipsoids,
+etc..."""
+
+__docformat__ = 'restructuredtext'
+
 # This module uses a C implementation through ctypes, for diagonal cases
 # TODO:
 #   - portable way to find/open the shared library
 #   - full cov matrice
+#   - test before inclusion
 
 import numpy as N
 import numpy.linalg as lin
-from numpy.random import randn
-from scipy.stats import chi2
-import densities as D
+#from numpy.random import randn
+#from scipy.stats import chi2
+#import densities as D
 
 import ctypes
-from ctypes import cdll, c_uint, c_int, c_double, POINTER
+from ctypes import c_uint, c_int
 from numpy.ctypeslib import ndpointer, load_library
 
 ctypes_major    = int(ctypes.__version__.split('.')[0])
 if ctypes_major < 1:
-    msg =  "version of ctypes is %s, expected at least %s" \
-            % (ctypes.__version__, '1.0.0')
-    raise ImportError(msg)
+    raise ImportError(msg =  "version of ctypes is %s, expected at least %s"\
+            % (ctypes.__version__, '1.0.1'))
 
 # Requirements for diag gden
 _gden   = load_library('c_gden.so', __file__)
@@ -75,9 +81,9 @@
     if len(N.shape(x)) != 2:
         raise DenError("x is not rank 2")
         
-    (n, d)      = x.shape
-    (dm0, dm1)  = mu.shape
-    (dv0, dv1)  = va.shape
+    (n, d)      = N.shape(x)
+    (dm0, dm1)  = N.shape(mu)
+    (dv0, dv1)  = N.shape(va)
     
     # Check x and mu same dimension
     if dm0 != 1:
@@ -165,9 +171,9 @@
         #     inva.ctypes.data_as(POINTER(c_double)),
         #     y.ctypes.data_as(POINTER(c_double)))
     else:
-        y   = _scalar_gauss_den(x[:,0], mu[0,0], va[0,0], log)
+        y   = _scalar_gauss_den(x[:, 0], mu[0, 0], va[0, 0], log)
         for i in range(1, d):
-            y    +=  _scalar_gauss_den(x[:,i], mu[0,i], va[0,i], log)
+            y    +=  _scalar_gauss_den(x[:, i], mu[0, i], va[0, i], log)
         return y
 
 def _full_gauss_den(x, mu, va, log):
@@ -199,19 +205,20 @@
     return y
 
 if __name__ == "__main__":
-    #=========================================
-    # Test accuracy between pure and C python
-    #=========================================
-    mu  = N.array([2.0, 3])
-    va  = N.array([5.0, 3])
+    pass
+    ##=========================================
+    ## Test accuracy between pure and C python
+    ##=========================================
+    #mu  = N.array([2.0, 3])
+    #va  = N.array([5.0, 3])
 
-    # Generate a multivariate gaussian of mean mu and covariance va
-    nframes = 1e4
-    X       = randn(nframes, 2)
-    Yc      = N.dot(N.diag(N.sqrt(va)), X.transpose())
-    Yc      = Yc.transpose() + mu
+    ## Generate a multivariate gaussian of mean mu and covariance va
+    #nframes = 1e4
+    #X       = randn(nframes, 2)
+    #Yc      = N.dot(N.diag(N.sqrt(va)), X.transpose())
+    #Yc      = Yc.transpose() + mu
 
-    Y   = D.gauss_den(Yc, mu, va)
-    Yt  = gauss_den(Yc, mu, va)
+    #Y   = D.gauss_den(Yc, mu, va)
+    #Yt  = gauss_den(Yc, mu, va)
 
-    print "Diff is " + str(N.sqrt(N.sum((Y-Yt) ** 2))/nframes/2)
+    #print "Diff is " + str(N.sqrt(N.sum((Y-Yt) ** 2))/nframes/2)

Modified: trunk/Lib/sandbox/pyem/densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/densities.py	2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/densities.py	2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,8 +1,8 @@
 #! /usr/bin/python
 #
 # Copyrighted David Cournapeau
-# Last Change: Sat Jun 09 08:00 PM 2007 J
-"""This module implements various bsic functions related to multivariate
+# Last Change: Sat Jun 09 10:00 PM 2007 J
+"""This module implements various basic functions related to multivariate
 gaussian, such as pdf estimation, confidence interval/ellipsoids, etc..."""
 
 __docformat__ = 'restructuredtext'
@@ -50,10 +50,10 @@
         pdf : ndarray
             Returns a rank 1 array of the pdf at points x.
 
-    Notes
-    -----
-    Vector are row vectors, except va which can be a matrix
-    (row vector variance for diagonal variance)."""
+    Note
+    ----
+        Vector are row vectors, except va which can be a matrix
+        (row vector variance for diagonal variance)."""
     
     lmu  = N.atleast_2d(mu)
     lva  = N.atleast_2d(va)


Property changes on: trunk/Lib/sandbox/pyem/doc
___________________________________________________________________
Name: svn:ignore
   - *.pyc
*.swp
*.pyd
*.so
*.prof


   + *.pyc
*.swp
*.pyd
*.so
*.prof
*.out
*.tex



Modified: trunk/Lib/sandbox/pyem/doc/Makefile
===================================================================
--- trunk/Lib/sandbox/pyem/doc/Makefile	2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/doc/Makefile	2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,4 +1,4 @@
-# Last Change: Mon May 28 10:00 AM 2007 J
+# Last Change: Sat Jun 09 05:00 PM 2007 J
 
 # This makefile is used to build the pdf from the rest file and inlined code
 # from python examples
@@ -7,7 +7,7 @@
 rst2tex	= PYTHONPATH=/home/david/local/lib/python2.4/site-packages rst2newlatex.py \
 		  --stylesheet-path base.tex --user-stylesheet user.tex 
 
-pytexfiles	= pyem.tex basic_example1.tex basic_example2.tex basic_example3.tex
+pytexfiles	= pyem.tex basic_example1.tex basic_example2.tex basic_example3.tex pdfestimation.tex
 
 SOURCEPATH	= $(PWD)
 
@@ -24,15 +24,18 @@
 pyem.tex: index.txt
 	$(rst2tex) $< > $@
 
-basic_example1.tex: examples/basic_example1.py
+basic_example1.tex: ../examples/basic_example1.py
 	$(py2tex) $< > $@
 
-basic_example2.tex: examples/basic_example2.py
+basic_example2.tex: ../examples/basic_example2.py
 	$(py2tex) $< > $@
 
-basic_example3.tex: examples/basic_example3.py
+basic_example3.tex: ../examples/basic_example3.py
 	$(py2tex) $< > $@
 
+pdfestimation.tex: ../examples/pdfestimation.py
+	$(py2tex) $< > $@
+
 clean:
 	for i in $(pytexfiles); do \
 		rm -f `echo $$i`; \

Modified: trunk/Lib/sandbox/pyem/doc/index.txt
===================================================================
--- trunk/Lib/sandbox/pyem/doc/index.txt	2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/doc/index.txt	2007-06-09 14:03:01 UTC (rev 3087)
@@ -13,7 +13,7 @@
         file: Bic_example.png
     /restindex
 
-.. Last Change: Mon May 28 10:00 AM 2007 J
+.. Last Change: Sat Jun 09 07:00 PM 2007 J
 
 ===================================================
  PyEM, a python package for Gaussian mixture models
@@ -176,14 +176,36 @@
 Examples 
 =========
 
-TODO.
+Using EM for pdf estimation
+---------------------------
 
+The following example uses the old faithful dataset and is available in the
+example directory. It models the joint distribution (d(t), w(t+1)), where d(t)
+is the duration time, and w(t+1) the waiting time for the next eruption. It
+selects the best model using the BIC.
+
+.. raw:: latex
+
+    \input{pdfestimation.tex}
+
+.. figure:: pdfestimation.png 
+    :width: 500
+    :height: 400
+
+    isodensity curves for the old faithful data modeled by a 1, 2, 3 and 4
+    componenits model (up to bottom, left to right).
+
+
 Using EM for clustering
 -----------------------
 
+TODO (this is fundamentally the same than pdf estimation, though)
+
 Using PyEM for supervised learning
 ----------------------------------
 
+TODO
+
 Note on performances
 ====================
 

Added: trunk/Lib/sandbox/pyem/doc/pdfestimation.png
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/sandbox/pyem/doc/pdfestimation.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Modified: trunk/Lib/sandbox/pyem/doc/tutorial.pdf
===================================================================
(Binary files differ)

Modified: trunk/Lib/sandbox/pyem/examples/pdfestimation.py
===================================================================
--- trunk/Lib/sandbox/pyem/examples/pdfestimation.py	2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/examples/pdfestimation.py	2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,15 +1,11 @@
 #! /usr/bin/env python
-# Last Change: Sat Jun 09 03:00 PM 2007 J
+# Last Change: Sat Jun 09 07:00 PM 2007 J
 
 # Example of doing pdf estimation with EM algorithm. Requires matplotlib.
 import numpy as N
-from numpy.testing import set_package_path, restore_path
-
 import pylab as P
 
-set_package_path()
-import pyem
-restore_path()
+from scipy.sandbox import pyem
 import utils
 
 oldfaithful = utils.get_faithful()
@@ -45,6 +41,8 @@
     X, Y, Z, V = gm.density_on_grid()
     P.contour(X, Y, Z, V)
     P.plot(dt[:, 0], dt[:, 1], '.')
+    P.xlabel('duration time (scaled)')
+    P.ylabel('waiting time (scaled)')
 
 print "According to the BIC, model with %d components is better" % (N.argmax(bc) + 1)
 P.show()

Modified: trunk/Lib/sandbox/pyem/gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,8 +1,14 @@
 # /usr/bin/python
-# Last Change: Sat Jun 09 08:00 PM 2007 J
+# Last Change: Sat Jun 09 10:00 PM 2007 J
 
-# Module to implement GaussianMixture class.
+"""Module implementing GM, a class which represents Gaussian mixtures.
 
+GM instances can be used to create, sample mixtures. They also provide
+different plotting facilities, such as isodensity contour for multi dimensional
+models, ellipses of confidence."""
+
+__docformat__ = 'restructuredtext'
+
 import numpy as N
 from numpy.random import randn, rand
 import numpy.linalg as lin
@@ -21,12 +27,12 @@
 #   be used as long as w, mu and va are not set
 #   - We have to use scipy now for chisquare pdf, so there may be other
 #   methods to be used, ie for implementing random index.
-#   - there is no check on internal state of the GM, that is does w, mu and va values
-#   make sense (eg singular values)
-#   - plot1d is still very rhough. There should be a sensible way to 
-#   modify the result plot (maybe returns a dic with global pdf, component pdf and
-#   fill matplotlib handles). Should be coherent with plot
-class GmParamError:
+#   - there is no check on internal state of the GM, that is does w, mu and va
+#   values make sense (eg singular values) - plot1d is still very rhough. There
+#   should be a sensible way to modify the result plot (maybe returns a dic
+#   with global pdf, component pdf and fill matplotlib handles). Should be
+#   coherent with plot
+class GmParamError(Exception):
     """Exception raised for errors in gmm params
 
     Attributes:
@@ -34,6 +40,7 @@
         message -- explanation of the error
     """
     def __init__(self, message):
+        Exception.__init__(self)
         self.message    = message
     
     def __str__(self):
@@ -52,11 +59,27 @@
     # Methods to construct a mixture
     #===============================
     def __init__(self, d, k, mode = 'diag'):
-        """Init a Gaussian Mixture of k components, each component being a 
-        d multi-variate Gaussian, with covariance matrix of style mode.
-        
-        If you want to build a Gaussian Mixture with knowns weights, means
-        and variances, you can use GM.fromvalues method directly"""
+        """Init a Gaussian Mixture.
+
+        :Parameters:
+            d : int
+                dimension of the mixture.
+            k : int
+                number of component in the mixture.
+            mode : string
+                mode of covariance
+
+        :Returns:
+            an instance of GM.
+
+        Note
+        ----
+
+        Only full and diag mode are supported for now.
+
+        :SeeAlso:
+            If you want to build a Gaussian Mixture with knowns weights, means
+            and variances, you can use GM.fromvalues method directly"""
         if mode not in self._cov_mod:
             raise GmParamError("mode %s not recognized" + str(mode))
 
@@ -80,16 +103,42 @@
             self.is1d = True
 
     def set_param(self, weights, mu, sigma):
-        """Set parameters of the model. Args should
-        be conformant with metparameters d and k given during
-        initialisation"""
+        """Set parameters of the model. 
+        
+        Args should be conformant with metparameters d and k given during
+        initialisation.
+        
+        :Parameters:
+            weights : ndarray
+                weights of the mixture (k elements)
+            mu : ndarray
+                means of the mixture. One component's mean per row, k row for k
+                components.
+            sigma : ndarray
+                variances of the mixture. For diagonal models, one row contains
+                the diagonal elements of the covariance matrix. For full
+                covariance, d rows for one variance.
+
+        Examples
+        --------
+        Create a 3 component, 2 dimension mixture with full covariance matrices
+
+        >>> w = numpy.array([0.2, 0.5, 0.3])
+        >>> mu = numpy.array([[0., 0.], [1., 1.]])
+        >>> va = numpy.array([[1., 0.], [0., 1.], [2., 0.5], [0.5, 1]])
+        >>> gm = GM(2, 3, 'full')
+        >>> gm.set_param(w, mu, va)
+
+        :SeeAlso:
+            If you know already the parameters when creating the model, you can
+            simply use the method class GM.fromvalues."""
         k, d, mode  = check_gmm_param(weights, mu, sigma)
         if not k == self.k:
             raise GmParamError("Number of given components is %d, expected %d" 
                     % (k, self.k))
         if not d == self.d:
-            raise GmParamError("Dimension of the given model is %d, expected %d" 
-                    % (d, self.d))
+            raise GmParamError("Dimension of the given model is %d, "\
+                "expected %d" % (d, self.d))
         if not mode == self.mode and not d == 1:
             raise GmParamError("Given covariance mode is %s, expected %s"
                     % (mode, self.mode))
@@ -104,16 +153,34 @@
         """This class method can be used to create a GM model
         directly from its parameters weights, mean and variance
         
-        w, mu, va   = GM.gen_param(d, k)
-        gm  = GM(d, k)
-        gm.set_param(w, mu, va)
+        :Parameters:
+            weights : ndarray
+                weights of the mixture (k elements)
+            mu : ndarray
+                means of the mixture. One component's mean per row, k row for k
+                components.
+            sigma : ndarray
+                variances of the mixture. For diagonal models, one row contains
+                the diagonal elements of the covariance matrix. For full
+                covariance, d rows for one variance.
 
+        :Returns:
+            gm : GM
+                an instance of GM.
+
+        Examples
+        --------
+
+        >>> w, mu, va   = GM.gen_param(d, k)
+        >>> gm  = GM(d, k)
+        >>> gm.set_param(w, mu, va)
+
         and
         
-        w, mu, va   = GM.gen_param(d, k)
-        gm  = GM.fromvalue(w, mu, va)
+        >>> w, mu, va   = GM.gen_param(d, k)
+        >>> gm  = GM.fromvalue(w, mu, va)
 
-        Are equivalent """
+        are strictly equivalent."""
         k, d, mode  = check_gmm_param(weights, mu, sigma)
         res = cls(d, k, mode)
         res.set_param(weights, mu, sigma)
@@ -123,7 +190,15 @@
     # Fundamental facilities (sampling, confidence, etc..)
     #=====================================================
     def sample(self, nframes):
-        """ Sample nframes frames from the model """
+        """ Sample nframes frames from the model.
+        
+        :Parameters:
+            nframes : int
+                number of samples to draw.
+        
+        :Returns:
+            samples : ndarray
+                samples in the format one sample per row (nframes, d)."""
         if not self.is_valid:
             raise GmParamError("""Parameters of the model has not been 
                 set yet, please set them using self.set_param()""")
@@ -134,47 +209,60 @@
         X   = randn(nframes, self.d)        
 
         if self.mode == 'diag':
-            X   = self.mu[S, :]  + X * N.sqrt(self.va[S,:])
+            X   = self.mu[S, :]  + X * N.sqrt(self.va[S, :])
         elif self.mode == 'full':
             # Faster:
             cho = N.zeros((self.k, self.va.shape[1], self.va.shape[1]))
             for i in range(self.k):
                 # Using cholesky looks more stable than sqrtm; sqrtm is not
                 # available in numpy anyway, only in scipy...
-                cho[i]  = lin.cholesky(self.va[i*self.d:i*self.d+self.d,:])
+                cho[i]  = lin.cholesky(self.va[i*self.d:i*self.d+self.d, :])
 
             for s in range(self.k):
                 tmpind      = N.where(S == s)[0]
                 X[tmpind]   = N.dot(X[tmpind], cho[s].transpose()) + self.mu[s]
         else:
-            raise GmParamError('cov matrix mode not recognized, this is a bug !')
+            raise GmParamError("cov matrix mode not recognized, "\
+                    "this is a bug !")
 
         return X
 
-    def conf_ellipses(self, dim = misc.DEF_VIS_DIM, npoints = misc.DEF_ELL_NP,
+    def conf_ellipses(self, dim = misc.DEF_VIS_DIM, npoints = misc.DEF_ELL_NP, 
             level = misc.DEF_LEVEL):
         """Returns a list of confidence ellipsoids describing the Gmm
         defined by mu and va. Check densities.gauss_ell for details
 
-        Returns:
-            -Xe:    a list of x coordinates for the ellipses (Xe[i] is
-            the array containing x coordinates of the ith Gaussian)
-            -Ye:    a list of y coordinates for the ellipses
+        :Parameters:
+            dim : sequence
+                sequences of two integers which represent the dimensions where to
+                project the ellipsoid.
+            npoints : int
+                number of points to generate for the ellipse.
+            level : float
+                level of confidence (between 0 and 1).
 
-        Example:
+        :Returns:
+            Xe : sequence
+                a list of x coordinates for the ellipses (Xe[i] is the array
+                containing x coordinates of the ith Gaussian)
+            Ye : sequence
+                a list of y coordinates for the ellipses.
+
+        Examples
+        --------
             Suppose we have w, mu and va as parameters for a mixture, then:
             
-            gm      = GM(d, k)
-            gm.set_param(w, mu, va)
-            X       = gm.sample(1000)
-            Xe, Ye  = gm.conf_ellipsoids()
-            pylab.plot(X[:,0], X[:, 1], '.')
-            for k in len(w):
-                pylab.plot(Xe[k], Ye[k], 'r')
+            >>> gm      = GM(d, k)
+            >>> gm.set_param(w, mu, va)
+            >>> X       = gm.sample(1000)
+            >>> Xe, Ye  = gm.conf_ellipsoids()
+            >>> pylab.plot(X[:,0], X[:, 1], '.')
+            >>> for k in len(w):
+            ...    pylab.plot(Xe[k], Ye[k], 'r')
                 
             Will plot samples X draw from the mixture model, and
             plot the ellipses of equi-probability from the mean with
-            fixed level of confidence 0.39.  """
+            default level of confidence."""
         if self.is1d:
             raise ValueError("This function does not make sense for 1d "
                 "mixtures.")
@@ -187,14 +275,14 @@
         Ye  = []   
         if self.mode == 'diag':
             for i in range(self.k):
-                xe, ye  = densities.gauss_ell(self.mu[i,:], self.va[i,:], 
+                xe, ye  = densities.gauss_ell(self.mu[i, :], self.va[i, :], 
                         dim, npoints, level)
                 Xe.append(xe)
                 Ye.append(ye)
         elif self.mode == 'full':
             for i in range(self.k):
-                xe, ye  = densities.gauss_ell(self.mu[i,:], 
-                        self.va[i*self.d:i*self.d+self.d,:], 
+                xe, ye  = densities.gauss_ell(self.mu[i, :], 
+                        self.va[i*self.d:i*self.d+self.d, :], 
                         dim, npoints, level)
                 Xe.append(xe)
                 Ye.append(ye)
@@ -202,8 +290,11 @@
         return Xe, Ye
     
     def check_state(self):
+        """Returns true if the parameters of the model are valid. 
+
+        For Gaussian mixtures, this means weights summing to 1, and variances
+        to be positive definite.
         """
-        """
         if not self.is_valid:
             raise GmParamError("""Parameters of the model has not been 
                 set yet, please set them using self.set_param()""")
@@ -222,18 +313,33 @@
         cond    = N.zeros(self.k)
         ava     = N.absolute(self.va)
         for c in range(self.k):
-            cond[c] = N.amax(ava[c,:]) / N.amin(ava[c,:])
+            cond[c] = N.amax(ava[c, :]) / N.amin(ava[c, :])
 
         print cond
 
-    def gen_param(self, d, nc, varmode = 'diag', spread = 1):
-        """Generate valid parameters for a gaussian mixture model.
-        d is the dimension, nc the number of components, and varmode
-        the mode for cov matrices.
+    @classmethod
+    def gen_param(cls, d, nc, varmode = 'diag', spread = 1):
+        """Generate random, valid parameters for a gaussian mixture model.
 
+        :Parameters:
+            d : int
+                the dimension
+            nc : int
+                the number of components
+            varmode : string
+                covariance matrix mode ('full' or 'diag').
+
+        :Returns:
+            w : ndarray
+                weights of the mixture
+            mu : ndarray
+                means of the mixture
+            w : ndarray
+                variances of the mixture
+
+        Notes
+        -----
         This is a class method.
-
-        Returns: w, mu, va
         """
         w   = abs(randn(nc))
         w   = w / sum(w, 0)
@@ -251,13 +357,13 @@
 
         return w, mu, va
 
-    gen_param = classmethod(gen_param)
+    #gen_param = classmethod(gen_param)
 
-    #=======================
-    # Regularization methods
-    #=======================
-    def _regularize(self):
-        raise NotImplemented("No regularization")
+    # #=======================
+    # # Regularization methods
+    # #=======================
+    # def _regularize(self):
+    #     raise NotImplemented("No regularization")
 
     #=================
     # Plotting methods
@@ -266,10 +372,29 @@
             level = misc.DEF_LEVEL):
         """Plot the ellipsoides directly for the model
         
-        Returns a list of lines, so that their style can be modified. By default,
-        the style is red color, and nolegend for all of them.
+        Returns a list of lines handle, so that their style can be modified. By
+        default, the style is red color, and nolegend for all of them.
         
-        Does not work for 1d"""
+        :Parameters:
+            dim : sequence
+                sequence of two integers, the dimensions of interest.
+            npoints : int
+                Number of points to use for the ellipsoids.
+            level : int
+                level of confidence (to use with fill argument)
+        
+        :Returns:
+            h : sequence
+                Returns a list of lines handle so that their properties
+                can be modified (eg color, label, etc...):
+
+        Note
+        ----
+        Does not work for 1d. Requires matplotlib
+        
+        :SeeAlso:
+            conf_ellipses is used to compute the ellipses. Use this if you want
+            to plot with something else than matplotlib."""
         if self.is1d:
             raise ValueError("This function does not make sense for 1d "
                 "mixtures.")
@@ -282,22 +407,32 @@
         Xe, Ye  = self.conf_ellipses(dim, npoints, level)
         try:
             import pylab as P
-            return [P.plot(Xe[i], Ye[i], 'r', label='_nolegend_')[0] for i in range(k)]
+            return [P.plot(Xe[i], Ye[i], 'r', label='_nolegend_')[0] for i in
+                    range(k)]
             #for i in range(k):
             #    P.plot(Xe[i], Ye[i], 'r')
         except ImportError:
             raise GmParamError("matplotlib not found, cannot plot...")
 
-    def plot1d(self, level = 0.5, fill = 0, gpdf = 0):
-        """This function plots the pdfs of each component of the model. 
-        If gpdf is 1, also plots the global pdf. If fill is 1, fill confidence
-        areas using level argument as a level value
+    def plot1d(self, level = misc.DEF_LEVEL, fill = False, gpdf = False):
+        """Plots the pdf of each component of the 1d mixture.
         
-        Returns a dictionary h of plot handles so that their properties can
-        be modified (eg color, label, etc...):
-            - h['pdf'] is a list of lines, one line per component pdf
-            - h['gpdf'] is the line for the global pdf
-            - h['conf'] is a list of filling area
+        :Parameters:
+            level : int
+                level of confidence (to use with fill argument)
+            fill : bool
+                if True, the area of the pdf corresponding to the given
+                confidence intervales is filled.
+            gpdf : bool
+                if True, the global pdf is plot.
+        
+        :Returns:
+            h : dict
+                Returns a dictionary h of plot handles so that their properties
+                can be modified (eg color, label, etc...):
+                - h['pdf'] is a list of lines, one line per component pdf
+                - h['gpdf'] is the line for the global pdf
+                - h['conf'] is a list of filling area
         """
         if not self.is1d:
             raise ValueError("This function does not make sense for "
@@ -310,12 +445,12 @@
             raise GmParamError("the model is not one dimensional model")
         from scipy.stats import norm
         nrm     = norm(0, 1)
-        pval    = N.sqrt(self.va[:,0]) * nrm.ppf((1+level)/2)
+        pval    = N.sqrt(self.va[:, 0]) * nrm.ppf((1+level)/2)
 
         # Compute reasonable min/max for the normal pdf: [-mc * std, mc * std]
         # gives the range we are taking in account for each gaussian
         mc  = 3
-        std = N.sqrt(self.va[:,0])
+        std = N.sqrt(self.va[:, 0])
         m   = N.amin(self.mu[:, 0] - mc * std)
         M   = N.amax(self.mu[:, 0] + mc * std)
 
@@ -326,7 +461,7 @@
 
         # Prepare the dic of plot handles to return
         ks  = ['pdf', 'conf', 'gpdf']
-        hp  = dict((i,[]) for i in ks)
+        hp  = dict((i, []) for i in ks)
         try:
             import pylab as P
             for c in range(self.k):
@@ -336,7 +471,8 @@
                 h   = P.plot(x, y, 'r', label ='_nolegend_')
                 hp['pdf'].extend(h)
                 if fill:
-                    #P.axvspan(-pval[c] + self.mu[c][0], pval[c] + self.mu[c][0], 
+                    #P.axvspan(-pval[c] + self.mu[c][0], pval[c] +
+                    #self.mu[c][0], 
                     #        facecolor = 'b', alpha = 0.2)
                     id1 = -pval[c] + self.mu[c]
                     id2 = pval[c] + self.mu[c]
@@ -350,7 +486,8 @@
                             facecolor = 'b', alpha = 0.1, label='_nolegend_')
                     hp['conf'].extend(h)
                     #P.fill([xc[0], xc[0], xc[-1], xc[-1]], 
-                    #        [0, Yf[0], Yf[-1], 0], facecolor = 'b', alpha = 0.2)
+                    #        [0, Yf[0], Yf[-1], 0], facecolor = 'b', alpha =
+                    #        0.2)
             if gpdf:
                 h           = P.plot(x, Yt, 'r:', label='_nolegend_')
                 hp['gpdf']  = h
@@ -363,7 +500,7 @@
         the pdf of the mixture."""
         # XXX: have a public function to compute the pdf at given points
         # instead...
-        std = N.sqrt(self.va[:,0])
+        std = N.sqrt(self.va[:, 0])
         retval = N.empty((x.size, self.k))
         for c in range(self.k):
             retval[:, c] = self.w[c]/(N.sqrt(2*N.pi) * std[c]) * \
@@ -373,9 +510,30 @@
 
     def density_on_grid(self, dim = misc.DEF_VIS_DIM, nx = 50, ny = 50,
             maxlevel = 0.95):
-        """Do all the necessary computation for contour plot of mixture's density.
+        """Do all the necessary computation for contour plot of mixture's
+        density.
         
-        Returns X, Y, Z and V as expected by mpl contour function."""
+        :Parameters:
+            dim : sequence
+                sequence of two integers, the dimensions of interest.
+            nx : int
+                Number of points to use for the x axis of the grid
+            ny : int
+                Number of points to use for the y axis of the grid
+        
+        :Returns:
+            X : ndarray
+                points of the x axis of the grid
+            Y : ndarray
+                points of the y axis of the grid
+            Z : ndarray
+                values of the density on X and Y
+            V : ndarray
+                Contour values to display.
+            
+        Note
+        ----
+        X, Y, Z and V are as expected by matplotlib contour function."""
         if self.is1d:
             raise ValueError("This function does not make sense for 1d "
                 "mixtures.")
@@ -397,13 +555,14 @@
         X, Y, den = self._densityctr(N.linspace(ax[0]-0.2*w, ax[1]+0.2*w, nx), \
                 N.linspace(ax[2]-0.2*h, ax[3]+0.2*h, ny), dim = dim)
         lden = N.log(den)
+        # XXX: how to find "good" values for level ?
         V = [-5, -3, -1, -0.5, ]
         V.extend(N.linspace(0, N.max(lden), 4).tolist())
         return X, Y, lden, N.array(V)
 
-    def _densityctr(self, xrange, yrange, dim = misc.DEF_VIS_DIM):
+    def _densityctr(self, rangex, rangey, dim = misc.DEF_VIS_DIM):
         """Helper function to compute density contours on a grid."""
-        gr = N.meshgrid(xrange, yrange)
+        gr = N.meshgrid(rangex, rangey)
         X = gr[0].flatten()
         Y = gr[1].flatten()
         xdata = N.concatenate((X[:, N.newaxis], Y[:, N.newaxis]), axis = 1)
@@ -412,7 +571,7 @@
         dva = self._get_va(dim)
         den = densities.multiple_gauss_den(xdata, dmu, dva) * self.w
         den = N.sum(den, 1)
-        den = den.reshape(len(yrange), len(xrange))
+        den = den.reshape(len(rangey), len(rangex))
 
         X = gr[0]
         Y = gr[1]
@@ -435,16 +594,16 @@
 
     # Syntactic sugar
     def __repr__(self):
-        repr    = ""
-        repr    += "Gaussian Mixture:\n"
-        repr    += " -> %d dimensions\n" % self.d
-        repr    += " -> %d components\n" % self.k
-        repr    += " -> %s covariance \n" % self.mode
+        msg = ""
+        msg += "Gaussian Mixture:\n"
+        msg += " -> %d dimensions\n" % self.d
+        msg += " -> %d components\n" % self.k
+        msg += " -> %s covariance \n" % self.mode
         if self.is_valid:
-            repr    += "Has initial values"""
+            msg += "Has initial values"""
         else:
-            repr    += "Has no initial values yet"""
-        return repr
+            msg += "Has no initial values yet"""
+        return msg
 
     def __str__(self):
         return self.__repr__()
@@ -472,19 +631,26 @@
 
 def check_gmm_param(w, mu, va):
     """Check that w, mu and va are valid parameters for
-    a mixture of gaussian: w should sum to 1, there should
-    be the same number of component in each param, the variances
-    should be positive definite, etc... 
+    a mixture of gaussian.
     
-    Params:
-        w   = vector or list of weigths of the mixture (K elements)
-        mu  = matrix: K * d
-        va  = list of variances (vector K * d or square matrices Kd * d)
+    w should sum to 1, there should be the same number of component in each
+    param, the variances should be positive definite, etc... 
+    
+    :Parameters:
+        w : ndarray
+            vector or list of weigths of the mixture (K elements)
+        mu : ndarray
+            matrix: K * d
+        va : ndarray
+            list of variances (vector K * d or square matrices Kd * d)
 
-    returns:
-        K   = number of components
-        d   = dimension
-        mode    = 'diag' if diagonal covariance, 'full' of full matrices
+    :Returns:
+        k : int
+            number of components
+        d : int
+            dimension
+        mode : string
+            'diag' if diagonal covariance, 'full' of full matrices
     """
         
     # Check that w is valid
@@ -527,34 +693,35 @@
     return K, d, mode
         
 if __name__ == '__main__':
-    # Meta parameters:
-    #   - k = number of components
-    #   - d = dimension
-    #   - mode : mode of covariance matrices
-    d       = 5
-    k       = 4
+    pass
+    ## # Meta parameters:
+    ## #   - k = number of components
+    ## #   - d = dimension
+    ## #   - mode : mode of covariance matrices
+    ## d       = 5
+    ## k       = 4
 
-    # Now, drawing a model
-    mode    = 'full'
-    nframes = 1e3
+    ## # Now, drawing a model
+    ## mode    = 'full'
+    ## nframes = 1e3
 
-    # Build a model with random parameters
-    w, mu, va   = GM.gen_param(d, k, mode, spread = 3)
-    gm          = GM.fromvalues(w, mu, va)
+    ## # Build a model with random parameters
+    ## w, mu, va   = GM.gen_param(d, k, mode, spread = 3)
+    ## gm          = GM.fromvalues(w, mu, va)
 
-    # Sample nframes frames  from the model
-    X   = gm.sample(nframes)
+    ## # Sample nframes frames  from the model
+    ## X   = gm.sample(nframes)
 
-    # Plot the data
-    import pylab as P
-    P.plot(X[:, 0], X[:, 1], '.', label = '_nolegend_')
+    ## # Plot the data
+    ## import pylab as P
+    ## P.plot(X[:, 0], X[:, 1], '.', label = '_nolegend_')
 
-    # Real confidence ellipses with confidence level 
-    level       = 0.50
-    h           = gm.plot(level=level)
+    ## # Real confidence ellipses with confidence level 
+    ## level       = 0.50
+    ## h           = gm.plot(level=level)
 
-    # set the first ellipse label, which will appear in the legend
-    h[0].set_label('confidence ell at level ' + str(level))
+    ## # set the first ellipse label, which will appear in the legend
+    ## h[0].set_label('confidence ell at level ' + str(level))
 
-    P.legend(loc = 0)
-    P.show()
+    ## P.legend(loc = 0)
+    ## P.show()

Modified: trunk/Lib/sandbox/pyem/gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/gmm_em.py	2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,6 +1,12 @@
 # /usr/bin/python
-# Last Change: Fri Jun 08 08:00 PM 2007 J
+# Last Change: Sat Jun 09 10:00 PM 2007 J
 
+"""Module implementing GMM, a class to estimate Gaussian mixture models using
+EM, and EM, a class which use GMM instances to estimate models parameters using
+the ExpectationMaximization algorithm."""
+
+__docformat__ = 'restructuredtext'
+
 # TODO:
 #   - which methods to avoid va shrinking to 0 ? There are several options, 
 #   not sure which ones are appropriates
@@ -8,22 +14,23 @@
 #   - online EM
 
 import numpy as N
-import numpy.linalg as lin
+#import numpy.linalg as lin
 from numpy.random import randn
 #import _c_densities as densities
 import densities
 #from kmean import kmean
 from scipy.cluster.vq import kmeans2 as kmean
-from gauss_mix import GM
+#from gauss_mix import GM
 
-from misc import _DEF_ALPHA, _MIN_DBL_DELTA, _MIN_INV_COND
+#from misc import _DEF_ALPHA, _MIN_DBL_DELTA, _MIN_INV_COND
 
 # Error classes
 class GmmError(Exception):
     """Base class for exceptions in this module."""
-    pass
+    def __init__(self):
+        Exception.__init__(self)
 
-class GmmParamError:
+class GmmParamError(GmmError):
     """Exception raised for errors in gmm params
 
     Attributes:
@@ -31,41 +38,33 @@
         message -- explanation of the error
     """
     def __init__(self, message):
+        GmmError.__init__(self)
         self.message    = message
     
     def __str__(self):
         return self.message
 
-# Not sure yet about how to design different mixture models. Most of the code 
-# is different # (pdf, update part of EM, etc...) and I am not sure it makes 
-# sense to use inheritance for # interface specification in python, since its 
-# dynamic type systeme.
-
-# Anyway, a mixture model class should encapsulates all details 
-# concerning getting sufficient statistics (SS), likelihood and bic.
 class MixtureModel(object):
     pass
 
 class ExpMixtureModel(MixtureModel):
-    """Class to model mixture of exponential pdf (eg Gaussian, exponential, Laplace, 
-    etc..). This is a special case because some parts of EM are common for those
-    models..."""
+    """Class to model mixture of exponential pdf (eg Gaussian, exponential,
+    Laplace, etc..). This is a special case because some parts of EM are common
+    for those models..."""
     pass
 
 class GMM(ExpMixtureModel):
-    """ A class to model a Gaussian Mixture Model (GMM). An instance of 
-    this class is created by giving weights, mean and variances in the ctor.
-    An instanciated object can be sampled, trained by EM. 
-    
-    The class method gen_model can be used without instanciation."""
-
+    """ A class to model a Gaussian Mixture Model (GMM). An instance of this
+    class is created by giving weights, mean and variances in the ctor.  An
+    instanciated object can be sampled, trained by EM. """
     def init_kmean(self, data, niter = 5):
         """ Init the model with kmean."""
         k       = self.gm.k
         d       = self.gm.d
         init    = data[0:k, :]
 
-        # XXX: This is bogus: should do better (in kmean or here, do not know yet)
+        # XXX: This is bogus initialization should do better (in kmean or here,
+        # do not know yet): should 
         (code, label)   = kmean(data, init, niter, minit = 'matrix')
 
         w   = N.ones(k) / k
@@ -74,14 +73,15 @@
             va = N.zeros((k, d))
             for i in range(k):
                 for j in range(d):
-                    va[i,j] = N.cov(data[N.where(label==i), j], rowvar = 0)
+                    va[i, j] = N.cov(data[N.where(label==i), j], rowvar = 0)
         elif self.gm.mode == 'full':
             va  = N.zeros((k*d, d))
             for i in range(k):
-                va[i*d:i*d+d,:] = \
+                va[i*d:i*d+d, :] = \
                     N.cov(data[N.where(label==i)], rowvar = 0)
         else:
-            raise GmmParamError("mode " + str(mode) + " not recognized")
+            raise GmmParamError("mode " + str(self.gm.mode) + \
+                    " not recognized")
 
         self.gm.set_param(w, mu, va)
 
@@ -96,8 +96,8 @@
             mu  = randn(k, d)
             va  = N.fabs(randn(k, d))
         else:
-            raise GmmParamError("""init_random not implemented for
-                    mode %s yet""", mode)
+            raise GmmParamError("init_random not implemented for "
+                    "mode %s yet", self.gm.mode)
 
         self.gm.set_param(w, mu, va)
         
@@ -109,8 +109,18 @@
     #   - To handle the different modes, we could do something "fancy" such as
     #   replacing methods, to avoid checking cases everywhere and unconsistency.
     def __init__(self, gm, init = 'kmean'):
-        """ Initialize a GMM with weight w, mean mu and variances va, and initialization
-        method for training init (kmean by default)"""
+        """Initialize a mixture model.
+        
+        Initialize the model from a GM instance. This class implements all the
+        necessary functionalities for EM.
+
+        :Parameters:
+            gm : GM
+                the mixture model to train.
+            init : string
+                initialization method to use.
+        
+        """
         self.gm = gm
 
         # Possible init methods
@@ -124,17 +134,18 @@
         self.initst = init
 
     def sufficient_statistics(self, data):
-        """ Return normalized and non-normalized sufficient statistics
-        from the model.
+        """Compute responsabilities.
         
-        Computes the latent variable distribution (a 
-        posteriori probability) knowing the explicit data 
-        for the Gaussian model (w, mu, var): gamma(t, i) = 
-            P[state = i | observation = data(t); w, mu, va]
+        Return normalized and non-normalized sufficient statistics from the
+        model.
+        
+        Note
+        ----
+        Computes the latent variable distribution (a posteriori probability)
+        knowing the explicit data for the Gaussian model (w, mu, var): gamma(t,
+        i) = P[state = i | observation = data(t); w, mu, va]
 
         This is basically the E step of EM for GMM."""
-        n   = data.shape[0]
-
         # compute the gaussian pdf
         tgd	= densities.multiple_gauss_den(data, self.gm.mu, self.gm.va)
         # multiply by the weight
@@ -149,22 +160,22 @@
         from the a posteriori pdf, computed by gmm_posterior
         (E step).
         """
-        k       = self.gm.k
-        d       = self.gm.d
-        n       = data.shape[0]
-        invn    = 1.0/n
-        mGamma  = N.sum(gamma, axis = 0)
+        k = self.gm.k
+        d = self.gm.d
+        n = data.shape[0]
+        invn = 1.0/n
+        mGamma = N.sum(gamma, axis = 0)
 
         if self.gm.mode == 'diag':
-            mu      = N.zeros((k, d))
-            va      = N.zeros((k, d))
-            gamma   = gamma.T
+            mu = N.zeros((k, d))
+            va = N.zeros((k, d))
+            gamma = gamma.T
             for c in range(k):
-                x   = N.dot(gamma[c:c+1,:], data)[0,:]
-                xx  = N.dot(gamma[c:c+1,:], data ** 2)[0,:]
+                x = N.dot(gamma[c:c+1, :], data)[0, :]
+                xx = N.dot(gamma[c:c+1, :], data ** 2)[0, :]
 
-                mu[c,:] = x / mGamma[c]
-                va[c,:] = xx  / mGamma[c] - mu[c,:] ** 2
+                mu[c, :] = x / mGamma[c]
+                va[c, :] = xx  / mGamma[c] - mu[c, :] ** 2
             w   = invn * mGamma
 
         elif self.gm.mode == 'full':
@@ -177,21 +188,22 @@
             mu  = N.zeros((k, d))
             va  = N.zeros((k*d, d))
 
-            gamma   = gamma.transpose()
+            gamma = gamma.transpose()
             for c in range(k):
                 #x   = N.sum(N.outer(gamma[:, c], 
                 #            N.ones((1, d))) * data, axis = 0)
-                x   = N.dot(gamma[c:c+1,:], data)[0,:]
-                xx  = N.zeros((d, d))
+                x = N.dot(gamma[c:c+1, :], data)[0, :]
+                xx = N.zeros((d, d))
                 
                 # This should be much faster than recursing on n...
                 for i in range(d):
                     for j in range(d):
-                        xx[i,j] = N.sum(data[:,i] * data[:,j] * gamma[c,:], axis = 0)
+                        xx[i, j] = N.sum(data[:, i] * data[:, j] * gamma[c, :],
+                                axis = 0)
 
-                mu[c,:] = x / mGamma[c]
-                va[c*d:c*d+d,:] = xx  / mGamma[c] - \
-                                    N.outer(mu[c,:], mu[c,:])
+                mu[c, :] = x / mGamma[c]
+                va[c*d:c*d+d, :] = xx  / mGamma[c] \
+                        - N.outer(mu[c, :], mu[c, :])
             w   = invn * mGamma
         else:
             raise GmmParamError("varmode not recognized")
@@ -226,19 +238,17 @@
         of the definition given here. """
 
         if self.gm.mode == 'diag':
-            """ for a diagonal model, we have
-            k - 1 (k weigths, but one constraint of normality)
-            + k * d (means) + k * d (variances) """
+            # for a diagonal model, we have k - 1 (k weigths, but one
+            # constraint of normality) + k * d (means) + k * d (variances)
             free_deg    = self.gm.k * (self.gm.d * 2 + 1) - 1
         elif self.gm.mode == 'full':
-            """ for a full model, we have
-            k - 1 (k weigths, but one constraint of normality)
-            + k * d (means) + k * d * d / 2 (each covariance matrice
-            has d **2 params, but with positivity constraint) """
+            # for a full model, we have k - 1 (k weigths, but one constraint of
+            # normality) + k * d (means) + k * d * d / 2 (each covariance
+            # matrice has d **2 params, but with positivity constraint)
             if self.gm.d == 1:
-                free_deg    = self.gm.k * 3 - 1
+                free_deg = self.gm.k * 3 - 1
             else:
-                free_deg    = self.gm.k * (self.gm.d + 1 + self.gm.d ** 2 / 2) - 1
+                free_deg = self.gm.k * (self.gm.d + 1 + self.gm.d ** 2 / 2) - 1
 
         lk  = self.likelihood(data)
         n   = N.shape(data)[0]
@@ -261,21 +271,32 @@
         pass
     
     def train(self, data, model, maxiter = 10, thresh = 1e-5):
-        """
-        Train a model using data, and stops when the likelihood fails
-        behind a threshold, or when the number of iterations > niter, 
-        whichever comes first
+        """Train a model using EM.
 
-        Args:
-            - data:     contains the observed features, one row is one frame, ie one 
-            observation of dimension d
-            - model:    object of class Mixture
-            - maxiter:  maximum number of iterations
+        Train a model using data, and stops when the likelihood increase
+        between two consecutive iteration fails behind a threshold, or when the
+        number of iterations > niter, whichever comes first
 
-        The model is trained, and its parameters updated accordingly.
+        :Parameters:
+            data : ndarray
+                contains the observed features, one row is one frame, ie one
+                observation of dimension d
+            model : GMM
+                GMM instance.
+            maxiter : int
+                maximum number of iterations
+            thresh : threshold
+                if the slope of the likelihood falls below this value, the
+                algorithm stops.
 
-        Returns:
-            likelihood (one value per iteration).
+        :Returns:
+            likelihood : ndarray
+                one value per iteration.
+
+        Note
+        ----
+        The model is trained, and its parameters updated accordingly, eg the
+        results are put in the GMM instance.
         """
         if not isinstance(model, MixtureModel):
             raise TypeError("expect a MixtureModel as a model")
@@ -296,62 +317,24 @@
             model.update_em(data, g)
             if has_em_converged(like[i], like[i-1], thresh):
                 return like[0:i]
-        # # Em computation, with computation of the likelihood
-        # g, tgd      = model.sufficient_statistics(data)
-        # like[0]     = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
-        # model.update_em(data, g)
-        # for i in range(1, maxiter):
-        #     print "=== Iteration %d ===" % i
-        #     isreg   = False
-        #     for j in range(model.gm.k):
-        #         va  = model.gm.va[j]
-        #         if va.any() < _MIN_INV_COND:
-        #             isreg   = True
-        #             print "\tregularization detected"
-        #             print "\t" + str(va)
-        #             model.gm.va[j]  = regularize_diag(va)
-        #             print "\t" + str(va) + ", " + str(model.gm.va[j])
-        #             print "\t" + str(gauss_den(data, model.gm.mu[j], model.gm.va[j]))
-        #             print "\tend regularization detected"
-        #             var = va
-        #         
-        #     g, tgd      = model.sufficient_statistics(data)
-        #     try:
-        #         assert not( (N.isnan(tgd)).any() )
-        #         if isreg:
-        #             print var
-        #     except AssertionError:
-        #         print "tgd is nan..."
-        #         print model.gm.va[13,:]
-        #         print 1/model.gm.va[13,:]
-        #         print densities.gauss_den(data, model.gm.mu[13], model.gm.va[13])
-        #         print N.isnan((multiple_gauss_den(data, model.gm.mu, model.gm.va))).any()
-        #         print "Exciting"
-        #         import sys
-        #         sys.exit(-1)
-        #     like[i]     = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
-        #     model.update_em(data, g)
-        #     assert not( model.gm.va.any() < 1e-6)
-        #     if has_em_converged(like[i], like[i-1], thresh):
-        #         return like[0:i]
 
         return like
     
-def regularize_diag(variance, alpha = _DEF_ALPHA):
-    delta   = N.sum(variance) / variance.size
-    if delta > _MIN_DBL_DELTA:
-        return variance + alpha * delta
-    else:
-        return variance + alpha * _MIN_DBL_DELTA
+#def regularize_diag(variance, alpha = _DEF_ALPHA):
+#    delta   = N.sum(variance) / variance.size
+#    if delta > _MIN_DBL_DELTA:
+#        return variance + alpha * delta
+#    else:
+#        return variance + alpha * _MIN_DBL_DELTA
+#
+#def regularize_full(variance):
+#    # Trace of a positive definite matrix is always > 0
+#    delta   = N.trace(variance) / variance.shape[0]
+#    if delta > _MIN_DBL_DELTA:
+#        return variance + alpha * delta
+#    else:
+#        return variance + alpha * _MIN_DBL_DELTA
 
-def regularize_full(variance):
-    # Trace of a positive definite matrix is always > 0
-    delta   = N.trace(variance) / variance.shape[0]
-    if delta > _MIN_DBL_DELTA:
-        return variance + alpha * delta
-    else:
-        return variance + alpha * _MIN_DBL_DELTA
-
 # Misc functions
 def bic(lk, deg, n):
     """ Expects lk to be log likelihood """
@@ -369,127 +352,129 @@
         return False
 
 if __name__ == "__main__":
-    import copy
-    #=============================
-    # Simple GMM with 5 components
-    #=============================
+    pass
+    ## import copy
+    ## #=============================
+    ## # Simple GMM with 5 components
+    ## #=============================
 
-    #+++++++++++++++++++++++++++++
-    # Meta parameters of the model
-    #   - k: Number of components
-    #   - d: dimension of each Gaussian
-    #   - mode: Mode of covariance matrix: full or diag
-    #   - nframes: number of frames (frame = one data point = one
-    #   row of d elements
-    k       = 2 
-    d       = 1
-    mode    = 'full'
-    nframes = 1e3
+    ## #+++++++++++++++++++++++++++++
+    ## # Meta parameters of the model
+    ## #   - k: Number of components
+    ## #   - d: dimension of each Gaussian
+    ## #   - mode: Mode of covariance matrix: full or diag
+    ## #   - nframes: number of frames (frame = one data point = one
+    ## #   row of d elements
+    ## k       = 2 
+    ## d       = 1
+    ## mode    = 'full'
+    ## nframes = 1e3
 
-    #+++++++++++++++++++++++++++++++++++++++++++
-    # Create an artificial GMM model, samples it
-    #+++++++++++++++++++++++++++++++++++++++++++
-    print "Generating the mixture"
-    # Generate a model with k components, d dimensions
-    w, mu, va   = GM.gen_param(d, k, mode, spread = 3)
-    gm          = GM(d, k, mode)
-    gm.set_param(w, mu, va)
+    ## #+++++++++++++++++++++++++++++++++++++++++++
+    ## # Create an artificial GMM model, samples it
+    ## #+++++++++++++++++++++++++++++++++++++++++++
+    ## print "Generating the mixture"
+    ## # Generate a model with k components, d dimensions
+    ## w, mu, va   = GM.gen_param(d, k, mode, spread = 3)
+    ## gm          = GM(d, k, mode)
+    ## gm.set_param(w, mu, va)
 
-    # Sample nframes frames  from the model
-    data    = gm.sample(nframes)
+    ## # Sample nframes frames  from the model
+    ## data    = gm.sample(nframes)
 
-    #++++++++++++++++++++++++
-    # Learn the model with EM
-    #++++++++++++++++++++++++
+    ## #++++++++++++++++++++++++
+    ## # Learn the model with EM
+    ## #++++++++++++++++++++++++
 
-    # Init the model
-    print "Init a model for learning, with kmean for initialization"
-    lgm = GM(d, k, mode)
-    gmm = GMM(lgm, 'kmean')
-    gmm.init(data)
+    ## # Init the model
+    ## print "Init a model for learning, with kmean for initialization"
+    ## lgm = GM(d, k, mode)
+    ## gmm = GMM(lgm, 'kmean')
+    ## gmm.init(data)
 
-    # Keep the initialized model for drawing
-    gm0 = copy.copy(lgm)
+    ## # Keep the initialized model for drawing
+    ## gm0 = copy.copy(lgm)
 
-    # The actual EM, with likelihood computation
-    niter   = 10
-    like    = N.zeros(niter)
+    ## # The actual EM, with likelihood computation
+    ## niter   = 10
+    ## like    = N.zeros(niter)
 
-    print "computing..."
-    for i in range(niter):
-        g, tgd  = gmm.sufficient_statistics(data)
-        like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
-        gmm.update_em(data, g)
-    # # Alternative form, by using EM class: as the EM class
-    # # is quite rudimentary now, it is not very useful, just save
-    # # a few lines
-    # em      = EM()
-    # like    = em.train(data, gmm, niter)
+    ## print "computing..."
+    ## for i in range(niter):
+    ##     g, tgd  = gmm.sufficient_statistics(data)
+    ##     like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
+    ##     gmm.update_em(data, g)
+    ## # # Alternative form, by using EM class: as the EM class
+    ## # # is quite rudimentary now, it is not very useful, just save
+    ## # # a few lines
+    ## # em      = EM()
+    ## # like    = em.train(data, gmm, niter)
 
-    #+++++++++++++++
-    # Draw the model
-    #+++++++++++++++
-    print "drawing..."
-    import pylab as P
-    P.subplot(2, 1, 1)
+    ## #+++++++++++++++
+    ## # Draw the model
+    ## #+++++++++++++++
+    ## print "drawing..."
+    ## import pylab as P
+    ## P.subplot(2, 1, 1)
 
-    if not d == 1:
-        # Draw what is happening
-        P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
+    ## if not d == 1:
+    ##     # Draw what is happening
+    ##     P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
 
-        # Real confidence ellipses
-        Xre, Yre  = gm.conf_ellipses()
-        P.plot(Xre[0], Yre[0], 'g', label = 'true confidence ellipsoides')
-        for i in range(1,k):
-            P.plot(Xre[i], Yre[i], 'g', label = '_nolegend_')
+    ##     # Real confidence ellipses
+    ##     Xre, Yre  = gm.conf_ellipses()
+    ##     P.plot(Xre[0], Yre[0], 'g', label = 'true confidence ellipsoides')
+    ##     for i in range(1,k):
+    ##         P.plot(Xre[i], Yre[i], 'g', label = '_nolegend_')
 
-        # Initial confidence ellipses as found by kmean
-        X0e, Y0e  = gm0.conf_ellipses()
-        P.plot(X0e[0], Y0e[0], 'k', label = 'initial confidence ellipsoides')
-        for i in range(1,k):
-            P.plot(X0e[i], Y0e[i], 'k', label = '_nolegend_')
+    ##     # Initial confidence ellipses as found by kmean
+    ##     X0e, Y0e  = gm0.conf_ellipses()
+    ##     P.plot(X0e[0], Y0e[0], 'k', label = 'initial confidence ellipsoides')
+    ##     for i in range(1,k):
+    ##         P.plot(X0e[i], Y0e[i], 'k', label = '_nolegend_')
 
-        # Values found by EM
-        Xe, Ye  = lgm.conf_ellipses()
-        P.plot(Xe[0], Ye[0], 'r', label = 'confidence ellipsoides found by EM')
-        for i in range(1,k):
-            P.plot(Xe[i], Ye[i], 'r', label = '_nolegend_')
-        P.legend(loc = 0)
-    else:
-        # Real confidence ellipses
-        h   = gm.plot1d()
-        [i.set_color('g') for i in h['pdf']]
-        h['pdf'][0].set_label('true pdf')
+    ##     # Values found by EM
+    ##     Xe, Ye  = lgm.conf_ellipses()
+    ##     P.plot(Xe[0], Ye[0], 'r', label = "confidence ellipsoides found by"
+    ##      "EM")
+    ##     for i in range(1,k):
+    ##         P.plot(Xe[i], Ye[i], 'r', label = '_nolegend_')
+    ##     P.legend(loc = 0)
+    ## else:
+    ##     # Real confidence ellipses
+    ##     h   = gm.plot1d()
+    ##     [i.set_color('g') for i in h['pdf']]
+    ##     h['pdf'][0].set_label('true pdf')
 
-        # Initial confidence ellipses as found by kmean
-        h0  = gm0.plot1d()
-        [i.set_color('k') for i in h0['pdf']]
-        h0['pdf'][0].set_label('initial pdf')
+    ##     # Initial confidence ellipses as found by kmean
+    ##     h0  = gm0.plot1d()
+    ##     [i.set_color('k') for i in h0['pdf']]
+    ##     h0['pdf'][0].set_label('initial pdf')
 
-        # Values found by EM
-        hl  = lgm.plot1d(fill = 1, level = 0.66)
-        [i.set_color('r') for i in hl['pdf']]
-        hl['pdf'][0].set_label('pdf found by EM')
+    ##     # Values found by EM
+    ##     hl  = lgm.plot1d(fill = 1, level = 0.66)
+    ##     [i.set_color('r') for i in hl['pdf']]
+    ##     hl['pdf'][0].set_label('pdf found by EM')
 
-        P.legend(loc = 0)
+    ##     P.legend(loc = 0)
 
-    P.subplot(2, 1, 2)
-    P.plot(like)
-    P.title('log likelihood')
+    ## P.subplot(2, 1, 2)
+    ## P.plot(like)
+    ## P.title('log likelihood')
 
-    # #++++++++++++++++++
-    # # Export the figure
-    # #++++++++++++++++++
-    # F   = P.gcf()
-    # DPI = F.get_dpi()
-    # DefaultSize = F.get_size_inches()
-    # # the default is 100dpi for savefig:
-    # F.savefig("example1.png")
+    ## # #++++++++++++++++++
+    ## # # Export the figure
+    ## # #++++++++++++++++++
+    ## # F   = P.gcf()
+    ## # DPI = F.get_dpi()
+    ## # DefaultSize = F.get_size_inches()
+    ## # # the default is 100dpi for savefig:
+    ## # F.savefig("example1.png")
 
-    # # Now make the image twice as big, while keeping the fonts and all the
-    # # same size
-    # F.set_figsize_inches( (DefaultSize[0]*2, DefaultSize[1]*2) )
-    # Size = F.get_size_inches()
-    # print "Size in Inches", Size
-    # F.savefig("example2.png")
-    P.show()
+    ## # # Now make the image twice as big, while keeping the fonts and all the
+    ## # # same size
+    ## # F.set_figsize_inches( (DefaultSize[0]*2, DefaultSize[1]*2) )
+    ## # Size = F.get_size_inches()
+    ## # print "Size in Inches", Size
+    ## # F.savefig("example2.png")
+    ## P.show()

Modified: trunk/Lib/sandbox/pyem/info.py
===================================================================
--- trunk/Lib/sandbox/pyem/info.py	2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/info.py	2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,61 +1,63 @@
 """
-Routines for Gaussian Mixture Models
-and learning with Expectation Maximization 
-==========================================
+Routines for Gaussian Mixture Models and learning with Expectation Maximization 
+===============================================================================
 
-This module contains classes and function to compute multivariate Gaussian densities
-(diagonal and full covariance matrices), Gaussian mixtures, Gaussian mixtures models
-and an Em trainer.
+This module contains classes and function to compute multivariate Gaussian
+densities (diagonal and full covariance matrices), Gaussian mixtures, Gaussian
+mixtures models and an Em trainer.
 
 More specifically, the module defines the following classes, functions:
 
 - densities.gauss_den: function to compute multivariate Gaussian pdf 
-- gauss_mix.GM: defines the GM (Gaussian Mixture) class. A Gaussian Mixture can be
-created from its parameters weights, mean and variances, or from its meta parameters
-d (dimension of the Gaussian) and k (number of components in the mixture). A Gaussian
-Model can then be sampled or plot (if d>1, plot confidence ellipsoids projected on 
-2 chosen dimensions, if d == 1, plot the pdf of each component and fill the zone
-of confidence for a given level)
-- gmm_em.GMM: defines a class GMM (Gaussian Mixture Model). This class is constructed
-from a GM model gm, and can be used to train gm. The GMM can be initiated by
-kmean or at random, and can compute sufficient statistics, and update its parameters
-from the sufficient statistics.
-- kmean.kmean: implements a kmean algorithm. We cannot use scipy.cluster.vq kmeans, since
-its does not give membership of observations.
+- gauss_mix.GM: defines the GM (Gaussian Mixture) class. A Gaussian Mixture can
+  be created from its parameters weights, mean and variances, or from its meta
+  parameters d (dimension of the Gaussian) and k (number of components in the
+  mixture). A Gaussian Model can then be sampled or plot (if d>1, plot
+  confidence ellipsoids projected on 2 chosen dimensions, if d == 1, plot the
+  pdf of each component and fill the zone of confidence for a given level)
+- gmm_em.GMM: defines a class GMM (Gaussian Mixture Model). This class is
+  constructed from a GM model gm, and can be used to train gm. The GMM can be
+  initiated by kmean or at random, and can compute sufficient statistics, and
+  update its parameters from the sufficient statistics.
+- kmean.kmean: implements a kmean algorithm. We cannot use scipy.cluster.vq
+  kmeans, since its does not give membership of observations.
 
 Example of use: 
-    #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-    # Create an artificial 2 dimension, 3 clusters GM model, samples it
-    #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-    w, mu, va   = GM.gen_param(2, 3, 'diag', spread = 1.5)
-    gm          = GM.fromvalues(w, mu, va)
+---------------
 
-    # Sample 1000 frames  from the model
-    data    = gm.sample(1000)
+>>> #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+>>> # Create an artificial 2 dimension, 3 clusters GM model, samples it
+>>> #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+>>> w, mu, va   = GM.gen_param(2, 3, 'diag', spread = 1.5)
+>>> gm          = GM.fromvalues(w, mu, va)
+>>> 
+>>> # Sample 1000 frames  from the model
+>>> data    = gm.sample(1000)
+>>> 
+>>> #++++++++++++++++++++++++
+>>> # Learn the model with EM
+>>> #++++++++++++++++++++++++
+>>> # Init the model
+>>> lgm = GM(d, k, mode)
+>>> gmm = GMM(lgm, 'kmean')
+>>> 
+>>> # The actual EM, with likelihood computation. The threshold
+>>> # is compared to the (linearly appromixated) derivative of the likelihood
+>>> em      = EM()
+>>> like    = em.train(data, gmm, maxiter = 30, thresh = 1e-8)
 
-    #++++++++++++++++++++++++
-    # Learn the model with EM
-    #++++++++++++++++++++++++
-    # Init the model
-    lgm = GM(d, k, mode)
-    gmm = GMM(lgm, 'kmean')
-
-    # The actual EM, with likelihood computation. The threshold
-    # is compared to the (linearly appromixated) derivative of the likelihood
-    em      = EM()
-    like    = em.train(data, gmm, maxiter = 30, thresh = 1e-8)
-
 Files example.py and example2.py show more capabilities of the toolbox, including
 plotting capabilities (using matplotlib) and model selection using Bayesian 
 Information Criterion (BIC).
 
 Bibliography:
-    * Maximum likelihood from incomplete data via the EM algorithm in Journal of 
-    the Royal Statistical Society, Series B, 39(1):1--38, 1977, by A. P. Dempster, 
-    N. M. Laird, and D. B. Rubin
-    * Bayesian Approaches to Gaussian Mixture Modelling (1998) by 
-    Stephen J. Roberts, Dirk Husmeier, Iead Rezek, William Penny in 
-    IEEE Transactions on Pattern Analysis and Machine Intelligence
+
+- Maximum likelihood from incomplete data via the EM algorithm in Journal of
+  the Royal Statistical Society, Series B, 39(1):1--38, 1977, by A. P.
+  Dempster, N. M. Laird, and D. B. Rubin
+- Bayesian Approaches to Gaussian Mixture Modelling (1998) by Stephen J.
+  Roberts, Dirk Husmeier, Iead Rezek, William Penny in IEEE Transactions on
+  Pattern Analysis and Machine Intelligence
      
 Copyright: David Cournapeau 2006
 License: BSD-style (see LICENSE.txt in main source directory)

Modified: trunk/Lib/sandbox/pyem/online_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/online_em.py	2007-06-09 11:43:51 UTC (rev 3086)
+++ trunk/Lib/sandbox/pyem/online_em.py	2007-06-09 14:03:01 UTC (rev 3087)
@@ -1,28 +1,27 @@
 # /usr/bin/python
-# Last Change: Fri Jun 08 08:00 PM 2007 J
+# Last Change: Sat Jun 09 10:00 PM 2007 J
 
-#---------------------------------------------
-# This is not meant to be used yet !!!! I am 
-# not sure how to integrate this stuff inside
-# the package yet. The cases are:
-#   - we have a set of data, and we want to test online EM 
-#   compared to normal EM 
-#   - we do not have all the data before putting them in online EM:
-#   eg current frame depends on previous frame in some way.
+# This is not meant to be used yet !!!! I am not sure how to integrate this
+# stuff inside the package yet. The cases are:
+#   - we have a set of data, and we want to test online EM compared to normal
+#   EM 
+#   - we do not have all the data before putting them in online EM: eg current
+#   frame depends on previous frame in some way.
 
 # TODO:
 #   - Add biblio
-#   - Look back at articles for discussion for init, regularization and 
+#   - Look back at articles for discussion for init, regularization and
 #   convergence rates
-#   - the function sufficient_statistics does not really return SS. This is not a
-#   big problem, but it would be better to really return them as the name implied.
+#   - the function sufficient_statistics does not really return SS. This is not
+#   a big problem, but it would be better to really return them as the name
+#   implied.
 
 import numpy as N
 from numpy import mean
 from numpy.testing import assert_array_almost_equal, assert_array_equal
 
-from gmm_em import ExpMixtureModel, GMM, EM
-from gauss_mix import GM
+from gmm_em import ExpMixtureModel#, GMM, EM
+#from gauss_mix import GM
 from scipy.cluster.vq import kmeans2 as kmean
 import densities2 as D
 
@@ -60,22 +59,24 @@
         k   = self.gm.k
         d   = self.gm.d
         if self.gm.mode == 'diag':
-            w           = N.ones(k) / k
+            w  = N.ones(k) / k
 
             # Init the internal state of EM
-            self.cx     = N.outer(w, mean(init_data, 0))
-            self.cxx    = N.outer(w, mean(init_data ** 2, 0))
+            self.cx = N.outer(w, mean(init_data, 0))
+            self.cxx = N.outer(w, mean(init_data ** 2, 0))
 
             # w, mu and va init is the same that in the standard case
-            (code, label)   = kmean(init_data, init_data[0:k, :], iter = niter, minit = 'matrix')
-            mu          = code.copy()
-            va          = N.zeros((k, d))
+            (code, label) = kmean(init_data, init_data[0:k, :], iter = 10,
+                    minit = 'matrix')
+            mu = code.copy()
+            va = N.zeros((k, d))
             for i in range(k):
                 for j in range(d):
-                    va [i,j] = N.cov(init_data[N.where(label==i), j], rowvar = 0)
+                    va [i, j] = N.cov(init_data[N.where(label==i), j], 
+                            rowvar = 0)
         else:
             raise OnGmmParamError("""init_online not implemented for
-                    mode %s yet""", mode)
+                    mode %s yet""", self.gm.mode)
 
         self.gm.set_param(w, mu, va)
         # c* are the parameters which are computed at every step (ie
@@ -95,22 +96,24 @@
         k   = self.gm.k
         d   = self.gm.d
         if self.gm.mode == 'diag':
-            w           = N.ones(k) / k
+            w  = N.ones(k) / k
 
             # Init the internal state of EM
-            self.cx     = N.outer(w, mean(init_data, 0))
-            self.cxx    = N.outer(w, mean(init_data ** 2, 0))
+            self.cx = N.outer(w, mean(init_data, 0))
+            self.cxx = N.outer(w, mean(init_data ** 2, 0))
 
             # w, mu and va init is the same that in the standard case
-            (code, label)   = kmean(init_data, init_data[0:k, :], iter = niter, minit = 'matrix')
-            mu          = code.copy()
-            va          = N.zeros((k, d))
+            (code, label) = kmean(init_data, init_data[0:k, :], 
+                    iter = niter, minit = 'matrix')
+            mu = code.copy()
+            va = N.zeros((k, d))
             for i in range(k):
                 for j in range(d):
-                    va [i,j] = N.cov(init_data[N.where(label==i), j], rowvar = 0)
+                    va[i, j] = N.cov(init_data[N.where(label==i), j], 
+                            rowvar = 0)
         else:
             raise OnGmmParamError("""init_online not implemented for
-                    mode %s yet""", mode)
+                    mode %s yet""", self.gm.mode)
 
         self.gm.set_param(w, mu, va)
         # c* are the parameters which are computed at every step (ie
@@ -278,132 +281,133 @@
         
 
 if __name__ == '__main__':
-    d       = 1
-    k       = 2
-    mode    = 'diag'
-    nframes = int(5e3)
-    emiter  = 4
-    seed(5)
+    pass
+    #d       = 1
+    #k       = 2
+    #mode    = 'diag'
+    #nframes = int(5e3)
+    #emiter  = 4
+    #seed(5)
 
-    #+++++++++++++++++++++++++++++++++++++++++++++++++
-    # Generate a model with k components, d dimensions
-    #+++++++++++++++++++++++++++++++++++++++++++++++++
-    w, mu, va   = GM.gen_param(d, k, mode, spread = 1.5)
-    gm          = GM.fromvalues(w, mu, va)
-    # Sample nframes frames  from the model
-    data        = gm.sample(nframes)
+    ##+++++++++++++++++++++++++++++++++++++++++++++++++
+    ## Generate a model with k components, d dimensions
+    ##+++++++++++++++++++++++++++++++++++++++++++++++++
+    #w, mu, va   = GM.gen_param(d, k, mode, spread = 1.5)
+    #gm          = GM.fromvalues(w, mu, va)
+    ## Sample nframes frames  from the model
+    #data        = gm.sample(nframes)
 
-    #++++++++++++++++++++++++++++++++++++++++++
-    # Approximate the models with classical EM
-    #++++++++++++++++++++++++++++++++++++++++++
-    # Init the model
-    lgm = GM(d, k, mode)
-    gmm = GMM(lgm, 'kmean')
-    gmm.init(data)
+    ##++++++++++++++++++++++++++++++++++++++++++
+    ## Approximate the models with classical EM
+    ##++++++++++++++++++++++++++++++++++++++++++
+    ## Init the model
+    #lgm = GM(d, k, mode)
+    #gmm = GMM(lgm, 'kmean')
+    #gmm.init(data)
 
-    gm0    = copy.copy(gmm.gm)
-    # The actual EM, with likelihood computation
-    like    = N.zeros(emiter)
-    for i in range(emiter):
-        g, tgd  = gmm.sufficient_statistics(data)
-        like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
-        gmm.update_em(data, g)
+    #gm0    = copy.copy(gmm.gm)
+    ## The actual EM, with likelihood computation
+    #like    = N.zeros(emiter)
+    #for i in range(emiter):
+    #    g, tgd  = gmm.sufficient_statistics(data)
+    #    like[i] = N.sum(N.log(N.sum(tgd, 1)), axis = 0)
+    #    gmm.update_em(data, g)
 
-    #++++++++++++++++++++++++++++++++++++++++
-    # Approximate the models with online EM
-    #++++++++++++++++++++++++++++++++++++++++
-    ogm     = GM(d, k, mode)
-    ogmm    = OnGMM(ogm, 'kmean')
-    init_data   = data[0:nframes / 20, :]
-    ogmm.init(init_data)
+    ##++++++++++++++++++++++++++++++++++++++++
+    ## Approximate the models with online EM
+    ##++++++++++++++++++++++++++++++++++++++++
+    #ogm     = GM(d, k, mode)
+    #ogmm    = OnGMM(ogm, 'kmean')
+    #init_data   = data[0:nframes / 20, :]
+    #ogmm.init(init_data)
 
-    # Forgetting param
-    ku		= 0.005
-    t0		= 200
-    lamb	= 1 - 1/(N.arange(-1, nframes-1) * ku + t0)
-    nu0		= 0.2
-    nu		= N.zeros((len(lamb), 1))
-    nu[0]	= nu0
-    for i in range(1, len(lamb)):
-        nu[i]	= 1./(1 + lamb[i] / nu[i-1])
+    ## Forgetting param
+    #ku		= 0.005
+    #t0		= 200
+    #lamb	= 1 - 1/(N.arange(-1, nframes-1) * ku + t0)
+    #nu0		= 0.2
+    #nu		= N.zeros((len(lamb), 1))
+    #nu[0]	= nu0
+    #for i in range(1, len(lamb)):
+    #    nu[i]	= 1./(1 + lamb[i] / nu[i-1])
 
-    print "meth1"
-    # object version of online EM
-    for t in range(nframes):
-        ogmm.compute_sufficient_statistics_frame(data[t], nu[t])
-        ogmm.update_em_frame()
+    #print "meth1"
+    ## object version of online EM
+    #for t in range(nframes):
+    #    ogmm.compute_sufficient_statistics_frame(data[t], nu[t])
+    #    ogmm.update_em_frame()
 
-    ogmm.gm.set_param(ogmm.cw, ogmm.cmu, ogmm.cva)
+    #ogmm.gm.set_param(ogmm.cw, ogmm.cmu, ogmm.cva)
 
-    # 1d optimized version
-    ogm2        = GM(d, k, mode)
-    ogmm2       = OnGMM1d(ogm2, 'kmean')
-    ogmm2.init(init_data[:, 0])
+    ## 1d optimized version
+    #ogm2        = GM(d, k, mode)
+    #ogmm2       = OnGMM1d(ogm2, 'kmean')
+    #ogmm2.init(init_data[:, 0])
 
-    print "meth2"
-    # object version of online EM
-    for t in range(nframes):
-        ogmm2.compute_sufficient_statistics_frame(data[t, 0], nu[t])
-        ogmm2.update_em_frame()
+    #print "meth2"
+    ## object version of online EM
+    #for t in range(nframes):
+    #    ogmm2.compute_sufficient_statistics_frame(data[t, 0], nu[t])
+    #    ogmm2.update_em_frame()
 
-    #ogmm2.gm.set_param(ogmm2.cw, ogmm2.cmu, ogmm2.cva)
+    ##ogmm2.gm.set_param(ogmm2.cw, ogmm2.cmu, ogmm2.cva)
 
-    print ogmm.cw
-    print ogmm2.cw
-    #+++++++++++++++
-    # Draw the model
-    #+++++++++++++++
-    print "drawing..."
-    import pylab as P
-    P.subplot(2, 1, 1)
+    #print ogmm.cw
+    #print ogmm2.cw
+    ##+++++++++++++++
+    ## Draw the model
+    ##+++++++++++++++
+    #print "drawing..."
+    #import pylab as P
+    #P.subplot(2, 1, 1)
 
-    if not d == 1:
-        # Draw what is happening
-        P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
+    #if not d == 1:
+    #    # Draw what is happening
+    #    P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
 
-        h   = gm.plot()    
-        [i.set_color('g') for i in h]
-        h[0].set_label('true confidence ellipsoides')
+    #    h   = gm.plot()    
+    #    [i.set_color('g') for i in h]
+    #    h[0].set_label('true confidence ellipsoides')
 
-        h   = gm0.plot()    
-        [i.set_color('k') for i in h]
-        h[0].set_label('initial confidence ellipsoides')
+    #    h   = gm0.plot()    
+    #    [i.set_color('k') for i in h]
+    #    h[0].set_label('initial confidence ellipsoides')
 
-        h   = lgm.plot()    
-        [i.set_color('r') for i in h]
-        h[0].set_label('confidence ellipsoides found by EM')
+    #    h   = lgm.plot()    
+    #    [i.set_color('r') for i in h]
+    #    h[0].set_label('confidence ellipsoides found by EM')
 
-        h   = ogmm.gm.plot()    
-        [i.set_color('m') for i in h]
-        h[0].set_label('confidence ellipsoides found by Online EM')
+    #    h   = ogmm.gm.plot()    
+    #    [i.set_color('m') for i in h]
+    #    h[0].set_label('confidence ellipsoides found by Online EM')
 
-        # P.legend(loc = 0)
-    else:
-        # Real confidence ellipses
-        h   = gm.plot1d()
-        [i.set_color('g') for i in h['pdf']]
-        h['pdf'][0].set_label('true pdf')
+    #    # P.legend(loc = 0)
+    #else:
+    #    # Real confidence ellipses
+    #    h   = gm.plot1d()
+    #    [i.set_color('g') for i in h['pdf']]
+    #    h['pdf'][0].set_label('true pdf')
 
-        # Initial confidence ellipses as found by kmean
-        h0  = gm0.plot1d()
-        [i.set_color('k') for i in h0['pdf']]
-        h0['pdf'][0].set_label('initial pdf')
+    #    # Initial confidence ellipses as found by kmean
+    #    h0  = gm0.plot1d()
+    #    [i.set_color('k') for i in h0['pdf']]
+    #    h0['pdf'][0].set_label('initial pdf')
 
-        # Values found by EM
-        hl  = lgm.plot1d(fill = 1, level = 0.66)
-        [i.set_color('r') for i in hl['pdf']]
-        hl['pdf'][0].set_label('pdf found by EM')
+    #    # Values found by EM
+    #    hl  = lgm.plot1d(fill = 1, level = 0.66)
+    #    [i.set_color('r') for i in hl['pdf']]
+    #    hl['pdf'][0].set_label('pdf found by EM')
 
-        P.legend(loc = 0)
+    #    P.legend(loc = 0)
 
-        # Values found by Online EM
-        hl  = ogmm.gm.plot1d(fill = 1, level = 0.66)
-        [i.set_color('m') for i in hl['pdf']]
-        hl['pdf'][0].set_label('pdf found by Online EM')
+    #    # Values found by Online EM
+    #    hl  = ogmm.gm.plot1d(fill = 1, level = 0.66)
+    #    [i.set_color('m') for i in hl['pdf']]
+    #    hl['pdf'][0].set_label('pdf found by Online EM')
 
-        P.legend(loc = 0)
+    #    P.legend(loc = 0)
 
-    P.subplot(2, 1, 2)
-    P.plot(nu)
-    P.title('Learning rate')
-    P.show()
+    #P.subplot(2, 1, 2)
+    #P.plot(nu)
+    #P.title('Learning rate')
+    #P.show()



More information about the Scipy-svn mailing list