[Scipy-svn] r3085 - trunk/Lib/sandbox/pyem

scipy-svn@scip... scipy-svn@scip...
Sat Jun 9 06:39:04 CDT 2007


Author: cdavid
Date: 2007-06-09 06:38:41 -0500 (Sat, 09 Jun 2007)
New Revision: 3085

Modified:
   trunk/Lib/sandbox/pyem/densities.py
   trunk/Lib/sandbox/pyem/gauss_mix.py
   trunk/Lib/sandbox/pyem/misc.py
Log:
Clean up densities.py code, set docstrings to rest

Modified: trunk/Lib/sandbox/pyem/densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/densities.py	2007-06-09 08:37:59 UTC (rev 3084)
+++ trunk/Lib/sandbox/pyem/densities.py	2007-06-09 11:38:41 UTC (rev 3085)
@@ -1,11 +1,15 @@
 #! /usr/bin/python
 #
 # Copyrighted David Cournapeau
-# Last Change: Sat Jun 09 03:00 PM 2007 J
+# Last Change: Sat Jun 09 08:00 PM 2007 J
+"""This module implements various bsic functions related to multivariate
+gaussian, such as pdf estimation, confidence interval/ellipsoids, etc..."""
 
+__docformat__ = 'restructuredtext'
+
 import numpy as N
 import numpy.linalg as lin
-from numpy.random import randn
+#from numpy.random import randn
 from scipy.stats import chi2
 import misc
 
@@ -18,6 +22,7 @@
         message -- explanation of the error"""
     def __init__(self, message):
         self.message    = message
+        Exception.__init__(self)
     
     def __str__(self):
         return self.message
@@ -25,33 +30,50 @@
 # The following function do all the fancy stuff to check that parameters
 # are Ok, and call the right implementation if args are OK.
 def gauss_den(x, mu, va, log = False):
-    """ Compute multivariate Gaussian density at points x for 
+    """Compute multivariate Gaussian density at points x for 
     mean mu and variance va.
     
+    :Parameters:
+        x : ndarray
+            points where to estimate the pdf.  each row of the array is one
+            point of d dimension
+        mu : ndarray
+            mean of the pdf. Should have same dimension d than points in x.
+        va : ndarray
+            variance of the pdf. If va has d elements, va is interpreted as the
+            diagonal elements of the actual covariance matrix. Otherwise,
+            should be a dxd matrix (and positive definite).
+        log : boolean
+            if True, returns the log-pdf instead of the pdf.
+
+    :Returns:
+        pdf : ndarray
+            Returns a rank 1 array of the pdf at points x.
+
+    Notes
+    -----
     Vector are row vectors, except va which can be a matrix
-    (row vector variance for diagonal variance)
+    (row vector variance for diagonal variance)."""
     
-    If log is True, than the log density is returned 
-    (useful for underflow ?)"""
-    mu  = N.atleast_2d(mu)
-    va  = N.atleast_2d(va)
-    x   = N.atleast_2d(x)
+    lmu  = N.atleast_2d(mu)
+    lva  = N.atleast_2d(va)
+    lx   = N.atleast_2d(x)
     
     #=======================#
     # Checking parameters   #
     #=======================#
-    if len(N.shape(mu)) != 2:
+    if len(N.shape(lmu)) != 2:
         raise DenError("mu is not rank 2")
         
-    if len(N.shape(va)) != 2:
+    if len(N.shape(lva)) != 2:
         raise DenError("va is not rank 2")
         
-    if len(N.shape(x)) != 2:
+    if len(N.shape(lx)) != 2:
         raise DenError("x is not rank 2")
         
-    (n, d)      = x.shape
-    (dm0, dm1)  = mu.shape
-    (dv0, dv1)  = va.shape
+    d = N.shape(lx)[1]
+    (dm0, dm1) = N.shape(lmu)
+    (dv0, dv1) = N.shape(lva)
     
     # Check x and mu same dimension
     if dm0 != 1:
@@ -73,13 +95,13 @@
     #===============#
     if d == 1:
         # scalar case
-        return _scalar_gauss_den(x[:, 0], mu[0, 0], va[0, 0], log)
+        return _scalar_gauss_den(lx[:, 0], lmu[0, 0], lva[0, 0], log)
     elif dv0 == 1:
         # Diagonal matrix case
-        return _diag_gauss_den(x, mu, va, log)
+        return _diag_gauss_den(lx, lmu, lva, log)
     elif dv1 == dv0:
         # full case
-        return  _full_gauss_den(x, mu, va, log)
+        return  _full_gauss_den(lx, lmu, lva, log)
     else:
         raise DenError("variance mode not recognized, this is a bug")
 
@@ -115,20 +137,20 @@
     Call gauss_den instead"""
     # Diagonal matrix case
     d   = mu.size
-    n   = x.shape[0]
+    #n   = x.shape[0]
     if not log:
-        inva    = 1/va[0,0]
-        fac     = (2*N.pi) ** (-d/2.0) * N.sqrt(inva)
-        y       =  (x[:,0] - mu[0,0]) ** 2 * inva * -0.5
+        inva = 1/va[0, 0]
+        fac = (2*N.pi) ** (-d/2.0) * N.sqrt(inva)
+        y =  (x[:, 0] - mu[0, 0]) ** 2 * inva * -0.5
         for i in range(1, d):
-            inva    = 1/va[0,i]
-            fac     *= N.sqrt(inva)
-            y       += (x[:,i] - mu[0,i]) ** 2 * inva * -0.5
-        y   = fac * N.exp(y)
+            inva = 1/va[0, i]
+            fac *= N.sqrt(inva)
+            y += (x[:, i] - mu[0, i]) ** 2 * inva * -0.5
+        y = fac * N.exp(y)
     else:
-        y   = _scalar_gauss_den(x[:,0], mu[0,0], va[0,0], log)
+        y = _scalar_gauss_den(x[:, 0], mu[0, 0], va[0, 0], log)
         for i in range(1, d):
-            y    +=  _scalar_gauss_den(x[:,i], mu[0,i], va[0,i], log)
+            y +=  _scalar_gauss_den(x[:, i], mu[0, i], va[0, i], log)
     return y
 
 def _full_gauss_den(x, mu, va, log):
@@ -166,31 +188,46 @@
     return y
 
 # To get coordinatea of a confidence ellipse from multi-variate gaussian pdf
-def gauss_ell(mu, va, dim = misc._DEF_VIS_DIM, \
-        npoints = misc._DEF_ELL_NP, \
-        level = misc._DEF_LEVEL):
-    """ Given a mean and covariance for multi-variate
-    gaussian, returns npoints points for the ellipse
-    of confidence given by level (all points will be inside
-    the ellipsoides with a probability equal to level)
+def gauss_ell(mu, va, dim = misc.DEF_VIS_DIM, npoints = misc.DEF_ELL_NP, \
+        level = misc.DEF_LEVEL):
+    """Given a mean and covariance for multi-variate
+    gaussian, returns the coordinates of the confidense ellipsoid.
     
-    Returns the coordinate x and y of the ellipse"""
+    Compute npoints coordinates for the ellipse of confidence of given level
+    (all points will be inside the ellipsoides with a probability equal to
+    level).
+    
+    :Parameters:
+        mu : ndarray
+            mean of the pdf
+        va : ndarray
+            variance of the pdf
+        dim : sequence
+            sequences of two integers which represent the dimensions where to
+            project the ellipsoid.
+        npoints: int
+            number of points to generate for the ellipse.
+        level : float
+            level of confidence (between 0 and 1).
+
+    :Returns:
+        Returns the coordinate x and y of the ellipse."""
     if level >= 1 or level <= 0:
         raise ValueError("level should be a scale strictly between 0 and 1.""")
     
-    mu      = N.atleast_1d(mu)
-    va      = N.atleast_1d(va)
-    d       = mu.shape[0]
-    c       = N.array(dim)
+    mu = N.atleast_1d(mu)
+    va = N.atleast_1d(va)
+    d = N.shape(mu)[0]
+    c = N.array(dim)
 
     if N.any(c < 0) or N.any(c >= d):
         raise ValueError("dim elements should be >= 0 and < %d (dimension"\
                 " of the variance)" % d)
-    if mu.size == va.size:
+    if N.size(mu) == N.size(va):
         mode    = 'diag'
     else:
-        if va.ndim == 2:
-            if va.shape[0] == va.shape[1]:
+        if N.ndim(va) == 2:
+            if N.shape(va)[0] == N.shape(va)[1]:
                 mode    = 'full'
             else:
                 raise DenError("variance not square")
@@ -215,7 +252,7 @@
         elps    = N.outer(mu, N.ones(npoints))
         elps    += N.dot(N.diag(N.sqrt(va)), circle)
     elif mode == 'full':
-        va  = va[c,:][:,c]
+        va  = va[c, :][:, c]
         # Method: compute the cholesky decomp of each cov matrix, that is
         # compute cova such as va = cova * cova' 
         # WARN: scipy is different than matlab here, as scipy computes a lower
@@ -227,22 +264,38 @@
         elps    = N.outer(mu, N.ones(npoints))
         elps    += N.dot(cova, circle)
     else:
-        raise DenParam("var mode not recognized")
+        raise ValueError("var mode not recognized")
 
     return elps[0, :], elps[1, :]
 
 def multiple_gauss_den(data, mu, va):
     """Helper function to generate several Gaussian
-    pdf (different parameters) from the same data"""
-    mu  = N.atleast_2d(mu)
-    va  = N.atleast_2d(va)
+    pdf (different parameters) at the same points
 
-    K   = mu.shape[0]
-    n   = data.shape[0]
-    d   = mu.shape[1]
+    :Parameters:
+        data : ndarray
+            points where to estimate the pdfs (n,d).
+        mu : ndarray
+            mean of the pdf, of shape (k,d). One row of dimension d per
+            different component, the number of rows k being the number of
+            component
+        va : ndarray
+            variance of the pdf. One row per different component for diagonal
+            covariance (k, d), or d rows per component for full matrix pdf
+            (k*d,d).
+
+    :Returns:
+        Returns a (n, k) array, each column i being the pdf of the ith mean and
+        ith variance."""
+    mu = N.atleast_2d(mu)
+    va = N.atleast_2d(va)
+
+    K = N.shape(mu)[0]
+    n = N.shape(data)[0]
+    d = N.shape(mu)[1]
     
-    y   = N.zeros((K, n))
-    if mu.size == va.size:
+    y = N.zeros((K, n))
+    if N.size(mu) == N.size(va):
         for i in range(K):
             y[i] = gauss_den(data, mu[i, :], va[i, :])
         return y.T
@@ -252,39 +305,40 @@
         return y.T
 
 if __name__ == "__main__":
-    import pylab
+    pass
+    ## import pylab
 
-    #=========================================
-    # Test plotting a simple diag 2d variance:
-    #=========================================
-    va  = N.array([5, 3])
-    mu  = N.array([2, 3])
+    ## #=========================================
+    ## # Test plotting a simple diag 2d variance:
+    ## #=========================================
+    ## va  = N.array([5, 3])
+    ## mu  = N.array([2, 3])
 
-    # Generate a multivariate gaussian of mean mu and covariance va
-    X       = randn(1e3, 2)
-    Yc      = N.dot(N.diag(N.sqrt(va)), X.transpose())
-    Yc      = Yc.transpose() + mu
+    ## # Generate a multivariate gaussian of mean mu and covariance va
+    ## X       = randn(1e3, 2)
+    ## Yc      = N.dot(N.diag(N.sqrt(va)), X.transpose())
+    ## Yc      = Yc.transpose() + mu
 
-    # Plotting
-    Xe, Ye  = gauss_ell(mu, va, npoints = 100)
-    pylab.figure()
-    pylab.plot(Yc[:, 0], Yc[:, 1], '.')
-    pylab.plot(Xe, Ye, 'r')
+    ## # Plotting
+    ## Xe, Ye  = gauss_ell(mu, va, npoints = 100)
+    ## pylab.figure()
+    ## pylab.plot(Yc[:, 0], Yc[:, 1], '.')
+    ## pylab.plot(Xe, Ye, 'r')
 
-    #=========================================
-    # Test plotting a simple full 2d variance:
-    #=========================================
-    va  = N.array([[0.2, 0.1],[0.1, 0.5]])
-    mu  = N.array([0, 3])
+    ## #=========================================
+    ## # Test plotting a simple full 2d variance:
+    ## #=========================================
+    ## va  = N.array([[0.2, 0.1],[0.1, 0.5]])
+    ## mu  = N.array([0, 3])
 
-    # Generate a multivariate gaussian of mean mu and covariance va
-    X       = randn(1e3, 2)
-    Yc      = N.dot(lin.cholesky(va), X.transpose())
-    Yc      = Yc.transpose() + mu
+    ## # Generate a multivariate gaussian of mean mu and covariance va
+    ## X       = randn(1e3, 2)
+    ## Yc      = N.dot(lin.cholesky(va), X.transpose())
+    ## Yc      = Yc.transpose() + mu
 
-    # Plotting
-    Xe, Ye  = gauss_ell(mu, va, npoints = 100, level=0.95)
-    pylab.figure()
-    pylab.plot(Yc[:, 0], Yc[:, 1], '.')
-    pylab.plot(Xe, Ye, 'r')
-    pylab.show()
+    ## # Plotting
+    ## Xe, Ye  = gauss_ell(mu, va, npoints = 100, level=0.95)
+    ## pylab.figure()
+    ## pylab.plot(Yc[:, 0], Yc[:, 1], '.')
+    ## pylab.plot(Xe, Ye, 'r')
+    ## pylab.show()

Modified: trunk/Lib/sandbox/pyem/gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-09 08:37:59 UTC (rev 3084)
+++ trunk/Lib/sandbox/pyem/gauss_mix.py	2007-06-09 11:38:41 UTC (rev 3085)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Sat Jun 09 03:00 PM 2007 J
+# Last Change: Sat Jun 09 08:00 PM 2007 J
 
 # Module to implement GaussianMixture class.
 
@@ -151,8 +151,8 @@
 
         return X
 
-    def conf_ellipses(self, dim = misc._DEF_VIS_DIM, npoints = misc._DEF_ELL_NP, \
-        level = misc._DEF_LEVEL):
+    def conf_ellipses(self, dim = misc.DEF_VIS_DIM, npoints = misc.DEF_ELL_NP,
+            level = misc.DEF_LEVEL):
         """Returns a list of confidence ellipsoids describing the Gmm
         defined by mu and va. Check densities.gauss_ell for details
 
@@ -262,8 +262,8 @@
     #=================
     # Plotting methods
     #=================
-    def plot(self, dim = misc._DEF_VIS_DIM, npoints = misc._DEF_ELL_NP, 
-            level = misc._DEF_LEVEL):
+    def plot(self, dim = misc.DEF_VIS_DIM, npoints = misc.DEF_ELL_NP, 
+            level = misc.DEF_LEVEL):
         """Plot the ellipsoides directly for the model
         
         Returns a list of lines, so that their style can be modified. By default,
@@ -371,7 +371,7 @@
 
         return retval
 
-    def density_on_grid(self, dim = misc._DEF_VIS_DIM, nx = 50, ny = 50,
+    def density_on_grid(self, dim = misc.DEF_VIS_DIM, nx = 50, ny = 50,
             maxlevel = 0.95):
         """Do all the necessary computation for contour plot of mixture's density.
         
@@ -401,7 +401,7 @@
         V.extend(N.linspace(0, N.max(lden), 4).tolist())
         return X, Y, lden, N.array(V)
 
-    def _densityctr(self, xrange, yrange, dim = misc._DEF_VIS_DIM):
+    def _densityctr(self, xrange, yrange, dim = misc.DEF_VIS_DIM):
         """Helper function to compute density contours on a grid."""
         gr = N.meshgrid(xrange, yrange)
         X = gr[0].flatten()

Modified: trunk/Lib/sandbox/pyem/misc.py
===================================================================
--- trunk/Lib/sandbox/pyem/misc.py	2007-06-09 08:37:59 UTC (rev 3084)
+++ trunk/Lib/sandbox/pyem/misc.py	2007-06-09 11:38:41 UTC (rev 3085)
@@ -1,12 +1,12 @@
-# Last Change: Sat Jun 09 12:00 PM 2007 J
+# Last Change: Sat Jun 09 07:00 PM 2007 J
 
 #========================================================
 # Constants used throughout the module (def args, etc...)
 #========================================================
 # This is the default dimension for representing confidence ellipses
-_DEF_VIS_DIM = [0, 1]
-_DEF_ELL_NP = 100
-_DEF_LEVEL = 0.39
+DEF_VIS_DIM = [0, 1]
+DEF_ELL_NP = 100
+DEF_LEVEL = 0.39
 #=====================================================================
 # "magic number", that is number used to control regularization and co
 # Change them at your risk !



More information about the Scipy-svn mailing list