[Scipy-svn] r2320 - in trunk/Lib/sandbox/pyem: . tests

scipy-svn at scipy.org scipy-svn at scipy.org
Thu Nov 16 06:27:09 CST 2006


Author: cdavid
Date: 2006-11-16 06:26:52 -0600 (Thu, 16 Nov 2006)
New Revision: 2320

Added:
   trunk/Lib/sandbox/pyem/basic_example1.py
   trunk/Lib/sandbox/pyem/basic_example2.py
   trunk/Lib/sandbox/pyem/basic_example3.py
   trunk/Lib/sandbox/pyem/demo1.py
   trunk/Lib/sandbox/pyem/demo2.py
   trunk/Lib/sandbox/pyem/examples.py
   trunk/Lib/sandbox/pyem/tests/test_examples.py
Removed:
   trunk/Lib/sandbox/pyem/example.py
   trunk/Lib/sandbox/pyem/example2.py
Modified:
   trunk/Lib/sandbox/pyem/Changelog
   trunk/Lib/sandbox/pyem/__init__.py
   trunk/Lib/sandbox/pyem/gauss_mix.py
   trunk/Lib/sandbox/pyem/gmm_em.py
   trunk/Lib/sandbox/pyem/tests/test_online_em.py
Log:
Various bug fixes (see Changelog)

Modified: trunk/Lib/sandbox/pyem/Changelog
===================================================================
--- trunk/Lib/sandbox/pyem/Changelog	2006-11-16 09:25:03 UTC (rev 2319)
+++ trunk/Lib/sandbox/pyem/Changelog	2006-11-16 12:26:52 UTC (rev 2320)
@@ -1,3 +1,11 @@
+pyem (0.5.6) Thu, 16 Nov 2006 21:02:02 +0900
+
+	* correct examples
+	* correct exceptions msg strings in gauss_mix, which 
+	were buggy
+	* add examples from website to the package, so that above errors
+	do not appear again
+
 pyem (0.5.6) Thu, 16 Nov 2006 14:18:19 +0900
 
 	* bump to 0.5.6

Modified: trunk/Lib/sandbox/pyem/__init__.py
===================================================================
--- trunk/Lib/sandbox/pyem/__init__.py	2006-11-16 09:25:03 UTC (rev 2319)
+++ trunk/Lib/sandbox/pyem/__init__.py	2006-11-16 12:26:52 UTC (rev 2320)
@@ -1,11 +1,12 @@
 #! /usr/bin/env python
-# Last Change: Fri Oct 20 11:00 AM 2006 J
+# Last Change: Thu Nov 16 09:00 PM 2006 J
 
 from info import __doc__
 
 from gauss_mix import GmParamError, GM
 from gmm_em import GmmParamError, GMM, EM
 from online_em import OnGMM as _OnGMM
+import examples as _examples
 
 __all__ = filter(lambda s:not s.startswith('_'),dir())
 

Added: trunk/Lib/sandbox/pyem/basic_example1.py
===================================================================
--- trunk/Lib/sandbox/pyem/basic_example1.py	2006-11-16 09:25:03 UTC (rev 2319)
+++ trunk/Lib/sandbox/pyem/basic_example1.py	2006-11-16 12:26:52 UTC (rev 2320)
@@ -0,0 +1,48 @@
+import numpy as N
+import pylab as P
+from scipy.sandbox.pyem import GM
+
+#------------------------------
+# Hyper parameters:
+#   - K:    number of clusters
+#   - d:    dimension
+k   = 3
+d   = 2
+
+#-------------------------------------------------------
+# Values for weights, mean and (diagonal) variances
+#   - the weights are an array of rank 1
+#   - mean is expected to be rank 2 with one row for one component
+#   - variances are also expteced to be rank 2. For diagonal, one row
+#   is one diagonal, for full, the first d rows are the first variance,
+#   etc... In this case, the variance matrix should be k*d rows and d 
+#   colums
+w   = N.array([0.2, 0.45, 0.35])
+mu  = N.array([[4.1, 3], [1, 5], [-2, -3]])
+va  = N.array([[1, 1.5], [3, 4], [2, 3.5]])
+
+#-----------------------------------------
+# First method: directly from parameters:
+# Both methods are equivalents.
+gm      = GM.fromvalues(w, mu, va)
+
+#-------------------------------------
+# Second method to build a GM instance:
+gm      = GM(d, k, mode = 'diag')
+# The set_params checks that w, mu, and va corresponds to k, d and m
+gm.set_param(w, mu, va)
+
+# Once set_params is called, both methods are equivalent. The 2d
+# method is useful when using a GM object for learning (where
+# the learner class will set the params), whereas the first one
+# is useful when there is a need to quickly sample a model
+# from existing values, without a need to give the hyper parameters
+
+# Create a Gaussian Mixture from the parameters, and sample
+# 1000 items from it (one row = one 2 dimension sample)
+data    = gm.sample(1000)
+
+# Plot the samples
+P.plot(data[:, 0], data[:, 1], '.')
+# Plot the ellipsoids of confidence with a level a 75 %
+gm.plot(level = 0.75)

Added: trunk/Lib/sandbox/pyem/basic_example2.py
===================================================================
--- trunk/Lib/sandbox/pyem/basic_example2.py	2006-11-16 09:25:03 UTC (rev 2319)
+++ trunk/Lib/sandbox/pyem/basic_example2.py	2006-11-16 12:26:52 UTC (rev 2320)
@@ -0,0 +1,45 @@
+from numpy.random import seed
+
+from scipy.sandbox.pyem import GM, GMM, EM
+import copy
+
+# To reproduce results, fix the random seed
+seed(1)
+
+#+++++++++++++++++++++++++++++
+# Meta parameters of the model
+#   - k: Number of components
+#   - d: dimension of each Gaussian
+#   - mode: Mode of covariance matrix: full or diag (string)
+#   - nframes: number of frames (frame = one data point = one
+#   row of d elements)
+k       = 2
+d       = 2
+mode    = 'diag'
+nframes = 1e3
+
+#+++++++++++++++++++++++++++++++++++++++++++
+# Create an artificial GM model, samples it
+#+++++++++++++++++++++++++++++++++++++++++++
+w, mu, va   = GM.gen_param(d, k, mode, spread = 1.5)
+gm          = GM.fromvalues(w, mu, va)
+
+# Sample nframes frames  from the model
+data    = gm.sample(nframes)
+
+#++++++++++++++++++++++++
+# Learn the model with EM
+#++++++++++++++++++++++++
+
+# Create a Model from a Gaussian mixture with kmean initialization
+lgm = GM(d, k, mode)
+gmm = GMM(lgm, 'kmean')
+
+# The actual EM, with likelihood computation. The threshold
+# is compared to the (linearly appromixated) derivative of the likelihood
+em      = EM()
+like    = em.train(data, gmm, maxiter = 30, thresh = 1e-8)
+
+# The computed parameters are in gmm.gm, which is the same than lgm
+# (remember, python does not copy most objects by default). You can for example
+# plot lgm against gm to compare

Added: trunk/Lib/sandbox/pyem/basic_example3.py
===================================================================
--- trunk/Lib/sandbox/pyem/basic_example3.py	2006-11-16 09:25:03 UTC (rev 2319)
+++ trunk/Lib/sandbox/pyem/basic_example3.py	2006-11-16 12:26:52 UTC (rev 2320)
@@ -0,0 +1,64 @@
+import numpy as N
+from numpy.random import seed
+
+from scipy.sandbox.pyem import GM, GMM, EM
+import copy
+
+seed(2)
+
+k       = 4
+d       = 2
+mode    = 'diag'
+nframes = 1e3
+
+#+++++++++++++++++++++++++++++++++++++++++++
+# Create an artificial GMM model, samples it
+#+++++++++++++++++++++++++++++++++++++++++++
+w, mu, va   = GM.gen_param(d, k, mode, spread = 1.0)
+gm          = GM.fromvalues(w, mu, va)
+
+# Sample nframes frames  from the model
+data    = gm.sample(nframes)
+
+#++++++++++++++++++++++++
+# Learn the model with EM
+#++++++++++++++++++++++++
+
+# List of learned mixtures lgm[i] is a mixture with i+1 components
+lgm     = []
+kmax    = 6
+bics    = N.zeros(kmax)
+em      = EM()
+for i in range(kmax):
+    lgm.append(GM(d, i+1, mode))
+
+    gmm = GMM(lgm[i], 'kmean')
+    em.train(data, gmm, maxiter = 30, thresh = 1e-10)
+    bics[i] = gmm.bic(data)
+
+print "Original model has %d clusters, bics says %d" % (k, N.argmax(bics)+1)
+
+#+++++++++++++++
+# Draw the model
+#+++++++++++++++
+import pylab as P
+P.subplot(3, 2, 1)
+
+for k in range(kmax):
+    P.subplot(3, 2, k+1)
+    level   = 0.9
+    P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
+
+    # h keeps the handles of the plot, so that you can modify 
+    # its parameters like label or color
+    h   = lgm[k].plot(level = level)
+    [i.set_color('r') for i in h]
+    h[0].set_label('EM confidence ellipsoides')
+
+    h   = gm.plot(level = level)
+    [i.set_color('g') for i in h]
+    h[0].set_label('Real confidence ellipsoides')
+
+P.legend(loc = 0)
+# depending on your configuration, you may have to call P.show() 
+# to actually display the figure

Copied: trunk/Lib/sandbox/pyem/demo1.py (from rev 2316, trunk/Lib/sandbox/pyem/example.py)

Copied: trunk/Lib/sandbox/pyem/demo2.py (from rev 2316, trunk/Lib/sandbox/pyem/example2.py)

Deleted: trunk/Lib/sandbox/pyem/example.py
===================================================================
--- trunk/Lib/sandbox/pyem/example.py	2006-11-16 09:25:03 UTC (rev 2319)
+++ trunk/Lib/sandbox/pyem/example.py	2006-11-16 12:26:52 UTC (rev 2320)
@@ -1,109 +0,0 @@
-#! /usr/bin/env python
-
-# Example of use of pyem toolbox. Feel free to change parameters
-# such as dimension, number of components, mode of covariance.
-#
-# You can also try less trivial things such as adding outliers, sampling
-# a mixture with full covariance and estimating it with a mixture with diagonal
-# gaussians (replace the mode of the learned model lgm)
-#
-# Later, I hope to add functions for number of component estimation using eg BIC
-
-import numpy as N
-from numpy.random import seed
-
-from scipy.sandbox.pyem import GM, GMM, EM
-import copy
-
-seed(1)
-#+++++++++++++++++++++++++++++
-# Meta parameters of the model
-#   - k: Number of components
-#   - d: dimension of each Gaussian
-#   - mode: Mode of covariance matrix: full or diag (string)
-#   - nframes: number of frames (frame = one data point = one
-#   row of d elements)
-k       = 2 
-d       = 2
-mode    = 'diag'
-nframes = 1e3
-
-#+++++++++++++++++++++++++++++++++++++++++++
-# Create an artificial GM model, samples it
-#+++++++++++++++++++++++++++++++++++++++++++
-w, mu, va   = GM.gen_param(d, k, mode, spread = 1.5)
-gm          = GM.fromvalues(w, mu, va)
-
-# Sample nframes frames  from the model
-data    = gm.sample(nframes)
-
-#++++++++++++++++++++++++
-# Learn the model with EM
-#++++++++++++++++++++++++
-
-# Init the model
-lgm = GM(d, k, mode)
-gmm = GMM(lgm, 'kmean')
-gmm.init(data)
-
-# Keep a copy for drawing later
-gm0 = copy.copy(lgm)
-
-# The actual EM, with likelihood computation. The threshold
-# is compared to the (linearly appromixated) derivative of the likelihood
-em      = EM()
-like    = em.train(data, gmm, maxiter = 30, thresh = 1e-8)
-
-#+++++++++++++++
-# Draw the model
-#+++++++++++++++
-import pylab as P
-P.subplot(2, 1, 1)
-
-# Level is the confidence level for confidence ellipsoids: 1.0 means that
-# all points will be (almost surely) inside the ellipsoid
-level   = 0.8
-if not d == 1:
-    P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
-
-    # h keeps the handles of the plot, so that you can modify 
-    # its parameters like label or color
-    h   = gm.plot(level = level)
-    [i.set_color('g') for i in h]
-    h[0].set_label('true confidence ellipsoides')
-
-    # Initial confidence ellipses as found by kmean
-    h   = gm0.plot(level = level)
-    [i.set_color('k') for i in h]
-    h[0].set_label('kmean confidence ellipsoides')
-
-    # Values found by EM
-    h   = lgm.plot(level = level)
-    [i.set_color('r') for i in h]
-    h[0].set_label('EM confidence ellipsoides')
-
-    P.legend(loc = 0)
-else:
-    # The 1d plotting function is quite elaborate: the confidence
-    # interval are represented by filled areas, the pdf of the mixture and
-    # the pdf of each component is drawn (optional)
-    h   = gm.plot1d(level = level)
-    [i.set_color('g') for i in h['pdf']]
-    h['pdf'][0].set_label('true pdf')
-
-    h0  = gm0.plot1d(level = level)
-    [i.set_color('k') for i in h0['pdf']]
-    h0['pdf'][0].set_label('initial pdf')
-
-    hl  = lgm.plot1d(fill = 1, level = level)
-    [i.set_color('r') for i in hl['pdf']]
-    hl['pdf'][0].set_label('pdf found by EM')
-
-    P.legend(loc = 0)
-
-P.subplot(2, 1, 2)
-P.plot(like)
-P.title('log likelihood')
-
-P.show()
-# P.save('2d diag.png')

Deleted: trunk/Lib/sandbox/pyem/example2.py
===================================================================
--- trunk/Lib/sandbox/pyem/example2.py	2006-11-16 09:25:03 UTC (rev 2319)
+++ trunk/Lib/sandbox/pyem/example2.py	2006-11-16 12:26:52 UTC (rev 2320)
@@ -1,104 +0,0 @@
-#! /usr/bin/env python
-
-# Example of use of pyem toolbox. Feel free to change parameters
-# such as dimension, number of components, mode of covariance.
-#
-# You can also try less trivial things such as adding outliers, sampling
-# a mixture with full covariance and estimating it with a mixture with diagonal
-# gaussians (replace the mode of the learned model lgm)
-#
-# Later, I hope to add functions for number of component estimation using eg BIC
-
-import numpy as N
-from numpy.random import seed
-
-from scipy.sandbox.pyem import GM, GMM, EM
-import copy
-
-seed(2)
-#+++++++++++++++++++++++++++++
-# Meta parameters of the model
-#   - k: Number of components
-#   - d: dimension of each Gaussian
-#   - mode: Mode of covariance matrix: full or diag (string)
-#   - nframes: number of frames (frame = one data point = one
-#   row of d elements)
-k       = 4 
-d       = 2
-mode    = 'diag'
-nframes = 1e3
-
-#+++++++++++++++++++++++++++++++++++++++++++
-# Create an artificial GMM model, samples it
-#+++++++++++++++++++++++++++++++++++++++++++
-w, mu, va   = GM.gen_param(d, k, mode, spread = 1.0)
-gm          = GM.fromvalues(w, mu, va)
-
-# Sample nframes frames  from the model
-data    = gm.sample(nframes)
-
-#++++++++++++++++++++++++
-# Learn the model with EM
-#++++++++++++++++++++++++
-
-lgm     = []
-kmax    = 6
-bics    = N.zeros(kmax)
-for i in range(kmax):
-    # Init the model with an empty Gaussian Mixture, and create a Gaussian 
-    # Mixture Model from it
-    lgm.append(GM(d, i+1, mode))
-    gmm = GMM(lgm[i], 'kmean')
-
-    # The actual EM, with likelihood computation. The threshold
-    # is compared to the (linearly appromixated) derivative of the likelihood
-    em      = EM()
-    em.train(data, gmm, maxiter = 30, thresh = 1e-10)
-    bics[i] = gmm.bic(data)
-
-print "Original model has %d clusters, bics says %d" % (k, N.argmax(bics)+1) 
-
-#+++++++++++++++
-# Draw the model
-#+++++++++++++++
-import pylab as P
-P.subplot(3, 2, 1)
-
-for k in range(kmax):
-    P.subplot(3, 2, k+1)
-    # Level is the confidence level for confidence ellipsoids: 1.0 means that
-    # all points will be (almost surely) inside the ellipsoid
-    level   = 0.8
-    if not d == 1:
-        P.plot(data[:, 0], data[:, 1], '.', label = '_nolegend_')
-
-        # h keeps the handles of the plot, so that you can modify 
-        # its parameters like label or color
-        h   = lgm[k].plot(level = level)
-        [i.set_color('r') for i in h]
-        h[0].set_label('EM confidence ellipsoides')
-
-        h   = gm.plot(level = level)
-        [i.set_color('g') for i in h]
-        h[0].set_label('Real confidence ellipsoides')
-    else:
-        # The 1d plotting function is quite elaborate: the confidence
-        # interval are represented by filled areas, the pdf of the mixture and
-        # the pdf of each component is drawn (optional)
-        h   = gm.plot1d(level = level)
-        [i.set_color('g') for i in h['pdf']]
-        h['pdf'][0].set_label('true pdf')
-
-        h0  = gm0.plot1d(level = level)
-        [i.set_color('k') for i in h0['pdf']]
-        h0['pdf'][0].set_label('initial pdf')
-
-        hl  = lgm.plot1d(fill = 1, level = level)
-        [i.set_color('r') for i in hl['pdf']]
-        hl['pdf'][0].set_label('pdf found by EM')
-
-        P.legend(loc = 0)
-
-P.legend(loc = 0)
-P.show()
-# P.save('2d diag.png')

Added: trunk/Lib/sandbox/pyem/examples.py
===================================================================
--- trunk/Lib/sandbox/pyem/examples.py	2006-11-16 09:25:03 UTC (rev 2319)
+++ trunk/Lib/sandbox/pyem/examples.py	2006-11-16 12:26:52 UTC (rev 2320)
@@ -0,0 +1,14 @@
+def ex1():
+    import basic_example1
+
+def ex2():
+    import basic_example2
+
+def ex3():
+    import basic_example3
+
+if __name__ == '__main__':
+    ex1()
+    ex2()
+    ex3()
+

Modified: trunk/Lib/sandbox/pyem/gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/gauss_mix.py	2006-11-16 09:25:03 UTC (rev 2319)
+++ trunk/Lib/sandbox/pyem/gauss_mix.py	2006-11-16 12:26:52 UTC (rev 2320)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Thu Nov 09 06:00 PM 2006 J
+# Last Change: Thu Nov 16 08:00 PM 2006 J
 
 # Module to implement GaussianMixture class.
 
@@ -82,10 +82,10 @@
         k, d, mode  = check_gmm_param(weights, mu, sigma)
         if not k == self.k:
             raise GmParamError("Number of given components is %d, expected %d" 
-                    % (shape(k), shape(self.k)))
+                    % (k, self.k))
         if not d == self.d:
             raise GmParamError("Dimension of the given model is %d, expected %d" 
-                    % (shape(d), shape(self.d)))
+                    % (d, self.d))
         if not mode == self.mode and not d == 1:
             raise GmParamError("Given covariance mode is %s, expected %s"
                     % (mode, self.mode))

Modified: trunk/Lib/sandbox/pyem/gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/gmm_em.py	2006-11-16 09:25:03 UTC (rev 2319)
+++ trunk/Lib/sandbox/pyem/gmm_em.py	2006-11-16 12:26:52 UTC (rev 2320)
@@ -374,7 +374,7 @@
 
     K   = mu.shape[0]
     n   = data.shape[0]
-    d   = data.shape[1]
+    d   = mu.shape[1]
     
     y   = N.zeros((K, n))
     if mu.size == va.size:

Added: trunk/Lib/sandbox/pyem/tests/test_examples.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_examples.py	2006-11-16 09:25:03 UTC (rev 2319)
+++ trunk/Lib/sandbox/pyem/tests/test_examples.py	2006-11-16 12:26:52 UTC (rev 2320)
@@ -0,0 +1,26 @@
+#! /usr/bin/env python
+# Last Change: Thu Nov 16 09:00 PM 2006 J
+
+from numpy.testing import *
+
+set_package_path()
+from pyem.examples import ex1, ex2, ex3
+restore_path()
+
+# #Optional:
+# set_local_path()
+# # import modules that are located in the same directory as this file.
+# restore_path()
+
+class test_examples(NumpyTestCase):
+    def check_ex1(self, level = 5):
+        ex1()
+
+    def check_ex2(self, level = 5):
+        ex2()
+
+    def check_ex3(self, level = 5):
+        ex3()
+
+if __name__ == "__main__":
+    NumpyTest().run()

Modified: trunk/Lib/sandbox/pyem/tests/test_online_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_online_em.py	2006-11-16 09:25:03 UTC (rev 2319)
+++ trunk/Lib/sandbox/pyem/tests/test_online_em.py	2006-11-16 12:26:52 UTC (rev 2320)
@@ -1,5 +1,5 @@
 #! /usr/bin/env python
-# Last Change: Mon Oct 23 07:00 PM 2006 J
+# Last Change: Thu Nov 16 09:00 PM 2006 J
 
 import copy
 
@@ -61,7 +61,7 @@
         self._create_model(d, k, mode, nframes, emiter)
         self._check(d, k, mode, nframes, emiter)
 
-    def check_2d(self, level = 2):
+    def check_2d(self, level = 1):
         d       = 2
         k       = 2
         mode    = 'diag'
@@ -72,7 +72,7 @@
         self._create_model(d, k, mode, nframes, emiter)
         self._check(d, k, mode, nframes, emiter)
 
-    def check_5d(self, level = 2):
+    def check_5d(self, level = 5):
         d       = 5
         k       = 2
         mode    = 'diag'



More information about the Scipy-svn mailing list