[Scipy-svn] r3133 - in trunk/Lib/sandbox/pyem: . examples tests

scipy-svn@scip... scipy-svn@scip...
Mon Jul 2 03:06:28 CDT 2007


Author: cdavid
Date: 2007-07-02 03:06:13 -0500 (Mon, 02 Jul 2007)
New Revision: 3133

Modified:
   trunk/Lib/sandbox/pyem/examples/examples.py
   trunk/Lib/sandbox/pyem/examples/pdfestimation.py
   trunk/Lib/sandbox/pyem/examples/pdfestimation1d.py
   trunk/Lib/sandbox/pyem/examples/utils.py
   trunk/Lib/sandbox/pyem/gmm_em.py
   trunk/Lib/sandbox/pyem/tests/test_examples.py
Log:
Clean up some examples, and add an example for regularized EM

Modified: trunk/Lib/sandbox/pyem/examples/examples.py
===================================================================
--- trunk/Lib/sandbox/pyem/examples/examples.py	2007-07-02 08:04:38 UTC (rev 3132)
+++ trunk/Lib/sandbox/pyem/examples/examples.py	2007-07-02 08:06:13 UTC (rev 3133)
@@ -7,6 +7,12 @@
 def ex3():
     import basic_example3
 
+def pdfestim():
+    import pdfestimation
+
+def pdfestim1d():
+    import pdfestimation1d
+
 if __name__ == '__main__':
     ex1()
     ex2()

Modified: trunk/Lib/sandbox/pyem/examples/pdfestimation.py
===================================================================
--- trunk/Lib/sandbox/pyem/examples/pdfestimation.py	2007-07-02 08:04:38 UTC (rev 3132)
+++ trunk/Lib/sandbox/pyem/examples/pdfestimation.py	2007-07-02 08:06:13 UTC (rev 3133)
@@ -1,5 +1,5 @@
 #! /usr/bin/env python
-# Last Change: Sat Jun 09 07:00 PM 2007 J
+# Last Change: Mon Jul 02 03:00 PM 2007 J
 
 # Example of doing pdf estimation with EM algorithm. Requires matplotlib.
 import numpy as N
@@ -32,6 +32,7 @@
 # bc will contain a list of BIC values for each model trained
 bc = []
 mode = 'full'
+P.figure()
 for k in range(1, 5):
     # Train a model of k component, and plots isodensity curve
     P.subplot(2, 2, k)
@@ -45,4 +46,3 @@
     P.ylabel('waiting time (scaled)')
 
 print "According to the BIC, model with %d components is better" % (N.argmax(bc) + 1)
-P.show()

Modified: trunk/Lib/sandbox/pyem/examples/pdfestimation1d.py
===================================================================
--- trunk/Lib/sandbox/pyem/examples/pdfestimation1d.py	2007-07-02 08:04:38 UTC (rev 3132)
+++ trunk/Lib/sandbox/pyem/examples/pdfestimation1d.py	2007-07-02 08:06:13 UTC (rev 3133)
@@ -1,6 +1,13 @@
 #! /usr/bin/env python
-# Last Change: Sat Jun 09 04:00 PM 2007 J
+# Last Change: Mon Jul 02 03:00 PM 2007 J
 
+__doc__ = """This example shows how to do pdfestimation for one dimensional
+data. It estimates a Gaussian mixture model for several number of components,
+and it determines the "best" one according to the Bayesian Information
+Criterion.
+
+It uses old faitfhul waiting time as the one dimension data, and plots the best
+model as well as the BIC as a function of the number of component."""
 # Example of doing pdf estimation with EM algorithm. Requires matplotlib.
 import numpy as N
 from numpy.testing import set_package_path, restore_path
@@ -66,4 +73,3 @@
 P.xlabel("number of components")
 P.ylabel("BIC")
 print "According to the BIC, model with %d components is better" % (N.argmax(bc) + 1)
-P.show()

Modified: trunk/Lib/sandbox/pyem/examples/utils.py
===================================================================
--- trunk/Lib/sandbox/pyem/examples/utils.py	2007-07-02 08:04:38 UTC (rev 3132)
+++ trunk/Lib/sandbox/pyem/examples/utils.py	2007-07-02 08:06:13 UTC (rev 3133)
@@ -1,5 +1,5 @@
 #! /usr/bin/env python
-# Last Change: Wed Jun 27 05:00 PM 2007 J
+# Last Change: Mon Jul 02 02:00 PM 2007 J
 
 # Various utilities for examples 
 
@@ -8,7 +8,7 @@
 
 # XXX: Bouah, hackish... Will go away once scipydata found its way
 set_package_path()
-from pyem.data import oldfaithful, pendigits
+from scikits.learn.datasets import oldfaithful, pendigits, iris
 restore_path()
 
 def get_faithful():

Modified: trunk/Lib/sandbox/pyem/gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/gmm_em.py	2007-07-02 08:04:38 UTC (rev 3132)
+++ trunk/Lib/sandbox/pyem/gmm_em.py	2007-07-02 08:06:13 UTC (rev 3133)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Sun Jul 01 06:00 PM 2007 J
+# Last Change: Mon Jul 02 04:00 PM 2007 J
 
 """Module implementing GMM, a class to estimate Gaussian mixture models using
 EM, and EM, a class which use GMM instances to estimate models parameters using
@@ -11,15 +11,13 @@
 #   - which methods to avoid va shrinking to 0 ? There are several options, 
 #   not sure which ones are appropriates
 #   - improve EM trainer
-
 import numpy as N
-#import numpy.linalg as lin
 from numpy.random import randn
 #import _c_densities as densities
 import densities
-#from kmean import kmean
 from scipy.cluster.vq import kmeans2 as kmean
 from gauss_mix import GmParamError
+from misc import curry
 
 #from misc import _DEF_ALPHA, _MIN_DBL_DELTA, _MIN_INV_COND
 
@@ -197,7 +195,7 @@
         """Computes update of the Gaussian Mixture Model (M step) from the
         responsabilities gamma and normalized responsabilities ngamma, for
         diagonal models."""
-        #XXX: caching SS may decrease memory consumption
+        #XXX: caching SS may decrease memory consumption, but is this possible ?
         k = self.gm.k
         d = self.gm.d
         n = data.shape[0]
@@ -422,23 +420,38 @@
         self.pval = pval
 
     def train(self, data, model, maxiter = 20, thresh = 1e-5):
+        mode = model.gm.mode
+
+        # Build regularizer
+        if mode == 'diag':
+            regularize = curry(regularize_diag, np = self.pcnt, prior =
+                    self.pval * N.ones(model.gm.d))
+        elif mode == 'full':
+            regularize = curry(regularize_full, np = self.pcnt, prior =
+                    self.pval * N.eye(model.gm.d))
+        else:
+            raise ValueError("unknown variance mode")
+
         model.init(data)
-        regularize_full(model.gm.va, self.pcnt, self.pval * N.eye(model.gm.d))
+        regularize(model.gm.va)
+
         # Likelihood is kept
         like = N.empty(maxiter, N.float)
 
         # Em computation, with computation of the likelihood
         g, tgd  = model.compute_log_responsabilities(data)
         g = N.exp(g)
+        model.update_em(data, g)
+        regularize(model.gm.va)
+
         like[0] = N.sum(densities.logsumexp(tgd), axis = 0)
-        model.update_em(data, g)
-        regularize_full(model.gm.va, self.pcnt, self.pval * N.eye(model.gm.d))
         for i in range(1, maxiter):
             g, tgd  = model.compute_log_responsabilities(data)
             g = N.exp(g)
+            model.update_em(data, g)
+            regularize(model.gm.va)
+
             like[i] = N.sum(densities.logsumexp(tgd), axis = 0)
-            model.update_em(data, g)
-            regularize_full(model.gm.va, self.pcnt, self.pval * N.eye(model.gm.d))
             if has_em_converged(like[i], like[i-1], thresh):
                 return like[0:i]
 
@@ -458,6 +471,18 @@
     else:
         return False
 
+def regularize_diag(va, np, prior):
+    """np * n is the number of prior counts (np is a proportion, and n is the
+    number of point).
+    
+    diagonal variance version"""
+    d = va.shape[1]
+    k = va.shape[0]
+
+    for i in range(k):
+        va[i] *= 1. / (1 + np)
+        va[i] += np / (1. + np) * prior
+
 def regularize_full(va, np, prior):
     """np * n is the number of prior counts (np is a proportion, and n is the
     number of point)."""
@@ -470,19 +495,3 @@
 
 if __name__ == "__main__":
     pass
-    ## # #++++++++++++++++++
-    ## # # Export the figure
-    ## # #++++++++++++++++++
-    ## # F   = P.gcf()
-    ## # DPI = F.get_dpi()
-    ## # DefaultSize = F.get_size_inches()
-    ## # # the default is 100dpi for savefig:
-    ## # F.savefig("example1.png")
-
-    ## # # Now make the image twice as big, while keeping the fonts and all the
-    ## # # same size
-    ## # F.set_figsize_inches( (DefaultSize[0]*2, DefaultSize[1]*2) )
-    ## # Size = F.get_size_inches()
-    ## # print "Size in Inches", Size
-    ## # F.savefig("example2.png")
-    ## P.show()

Modified: trunk/Lib/sandbox/pyem/tests/test_examples.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_examples.py	2007-07-02 08:04:38 UTC (rev 3132)
+++ trunk/Lib/sandbox/pyem/tests/test_examples.py	2007-07-02 08:06:13 UTC (rev 3133)
@@ -1,10 +1,10 @@
 #! /usr/bin/env python
-# Last Change: Mon May 28 04:00 PM 2007 J
+# Last Change: Mon Jul 02 03:00 PM 2007 J
 
 from numpy.testing import *
 
 set_package_path()
-from examples.examples import ex1, ex2, ex3
+from examples.examples import ex1, ex2, ex3, pdfestim, pdfestim1d
 restore_path()
 
 # #Optional:
@@ -13,14 +13,20 @@
 # restore_path()
 
 class test_examples(NumpyTestCase):
-    def check_ex1(self, level = 5):
+    def test_ex1(self, level = 3):
         ex1()
 
-    def check_ex2(self, level = 5):
+    def test_ex2(self, level = 3):
         ex2()
 
-    def check_ex3(self, level = 5):
+    def test_ex3(self, level = 3):
         ex3()
 
+    def test_pdfestim(self, level = 5):
+        pdfestim()
+
+    def test_pdfestim1d(self, level = 5):
+        pdfestim1d()
+
 if __name__ == "__main__":
     NumpyTest().run()



More information about the Scipy-svn mailing list