[Scipy-svn] r2272 - in trunk/Lib/sandbox/pyem: . pyem

scipy-svn at scipy.org scipy-svn at scipy.org
Thu Oct 12 08:46:28 CDT 2006


Author: cdavid
Date: 2006-10-12 08:46:06 -0500 (Thu, 12 Oct 2006)
New Revision: 2272

Added:
   trunk/Lib/sandbox/pyem/MANIFEST.in
   trunk/Lib/sandbox/pyem/pyem/profile_densities.py
   trunk/Lib/sandbox/pyem/pyem/profile_gmm.py
Modified:
   trunk/Lib/sandbox/pyem/Changelog
   trunk/Lib/sandbox/pyem/pyem/__init__.py
   trunk/Lib/sandbox/pyem/pyem/densities.py
   trunk/Lib/sandbox/pyem/pyem/gauss_mix.py
   trunk/Lib/sandbox/pyem/pyem/gmm_em.py
   trunk/Lib/sandbox/pyem/setup.py
Log:
[pyem @ david at ar.media.kyoto-u.ac.jp-20060714102813-3f88f458275af3ca]
Add scripts for benchmarking, minor corrections
David Cournapeau <david at ar.media.kyoto-u.ac.jp> | 2006-07-14 19:28:13 +0900 (Fri, 14 Jul 2006)

Modified: trunk/Lib/sandbox/pyem/Changelog
===================================================================
--- trunk/Lib/sandbox/pyem/Changelog	2006-10-12 13:45:58 UTC (rev 2271)
+++ trunk/Lib/sandbox/pyem/Changelog	2006-10-12 13:46:06 UTC (rev 2272)
@@ -1,3 +1,11 @@
+pyem (0.4) Fri, 14 Jul 2006 17:49:57 +0900
+
+	* put version to 0.4.1
+	* change some import due to recent changes in 
+	numpy
+
+-- David Cournapeau <david at ar.media.kyoto-u.ac.jp> 
+
 pyem (0.4) Fri, 14 Jul 2006 16:24:05 +0900
 
 	* put version to 0.4

Added: trunk/Lib/sandbox/pyem/MANIFEST.in
===================================================================
--- trunk/Lib/sandbox/pyem/MANIFEST.in	2006-10-12 13:45:58 UTC (rev 2271)
+++ trunk/Lib/sandbox/pyem/MANIFEST.in	2006-10-12 13:46:06 UTC (rev 2272)
@@ -0,0 +1,4 @@
+include pyem/src/c_numpy.pxd
+include pyem/src/c_python.pxd
+include Changelog
+include TODO

Modified: trunk/Lib/sandbox/pyem/pyem/__init__.py
===================================================================
--- trunk/Lib/sandbox/pyem/pyem/__init__.py	2006-10-12 13:45:58 UTC (rev 2271)
+++ trunk/Lib/sandbox/pyem/pyem/__init__.py	2006-10-12 13:46:06 UTC (rev 2272)
@@ -1,4 +1,7 @@
 #! /usr/bin/env python
-# Last Change: Fri Jul 14 04:00 PM 2006 J
+# Last Change: Fri Jul 14 05:00 PM 2006 J
 
-version = '0.4'
+version = '0.4.1'
+
+from gauss_mix import GmParamError, GM
+from gmm_em import GmmParamError, GMM

Modified: trunk/Lib/sandbox/pyem/pyem/densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/pyem/densities.py	2006-10-12 13:45:58 UTC (rev 2271)
+++ trunk/Lib/sandbox/pyem/pyem/densities.py	2006-10-12 13:46:06 UTC (rev 2272)
@@ -1,10 +1,11 @@
 #! /usr/bin/python
 #
 # Copyrighted David Cournapeau
-# Last Change: Thu Jul 13 04:00 PM 2006 J
+# Last Change: Fri Jul 14 05:00 PM 2006 J
 
 import numpy as N
 import numpy.linalg as lin
+from numpy.random import randn
 
 # Error classes
 class DenError(Exception):
@@ -219,14 +220,14 @@
     """Generate a set of data of dimension d, with n frames,
     that is input data, mean, var and output of gden, so that
     other implementations can be tested against"""
-    mu  = N.randn(1, d)
+    mu  = randn(1, d)
     if mode == 'diag':
-        va  = abs(N.randn(1, d))
+        va  = abs(randn(1, d))
     elif mode == 'full':
-        va  = N.randn(d, d)
-        va  = N.matrixmultiply(va, va.transpose())
+        va  = randn(d, d)
+        va  = matrixmultiply(va, va.transpose())
 
-    input   = N.randn(n, d)
+    input   = randn(n, d)
     output  = gauss_den(input, mu, va)
 
     import tables
@@ -251,7 +252,7 @@
     # # mu  = 1.0
     # # va  = 2.0
 
-    # # X   = N.randn(1e3, 1)
+    # # X   = randn(1e3, 1)
 
     # # Y   = gauss_den(X, mu, va)
 
@@ -269,7 +270,7 @@
     # # mu  = N.array([1.0, -2.0])
     # # va  = N.array([1.0, 2.0])
 
-    # # X   = N.randn(1e3, 2)
+    # # X   = randn(1e3, 2)
 
     # # Y   = gauss_den(X, mu, va)
 
@@ -285,7 +286,7 @@
     # mu  = N.array([[0.2, -1.0]])
     # va  = N.array([[1.2, 0.1], [0.1, 0.5]])
 
-    # X   = N.randn(1e3, 2)
+    # X   = randn(1e3, 2)
 
     # Y   = gauss_den(X, mu, va)
 
@@ -372,7 +373,7 @@
     mu  = N.array([2, 3])
 
     # Generate a multivariate gaussian of mean mu and covariance va
-    X       = N.randn(1e3, 2)
+    X       = randn(1e3, 2)
     Yc      = N.matrixmultiply(N.diag(N.sqrt(va)), X.transpose())
     Yc      = Yc.transpose() + mu
 
@@ -389,7 +390,7 @@
     mu  = N.array([0, 3])
 
     # Generate a multivariate gaussian of mean mu and covariance va
-    X       = N.randn(1e3, 2)
+    X       = randn(1e3, 2)
     Yc      = N.matrixmultiply(lin.cholesky(va), X.transpose())
     Yc      = Yc.transpose() + mu
 

Modified: trunk/Lib/sandbox/pyem/pyem/gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/pyem/gauss_mix.py	2006-10-12 13:45:58 UTC (rev 2271)
+++ trunk/Lib/sandbox/pyem/pyem/gauss_mix.py	2006-10-12 13:46:06 UTC (rev 2272)
@@ -1,9 +1,10 @@
 # /usr/bin/python
-# Last Change: Fri Jul 14 03:00 PM 2006 J
+# Last Change: Fri Jul 14 05:00 PM 2006 J
 
 # Module to implement GaussianMixture class.
 
 import numpy as N
+from numpy.random import randn, rand
 import numpy.linalg as lin
 import densities
 
@@ -89,7 +90,7 @@
         # State index (ie hidden var)
         S   = gen_rand_index(self.w, nframes)
         # standard gaussian
-        X   = N.randn(nframes, self.d)        
+        X   = randn(nframes, self.d)        
 
         if self.mode == 'diag':
             X   = self.mu[S, :]  + X * N.sqrt(self.va[S,:])
@@ -163,14 +164,14 @@
 
         Returns: w, mu, va
         """
-        w   = abs(N.randn(nc))
+        w   = abs(randn(nc))
         w   = w / sum(w)
 
-        mu  = spread * N.randn(nc, d)
+        mu  = spread * randn(nc, d)
         if varmode == 'diag':
-            va  = abs(N.randn(nc, d))
+            va  = abs(randn(nc, d))
         elif varmode == 'full':
-            va  = N.randn(nc * d, d)
+            va  = randn(nc * d, d)
             for k in range(nc):
                 va[k*d:k*d+d]   = N.dot( va[k*d:k*d+d], 
                     va[k*d:k*d+d].transpose())
@@ -192,7 +193,7 @@
     # TODO: check each value of inverse distribution is
     # different
     invcdf  = N.cumsum(p)
-    uni     = N.rand(n)
+    uni     = rand(n)
     index   = N.zeros(n)
 
     # This one should be a bit faster

Modified: trunk/Lib/sandbox/pyem/pyem/gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/pyem/gmm_em.py	2006-10-12 13:45:58 UTC (rev 2271)
+++ trunk/Lib/sandbox/pyem/pyem/gmm_em.py	2006-10-12 13:46:06 UTC (rev 2272)
@@ -1,8 +1,9 @@
 # /usr/bin/python
-# Last Change: Fri Jul 14 04:00 PM 2006 J
+# Last Change: Fri Jul 14 05:00 PM 2006 J
 
 import numpy as N
 import numpy.linalg as lin
+from numpy.random import randn
 import densities
 from kmean import kmean
 from gauss_mix import GM
@@ -84,8 +85,8 @@
         d   = self.gm.d
         if mode == 'diag':
             w   = N.ones(k, float) / k
-            mu  = N.randn(k, d)
-            va  = N.fabs(N.randn(k, d))
+            mu  = randn(k, d)
+            va  = N.fabs(randn(k, d))
         else:
             raise GmmParamError("""init_random not implemented for
                     mode %s yet""", mode)

Added: trunk/Lib/sandbox/pyem/pyem/profile_densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/pyem/profile_densities.py	2006-10-12 13:45:58 UTC (rev 2271)
+++ trunk/Lib/sandbox/pyem/pyem/profile_densities.py	2006-10-12 13:46:06 UTC (rev 2272)
@@ -0,0 +1,47 @@
+import numpy as N
+import densities as D
+import tables
+
+def bench1(mode = 'diag'):
+    #===========================================
+    # Diag Gaussian of dimension 20
+    #===========================================
+    d       = 20
+    n       = 1e5
+    niter   = 1
+    mode    = 'diag'
+
+    # Generate a model with k components, d dimensions
+    mu  = N.randn(1, d)
+    if mode == 'diag':
+        va  = abs(N.randn(1, d))
+    elif mode == 'full':
+        va  = N.randn(d, d)
+        va  = N.matrixmultiply(va, va.transpose())
+
+    X   = N.randn(n, d)
+    print "Compute %d times densities, %d dimension, %d frames" % (niter, d, n)
+    for i in range(niter):
+        Y   = D.gauss_den(X, mu, va)
+    
+    # Check values
+    h5file  = tables.openFile('diag.dat', "r")
+    X   = h5file.root.input.read()
+    mu  = h5file.root.mu.read()
+    va  = h5file.root.va.read()
+    Yt  = h5file.root.output.read()
+    Y   = D.gauss_den(X, mu, va)
+
+    try:
+        N.testing.assert_array_almost_equal(Y, Yt) 
+    except AssertionError:
+        print N.sqrt(N.sum((Y-Yt) **2)) / n
+        raise "Not accurate !!!!"
+
+if __name__ == "__main__":
+    import profile
+    profile.run('bench1()', 'gdenprof')
+    import pstats
+    p = pstats.Stats('gdenprof')
+    print p.sort_stats('cumulative').print_stats(20)
+

Added: trunk/Lib/sandbox/pyem/pyem/profile_gmm.py
===================================================================
--- trunk/Lib/sandbox/pyem/pyem/profile_gmm.py	2006-10-12 13:45:58 UTC (rev 2271)
+++ trunk/Lib/sandbox/pyem/pyem/profile_gmm.py	2006-10-12 13:46:06 UTC (rev 2272)
@@ -0,0 +1,57 @@
+import numpy as N
+import tables
+from gmm_em import GM, GMM
+import copy
+
+def bench1(mode = 'diag'):
+    #===========================================
+    # GMM of 20 comp, 20 dimension, 1e4 frames
+    #===========================================
+    d       = 20
+    k       = 20
+    nframes = 1e4
+    niter   = 1
+    mode    = 'diag'
+
+    #+++++++++++++++++++++++++++++++++++++++++++
+    # Create an artificial GMM model, samples it
+    #+++++++++++++++++++++++++++++++++++++++++++
+    print "Generating the mixture"
+    # Generate a model with k components, d dimensions
+    w, mu, va   = GM.gen_param(d, k, mode, spread = 3)
+    gm          = GM(d, k, mode)
+    gm.set_param(w, mu, va)
+
+    # Sample nframes frames  from the model
+    data    = gm.sample(nframes)
+
+    #++++++++++++++++++++++++
+    # Learn the model with EM
+    #++++++++++++++++++++++++
+
+    # Init the model
+    print "Init a model for learning, with kmean for initialization"
+    lgm = GM(d, k, mode)
+    gmm = GMM(lgm, 'kmean')
+    
+    gmm.init(data)
+    # Keep the initialized model for drawing
+    gm0 = copy.copy(lgm)
+
+    # The actual EM, with likelihood computation
+    niter   = 10
+    like    = N.zeros(niter, float)
+
+    print "computing..."
+    for i in range(niter):
+        g, tgd  = gmm.sufficient_statistics(data)
+        like[i] = N.sum(N.log(N.sum(tgd, 1)))
+        gmm.update_em(data, g)
+
+if __name__ == "__main__":
+    import profile
+    profile.run('bench1()', 'gmmprof')
+    import pstats
+    p = pstats.Stats('gmmprof')
+    print p.sort_stats('cumulative').print_stats(20)
+

Modified: trunk/Lib/sandbox/pyem/setup.py
===================================================================
--- trunk/Lib/sandbox/pyem/setup.py	2006-10-12 13:45:58 UTC (rev 2271)
+++ trunk/Lib/sandbox/pyem/setup.py	2006-10-12 13:46:06 UTC (rev 2272)
@@ -1,5 +1,5 @@
 #! /usr/bin/env python
-# Last Change: Thu Jul 13 01:00 PM 2006 J
+# Last Change: Fri Jul 14 04:00 PM 2006 J
 """ pyem is a small python package to estimate Gaussian Mixtures Models
 from data, using Expectation Maximization"""
 from distutils.core import setup, Extension
@@ -11,7 +11,6 @@
 
 from numpy.distutils.misc_util import get_numpy_include_dirs
 NUMPYINC    = get_numpy_include_dirs()[0]
-print NUMPYINC
 
 # General variables:
 #   - DISTNAME: name of the distributed package
@@ -34,8 +33,7 @@
         url=URL,
         packages=['pyem'], 
         ext_modules=[Extension('pyem/c_gmm', ['pyem/src/c_gmm.pyx'], 
-            include_dirs=[NUMPYINC])],
-        data_files=['c_numpy.pxd', 'c_python.pxd'],
+            include_dirs=[NUMPYINC])],  
         cmdclass = {'build_ext': build_ext},
     )
 
@@ -53,7 +51,7 @@
 try:
     from Pyrex.Distutils import build_ext
     setup_pyrex()
-except:
+except ImportError:
     print "Pyrex not found, C extension won't be available"
     setup_nopyrex()
 



More information about the Scipy-svn mailing list