[Scipy-svn] r2276 - in trunk/Lib/sandbox/pyem: . pyem pyem/src

scipy-svn at scipy.org scipy-svn at scipy.org
Thu Oct 12 08:47:24 CDT 2006


Author: cdavid
Date: 2006-10-12 08:47:08 -0500 (Thu, 12 Oct 2006)
New Revision: 2276

Modified:
   trunk/Lib/sandbox/pyem/.bzrignore
   trunk/Lib/sandbox/pyem/Changelog
   trunk/Lib/sandbox/pyem/TODO
   trunk/Lib/sandbox/pyem/pyem/__init__.py
   trunk/Lib/sandbox/pyem/pyem/densities.py
   trunk/Lib/sandbox/pyem/pyem/gauss_mix.py
   trunk/Lib/sandbox/pyem/pyem/gmm_em.py
   trunk/Lib/sandbox/pyem/pyem/kmean.py
   trunk/Lib/sandbox/pyem/pyem/profile_gmm.py
   trunk/Lib/sandbox/pyem/pyem/src/Makefile
   trunk/Lib/sandbox/pyem/setup.py
Log:
[pyem @ david at ar.media.kyoto-u.ac.jp-20060824104442-7df14730b05fa277]
correct bug with GMM.init_method
David Cournapeau <david at ar.media.kyoto-u.ac.jp> | 2006-08-24 19:44:42 +0900 (Thu, 24 Aug 2006)

Modified: trunk/Lib/sandbox/pyem/.bzrignore
===================================================================
--- trunk/Lib/sandbox/pyem/.bzrignore	2006-10-12 13:46:56 UTC (rev 2275)
+++ trunk/Lib/sandbox/pyem/.bzrignore	2006-10-12 13:47:08 UTC (rev 2276)
@@ -10,3 +10,5 @@
 profile_gmm_em.py
 data.h5
 gmmprof
+valgrind-python.supp
+valgrind-python.supp

Modified: trunk/Lib/sandbox/pyem/Changelog
===================================================================
--- trunk/Lib/sandbox/pyem/Changelog	2006-10-12 13:46:56 UTC (rev 2275)
+++ trunk/Lib/sandbox/pyem/Changelog	2006-10-12 13:47:08 UTC (rev 2276)
@@ -1,3 +1,12 @@
+pyem (0.5.2) Thu, 24 Aug 2006 19:42:28 +0900
+
+	* put version to 0.5.2
+	* Correct a bug with init method in GMM (change class method
+	to object method).
+	* modify the setup for a more flexible system
+
+-- David Cournapeau <david at ar.media.kyoto-u.ac.jp> 
+
 pyem (0.5.1) Thu, 17 Aug 2006 11:54:41 +0900
 
 	* put version to 0.5.1

Modified: trunk/Lib/sandbox/pyem/TODO
===================================================================
--- trunk/Lib/sandbox/pyem/TODO	2006-10-12 13:46:56 UTC (rev 2275)
+++ trunk/Lib/sandbox/pyem/TODO	2006-10-12 13:47:08 UTC (rev 2276)
@@ -1,8 +1,10 @@
-# Last Change: Fri Aug 04 11:00 PM 2006 J
+# Last Change: Tue Aug 22 10:00 PM 2006 J
 
 Things which must be implemented for a 1.0 version:
     - test for various length and model size.
     - a small help/tutorial
+    - use scipy.cluster.vq instead of custom kmeans algo (find
+    a way to get label from vq.kmeans)
 
 Things which would be nice:
     - C implementation of Gaussian densities at least.

Modified: trunk/Lib/sandbox/pyem/pyem/__init__.py
===================================================================
--- trunk/Lib/sandbox/pyem/pyem/__init__.py	2006-10-12 13:46:56 UTC (rev 2275)
+++ trunk/Lib/sandbox/pyem/pyem/__init__.py	2006-10-12 13:47:08 UTC (rev 2276)
@@ -1,7 +1,7 @@
 #! /usr/bin/env python
-# Last Change: Thu Aug 17 03:00 PM 2006 J
+# Last Change: Thu Aug 24 07:00 PM 2006 J
 
-version = '0.5.1'
+version = '0.5.2'
 
 from gauss_mix import GmParamError, GM
 from gmm_em import GmmParamError, GMM

Modified: trunk/Lib/sandbox/pyem/pyem/densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/pyem/densities.py	2006-10-12 13:46:56 UTC (rev 2275)
+++ trunk/Lib/sandbox/pyem/pyem/densities.py	2006-10-12 13:47:08 UTC (rev 2276)
@@ -1,14 +1,13 @@
 #! /usr/bin/python
 #
 # Copyrighted David Cournapeau
-# Last Change: Thu Aug 17 03:00 PM 2006 J
+# Last Change: Tue Aug 22 08:00 PM 2006 J
 
 import numpy as N
 import numpy.linalg as lin
 from numpy.random import randn
 from scipy.stats import chi2
 
-
 # Error classes
 class DenError(Exception):
     """Base class for exceptions in this module.
@@ -103,6 +102,12 @@
 
     return y
     
+#from ctypes import cdll, c_uint, c_int, c_double, POINTER
+#_gden   = cdll.LoadLibrary('src/libgden.so')
+#_gden.gden_diag.restype     = c_int
+#_gden.gden_diag.argtypes    = [POINTER(c_double), c_uint, c_uint,
+#        POINTER(c_double), POINTER(c_double), POINTER(c_double)]
+
 def _diag_gauss_den(x, mu, va, log):
     """ This function is the actual implementation
     of gaussian pdf in scalar case. It assumes all args
@@ -119,9 +124,20 @@
         y       =  (x[:,0] - mu[0,0]) ** 2 * inva * -0.5
         for i in range(1, d):
             inva    = 1/va[0,i]
-            fac     *= (2*N.pi) ** (-d/2.0) * N.sqrt(inva)
+            #fac     *= (2*N.pi) ** (-d/2.0) * N.sqrt(inva)
+            fac     *= N.sqrt(inva)
             y       += (x[:,i] - mu[0,i]) ** 2 * inva * -0.5
         y   = fac * N.exp(y)
+    #d   = mu.size
+    #n   = x.shape[0]
+    #if not log:
+    #    y   = N.zeros(n)
+    #    inva= 1/va
+    #    _gden.gden_diag(x.ctypes.data_as(POINTER(c_double)),
+    #        n, d,
+    #        mu.ctypes.data_as(POINTER(c_double)),
+    #        (inva).ctypes.data_as(POINTER(c_double)),
+    #        y.ctypes.data_as(POINTER(c_double)))
     else:
         y   = _scalar_gauss_den(x[:,0], mu[0,0], va[0,0], log)
         for i in range(1, d):

Modified: trunk/Lib/sandbox/pyem/pyem/gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/pyem/gauss_mix.py	2006-10-12 13:46:56 UTC (rev 2275)
+++ trunk/Lib/sandbox/pyem/pyem/gauss_mix.py	2006-10-12 13:47:08 UTC (rev 2276)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Thu Aug 17 03:00 PM 2006 J
+# Last Change: Tue Aug 22 09:00 PM 2006 J
 
 # Module to implement GaussianMixture class.
 
@@ -46,7 +46,7 @@
         """Init a Gaussian model of k components, each component being a 
         d multi-variate Gaussian, with covariance matrix of style mode"""
         if mode not in self._cov_mod:
-            raise GmmParamError("mode %s not recognized" + str(mode))
+            raise GmParamError("mode %s not recognized" + str(mode))
 
         self.d      = d
         self.k      = k
@@ -67,13 +67,13 @@
         """Set parameters of the model"""
         k, d, mode  = check_gmm_param(weights, mu, sigma)
         if not k == self.k:
-            raise GmmParamError("Number of given components is %d, expected %d" 
+            raise GmParamError("Number of given components is %d, expected %d" 
                     % (shape(k), shape(self.k)))
         if not d == self.d:
-            raise GmmParamError("Dimension of the given model is %d, expected %d" 
+            raise GmParamError("Dimension of the given model is %d, expected %d" 
                     % (shape(d), shape(self.d)))
         if not mode == self.mode:
-            raise GmmParamError("Given covariance mode is %s, expected %d"
+            raise GmParamError("Given covariance mode is %s, expected %d"
                     % (mode, self.mode))
         self.w  = weights
         self.mu = mu
@@ -84,7 +84,7 @@
     def sample(self, nframes):
         """ Sample nframes frames from the model """
         if not self.is_valid:
-            raise GmmParamError("""Parameters of the model has not been 
+            raise GmParamError("""Parameters of the model has not been 
                 set yet, please set them using self.set_param()""")
 
         # State index (ie hidden var)
@@ -106,7 +106,7 @@
                 tmpind      = N.where(S == s)[0]
                 X[tmpind]   = N.dot(X[tmpind], cho[s].transpose()) + self.mu[s]
         else:
-            raise GmmParamError('cov matrix mode not recognized, this is a bug !')
+            raise GmParamError('cov matrix mode not recognized, this is a bug !')
 
         return X
 
@@ -172,7 +172,7 @@
                 va[k*d:k*d+d]   = N.dot( va[k*d:k*d+d], 
                     va[k*d:k*d+d].transpose())
         else:
-            raise GmmParamError('cov matrix mode not recognized')
+            raise GmParamError('cov matrix mode not recognized')
 
         return w, mu, va
 
@@ -219,32 +219,32 @@
         
     # Check that w is valid
     if N.fabs(N.sum(w, 0)  - 1) > MAX_DEV:
-        raise GmmParamError('weight does not sum to 1')
+        raise GmParamError('weight does not sum to 1')
     
     if not len(w.shape) == 1:
-        raise GmmParamError('weight is not a vector')
+        raise GmParamError('weight is not a vector')
 
     # Check that mean and va have the same number of components
     K           = len(w)
 
     if N.ndim(mu) < 2:
         msg = "mu should be a K,d matrix, and a row vector if only 1 comp"
-        raise GmmParamError(msg)
+        raise GmParamError(msg)
     if N.ndim(va) < 2:
         msg = """va should be a K,d / K *d, d matrix, and a row vector if
         only 1 diag comp"""
-        raise GmmParamError(msg)
+        raise GmParamError(msg)
 
     (Km, d)     = mu.shape
     (Ka, da)    = va.shape
 
     if not K == Km:
         msg = "not same number of component in mean and weights"
-        raise GmmParamError(msg)
+        raise GmParamError(msg)
 
     if not d == da:
         msg = "not same number of dimensions in mean and variances"
-        raise GmmParamError(msg)
+        raise GmParamError(msg)
 
     if Km == Ka:
         mode = 'diag'
@@ -252,7 +252,7 @@
         mode = 'full'
         if not Ka == Km*d:
             msg = "not same number of dimensions in mean and variances"
-            raise GmmParamError(msg)
+            raise GmParamError(msg)
         
     return K, d, mode
         

Modified: trunk/Lib/sandbox/pyem/pyem/gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/pyem/gmm_em.py	2006-10-12 13:46:56 UTC (rev 2275)
+++ trunk/Lib/sandbox/pyem/pyem/gmm_em.py	2006-10-12 13:47:08 UTC (rev 2276)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Thu Aug 17 03:00 PM 2006 J
+# Last Change: Thu Aug 24 02:00 PM 2006 J
 
 import numpy as N
 import numpy.linalg as lin
@@ -95,8 +95,6 @@
         self.gm.mu  = mu
         self.gm.va  = va
 
-    # Possible init methods
-    _init_methods = {'kmean': init_kmean, 'random' : init_random}
     # TODO: 
     #   - format of parameters ? For variances, list of variances matrix,
     #   keep the current format, have 3d matrices ?
@@ -107,10 +105,13 @@
         method for training init (kmean by default)"""
         self.gm = gm
 
-        if init not in self._init_methods:
+        # Possible init methods
+        init_methods = {'kmean': self.init_kmean, 'random' : self.init_random}
+
+        if init not in init_methods:
             raise GmmParamError('init method %s not recognized' + str(init))
 
-        GMM.init   = self._init_methods[init]
+        self.init   = init_methods[init]
 
     def sufficient_statistics(self, data):
         """ Return normalized and non-normalized sufficient statistics
@@ -285,6 +286,9 @@
     gmm.init(data)
     # Keep the initialized model for drawing
     gm0 = copy.copy(lgm)
+    print gm0.w
+    print gm0.mu
+    print gm0.va
 
     # The actual EM, with likelihood computation
     niter   = 10
@@ -329,6 +333,11 @@
         P.plot(Xe[i], Ye[i], 'r', label = '_nolegend_')
     P.legend(loc = 0)
 
+    #from scipy.cluster.vq import kmeans
+    #code    = kmeans(data, k)[0]
+    #print code
+    #P.plot(code[:,0], code[:, 1], 'oy')
+    #P.plot(gm0.mu[:,0], gm0.mu[:, 1], 'ok')
     P.subplot(2, 1, 2)
     P.plot(like)
     P.title('log likelihood')

Modified: trunk/Lib/sandbox/pyem/pyem/kmean.py
===================================================================
--- trunk/Lib/sandbox/pyem/pyem/kmean.py	2006-10-12 13:46:56 UTC (rev 2275)
+++ trunk/Lib/sandbox/pyem/pyem/kmean.py	2006-10-12 13:47:08 UTC (rev 2276)
@@ -1,5 +1,5 @@
 # /usr/bin/python
-# Last Change: Thu Aug 17 03:00 PM 2006 J
+# Last Change: Tue Aug 22 10:00 PM 2006 J
 
 import numpy as N
 
@@ -46,12 +46,22 @@
     
 # Try to import pyrex function for vector quantization. If not available,
 # falls back on pure python implementation.
+#%KMEANIMPORT%
 try:
-    from c_gmm import _vq
-except:
-    print """c_gmm._vq not found, using pure python implementation instead.
+    from scipy.cluster.vq import kmeans as kmean
+except ImportError:
+    try:
+        from c_gmm import _vq
+    except:
+        print """c_gmm._vq not found, using pure python implementation instead. 
         Kmean will be REALLY slow"""
-    _vq = _py_vq
+        _vq = _py_vq
+#try:
+#    from c_gmm import _vq
+#except:
+#    print """c_gmm._vq not found, using pure python implementation instead. 
+#    Kmean will be REALLY slow"""
+#    _vq = _py_vq
 
 # Test functions usable for now
 def test_kmean():

Modified: trunk/Lib/sandbox/pyem/pyem/profile_gmm.py
===================================================================
--- trunk/Lib/sandbox/pyem/pyem/profile_gmm.py	2006-10-12 13:46:56 UTC (rev 2275)
+++ trunk/Lib/sandbox/pyem/pyem/profile_gmm.py	2006-10-12 13:47:08 UTC (rev 2276)
@@ -9,7 +9,7 @@
     d       = 20
     k       = 20
     nframes = 1e4
-    niter   = 1
+    niter   = 10
     mode    = 'diag'
 
     #+++++++++++++++++++++++++++++++++++++++++++
@@ -38,7 +38,6 @@
     gm0 = copy.copy(lgm)
 
     # The actual EM, with likelihood computation
-    niter   = 10
     like    = N.zeros(niter)
 
     print "computing..."

Modified: trunk/Lib/sandbox/pyem/pyem/src/Makefile
===================================================================
--- trunk/Lib/sandbox/pyem/pyem/src/Makefile	2006-10-12 13:46:56 UTC (rev 2275)
+++ trunk/Lib/sandbox/pyem/pyem/src/Makefile	2006-10-12 13:47:08 UTC (rev 2276)
@@ -5,11 +5,17 @@
 
 PYTHONINC	= -I/usr/include/python2.4
 NUMPYINC	= -I/usr/lib/python2.4/site-packages/numpy/core/include
-OPTIMS		= -O3 -ffast-math -march=pentium4
+OPTIMS		= -O2 -ffast-math -march=pentium4
 WARN		= -W -Wall
 
 CFLAGS	= $(PYTHONINC) $(NUMPYINC) $(OPTIMS) $(WARN)
 
+libgden.so: c_gden.o
+	$(LD) -shared -o $@ $< -Wl,-soname,$@
+
+c_gden.o: c_gden.c
+	$(CC) -c $(CFLAGS) -fPIC $<
+
 c_gmm.so: c_gmm.o
 	$(LD) -shared -o $@ $< -Wl,-soname,$@
 

Modified: trunk/Lib/sandbox/pyem/setup.py
===================================================================
--- trunk/Lib/sandbox/pyem/setup.py	2006-10-12 13:46:56 UTC (rev 2275)
+++ trunk/Lib/sandbox/pyem/setup.py	2006-10-12 13:47:08 UTC (rev 2276)
@@ -1,5 +1,9 @@
 #! /usr/bin/env python
-# Last Change: Fri Jul 14 04:00 PM 2006 J
+# Last Change: Mon Aug 21 12:00 PM 2006 J
+# TODO:
+#   - check how to handle cmd line build options with distutils and use
+#   it in the building process
+
 """ pyem is a small python package to estimate Gaussian Mixtures Models
 from data, using Expectation Maximization"""
 from distutils.core import setup, Extension
@@ -9,6 +13,8 @@
 import os
 if os.path.exists('MANIFEST'): os.remove('MANIFEST')
 
+import re
+
 from numpy.distutils.misc_util import get_numpy_include_dirs
 NUMPYINC    = get_numpy_include_dirs()[0]
 
@@ -24,34 +30,82 @@
 AUTHOR_EMAIL='david at ar.media.kyoto-u.ac.jp',
 URL         ='http://ar.media.kyoto-u.ac.jp/members/david',
 
-def setup_pyrex():
-    setup(name=DISTNAME,
-        version=VERSION,
-        description=DESCRIPTION,
-        author=AUTHOR,
-        author_email=AUTHOR_EMAIL,
-        url=URL,
-        packages=['pyem'], 
-        ext_modules=[Extension('pyem/c_gmm', ['pyem/src/c_gmm.pyx'], 
-            include_dirs=[NUMPYINC])],  
-        cmdclass = {'build_ext': build_ext},
-    )
+# Source files for extensions
 
-def setup_nopyrex():
-    setup(name=DISTNAME,
-        version=VERSION,
-        description=DESCRIPTION,
-        author=AUTHOR,
-        author_email=AUTHOR_EMAIL,
-        url=URL,
-        packages=['pyem'], 
-        #ext_modules=[Extension('_hello', ['hellomodule.c'])],
-    )
+# Functions used to substitute values in File.
+# Mainly use to replace config.h capabilities
+def do_subst_in_file(sourcefile, targetfile, dict):
+    """Replace all instances of the keys of dict with their values.
+    For example, if dict is {'%VERSION%': '1.2345', '%BASE%': 'MyProg'},
+    then all instances of %VERSION% in the file will be replaced with 1.2345 etc.
+    """
+    try:
+        f = open(sourcefile, 'rb')
+        contents = f.read()
+        f.close()
+    except:
+        raise IOError, "Can't read source file %s"%sourcefile
 
-try:
-    from Pyrex.Distutils import build_ext
-    setup_pyrex()
-except ImportError:
-    print "Pyrex not found, C extension won't be available"
-    setup_nopyrex()
+    for (k,v) in dict.items():
+        contents = re.sub(k, v, contents)
+    try:
+        f = open(targetfile, 'wb')
+        f.write(contents)
+        f.close()
+    except:
+        raise IOError, "Can't read source file %s"%sourcefile
+    return 0 # success
+ 
+class SetupOption:
+    def __init__(self):
+        self.kmean      = 'py'
+        self.ext_modules= []
+        self.cmdclass   = {}
+        self.subsdic     = {'%KMEANIMPORT%': []}
 
+    def _config_kmean(self):
+        # Check in this order:
+        #   - kmean in scipy.cluster,
+        #   - custom vq with pyrex 
+        #   - custom pure python vq
+        #try:
+        #    from scipy.cluster.vq import kmeans
+        #    self.kmean  = 'scipy'
+        #    #self.subsdic['%KMEANIMPORT%']   = scipy_kmean
+        #except ImportError:
+        #    try:
+        #        from Pyrex.Distutils import build_ext
+        #        self.kmean  = 'pyrex'
+        #        self.ext_modules.append(Extension('pyem/c_gmm', 
+        #            ['pyem/src/c_gmm.pyx'], include_dirs=[NUMPYINC]))
+        #        self.cmdclass['build_ext']  = build_ext
+        #        #self.subsdic['%KMEANIMPORT%']   = pyrex_kmean
+        #    except ImportError:
+        #        self.kmean  = 'py'
+        #        #self.subsdic['%KMEANIMPORT%']   = pyrex_kmean
+        try:
+            from Pyrex.Distutils import build_ext
+            self.kmean  = 'pyrex'
+            self.ext_modules.append(Extension('pyem/c_gmm', 
+                ['pyem/src/c_gmm.pyx'], include_dirs=[NUMPYINC]))
+            self.cmdclass['build_ext']  = build_ext
+            #self.subsdic['%KMEANIMPORT%']   = pyrex_kmean
+        except ImportError:
+            self.kmean  = 'py'
+            #self.subsdic['%KMEANIMPORT%']   = pyrex_kmean
+    def setup(self):
+        self._config_kmean()
+        import time
+        #do_subst_in_file('pyem/kmean.py.in', 'pyem/kmean.py', self.subsdic)
+        setup(name      = DISTNAME,
+            version     = VERSION,
+            description = DESCRIPTION,
+            author      = AUTHOR,
+            author_email= AUTHOR_EMAIL,
+            url         = URL,
+            packages    = ['pyem'],
+            ext_modules = self.ext_modules,
+            cmdclass    = self.cmdclass)
+
+stpobj  = SetupOption()
+stpobj.setup()



More information about the Scipy-svn mailing list