[SciPy-user] local nonparametric regression, gauss process

josef.pktd@gmai... josef.pktd@gmai...
Mon Feb 23 23:32:07 CST 2009


Last weekend, I was trying out spatial and sparse. I wrote some
functions for kernel ridge regression, however I still have
parameterization problems with the sparse version. But here is the
dense version, which I tried out with 1000 training points and 2000
points in total.

It's not finished but produces some nice graphs to show the local
regression. If there is interest, I can add this to scipy.


Josef
-------------- next part --------------
'''Kernel Ridge Regression for local non-parametric regression'''


import numpy as np
from scipy import spatial as ssp
from numpy.testing import assert_equal
import matplotlib.pylab as plt

def plt_closeall(n=10):
    '''close a number of open matplotlib windows'''
    for i in range(n): plt.close()

def kernel_rbf(x,y,scale=1, **kwds):
    #scale = kwds.get('scale',1)
    dist = ssp.minkowski_distance_p(x[:,np.newaxis,:],y[np.newaxis,:,:],2)
    return np.exp(-0.5/scale*(dist))

def kernel_euclid(x,y,p=2, **kwds):
    return ssp.minkowski_distance(x[:,np.newaxis,:],y[np.newaxis,:,:],p)

class GaussProcess(object):
    '''class to perform kernel ridge regression (gaussian process)

    Warning: this class is memory intensive, it creates nobs x nobs distance
    matrix and its inverse, where nobs is the number of rows (observations).
    See sparse version for larger number of observations
    

    Notes
    -----

    Todo:
    * normalize multidimensional x array on demand, either by var or cov
    * add confidence band
    * automatic selection or proposal of smoothing parameters

    Reference
    ---------

    Rasmussen, C.E. and C.K.I. Williams, 2006, Gaussian Processes for Machine
    Learning, the MIT Press, www.GaussianProcess.org/gpal, chapter 2
    '''
    
    def __init__(self, x,y=None, kernel=kernel_rbf,
                 scale=0.5, ridgecoeff = 1e-10, **kwds ):    
        '''        
        Parameters
        ----------
        x : 2d array (N,K)
           data array of explanatory variables, columns represent variables
           rows represent observations
        y : 2d array (N,1) (optional)
           endogenous variable that should be fitted or predicted
           can alternatively be specified as parameter to fit method
        kernel : function, default: kernel_rbf
           kernel: (x1,x2)->kernel matrix is a function that takes as parameter
           two column arrays and return the kernel or distance matrix
        scale : float (optional)
           smoothing parameter for the rbf kernel
        ridgecoeff : float (optional)
           coefficient that is multiplied with the identity matrix in the
           ridge regression
        
        Notes
        -----
        After initialization, kernel matrix is calculated and if y is given
        as parameter then also the linear regression parameter and the
        fitted or estimated y values, yest, are calculated. yest is available
        as an attribute in this case.

        Both scale and the ridge coefficient smooth the fitted curve.
           
        '''
        
        self.x = x
        self.kernel = kernel
        self.scale = scale
        self.ridgecoeff = ridgecoeff
        self.distxsample = kernel(x,x,scale=scale)
        self.Kinv = np.linalg.inv(self.distxsample +
                             np.eye(*self.distxsample.shape)*ridgecoeff)
        if not y is None:
            self.y = y
            self.yest = self.fit(y)
            

    def fit(self,y):
        '''fit the training explanatory variables to a sample ouput variable'''
        self.parest = np.dot(self.Kinv,y)
        yhat = np.dot(self.distxsample,self.parest)
        return yhat
        
##        print ds33.shape
##        ds33_2 = kernel(x,x[::k,:],scale=scale)
##        dsinv = np.linalg.inv(ds33+np.eye(*distxsample.shape)*ridgecoeff)
##        B = np.dot(dsinv,y[::k,:])
    def predict(self,x):
        '''predict new y values for a given array of explanatory variables'''
        self.xpredict = x
        distxpredict = self.kernel(x,self.x,scale=self.scale)
        self.ypredict = np.dot(distxpredict,self.parest)
        return self.ypredict

    def plot(self, y, plt=plt ):
        '''some basic plots'''
        #todo return proper graph handles
        plt.figure();
        plt.plot(self.x,self.y,'bo-',self.x,self.yest,'r.-')
        plt.title('sample (training) points')
        plt.figure()
        plt.plot(self.xpredict,y,'bo-',self.xpredict,self.ypredict,'r.-')
        plt.title('all points')
        
        

def example1():
    m,k = 500,4
    upper = 6
    scale=10
    xs1a = np.linspace(1,upper,m)[:,np.newaxis]
    xs1 = xs1a*np.ones((1,4)) + 1/(1.0+np.exp(np.random.randn(m,k)))
    xs1 /= np.std(xs1[::k,:],0)   # normalize scale, could use cov to normalize
    y1true = np.sum(np.sin(xs1)+np.sqrt(xs1),1)[:,np.newaxis]
    y1 = y1true + 0.250 * np.random.randn(m,1)

    stride = 2 #use only some points as trainig points e.g 2 means every 2nd
    gp1 = GaussProcess(xs1[::stride,:],y1[::stride,:], kernel=kernel_euclid,
                       ridgecoeff=1e-10)
    yhatr1 = gp1.predict(xs1)
    plt.figure()
    plt.plot(y1true, y1,'bo',y1true, yhatr1,'r.')
    plt.title('euclid kernel: true y versus noisy y and estimated y')
    plt.figure()
    plt.plot(y1,'bo-',y1true,'go-',yhatr1,'r.-')
    plt.title('euclid kernel: true (green), noisy (blue) and estimated (red) '+
              'observations')

    gp2 = GaussProcess(xs1[::stride,:],y1[::stride,:], kernel=kernel_rbf,
                       scale=scale, ridgecoeff=1e-1)
    yhatr2 = gp2.predict(xs1)
    plt.figure()
    plt.plot(y1true, y1,'bo',y1true, yhatr2,'r.')
    plt.title('rbf kernel: true versus noisy (blue) and estimated (red) observations')
    plt.figure()
    plt.plot(y1,'bo-',y1true,'go-',yhatr2,'r.-')
    plt.title('rbf kernel: true (green), noisy (blue) and estimated (red) '+
              'observations')
    #gp2.plot(y1)


def example2(m=100, scale=0.01, stride=2):
    #m,k = 100,1
    upper = 6
    xs1 = np.linspace(1,upper,m)[:,np.newaxis]
    y1true = np.sum(np.sin(xs1**2),1)[:,np.newaxis]/xs1
    y1 = y1true + 0.05*np.random.randn(m,1)
    
    ridgecoeff = 1e-10
    #stride = 2 #use only some points as trainig points e.g 2 means every 2nd
    gp1 = GaussProcess(xs1[::stride,:],y1[::stride,:], kernel=kernel_euclid,
                       ridgecoeff=1e-10)
    yhatr1 = gp1.predict(xs1)
    plt.figure()
    plt.plot(y1true, y1,'bo',y1true, yhatr1,'r.')
    plt.title('euclid kernel: true versus noisy (blue) and estimated (red) observations')
    plt.figure()
    plt.plot(y1,'bo-',y1true,'go-',yhatr1,'r.-')
    plt.title('euclid kernel: true (green), noisy (blue) and estimated (red) '+
              'observations')

    gp2 = GaussProcess(xs1[::stride,:],y1[::stride,:], kernel=kernel_rbf,
                       scale=scale, ridgecoeff=1e-2)
    yhatr2 = gp2.predict(xs1)
    plt.figure()
    plt.plot(y1true, y1,'bo',y1true, yhatr2,'r.')
    plt.title('rbf kernel: true versus noisy (blue) and estimated (red) observations')
    plt.figure()
    plt.plot(y1,'bo-',y1true,'go-',yhatr2,'r.-')
    plt.title('rbf kernel: true (green), noisy (blue) and estimated (red) '+
              'observations')
    #gp2.plot(y1)

example2()
#example2(m=1000, scale=0.01)
#example2(m=100, scale=0.5)   # oversmoothing
#example2(m=2000, scale=0.005) # this looks good for rbf, zoom in
#example2(m=200, scale=0.01,stride=4)  
example1()
plt.show()
#plt_closeall()   # use this to close the open figure windows


More information about the SciPy-user mailing list