[SciPy-User] Logistic regression using SciPy

Fg Nu fgnu32@yahoo....
Sun Dec 9 22:01:30 CST 2012

```
I am trying to code up logistic regression in Python using the SciPy "fmin_bfgs" function, but am running into some issues. I wrote functions for the logistic (sigmoid) transformation  function, and the cost function, and those work fine (I have used the optimized values of the parameter vector found via canned software to test the functions, and those match up). I am not that sure of my implementation of the gradient function, but it looks reasonable.

Here is the code:

#==================================================

# purpose: logistic regression
import numpy as np
import scipy as sp
import scipy.optimize

import matplotlib as mpl
import os

# prepare the data
vY = data[:, 0]
mX = data[:, 1:]
intercept = np.ones(mX.shape[0]).reshape(mX.shape[0], 1)
mX = np.concatenate((intercept, mX), axis = 1)
iK = mX.shape[1]
iN = mX.shape[0]

# logistic transformation
def logit(mX, vBeta):
return((1/(1.0 + np.exp(-np.dot(mX, vBeta)))))

# test function call
vBeta0 = np.array([-.10296645, -.0332327, -.01209484, .44626211, .92554137, .53973828,
1.7993371, .7148045  ])
logit(mX, vBeta0)

# cost function
def logLikelihoodLogit(vBeta, mX, vY):
return(-(np.sum(vY*np.log(logit(mX, vBeta)) + (1-vY)*(np.log(1-logit(mX, vBeta))))))
logLikelihoodLogit(vBeta0, mX, vY) # test function call

def likelihoodScore(vBeta, mX, vY):
return(np.dot(mX.T,
((np.dot(mX, vBeta) - vY)/
np.dot(mX, vBeta)).reshape(iN, 1)).reshape(iK, 1))

likelihoodScore(vBeta0, mX, vY).shape # test function call

# optimize the function (without gradient)
optimLogit = scipy.optimize.fmin_bfgs(logLikelihoodLogit,
x0 = np.array([-.1, -.03, -.01, .44, .92, .53,
1.8, .71]),
args = (mX, vY), gtol = 1e-3)

# optimize the function (with gradient)
optimLogit = scipy.optimize.fmin_bfgs(logLikelihoodLogit,
x0 = np.array([-.1, -.03, -.01, .44, .92, .53,
1.8, .71]), fprime = likelihoodScore,
args = (mX, vY), gtol = 1e-3)
#=====================================================

* The first optimization (without gradient)  ends with a whole lot of stuff about division by zero.

* The second optimization (with gradient) ends with a matrices not aligned error, which probably means I have got the way the gradient is to be returned wrong.

Any help with this is appreciated. If anyone wants to try this, the data is included below.

low,age,lwt,race,smoke,ptl,ht,ui
0,19,182,2,0,0,0,1
0,33,155,3,0,0,0,0
0,20,105,1,1,0,0,0
0,21,108,1,1,0,0,1
0,18,107,1,1,0,0,1
0,21,124,3,0,0,0,0
0,22,118,1,0,0,0,0
0,17,103,3,0,0,0,0
0,29,123,1,1,0,0,0
0,26,113,1,1,0,0,0
0,19,95,3,0,0,0,0
0,19,150,3,0,0,0,0
0,22,95,3,0,0,1,0
0,30,107,3,0,1,0,1
0,18,100,1,1,0,0,0
0,18,100,1,1,0,0,0
0,15,98,2,0,0,0,0
0,25,118,1,1,0,0,0
0,20,120,3,0,0,0,1
0,28,120,1,1,0,0,0
0,32,121,3,0,0,0,0
0,31,100,1,0,0,0,1
0,36,202,1,0,0,0,0
0,28,120,3,0,0,0,0
0,25,120,3,0,0,0,1
0,28,167,1,0,0,0,0
0,17,122,1,1,0,0,0
0,29,150,1,0,0,0,0
0,26,168,2,1,0,0,0
0,17,113,2,0,0,0,0
0,17,113,2,0,0,0,0
0,24,90,1,1,1,0,0
0,35,121,2,1,1,0,0
0,25,155,1,0,0,0,0
0,25,125,2,0,0,0,0
0,29,140,1,1,0,0,0
0,19,138,1,1,0,0,0
0,27,124,1,1,0,0,0
0,31,215,1,1,0,0,0
0,33,109,1,1,0,0,0
0,21,185,2,1,0,0,0
0,19,189,1,0,0,0,0
0,23,130,2,0,0,0,0
0,21,160,1,0,0,0,0
0,18,90,1,1,0,0,1
0,18,90,1,1,0,0,1
0,32,132,1,0,0,0,0
0,19,132,3,0,0,0,0
0,24,115,1,0,0,0,0
0,22,85,3,1,0,0,0
0,22,120,1,0,0,1,0
0,23,128,3,0,0,0,0
0,22,130,1,1,0,0,0
0,30,95,1,1,0,0,0
0,19,115,3,0,0,0,0
0,16,110,3,0,0,0,0
0,21,110,3,1,0,0,1
0,30,153,3,0,0,0,0
0,20,103,3,0,0,0,0
0,17,119,3,0,0,0,0
0,17,119,3,0,0,0,0
0,23,119,3,0,0,0,0
0,24,110,3,0,0,0,0
0,28,140,1,0,0,0,0
0,26,133,3,1,2,0,0
0,20,169,3,0,1,0,1
0,24,115,3,0,0,0,0
0,28,250,3,1,0,0,0
0,20,141,1,0,2,0,1
0,22,158,2,0,1,0,0
0,22,112,1,1,2,0,0
0,31,150,3,1,0,0,0
0,23,115,3,1,0,0,0
0,16,112,2,0,0,0,0
0,16,135,1,1,0,0,0
0,18,229,2,0,0,0,0
0,25,140,1,0,0,0,0
0,32,134,1,1,1,0,0
0,20,121,2,1,0,0,0
0,23,190,1,0,0,0,0
0,22,131,1,0,0,0,0
0,32,170,1,0,0,0,0
0,30,110,3,0,0,0,0
0,20,127,3,0,0,0,0
0,23,123,3,0,0,0,0
0,17,120,3,1,0,0,0
0,19,105,3,0,0,0,0
0,23,130,1,0,0,0,0
0,36,175,1,0,0,0,0
0,22,125,1,0,0,0,0
0,24,133,1,0,0,0,0
0,21,134,3,0,0,0,0
0,19,235,1,1,0,1,0
0,25,95,1,1,3,0,1
0,16,135,1,1,0,0,0
0,29,135,1,0,0,0,0
0,29,154,1,0,0,0,0
0,19,147,1,1,0,0,0
0,19,147,1,1,0,0,0
0,30,137,1,0,0,0,0
0,24,110,1,0,0,0,0
0,19,184,1,1,0,1,0
0,24,110,3,0,1,0,0
0,23,110,1,0,0,0,0
0,20,120,3,0,0,0,0
0,25,241,2,0,0,1,0
0,30,112,1,0,0,0,0
0,22,169,1,0,0,0,0
0,18,120,1,1,0,0,0
0,16,170,2,0,0,0,0
0,32,186,1,0,0,0,0
0,18,120,3,0,0,0,0
0,29,130,1,1,0,0,0
0,33,117,1,0,0,0,1
0,20,170,1,1,0,0,0
0,28,134,3,0,0,0,0
0,14,135,1,0,0,0,0
0,28,130,3,0,0,0,0
0,25,120,1,0,0,0,0
0,16,95,3,0,0,0,0
0,20,158,1,0,0,0,0
0,26,160,3,0,0,0,0
0,21,115,1,0,0,0,0
0,22,129,1,0,0,0,0
0,25,130,1,0,0,0,0
0,31,120,1,0,0,0,0
0,35,170,1,0,1,0,0
0,19,120,1,1,0,0,0
0,24,116,1,0,0,0,0
0,45,123,1,0,0,0,0
1,28,120,3,1,1,0,1
1,29,130,1,0,0,0,1
1,34,187,2,1,0,1,0
1,25,105,3,0,1,1,0
1,25,85,3,0,0,0,1
1,27,150,3,0,0,0,0
1,23,97,3,0,0,0,1
1,24,128,2,0,1,0,0
1,24,132,3,0,0,1,0
1,21,165,1,1,0,1,0
1,32,105,1,1,0,0,0
1,19,91,1,1,2,0,1
1,25,115,3,0,0,0,0
1,16,130,3,0,0,0,0
1,25,92,1,1,0,0,0
1,20,150,1,1,0,0,0
1,21,200,2,0,0,0,1
1,24,155,1,1,1,0,0
1,21,103,3,0,0,0,0
1,20,125,3,0,0,0,1
1,25,89,3,0,2,0,0
1,19,102,1,0,0,0,0
1,19,112,1,1,0,0,1
1,26,117,1,1,1,0,0
1,24,138,1,0,0,0,0
1,17,130,3,1,1,0,1
1,20,120,2,1,0,0,0
1,22,130,1,1,1,0,1
1,27,130,2,0,0,0,1
1,20,80,3,1,0,0,1
1,17,110,1,1,0,0,0
1,25,105,3,0,1,0,0
1,20,109,3,0,0,0,0
1,18,148,3,0,0,0,0
1,18,110,2,1,1,0,0
1,20,121,1,1,1,0,1
1,21,100,3,0,1,0,0
1,26,96,3,0,0,0,0
1,31,102,1,1,1,0,0
1,15,110,1,0,0,0,0
1,23,187,2,1,0,0,0
1,20,122,2,1,0,0,0
1,24,105,2,1,0,0,0
1,15,115,3,0,0,0,1
1,23,120,3,0,0,0,0
1,30,142,1,1,1,0,0
1,22,130,1,1,0,0,0
1,17,120,1,1,0,0,0
1,23,110,1,1,1,0,0
1,17,120,2,0,0,0,0
1,26,154,3,0,1,1,0
1,20,106,3,0,0,0,0
1,26,190,1,1,0,0,0
1,14,101,3,1,1,0,0
1,28,95,1,1,0,0,0
1,14,100,3,0,0,0,0
1,23,94,3,1,0,0,0
1,17,142,2,0,0,1,0
1,21,130,1,1,0,1,0

Thanks.

```