[IPython-User] optimize parallel optimizations code?

Skipper Seabold jsseabold@gmail....
Sun Nov 14 23:07:18 CST 2010


I have a class that does some optimizations multiple times using
different starting values in order to check for convergence problems
due to some "bad" data.  My code was taking about an hour, then I
added the multiple calls to scipy.optimize, and it went up to about 4
hours.  I am new to this, but I tried to parallelize this code,
thinking it could speed things up.  It looks like it's much actually
slower.  Is there anything I can do to optimize this or remove some
overhead?  A test case is inlined and attached.


In [3]: timeit mod.fit()
10 loops, best of 3: 39.2 ms per loop

In [4]: timeit pmod.fit()
1 loops, best of 3: 255 ms per loop


import numpy as np
from numpy import log, dot, array
from scipy import optimize
from IPython.kernel import client


def pobj(params, Y,X):
    """
    Normal log-likelihood
    """
    nobs2 = len(X)/2.
    resid = Y - dot(X,params[:,None])
    return -(- nobs2*np.log(2*np.pi)-nobs2*np.log(1/(2*nobs2) *\
        dot(resid.T, resid)) - nobs2)


class PModel(object):
    def __init__(self, Y,X):
        self.X = X
        self.Y = Y

    def fit(self, start_params=[0,0], eta=1e-8):
        start_params = np.asarray(start_params)
        Y = self.Y
        X = self.X

        start_params1 = start_params.copy()
        start_params2 = start_params + 5
        start_params3 = start_params - 5
        start_params4 = np.random.randint(-10,10,size=2)
        mec.push(dict(start_params=start_params1), targets=0)
        mec.push(dict(start_params=start_params2), targets=1)
        mec.push(dict(start_params=start_params3), targets=2)
        mec.push(dict(start_params=start_params4), targets=3)
        mec.execute("""
params = start_params
ret = optimize.fmin_tnc(obj, params, approx_grad=True,
                             eta=eta, maxfun=3000, args=(Y,X), messages=0)""")
        ret1 = mec.pull('ret', targets=0)[0]
        ret2 = mec.pull('ret', targets=1)[0]
        ret3 = mec.pull('ret', targets=2)[0]
        ret4 = mec.pull('ret', targets=3)[0]
        self.results = ret1
        # check results
        try:
            np.testing.assert_almost_equal(ret1[0], ret2[0], 4)
            np.testing.assert_almost_equal(ret1[0], ret3[0], 4)
            np.testing.assert_almost_equal(ret1[0], ret4[0], 4)
            self.converged = str(ret1[-1])+": "+optimize.tnc.RCSTRINGS[ret1[-1]]
        except:
            self.converged = "9: Results sensitive to starting values"

class Model(object):
    def __init__(self, Y,X):
        self.X = X
        self.Y = Y

    def obj(self, params, Y, X):
        """
        Normal log-likelihood
        """
        X = self.X
        Y = self.Y
        nobs2 = len(X)/2.
        resid = Y - np.dot(X,params[:,None])
        return - nobs2*np.log(2*np.pi)-nobs2*np.log(1/(2*nobs2) *\
                np.dot(resid.T,resid)) - nobs2

    def fit(self, start_params=[0,0], eta=1e-8):
        start_params = np.asarray(start_params)
        obj = self.obj
        Y = self.Y
        X = self.X

        start_params1 = start_params.copy()
        start_params2 = start_params + 5
        start_params3 = start_params - 5
        start_params4 = np.random.randint(-10,10,size=2)
        ret1 = optimize.fmin_tnc(obj, start_params1, approx_grad=True,
                             eta=eta, maxfun=3000, args=(Y,X), messages=0)
        ret2 = optimize.fmin_tnc(obj, start_params2, approx_grad=True,
                             eta=eta, maxfun=3000, args=(Y,X), messages=0)
        ret3 = optimize.fmin_tnc(obj, start_params3, approx_grad=True,
                             eta=eta, maxfun=3000, args=(Y,X), messages=0)
        ret4 = optimize.fmin_tnc(obj, start_params4, approx_grad=True,
                             eta=eta, maxfun=3000, args=(Y,X), messages=0)

        self.results = ret1
        # check results
        try:
            np.testing.assert_almost_equal(ret1[0], ret2[0], 4)
            np.testing.assert_almost_equal(ret1[0], ret3[0], 4)
            np.testing.assert_almost_equal(ret1[0], ret4[0], 4)
            self.converged = str(ret1[-1])+": "+optimize.tnc.RCSTRINGS[ret1[-1]]
        except:
            self.converged = "9: Results sensitive to starting values"


if __name__ == "__main__":
    np.random.seed(12345)
    X = np.random.uniform(0,10,size=(10000,2))
    beta = np.array([3.5,-3.5])
    Y = np.dot(X,beta[:,None]) + np.random.normal(size=(10000,1))

    eta = 1e-8

    mec = client.MultiEngineClient()
    mec.push_function(dict(obj=pobj))
    mec.push(dict(np=np, optimize=optimize, Y=Y,X=X,eta=eta,dot=dot))
    pmod = PModel(Y,X)
    pmod.fit()

    mod = Model(Y,X)
    mod.fit()
-------------- next part --------------
A non-text attachment was scrubbed...
Name: parallel_model.py
Type: text/x-python
Size: 4329 bytes
Desc: not available
Url : http://mail.scipy.org/pipermail/ipython-user/attachments/20101115/d6c6aab6/attachment.py 


More information about the IPython-User mailing list