[IPython-User] optimize parallel optimizations code?

Brian Granger ellisonbg@gmail....
Tue Nov 16 16:16:50 CST 2010


Skipper,

I looked through your code and have a few questions and thoughts:

* What data are you sending back and forth between the client and the
engines.  It looks like that is going on here:

       mec.push(dict(start_params=start_params1), targets=0)
       mec.push(dict(start_params=start_params2), targets=1)
       mec.push(dict(start_params=start_params3), targets=2)
       mec.push(dict(start_params=start_params4), targets=3)
       mec.execute("""
params = start_params
ret = optimize.fmin_tnc(obj, params, approx_grad=True,
                            eta=eta, maxfun=3000, args=(Y,X),
messages=0)""")
       ret1 = mec.pull('ret', targets=0)[0]
       ret2 = mec.pull('ret', targets=1)[0]
       ret3 = mec.pull('ret', targets=2)[0]
       ret4 = mec.pull('ret', targets=3)[0]

and:

   mec.push(dict(np=np, optimize=optimize, Y=Y,X=X,eta=eta,dot=dot))

One way of getting a sense of the data movement overhead is to run the code
with just the data movement and no computation.

* Can you time the parallel and serial code to get a better sense of where
it is taking a long time.

* Can you estimate the amount of time spend in the part of the code you are
parallelizing.  Or, how much time is spent in the part of the code you
didn't parallelize.

Let's start there..

Cheers,

Brian




On Sun, Nov 14, 2010 at 9:07 PM, Skipper Seabold <jsseabold@gmail.com>wrote:

> I have a class that does some optimizations multiple times using
> different starting values in order to check for convergence problems
> due to some "bad" data.  My code was taking about an hour, then I
> added the multiple calls to scipy.optimize, and it went up to about 4
> hours.  I am new to this, but I tried to parallelize this code,
> thinking it could speed things up.  It looks like it's much actually
> slower.  Is there anything I can do to optimize this or remove some
> overhead?  A test case is inlined and attached.
>
>
> In [3]: timeit mod.fit()
> 10 loops, best of 3: 39.2 ms per loop
>
> In [4]: timeit pmod.fit()
> 1 loops, best of 3: 255 ms per loop
>
>
> import numpy as np
> from numpy import log, dot, array
> from scipy import optimize
> from IPython.kernel import client
>
>
> def pobj(params, Y,X):
>    """
>    Normal log-likelihood
>    """
>    nobs2 = len(X)/2.
>    resid = Y - dot(X,params[:,None])
>    return -(- nobs2*np.log(2*np.pi)-nobs2*np.log(1/(2*nobs2) *\
>        dot(resid.T, resid)) - nobs2)
>
>
> class PModel(object):
>    def __init__(self, Y,X):
>        self.X = X
>        self.Y = Y
>
>    def fit(self, start_params=[0,0], eta=1e-8):
>        start_params = np.asarray(start_params)
>        Y = self.Y
>        X = self.X
>
>        start_params1 = start_params.copy()
>        start_params2 = start_params + 5
>        start_params3 = start_params - 5
>        start_params4 = np.random.randint(-10,10,size=2)
>        mec.push(dict(start_params=start_params1), targets=0)
>        mec.push(dict(start_params=start_params2), targets=1)
>        mec.push(dict(start_params=start_params3), targets=2)
>        mec.push(dict(start_params=start_params4), targets=3)
>        mec.execute("""
> params = start_params
> ret = optimize.fmin_tnc(obj, params, approx_grad=True,
>                             eta=eta, maxfun=3000, args=(Y,X),
> messages=0)""")
>        ret1 = mec.pull('ret', targets=0)[0]
>        ret2 = mec.pull('ret', targets=1)[0]
>        ret3 = mec.pull('ret', targets=2)[0]
>        ret4 = mec.pull('ret', targets=3)[0]
>        self.results = ret1
>        # check results
>        try:
>            np.testing.assert_almost_equal(ret1[0], ret2[0], 4)
>            np.testing.assert_almost_equal(ret1[0], ret3[0], 4)
>            np.testing.assert_almost_equal(ret1[0], ret4[0], 4)
>            self.converged = str(ret1[-1])+":
> "+optimize.tnc.RCSTRINGS[ret1[-1]]
>        except:
>            self.converged = "9: Results sensitive to starting values"
>
> class Model(object):
>    def __init__(self, Y,X):
>        self.X = X
>        self.Y = Y
>
>    def obj(self, params, Y, X):
>        """
>        Normal log-likelihood
>        """
>        X = self.X
>        Y = self.Y
>        nobs2 = len(X)/2.
>        resid = Y - np.dot(X,params[:,None])
>        return - nobs2*np.log(2*np.pi)-nobs2*np.log(1/(2*nobs2) *\
>                np.dot(resid.T,resid)) - nobs2
>
>    def fit(self, start_params=[0,0], eta=1e-8):
>        start_params = np.asarray(start_params)
>        obj = self.obj
>        Y = self.Y
>        X = self.X
>
>        start_params1 = start_params.copy()
>        start_params2 = start_params + 5
>        start_params3 = start_params - 5
>        start_params4 = np.random.randint(-10,10,size=2)
>        ret1 = optimize.fmin_tnc(obj, start_params1, approx_grad=True,
>                             eta=eta, maxfun=3000, args=(Y,X), messages=0)
>        ret2 = optimize.fmin_tnc(obj, start_params2, approx_grad=True,
>                             eta=eta, maxfun=3000, args=(Y,X), messages=0)
>        ret3 = optimize.fmin_tnc(obj, start_params3, approx_grad=True,
>                             eta=eta, maxfun=3000, args=(Y,X), messages=0)
>        ret4 = optimize.fmin_tnc(obj, start_params4, approx_grad=True,
>                             eta=eta, maxfun=3000, args=(Y,X), messages=0)
>
>        self.results = ret1
>        # check results
>        try:
>            np.testing.assert_almost_equal(ret1[0], ret2[0], 4)
>            np.testing.assert_almost_equal(ret1[0], ret3[0], 4)
>            np.testing.assert_almost_equal(ret1[0], ret4[0], 4)
>            self.converged = str(ret1[-1])+":
> "+optimize.tnc.RCSTRINGS[ret1[-1]]
>        except:
>            self.converged = "9: Results sensitive to starting values"
>
>
> if __name__ == "__main__":
>    np.random.seed(12345)
>    X = np.random.uniform(0,10,size=(10000,2))
>    beta = np.array([3.5,-3.5])
>    Y = np.dot(X,beta[:,None]) + np.random.normal(size=(10000,1))
>
>    eta = 1e-8
>
>    mec = client.MultiEngineClient()
>    mec.push_function(dict(obj=pobj))
>    mec.push(dict(np=np, optimize=optimize, Y=Y,X=X,eta=eta,dot=dot))
>    pmod = PModel(Y,X)
>    pmod.fit()
>
>    mod = Model(Y,X)
>    mod.fit()
>
> _______________________________________________
> IPython-User mailing list
> IPython-User@scipy.org
> http://mail.scipy.org/mailman/listinfo/ipython-user
>
>


-- 
Brian E. Granger, Ph.D.
Assistant Professor of Physics
Cal Poly State University, San Luis Obispo
bgranger@calpoly.edu
ellisonbg@gmail.com
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://mail.scipy.org/pipermail/ipython-user/attachments/20101116/5012c739/attachment.html 


More information about the IPython-User mailing list