[Numpy-discussion] Objected-oriented SIMD API for Numpy

Pauli Virtanen pav+sp@iki...
Wed Oct 21 03:24:09 CDT 2009


Wed, 21 Oct 2009 16:48:22 +0900, Mathieu Blondel wrote:
[clip]
> My original idea was to write the code in C with Intel/Alvitec/Neon
> intrinsics and have this code binded to be able to call it from Python.
> So the SIMD code would be compiled already, ready to be called from
> Python. Like you said, there's a risk that the overhead of calling
> Python is bigger than the benefit of using SIMD instructions. If it's
> worth trying out, an experiment can be made with Vector4f to see if it's
> even worth continuing with other types.

The overhead is quickly checked for multiplication with numpy arrays of 
varying size, without SSE:

Overhead per iteration (ms): 1.6264549101
Time per array element (ms): 0.000936947636565
Cross-over point:            1735.90801303

#----------------------------------------------
import numpy as np
from scipy import optimize
import time
import matplotlib.pyplot as plt

def main():
    data = []

    for n in np.unique(np.logspace(0, 5, 20).astype(int)):
        print n
        m = 100
        reps = 5
        times = []
        for rep in xrange(reps):
            x = np.zeros((n,), dtype=np.float_)
            start = time.time()
            #------------------
            for k in xrange(m):
                x *= 1.1
            #------------------
            end = time.time()
            times.append(end - start)
        t = min(times)
        data.append((n, t))

    data = np.array(data)

    def model(z):
        n, t = data.T
        overhead, per_elem = z
        return np.log10(t) - np.log10(overhead + per_elem * n)

    z, ier = optimize.leastsq(model, [1., 1.])
    overhead, per_elem = z

    print ""
    print "Overhead per iteration (ms):", overhead*1e3
    print "Time per array element (ms):", per_elem*1e3
    print "Cross-over point:           ", overhead/per_elem

    n = np.logspace(0, 5, 500)
    plt.loglog(data[:,0], data[:,0]/data[:,1], 'x',
               label=r'measured')
    plt.loglog(n, n/(overhead + per_elem*n), 'k-',
               label=r'fit to $t = a + b n$')
    plt.xlabel(r'$n$')
    plt.ylabel(r'ops/second')
    plt.grid(1)
    plt.legend()
    plt.show()

if __name__ == "__main__":
    main()



More information about the NumPy-Discussion mailing list