Thank you so much for the suggestion, Paulo! Selecting 2D points in a list
by creating an array 'mask' of booleans and then using arr[mask, :] is indeed
really fast compared to using numpy.apply_along_axis(), in my case (simple
"larger than" tests on individual coordinates).
I had not realized that you could do "arr[mask, :]": this works great!
EOL
PS: here are the speed tests I've done on the selection of 2D points from a
list, with the following results:
filter0: 107.2 s
filter1: 0.3 s
filter2: 9.7 s
filter3: 0.6 s
obtained with:
#!/usr/bin/env python
import numpy
def filter0(points):
"""
Returns only those points that match the filter.
"""
def filter(p):
return (p[0] > 0.5) and (p[1] < 0.5)
return points[numpy.apply_along_axis(filter, axis = 1, arr = points)]
def filter1(points):
"""
Returns only those points that match the filter.
"""
mask = (points[:, 0] > 0.5) & (points[:, 1] < 0.5)
return points[mask, :]
def filter2(points):
"""
Returns only those points that match the filter.
"""
return numpy.array([p for p in points if ((p[0] > 0.5) and p[1] < 0.5)])
def filter3(points):
"""
Returns only those points that match the filter.
"""
mask = (points[:, 0] > 0.5)
points = points[mask, :]
mask = points[:, 1] < 0.5
return points[mask, :]
if __name__ == '__main__':
import timeit
# We generate many random points:
NUM_PTS = 1000000
points = numpy.random.random((NUM_PTS, 2))
# We make sure that all the filters give the same result:
#print "Initial points:"
#print points
#print "Filtered points:"
#print filter0(points)
#print filter1(points)
#print filter2(points)
#print filter3(points)
for filter_num in range(4):
func_name = "filter%d" % filter_num
t = timeit.Timer("%s(points)" % func_name,
"from __main__ import %s, points" % func_name)
print "%s: %.1f s" % (func_name, t.timeit(number = 3))
