[Numpy-svn] r3316 - trunk/numpy/lib

numpy-svn at scipy.org numpy-svn at scipy.org
Thu Oct 12 06:14:58 CDT 2006


Author: rc
Date: 2006-10-12 06:14:55 -0500 (Thu, 12 Oct 2006)
New Revision: 3316

Modified:
   trunk/numpy/lib/arraysetops.py
Log:
modernized to use new numpy features, speed-up of unique1d, doc update

Modified: trunk/numpy/lib/arraysetops.py
===================================================================
--- trunk/numpy/lib/arraysetops.py	2006-10-12 06:18:04 UTC (rev 3315)
+++ trunk/numpy/lib/arraysetops.py	2006-10-12 11:14:55 UTC (rev 3316)
@@ -11,140 +11,160 @@
   union1d,
   setdiff1d
 
-Concerning the speed, test_unique1d_speed() reveals that up to 10000000
-elements unique1d() is about 10 times faster than the standard dictionary-based
-numpy.unique().
+All functions work best with integer numerical arrays on input. For floating
+point arrays, innacurate results may appear due to usual round-off and
+floating point comparison issues.
 
-Limitations: Except unique1d, union1d and intersect1d_nu, all functions expect
-inputs with unique elements. Speed could be gained in some operations by an
-implementaion of sort(), that can provide directly the permutation vectors,
-avoiding thus calls to argsort().
+Except unique1d, union1d and intersect1d_nu, all functions expect inputs with
+unique elements. Speed could be gained in some operations by an implementaion
+of sort(), that can provide directly the permutation vectors, avoiding thus
+calls to argsort().
 
+Run test_unique1d_speed() to compare numpy.unique1d() with numpy.unique() - it
+should be the same.
+
 To do: Optionally return indices analogously to unique1d for all functions.
 
 Author: Robert Cimrman
+
+created:       01.11.2005
+last revision: 12.10.2006
 """
-__all__ = ['unique1d', 'intersect1d', 'intersect1d_nu', 'setxor1d',
+__all__ = ['ediff1d', 'unique1d', 'intersect1d', 'intersect1d_nu', 'setxor1d',
            'setmember1d', 'union1d', 'setdiff1d']
 
 
-# 02.11.2005, c
 import time
-import numpy
+import numpy as nm
 
-##
-# 03.11.2005, c
 def ediff1d(ary, to_end = None, to_begin = None):
-    """Array difference with prefixed and/or appended value."""
-    ary = numpy.asarray(ary)
+    """Array difference with prefixed and/or appended value.
+
+    See also: unique1d, intersect1d, intersect1d_nu, setxor1d,
+    setmember1d, union1d, setdiff1d
+    """
+    ary = nm.asarray(ary).flat
     ed = ary[1:] - ary[:-1]
     if to_begin is not None:
-        ed = numpy.insert(ed, 0, to_begin)
-    if to_end is not None:
-        ed = numpy.append(ed, to_end)
+        if to_end is not None:
+            ed = nm.r_[to_begin, ed, to_end]
+        else:
+            ed = nm.insert(ed, 0, to_begin)
+    elif to_end is not None:
+        ed = nm.append(ed, to_end)
         
     return ed
 
-##
-# 01.11.2005, c
-# 02.11.2005
 def unique1d(ar1, return_index=False):
     """Unique elements of 1D array. When return_index is True, return
     also the indices indx such that ar1.flat[indx] is the resulting
     array of unique elements.
     
+    See also: ediff1d, intersect1d, intersect1d_nu, setxor1d,
+    setmember1d, union1d, setdiff1d
     """
-    ar = numpy.asarray(ar1).ravel()
+    ar = nm.asarray(ar1).flatten()
     if ar.size == 0:
-        if return_index: return numpy.empty(0, numpy.bool), ar
+        if return_index: return nm.empty(0, nm.bool), ar
         else: return ar
     
     if return_index:
         perm = ar.argsort()
-        aux = ar.take(perm)
-        flag = ediff1d(aux, 1) != 0
-        return perm.compress(flag), aux.compress(flag)
+        aux = ar[perm]
+        flag = nm.concatenate( ([True], aux[1:] != aux[:-1]) )
+        return perm[flag], aux[flag]
     
     else:
         ar.sort()
-        return ar.compress(ediff1d(ar, 1) != 0)
+        flag = nm.concatenate( ([True], ar[1:] != ar[:-1]) )
+        return ar[flag]
 
-##
-# 01.11.2005, c
 def intersect1d( ar1, ar2 ):
-    """Intersection of 1D arrays with unique elements."""
-    aux = numpy.concatenate((ar1,ar2))
+    """Intersection of 1D arrays with unique elements.
+
+    See also: ediff1d, unique1d, intersect1d_nu, setxor1d,
+    setmember1d, union1d, setdiff1d
+    """
+    aux = nm.concatenate((ar1,ar2))
     aux.sort()
-    return aux.compress( (aux[1:] - aux[:-1]) == 0)
+    return aux[aux[1:] == aux[:-1]]
 
-##
-# 01.11.2005, c
 def intersect1d_nu( ar1, ar2 ):
-    """Intersection of 1D arrays with any elements."""
+    """Intersection of 1D arrays with any elements.
+
+    See also: ediff1d, unique1d, intersect1d, setxor1d,
+    setmember1d, union1d, setdiff1d
+    """
     # Might be faster then unique1d( intersect1d( ar1, ar2 ) )?
-    aux = numpy.concatenate((unique1d(ar1), unique1d(ar2)))
+    aux = nm.concatenate((unique1d(ar1), unique1d(ar2)))
     aux.sort()
-    return aux.compress( (aux[1:] - aux[:-1]) == 0)
+    return aux[aux[1:] == aux[:-1]]
 
-##
-# 01.11.2005, c
 def setxor1d( ar1, ar2 ):
-    """Set exclusive-or of 1D arrays with unique elements."""
-    aux = numpy.concatenate((ar1, ar2))
+    """Set exclusive-or of 1D arrays with unique elements.
+
+    See also: ediff1d, unique1d, intersect1d, intersect1d_nu,
+    setmember1d, union1d, setdiff1d
+    """
+    aux = nm.concatenate((ar1, ar2))
     if aux.size == 0:
         return aux
     
     aux.sort()
-    flag = ediff1d(aux, to_end = 1, to_begin = 1) == 0
-    flag2 = ediff1d(flag) == 0
-    return aux.compress(flag2)
+#    flag = ediff1d( aux, to_end = 1, to_begin = 1 ) == 0
+    flag = nm.concatenate( ([True], aux[1:] != aux[:-1], [True] ) )
+#    flag2 = ediff1d( flag ) == 0
+    flag2 = flag[1:] == flag[:-1]
+    return aux[flag2]
 
-##
-# 03.11.2005, c
-# 05.01.2006
 def setmember1d( ar1, ar2 ):
     """Return an array of shape of ar1 containing 1 where the elements of
-    ar1 are in ar2 and 0 otherwise."""
-    concat = numpy.concatenate
-    zlike = numpy.zeros_like
-    ar = concat( (ar1, ar2 ) )
-    tt = concat( (zlike( ar1 ),
-                  zlike( ar2 ) + 1) )
+    ar1 are in ar2 and 0 otherwise.
+
+    See also: ediff1d, unique1d, intersect1d, intersect1d_nu, setxor1d,
+    union1d, setdiff1d
+    """
+    zlike = nm.zeros_like
+    ar = nm.concatenate( (ar1, ar2 ) )
+    tt = nm.concatenate( (zlike( ar1 ), zlike( ar2 ) + 1) )
     perm = ar.argsort()
-    aux = ar.take(perm)
-    aux2 = tt.take(perm)
-    flag = ediff1d( aux, 1 ) == 0
+    aux = ar[perm]
+    aux2 = tt[perm]
+#    flag = ediff1d( aux, 1 ) == 0
+    flag = nm.concatenate( (aux[1:] == aux[:-1], [False] ) )
 
-    ii = numpy.where( flag * aux2 )[0]
+    ii = nm.where( flag * aux2 )[0]
     aux = perm[ii+1]
     perm[ii+1] = perm[ii]
     perm[ii] = aux
 
     indx = perm.argsort()[:len( ar1 )]
 
-    return flag.take( indx )
+    return flag[indx]
 
-##
-# 03.11.2005, c
 def union1d( ar1, ar2 ):
-    """Union of 1D arrays with unique elements."""
-    return unique1d( numpy.concatenate( (ar1, ar2) ) )
+    """Union of 1D arrays with unique elements.
 
-##
-# 03.11.2005, c
+    See also: ediff1d, unique1d, intersect1d, intersect1d_nu, setxor1d,
+    setmember1d, setdiff1d
+    """
+    return unique1d( nm.concatenate( (ar1, ar2) ) )
+
 def setdiff1d( ar1, ar2 ):
-    """Set difference of 1D arrays with unique elements."""
+    """Set difference of 1D arrays with unique elements.
+
+    See also: ediff1d, unique1d, intersect1d, intersect1d_nu, setxor1d,
+    setmember1d, union1d
+    """
     aux = setmember1d(ar1,ar2)
     if aux.size == 0:
         return aux
     else:
-        return numpy.asarray(ar1).compress(aux == 0)
+        return nm.asarray(ar1)[aux == 0]
 
-##
-# 02.11.2005, c
 def test_unique1d_speed( plot_results = False ):
-#    exponents = numpy.linspace( 2, 7, 9 )
-    exponents = numpy.linspace( 2, 6, 9 )
+#    exponents = nm.linspace( 2, 7, 9 )
+    exponents = nm.linspace( 2, 7, 9 )
     ratios = []
     nItems = []
     dt1s = []
@@ -153,15 +173,15 @@
 
         nItem = 10 ** ii
         print 'using %d items:' % nItem
-        a = numpy.fix( nItem / 10 * numpy.random.random( nItem ) )
+        a = nm.fix( nItem / 10 * nm.random.random( nItem ) )
 
-        print 'dictionary:'
+        print 'unique:'
         tt = time.clock()
-        b = numpy.unique( a )
+        b = nm.unique( a )
         dt1 = time.clock() - tt
         print dt1
 
-        print 'array:'
+        print 'unique1d:'
         tt = time.clock()
         c = unique1d( a )
         dt2 = time.clock() - tt
@@ -180,7 +200,7 @@
         dt1s.append( dt1 )
         dt2s.append( dt2 )
 
-        assert numpy.alltrue( b == c )
+        assert nm.alltrue( b == c )
 
 
     print nItems
@@ -195,7 +215,7 @@
             pylab.figure( fig )
             fun( nItems, dt1s, 'g-o', linewidth = 2, markersize = 8 )
             fun( nItems, dt2s, 'b-x', linewidth = 2, markersize = 8 )
-            pylab.legend( ('dictionary', 'array' ) )
+            pylab.legend( ('unique', 'unique1d' ) )
             pylab.xlabel( 'nItem' )
             pylab.ylabel( 'time [s]' )
 



More information about the Numpy-svn mailing list