[Numpy-svn] r3498 - trunk/numpy/lib

numpy-svn at scipy.org numpy-svn at scipy.org
Sun Jan 7 23:19:18 CST 2007


Author: rkern
Date: 2007-01-07 23:19:16 -0600 (Sun, 07 Jan 2007)
New Revision: 3498

Modified:
   trunk/numpy/lib/arraysetops.py
Log:
* Fix #410 by using the stable mergesort instead of the unstable default sort in setmember1d().
* Add some more information to the function docstrings.
* Reduced the "See also" sections of the docstrings to point to the module instead of the full list of functions (some of which were not entirely relevant).


Modified: trunk/numpy/lib/arraysetops.py
===================================================================
--- trunk/numpy/lib/arraysetops.py	2007-01-08 04:01:52 UTC (rev 3497)
+++ trunk/numpy/lib/arraysetops.py	2007-01-08 05:19:16 UTC (rev 3498)
@@ -1,5 +1,5 @@
 """
-Set operations for 1D numeric arrays based on sort() function.
+Set operations for 1D numeric arrays based on sorting.
 
 Contains:
   ediff1d,
@@ -11,16 +11,16 @@
   union1d,
   setdiff1d
 
-All functions work best with integer numerical arrays on input
-(e.g. indices). For floating point arrays, innacurate results may appear due to
-usual round-off and floating point comparison issues.
+All functions work best with integer numerical arrays on input (e.g. indices).
+For floating point arrays, innacurate results may appear due to usual round-off
+and floating point comparison issues.
 
 Except unique1d, union1d and intersect1d_nu, all functions expect inputs with
-unique elements. Speed could be gained in some operations by an implementaion
-of sort(), that can provide directly the permutation vectors, avoiding thus
-calls to argsort().
+unique elements. Speed could be gained in some operations by an implementaion of
+sort(), that can provide directly the permutation vectors, avoiding thus calls
+to argsort().
 
-Run test_unique1d_speed() to compare performance of numpy.unique1d() and
+Run _test_unique1d_speed() to compare performance of numpy.unique1d() and
 numpy.unique() - it should be the same.
 
 To do: Optionally return indices analogously to unique1d for all functions.
@@ -28,7 +28,7 @@
 Author: Robert Cimrman
 
 created:       01.11.2005
-last revision: 12.10.2006
+last revision: 07.01.2007
 """
 __all__ = ['ediff1d', 'unique1d', 'intersect1d', 'intersect1d_nu', 'setxor1d',
            'setmember1d', 'union1d', 'setdiff1d']
@@ -37,30 +37,60 @@
 import numpy as nm
 
 def ediff1d(ary, to_end = None, to_begin = None):
-    """Array difference with prefixed and/or appended value.
+    """The differences between consecutive elements of an array, possibly with
+    prefixed and/or appended values.
 
-    See also: unique1d, intersect1d, intersect1d_nu, setxor1d,
-    setmember1d, union1d, setdiff1d
+    :Parameters:
+      - `ary` : array
+        This array will be flattened before the difference is taken.
+      - `to_end` : number, optional
+        If provided, this number will be tacked onto the end of the returned
+        differences.
+      - `to_begin` : number, optional
+        If provided, this number will be taked onto the beginning of the
+        returned differences.
+
+    :Returns:
+      - `ed` : array
+        The differences. Loosely, this will be (ary[1:] - ary[:-1]).
     """
     ary = nm.asarray(ary).flat
     ed = ary[1:] - ary[:-1]
+    arrays = [ed]
     if to_begin is not None:
-        if to_end is not None:
-            ed = nm.r_[to_begin, ed, to_end]
-        else:
-            ed = nm.insert(ed, 0, to_begin)
-    elif to_end is not None:
-        ed = nm.append(ed, to_end)
-        
+        arrays.insert(0, to_begin)
+    if to_end is not None:
+        arrays.append(to_end)
+
+    if len(arrays) != 1:
+        # We'll save ourselves a copy of a potentially large array in the common
+        # case where neither to_begin or to_end was given.
+        ed = nm.hstack(arrays)
+       
     return ed
 
 def unique1d(ar1, return_index=False):
-    """Unique elements of 1D array. When return_index is True, return
-    also the indices indx such that ar1.flat[indx] is the resulting
-    array of unique elements.
-    
-    See also: ediff1d, intersect1d, intersect1d_nu, setxor1d,
-    setmember1d, union1d, setdiff1d
+    """Find the unique elements of 1D array. 
+
+    Most of the other array set operations operate on the unique arrays
+    generated by this function.
+
+    :Parameters:
+      - `ar1` : array
+        This array will be flattened if it is not already 1D.
+      - `return_index` : bool, optional
+        If True, also return the indices against ar1 that result in the unique
+        array.
+
+    :Returns:
+      - `unique` : array
+        The unique values.
+      - `unique_indices` : int array, optional
+        The indices of the unique values. Only provided if return_index is True.
+
+    :See also:
+      numpy.lib.arraysetops has a number of other functions for performing set
+      operations on arrays.
     """
     ar = nm.asarray(ar1).flatten()
     if ar.size == 0:
@@ -81,8 +111,20 @@
 def intersect1d( ar1, ar2 ):
     """Intersection of 1D arrays with unique elements.
 
-    See also: ediff1d, unique1d, intersect1d_nu, setxor1d,
-    setmember1d, union1d, setdiff1d
+    Use unique1d() to generate arrays with only unique elements to use as inputs
+    to this function. Alternatively, use intersect1d_nu() which will find the
+    unique values for you.
+
+    :Parameters:
+      - `ar1` : array
+      - `ar2` : array
+
+    :Returns:
+      - `intersection` : array
+
+    :See also:
+      numpy.lib.arraysetops has a number of other functions for performing set
+      operations on arrays.
     """
     aux = nm.concatenate((ar1,ar2))
     aux.sort()
@@ -91,10 +133,20 @@
 def intersect1d_nu( ar1, ar2 ):
     """Intersection of 1D arrays with any elements.
 
-    See also: ediff1d, unique1d, intersect1d, setxor1d,
-    setmember1d, union1d, setdiff1d
+    The input arrays do not have unique elements like intersect1d() requires.
+
+    :Parameters:
+      - `ar1` : array
+      - `ar2` : array
+    
+    :Returns:
+      - `intersection` : array
+
+    :See also:
+      numpy.lib.arraysetops has a number of other functions for performing set
+      operations on arrays.
     """
-    # Might be faster then unique1d( intersect1d( ar1, ar2 ) )?
+    # Might be faster than unique1d( intersect1d( ar1, ar2 ) )?
     aux = nm.concatenate((unique1d(ar1), unique1d(ar2)))
     aux.sort()
     return aux[aux[1:] == aux[:-1]]
@@ -102,8 +154,20 @@
 def setxor1d( ar1, ar2 ):
     """Set exclusive-or of 1D arrays with unique elements.
 
-    See also: ediff1d, unique1d, intersect1d, intersect1d_nu,
-    setmember1d, union1d, setdiff1d
+    Use unique1d() to generate arrays with only unique elements to use as inputs
+    to this function.
+
+    :Parameters:
+      - `ar1` : array
+      - `ar2` : array
+
+    :Returns:
+      - `xor` : array
+        The values that are only in one, but not both, of the input arrays.
+
+    :See also:
+      numpy.lib.arraysetops has a number of other functions for performing set
+      operations on arrays.
     """
     aux = nm.concatenate((ar1, ar2))
     if aux.size == 0:
@@ -117,16 +181,31 @@
     return aux[flag2]
 
 def setmember1d( ar1, ar2 ):
-    """Return an array of shape of ar1 containing 1 where the elements of
-    ar1 are in ar2 and 0 otherwise.
+    """Return a boolean array of shape of ar1 containing True where the elements
+    of ar1 are in ar2 and False otherwise.
 
-    See also: ediff1d, unique1d, intersect1d, intersect1d_nu, setxor1d,
-    union1d, setdiff1d
+    Use unique1d() to generate arrays with only unique elements to use as inputs
+    to this function.
+
+    :Parameters:
+      - `ar1` : array
+      - `ar2` : array
+
+    :Returns:
+      - `mask` : bool array
+        The values ar1[mask] are in ar2.
+
+    :See also:
+      numpy.lib.arraysetops has a number of other functions for performing set
+      operations on arrays.
     """
     zlike = nm.zeros_like
     ar = nm.concatenate( (ar1, ar2 ) )
     tt = nm.concatenate( (zlike( ar1 ), zlike( ar2 ) + 1) )
-    perm = ar.argsort()
+    # We need this to be a stable sort, so always use 'mergesort' here. The
+    # values from the first array should always come before the values from the
+    # second array.
+    perm = ar.argsort(kind='mergesort')
     aux = ar[perm]
     aux2 = tt[perm]
 #    flag = ediff1d( aux, 1 ) == 0
@@ -137,23 +216,46 @@
     perm[ii+1] = perm[ii]
     perm[ii] = aux
 
-    indx = perm.argsort()[:len( ar1 )]
+    indx = perm.argsort(kind='mergesort')[:len( ar1 )]
 
     return flag[indx]
 
 def union1d( ar1, ar2 ):
     """Union of 1D arrays with unique elements.
 
-    See also: ediff1d, unique1d, intersect1d, intersect1d_nu, setxor1d,
-    setmember1d, setdiff1d
+    Use unique1d() to generate arrays with only unique elements to use as inputs
+    to this function.
+
+    :Parameters:
+      - `ar1` : array
+      - `ar2` : array
+
+    :Returns:
+      - `union` : array
+
+    :See also:
+      numpy.lib.arraysetops has a number of other functions for performing set
+      operations on arrays.
     """
     return unique1d( nm.concatenate( (ar1, ar2) ) )
 
 def setdiff1d( ar1, ar2 ):
     """Set difference of 1D arrays with unique elements.
 
-    See also: ediff1d, unique1d, intersect1d, intersect1d_nu, setxor1d,
-    setmember1d, union1d
+    Use unique1d() to generate arrays with only unique elements to use as inputs
+    to this function.
+
+    :Parameters:
+      - `ar1` : array
+      - `ar2` : array
+
+    :Returns:
+      - `difference` : array
+        The values in ar1 that are not in ar2.
+
+    :See also:
+      numpy.lib.arraysetops has a number of other functions for performing set
+      operations on arrays.
     """
     aux = setmember1d(ar1,ar2)
     if aux.size == 0:
@@ -161,7 +263,7 @@
     else:
         return nm.asarray(ar1)[aux == 0]
 
-def test_unique1d_speed( plot_results = False ):
+def _test_unique1d_speed( plot_results = False ):
 #    exponents = nm.linspace( 2, 7, 9 )
     exponents = nm.linspace( 2, 7, 9 )
     ratios = []
@@ -222,4 +324,4 @@
         pylab.show()
 
 if (__name__ == '__main__'):
-    test_unique1d_speed( plot_results = True )
+    _test_unique1d_speed( plot_results = True )



More information about the Numpy-svn mailing list