[Scipy-svn] r6878 - trunk/scipy/stats

scipy-svn@scip... scipy-svn@scip...
Sat Nov 13 23:38:11 CST 2010


Author: rgommers
Date: 2010-11-13 23:38:10 -0600 (Sat, 13 Nov 2010)
New Revision: 6878

Modified:
   trunk/scipy/stats/stats.py
Log:
DOC: merge wiki edits for stats.

Modified: trunk/scipy/stats/stats.py
===================================================================
--- trunk/scipy/stats/stats.py	2010-11-13 21:15:39 UTC (rev 6877)
+++ trunk/scipy/stats/stats.py	2010-11-14 05:38:10 UTC (rev 6878)
@@ -244,17 +244,37 @@
 ########
 
 def nanmean(x, axis=0):
-    """Compute the mean over the given axis ignoring nans.
+    """
+    Compute the mean over the given axis ignoring nans.
 
-    :Parameters:
-        x : ndarray
-            input array
-        axis : int
-            axis along which the mean is computed.
+    Parameters
+    ----------
+    x : ndarray
+        Input array.
+    axis : int, optional
+        Axis along which the mean is computed. Default is 0, i.e. the
+        first axis.
 
-    :Results:
-        m : float
-            the mean."""
+    Returns
+    -------
+    m : float
+        The mean of `x`, ignoring nans.
+
+    See Also
+    --------
+    nanstd, nanmedian
+
+    Examples
+    --------
+    >>> from scipy import stats
+    >>> a = np.linspace(0, 4, 3)
+    >>> a
+    array([ 0.,  2.,  4.])
+    >>> a[-1] = np.nan
+    >>> stats.nanmean(a)
+    1.0
+
+    """
     x, axis = _chk_asarray(x,axis)
     x = x.copy()
     Norig = x.shape[axis]
@@ -264,20 +284,44 @@
     return np.mean(x,axis)/factor
 
 def nanstd(x, axis=0, bias=False):
-    """Compute the standard deviation over the given axis ignoring nans
+    """
+    Compute the standard deviation over the given axis, ignoring nans.
 
-    :Parameters:
-        x : ndarray
-            input array
-        axis : int
-            axis along which the standard deviation is computed.
-        bias : boolean
-            If true, the biased (normalized by N) definition is used. If false,
-            the unbiased is used (the default).
+    Parameters
+    ----------
+    x : array_like
+        Input array.
+    axis : int or None, optional
+        Axis along which the standard deviation is computed. Default is 0.
+        If None, compute over the whole array `x`.
+    bias : bool, optional
+        If True, the biased (normalized by N) definition is used. If False
+        (default), the unbiased definition is used.
 
-    :Results:
-        s : float
-            the standard deviation."""
+    Returns
+    -------
+    s : float
+        The standard deviation.
+
+    See Also
+    --------
+    nanmean, nanmedian
+
+    Examples
+    --------
+    >>> from scipy import stats
+    >>> a = np.arange(10, dtype=float)
+    >>> a[1:3] = np.nan
+    >>> np.std(a)
+    nan
+    >>> stats.nanstd(a)
+    2.9154759474226504
+    >>> stats.nanstd(a.reshape(2, 5), axis=1)
+    array([ 2.0817,  1.5811])
+    >>> stats.nanstd(a.reshape(2, 5), axis=None)
+    2.9154759474226504
+
+    """
     x, axis = _chk_asarray(x,axis)
     x = x.copy()
     Norig = x.shape[axis]
@@ -317,17 +361,52 @@
     return np.median(x)
 
 def nanmedian(x, axis=0):
-    """ Compute the median along the given axis ignoring nan values
+    """
+    Compute the median along the given axis ignoring nan values.
 
-    :Parameters:
-        x : ndarray
-            input array
-        axis : int
-            axis along which the median is computed.
+    Parameters
+    ----------
+    x : array_like
+        Input array.
+    axis : int, optional
+        Axis along which the median is computed. Default is 0, i.e. the
+        first axis.
 
-    :Results:
-        m : float
-            the median."""
+    Returns
+    -------
+    m : float
+        The median of `x` along `axis`.
+
+    See Also
+    --------
+    nanstd, nanmean
+
+    Examples
+    --------
+    >>> from scipy import stats
+    >>> a = np.array([0, 3, 1, 5, 5, np.nan])
+    >>> stats.nanmedian(a)
+    array(3.0)
+
+    >>> b = np.array([0, 3, 1, 5, 5, np.nan, 5])
+    >>> stats.nanmedian(b)
+    array(4.0)
+
+    Example with axis:
+
+    >>> c = np.arange(30.).reshape(5,6)
+    >>> idx = np.array([False, False, False, True, False] * 6).reshape(5,6)
+    >>> c[idx] = np.nan
+    >>> c
+    array([[  0.,   1.,   2.,  nan,   4.,   5.],
+           [  6.,   7.,  nan,   9.,  10.,  11.],
+           [ 12.,  nan,  14.,  15.,  16.,  17.],
+           [ nan,  19.,  20.,  21.,  22.,  nan],
+           [ 24.,  25.,  26.,  27.,  nan,  29.]])
+    >>> stats.nanmedian(c, axis=1)
+    array([  2. ,   9. ,  15. ,  20.5,  26. ])
+
+    """
     x, axis = _chk_asarray(x,axis)
     x = x.copy()
     return np.apply_along_axis(_nanmedian,axis,x)
@@ -405,15 +484,15 @@
         Axis along which the harmonic mean is computed.
     dtype : dtype, optional
         Type of the returned array and of the accumulator in which the
-        elements are summed. If dtype is not specified, it defaults to the
-        dtype of a, unless a has an integer dtype with a precision less than
-        that of the default platform integer. In that case, the default
+        elements are summed. If `dtype` is not specified, it defaults to the
+        dtype of `a`, unless `a` has an integer `dtype` with a precision less
+        than that of the default platform integer. In that case, the default
         platform integer is used.
 
     Returns
     -------
     hmean : ndarray,
-        see dtype parameter above
+        see `dtype` parameter above
 
     See Also
     --------
@@ -571,19 +650,42 @@
 axis=0, ddof=1).""")
 
 def mode(a, axis=0):
-    """Returns an array of the modal (most common) value in the passed array.
+    """
+    Returns an array of the modal (most common) value in the passed array.
 
     If there is more than one such value, only the first is returned.
     The bin-count for the modal bins is also returned.
 
     Parameters
     ----------
-    a : array
-    axis=0 : int
+    a : array_like
+        n-dimensional array of which to find mode(s).
+    axis : int, optional
+        Axis along which to operate. Default is 0, i.e. the first axis.
 
     Returns
     -------
-    (array of modal values, array of counts for each mode)
+    vals : ndarray
+        Array of modal values.
+    counts : ndarray
+        Array of counts for each mode.
+
+    Examples
+    --------
+    >>> a = np.array([[6, 8, 3, 0],
+                      [3, 2, 1, 7],
+                      [8, 1, 8, 4],
+                      [5, 3, 0, 5],
+                      [4, 7, 5, 9]])
+    >>> from scipy import stats
+    >>> stats.mode(a)
+    (array([[ 3.,  1.,  0.,  0.]]), array([[ 1.,  1.,  1.,  1.]]))
+
+    To get mode of whole array, specify axis=None:
+
+    >>> stats.mode(a, axis=None)
+    (array([ 3.]), array([ 3.]))
+
     """
     a, axis = _chk_asarray(a, axis)
     scores = np.unique(np.ravel(a))       # get ALL unique values
@@ -1248,16 +1350,40 @@
     return a + (b - a)*fraction;
 
 def scoreatpercentile(a, per, limit=()):
-    """Calculate the score at the given 'per' percentile of the
-    sequence a.  For example, the score at per=50 is the median.
+    """
+    Calculate the score at the given `per` percentile of the sequence `a`.
 
-    If the desired quantile lies between two data points, we
-    interpolate between them.
+    For example, the score at per=50 is the median. If the desired quantile
+    lies between two data points, we interpolate between them. If the parameter
+    `limit` is provided, it should be a tuple (lower, upper) of two values.
+    Values of `a` outside this (closed) interval will be ignored.
 
-    If the parameter 'limit' is provided, it should be a tuple (lower,
-    upper) of two values.  Values of 'a' outside this (closed)
-    interval will be ignored.
+    Parameters
+    ----------
+    a : ndarray
+        Values from which to extract score.
+    per : int or float
+        Percentile at which to extract score.
+    limit : tuple, optional
+        Tuple of two scalars, the lower and upper limits within which to
+        compute the percentile.
 
+    Returns
+    -------
+    score : float
+        Score at percentile.
+
+    See Also
+    --------
+    percentileofscore
+
+    Examples
+    --------
+    >>> from scipy import stats
+    >>> a = np.arange(100)
+    >>> stats.scoreatpercentile(a, 50)
+    49.5
+
     """
     # TODO: this should be a simple wrapper around a well-written quantile
     # function.  GNU R provides 9 quantile algorithms (!), with differing
@@ -1631,24 +1757,46 @@
 def sem(a, axis=0, ddof=1):
     """
     Calculates the standard error of the mean (or standard error of
-    measurement) of the values in the passed array.
+    measurement) of the values in the input array.
 
     Parameters
     ----------
-    a: array like
-        An array containing the values for which
-    axis: int or None, optional.
-        if equal None, ravel array first. If equal to an integer, this will be
+    a : array_like
+        An array containing the values for which the standard error is
+        returned.
+    axis : int or None, optional.
+        If axis is None, ravel `a` first. If axis is an integer, this will be
         the axis over which to operate. Defaults to 0.
-    ddof: int
-        Delta degrees-of-freedom. How many degrees of freedom to adjust for
-        bias in limited samples relative to the population estimate of variance
+    ddof : int, optional
+        Delta degrees-of-freedom. How many degrees of freedom to adjust
+        for bias in limited samples relative to the population estimate
+        of variance. Defaults to 1.
 
     Returns
     -------
-    The standard error of the mean in the sample(s), along the input axis
+    s : ndarray or float
+        The standard error of the mean in the sample(s), along the input axis.
 
-"""
+    Notes
+    -----
+    The default value for `ddof` is different to the default (0) used by other
+    ddof containing routines, such as np.std nd stats.nanstd.
+
+    Examples
+    --------
+    Find standard error along the first axis:
+
+    >>> from scipy import stats
+    >>> a = np.arange(20).reshape(5,4)
+    >>> stats.sem(a)
+    array([ 2.8284,  2.8284,  2.8284,  2.8284])
+
+    Find standard error across the whole array, using n degrees of freedom:
+
+    >>> stats.sem(a, axis=None, ddof=0)
+    1.2893796958227628
+
+    """
     a, axis = _chk_asarray(a, axis)
     n = a.shape[axis]
     #s = samplestd(a,axis) / np.sqrt(n-1)
@@ -1691,23 +1839,51 @@
 
     Parameters
     ----------
-    a: array_like
-       An array like object containing the sample data
-    axis: int or None, optional
-         If axis is equal to None, the array is first ravel'd. If axis is an
-         integer, this is the axis over which to operate. Defaults to 0.
+    a : array_like
+        An array like object containing the sample data.
+    axis : int or None, optional
+        If `axis` is equal to None, the array is first raveled. If `axis` is
+        an integer, this is the axis over which to operate. Default is 0.
+    ddof : int, optional
+        Degrees of freedom correction in the calculation of the
+        standard deviation. Default is 0.
 
     Returns
     -------
-    zscore: array_like
-        the z-scores, standardized by mean and standard deviation of input
-        array
+    zscore : array_like
+        The z-scores, standardized by mean and standard deviation of input
+        array `a`.
 
     Notes
     -----
-    This function does not convert array classes, and works also with
-    matrices and masked arrays.
+    This function preserves ndarray subclasses, and works also with
+    matrices and masked arrays (it uses `asanyarray` instead of `asarray`
+    for parameters).
 
+    Examples
+    --------
+    >>> a = np.array([ 0.7972,  0.0767,  0.4383,  0.7866,  0.8091,  0.1954,
+                       0.6307, 0.6599,  0.1065,  0.0508])
+    >>> from scipy import stats
+    >>> stats.zscore(a)
+    array([ 1.1273, -1.247 , -0.0552,  1.0923,  1.1664, -0.8559,  0.5786,
+            0.6748, -1.1488, -1.3324])
+
+    Computing along a specified axis, using n-1 degrees of freedom (``ddof=1``)
+    to calculate the standard deviation:
+
+    >>> b = np.array([[ 0.3148,  0.0478,  0.6243,  0.4608],
+                      [ 0.7149,  0.0775,  0.6072,  0.9656],
+                      [ 0.6341,  0.1403,  0.9759,  0.4064],
+                      [ 0.5918,  0.6948,  0.904 ,  0.3721],
+                      [ 0.0921,  0.2481,  0.1188,  0.1366]])
+    >>> stats.zscore(b, axis=1, ddof=1)
+    array([[-1.1649, -1.4319, -0.8554, -1.0189],
+           [-0.8661, -1.5035, -0.9737, -0.6154],
+           [-0.888 , -1.3817, -0.5461, -1.1156],
+           [-2.3043, -2.2014, -1.9921, -2.5241],
+           [-2.0773, -1.9212, -2.0506, -2.0328]])
+
     """
     a = np.asanyarray(a)
     mns = a.mean(axis=axis)
@@ -1722,32 +1898,37 @@
 
 def zmap(scores, compare, axis=0, ddof=0):
     """
-    Calculates the zscores relative to the mean and standard deviation
-    of second input.
+    Calculates the relative z-scores.
 
-    Returns an array of z-scores, i.e. scores that are standardized to zero
+    Returns an array of z-scores, i.e., scores that are standardized to zero
     mean and unit variance, where mean and variance are calculated from the
     comparison array.
 
     Parameters
     ----------
-    scores : array-like
-       The input for which z scores are calculated
-    compare : array-like
-       The input from which the mean and standard deviation of the
-       normalization are taken, assumed to have same dimension as scores
-    axis : integer or None, {optional, default 0)
-        axis over which mean and std of compare array are calculated
+    scores : array_like
+        The input for which z-scores are calculated.
+    compare : array_like
+        The input from which the mean and standard deviation of the
+        normalization are taken; assumed to have the same dimension as
+        `scores`.
+    axis : int or None, optional
+        Axis over which mean and variance of `compare` are calculated.
+        Default is 0.
+    ddof : int, optional
+        Degrees of freedom correction in the calculation of the
+        standard deviation. Default is 0.
 
     Returns
     -------
     zscore : array_like
-       zscore in the same shape as scores
+        Z-scores, in the same shape as `scores`.
 
     Notes
     -----
-    This function does not convert array classes, and works also with
-    matrices and masked arrays.
+    This function preserves ndarray subclasses, and works also with
+    matrices and masked arrays (it uses `asanyarray` instead of `asarray`
+    for parameters).
 
     """
     scores, compare = map(np.asanyarray, [scores, compare])
@@ -1767,16 +1948,39 @@
 #####################################
 
 def threshold(a, threshmin=None, threshmax=None, newval=0):
-    """Clip array to a given value.
+    """
+    Clip array to a given value.
 
-Similar to numpy.clip(), except that values less than threshmin or
-greater than threshmax are replaced by newval, instead of by
-threshmin and threshmax respectively.
+    Similar to numpy.clip(), except that values less than `threshmin` or
+    greater than `threshmax` are replaced by `newval`, instead of by
+    `threshmin` and `threshmax` respectively.
 
-Returns: a, with values less than threshmin or greater than threshmax
-         replaced with newval
+    Parameters
+    ----------
+    a : array_like
+        Data to threshold.
+    threshmin : float, int or None, optional
+        Minimum threshold, defaults to None.
+    threshmax : float, int or None, optional
+        Maximum threshold, defaults to None.
+    newval : float or int, optional
+        Value to put in place of values in `a` outside of bounds.
+        Defaults to 0.
 
-"""
+    Returns
+    -------
+    out : ndarray
+        The clipped input array, with values less than `threshmin` or
+        greater than `threshmax` replaced with `newval`.
+
+    Examples
+    --------
+    >>> a = np.array([9, 9, 6, 3, 1, 6, 1, 0, 0, 8])
+    >>> from scipy import stats
+    >>> stats.threshold(a, threshmin=2, threshmax=8, newval=-1)
+    array([-1, -1,  6,  3, -1,  6, -1, -1, -1,  8])
+
+    """
     a = asarray(a).copy()
     mask = zeros(a.shape, dtype=bool)
     if threshmin is not None:
@@ -1789,35 +1993,39 @@
 
 
 def sigmaclip(a, low=4., high=4.):
-    """Iterative sigma-clipping of array elements.
+    """
+    Iterative sigma-clipping of array elements.
 
     The output array contains only those elements of the input array `c`
     that satisfy the conditions ::
 
         mean(c) - std(c)*low < c < mean(c) + std(c)*high
 
+    Starting from the full sample, all elements outside the critical range are
+    removed. The iteration continues with a new critical range until no
+    elements are outside the range.
+
     Parameters
     ----------
     a : array_like
-        data array, will be raveled if not 1d
-    low : float
-        lower bound factor of sigma clipping
-    high : float
-        upper bound factor of sigma clipping
+        Data array, will be raveled if not 1-D.
+    low : float, optional
+        Lower bound factor of sigma clipping. Default is 4.
+    high : float, optional
+        Upper bound factor of sigma clipping. Default is 4.
 
     Returns
     -------
-    c : array
-        input array with clipped elements removed
+    c : ndarray
+        Input array with clipped elements removed.
     critlower : float
-        lower threshold value use for clipping
+        Lower threshold value use for clipping.
     critlupper : float
-        upper threshold value use for clipping
+        Upper threshold value use for clipping.
 
-
     Examples
     --------
-    >>> a = np.concatenate((np.linspace(9.5,10.5,31),np.linspace(0,20,5)))
+    >>> a = np.concatenate((np.linspace(9.5,10.5,31), np.linspace(0,20,5)))
     >>> fact = 1.5
     >>> c, low, upp = sigmaclip(a, fact, fact)
     >>> c
@@ -1851,15 +2059,36 @@
 
 def trimboth(a, proportiontocut):
     """
-Slices off the passed proportion of items from BOTH ends of the passed
-array (i.e., with proportiontocut=0.1, slices 'leftmost' 10% AND
-'rightmost' 10% of scores.  You must pre-sort the array if you want
-"proper" trimming.  Slices off LESS if proportion results in a
-non-integer slice index (i.e., conservatively slices off
-proportiontocut).
+    Slices off a proportion of items from both ends of an array.
 
-Returns: trimmed version of array a
-"""
+    Slices off the passed proportion of items from both ends of the passed
+    array (i.e., with `proportiontocut` = 0.1, slices leftmost 10% **and**
+    rightmost 10% of scores).  You must pre-sort the array if you want
+    'proper' trimming.  Slices off less if proportion results in a
+    non-integer slice index (i.e., conservatively slices off
+    `proportiontocut`).
+
+    Parameters
+    ----------
+    a : array_like
+        Data to trim.
+    proportiontocut : float or int
+        Proportion of total data set to trim of each end.
+
+    Returns
+    -------
+    out : ndarray
+        Trimmed version of array `a`.
+
+    Examples
+    --------
+    >>> from scipy import stats
+    >>> a = np.arange(20)
+    >>> b = stats.trimboth(a, 0.1)
+    >>> b.shape
+    (16,)
+
+    """
     a = asarray(a)
     lowercut = int(proportiontocut*len(a))
     uppercut = len(a) - lowercut
@@ -1973,35 +2202,35 @@
     """
     Performs a 1-way ANOVA.
 
-    The on-way ANOVA tests the null hypothesis that 2 or more groups have
+    The one-way ANOVA tests the null hypothesis that two or more groups have
     the same population mean.  The test is applied to samples from two or
     more groups, possibly with differing sizes.
 
     Parameters
     ----------
     sample1, sample2, ... : array_like
-        The sample measurements should be given as arguments.
+        The sample measurements for each group.
 
     Returns
     -------
     F-value : float
-        The computed F-value of the test
+        The computed F-value of the test.
     p-value : float
-        The associated p-value from the F-distribution
+        The associated p-value from the F-distribution.
 
     Notes
     -----
     The ANOVA test has important assumptions that must be satisfied in order
     for the associated p-value to be valid.
 
-    1. The samples are independent
-    2. Each sample is from a normally distributed population
+    1. The samples are independent.
+    2. Each sample is from a normally distributed population.
     3. The population standard deviations of the groups are all equal.  This
-       property is known as homocedasticity.
+       property is known as homoscedasticity.
 
     If these assumptions are not true for a given set of data, it may still be
     possible to use the Kruskal-Wallis H-test (`stats.kruskal`_) although with
-    some loss of power
+    some loss of power.
 
     The algorithm is from Heiman[2], pp.394-7.
 
@@ -2095,12 +2324,12 @@
     Calculates a Spearman rank-order correlation coefficient and the p-value
     to test for non-correlation.
 
-    The Spearman correlation is a nonparametric measure of the linear
-    relationship between two datasets. Unlike the Pearson correlation, the
-    Spearman correlation does not assume that both datasets are normally
+    The Spearman correlation is a nonparametric measure of the monotonicity
+    of the relationship between two datasets. Unlike the Pearson correlation,
+    the Spearman correlation does not assume that both datasets are normally
     distributed. Like other correlation coefficients, this one varies
     between -1 and +1 with 0 implying no correlation. Correlations of -1 or
-    +1 imply an exact linear relationship. Positive correlations imply that
+    +1 imply an exact monotonic relationship. Positive correlations imply that
     as x increases, so does y. Negative correlations imply that as x
     increases, y decreases.
 
@@ -2109,37 +2338,34 @@
     as the one computed from these datasets. The p-values are not entirely
     reliable but are probably reasonable for datasets larger than 500 or so.
 
-    spearmanr currently does not do any tie correction, and is only correct
-    if there are no ties in the data.
-
     Parameters
     ----------
     a, b : 1D or 2D array_like, b is optional
         One or two 1-D or 2-D arrays containing multiple variables and
-        observations. Each column of m represents a variable, and each row
-        entry a single observation of those variables. Also see axis below.
-        Both arrays need to have the same length in the `axis` dimension.
-
+        observations. Each column of `a` and `b` represents a variable, and
+        each row entry a single observation of those variables. See also
+        `axis`. Both arrays need to have the same length in the `axis`
+        dimension.
     axis : int or None, optional
         If axis=0 (default), then each column represents a variable, with
         observations in the rows. If axis=0, the relationship is transposed:
         each row represents a variable, while the columns contain observations.
-        If axis=None, then both arrays will be raveled
+        If axis=None, then both arrays will be raveled.
 
     Returns
     -------
-    rho: float or array (2D square)
-        Spearman correlation matrix or correlation coefficient (if only 2 variables
-        are given as parameters. Correlation matrix is square with length
-        equal to total number of variables (columns or rows) in a and b
-        combined
+    rho: float or ndarray (2-D square)
+        Spearman correlation matrix or correlation coefficient (if only 2
+        variables are given as parameters. Correlation matrix is square with
+        length equal to total number of variables (columns or rows) in a and b
+        combined.
     p-value : float
         The two-sided p-value for a hypothesis test whose null hypothesis is
-        that two sets of data are uncorrelated, has same dimension as rho
+        that two sets of data are uncorrelated, has same dimension as rho.
 
     Notes
     -----
-    changes in scipy 0.8: rewrite to add tie-handling,  and axis
+    Changes in scipy 0.8.0: rewrite to add tie-handling, and axis.
 
     References
     ----------
@@ -2151,7 +2377,6 @@
 
     Examples
     --------
-
     >>> spearmanr([1,2,3,4,5],[5,6,7,8,7])
     (0.82078268166812329, 0.088587005313543798)
     >>> np.random.seed(1234321)
@@ -2221,19 +2446,39 @@
     implying no correlation. Correlations of -1 or +1 imply a determinative
     relationship.
 
+    This function uses a shortcut formula but produces the same result as
+    `pearsonr`.
+
     Parameters
     ----------
-    x : array of bools
-    y : array of floats
+    x : array_like of bools
+        Input array.
+    y : array_like
+        Input array.
 
     Returns
     -------
-    (point-biserial r,
-     2-tailed p-value)
+    r : float
+        R value
+    p-value : float
+        2-tailed p-value
 
     References
     ----------
     http://www.childrens-mercy.org/stats/definitions/biserial.htm
+
+    Examples
+    --------
+    >>> from scipy import stats
+    >>> a = np.array([0, 0, 0, 1, 1, 1, 1])
+    >>> b = np.arange(7)
+    >>> stats.pointbiserialr(a, b)
+    (0.8660254037844386, 0.011724811003954652)
+    >>> stats.pearsonr(a, b)
+    (0.86602540378443871, 0.011724811003954626)
+    >>> np.corrcoef(a, b)
+    array([[ 1.       ,  0.8660254],
+           [ 0.8660254,  1.       ]])
     """
 
     ## Test data: http://support.sas.com/ctx/samples/index.jsp?sid=490&tab=output
@@ -3372,28 +3617,75 @@
 #####################################
 
 def ss(a, axis=0):
-    """Squares each value in the passed array, adds these squares, and
-    returns the result.
+    """
+    Squares each element of the input array, and returns the square(s) of that.
 
     Parameters
     ----------
-    a : array
-    axis : int or None
+    a : array_like
+        Input array.
+    axis : int or None, optional
+        The axis along which to calculate. If None, use whole array.
+        Default is 0, i.e. along the first axis.
 
     Returns
     -------
-    The sum along the given axis for (a*a).
+    ss : ndarray
+        The sum along the given axis for (a**2).
+
+    See also
+    --------
+    square_of_sums : The square(s) of the sum(s) (the opposite of `ss`).
+
+    Examples
+    --------
+    >>> from scipy import stats
+    >>> a = np.array([1., 2., 5.])
+    >>> stats.ss(a)
+    30.0
+
+    And calculating along an axis:
+
+    >>> b = np.array([[1., 2., 5.], [2., 5., 6.]])
+    >>> stats.ss(b, axis=1)
+    array([ 30., 65.])
+
     """
     a, axis = _chk_asarray(a, axis)
     return np.sum(a*a, axis)
 
 
 def square_of_sums(a, axis=0):
-    """Adds the values in the passed array, squares that sum, and returns the
-result.
+    """
+    Sums elements of the input array, and returns the square(s) of that sum.
 
-Returns: the square of the sum over axis.
-"""
+    Parameters
+    ----------
+    a : array_like
+        Input array.
+    axis : int or None, optional
+        If axis is None, ravel `a` first. If `axis` is an integer, this will
+        be the axis over which to operate. Defaults to 0.
+
+    Returns
+    -------
+    ss : float or ndarray
+        The square of the sum over `axis`.
+
+    See also
+    --------
+    ss : The sum of squares (the opposite of `square_of_sums`).
+
+    Examples
+    --------
+    >>> from scipy import stats
+    >>> a = np.arange(20).reshape(5,4)
+    >>> stats.square_of_sums(a)
+    array([ 1600.,  2025.,  2500.,  3025.])
+    >>> stats.square_of_sums(a, axis=None)
+    36100.0
+
+    """
     a, axis = _chk_asarray(a, axis)
     s = np.sum(a,axis)
     if not np.isscalar(s):
@@ -3421,25 +3713,28 @@
     return as_, it
 
 def rankdata(a):
-    """Ranks the data in a, dealing with ties appropriately.
+    """
+    Ranks the data, dealing with ties appropriately.
 
     Equal values are assigned a rank that is the average of the ranks that
     would have been otherwise assigned to all of the values within that set.
     Ranks begin at 1, not 0.
 
-    Example
-    -------
-    In [15]: stats.rankdata([0, 2, 2, 3])
-    Out[15]: array([ 1. ,  2.5,  2.5,  4. ])
-
     Parameters
     ----------
-    a : array
+    a : array_like
         This array is first flattened.
 
     Returns
     -------
-    An array of length equal to the size of a, containing rank scores.
+    rankdata : ndarray
+         An array of length equal to the size of `a`, containing rank scores.
+
+    Examples
+    --------
+    >>> stats.rankdata([0, 2, 2, 3])
+    array([ 1. ,  2.5,  2.5,  4. ])
+
     """
     a = np.ravel(a)
     n = len(a)



More information about the Scipy-svn mailing list