[Scipysvn] r6878  trunk/scipy/stats
scipysvn@scip...
scipysvn@scip...
Sat Nov 13 23:38:11 CST 2010
Author: rgommers
Date: 20101113 23:38:10 0600 (Sat, 13 Nov 2010)
New Revision: 6878
Modified:
trunk/scipy/stats/stats.py
Log:
DOC: merge wiki edits for stats.
Modified: trunk/scipy/stats/stats.py
===================================================================
 trunk/scipy/stats/stats.py 20101113 21:15:39 UTC (rev 6877)
+++ trunk/scipy/stats/stats.py 20101114 05:38:10 UTC (rev 6878)
@@ 244,17 +244,37 @@
########
def nanmean(x, axis=0):
 """Compute the mean over the given axis ignoring nans.
+ """
+ Compute the mean over the given axis ignoring nans.
 :Parameters:
 x : ndarray
 input array
 axis : int
 axis along which the mean is computed.
+ Parameters
+ 
+ x : ndarray
+ Input array.
+ axis : int, optional
+ Axis along which the mean is computed. Default is 0, i.e. the
+ first axis.
 :Results:
 m : float
 the mean."""
+ Returns
+ 
+ m : float
+ The mean of `x`, ignoring nans.
+
+ See Also
+ 
+ nanstd, nanmedian
+
+ Examples
+ 
+ >>> from scipy import stats
+ >>> a = np.linspace(0, 4, 3)
+ >>> a
+ array([ 0., 2., 4.])
+ >>> a[1] = np.nan
+ >>> stats.nanmean(a)
+ 1.0
+
+ """
x, axis = _chk_asarray(x,axis)
x = x.copy()
Norig = x.shape[axis]
@@ 264,20 +284,44 @@
return np.mean(x,axis)/factor
def nanstd(x, axis=0, bias=False):
 """Compute the standard deviation over the given axis ignoring nans
+ """
+ Compute the standard deviation over the given axis, ignoring nans.
 :Parameters:
 x : ndarray
 input array
 axis : int
 axis along which the standard deviation is computed.
 bias : boolean
 If true, the biased (normalized by N) definition is used. If false,
 the unbiased is used (the default).
+ Parameters
+ 
+ x : array_like
+ Input array.
+ axis : int or None, optional
+ Axis along which the standard deviation is computed. Default is 0.
+ If None, compute over the whole array `x`.
+ bias : bool, optional
+ If True, the biased (normalized by N) definition is used. If False
+ (default), the unbiased definition is used.
 :Results:
 s : float
 the standard deviation."""
+ Returns
+ 
+ s : float
+ The standard deviation.
+
+ See Also
+ 
+ nanmean, nanmedian
+
+ Examples
+ 
+ >>> from scipy import stats
+ >>> a = np.arange(10, dtype=float)
+ >>> a[1:3] = np.nan
+ >>> np.std(a)
+ nan
+ >>> stats.nanstd(a)
+ 2.9154759474226504
+ >>> stats.nanstd(a.reshape(2, 5), axis=1)
+ array([ 2.0817, 1.5811])
+ >>> stats.nanstd(a.reshape(2, 5), axis=None)
+ 2.9154759474226504
+
+ """
x, axis = _chk_asarray(x,axis)
x = x.copy()
Norig = x.shape[axis]
@@ 317,17 +361,52 @@
return np.median(x)
def nanmedian(x, axis=0):
 """ Compute the median along the given axis ignoring nan values
+ """
+ Compute the median along the given axis ignoring nan values.
 :Parameters:
 x : ndarray
 input array
 axis : int
 axis along which the median is computed.
+ Parameters
+ 
+ x : array_like
+ Input array.
+ axis : int, optional
+ Axis along which the median is computed. Default is 0, i.e. the
+ first axis.
 :Results:
 m : float
 the median."""
+ Returns
+ 
+ m : float
+ The median of `x` along `axis`.
+
+ See Also
+ 
+ nanstd, nanmean
+
+ Examples
+ 
+ >>> from scipy import stats
+ >>> a = np.array([0, 3, 1, 5, 5, np.nan])
+ >>> stats.nanmedian(a)
+ array(3.0)
+
+ >>> b = np.array([0, 3, 1, 5, 5, np.nan, 5])
+ >>> stats.nanmedian(b)
+ array(4.0)
+
+ Example with axis:
+
+ >>> c = np.arange(30.).reshape(5,6)
+ >>> idx = np.array([False, False, False, True, False] * 6).reshape(5,6)
+ >>> c[idx] = np.nan
+ >>> c
+ array([[ 0., 1., 2., nan, 4., 5.],
+ [ 6., 7., nan, 9., 10., 11.],
+ [ 12., nan, 14., 15., 16., 17.],
+ [ nan, 19., 20., 21., 22., nan],
+ [ 24., 25., 26., 27., nan, 29.]])
+ >>> stats.nanmedian(c, axis=1)
+ array([ 2. , 9. , 15. , 20.5, 26. ])
+
+ """
x, axis = _chk_asarray(x,axis)
x = x.copy()
return np.apply_along_axis(_nanmedian,axis,x)
@@ 405,15 +484,15 @@
Axis along which the harmonic mean is computed.
dtype : dtype, optional
Type of the returned array and of the accumulator in which the
 elements are summed. If dtype is not specified, it defaults to the
 dtype of a, unless a has an integer dtype with a precision less than
 that of the default platform integer. In that case, the default
+ elements are summed. If `dtype` is not specified, it defaults to the
+ dtype of `a`, unless `a` has an integer `dtype` with a precision less
+ than that of the default platform integer. In that case, the default
platform integer is used.
Returns

hmean : ndarray,
 see dtype parameter above
+ see `dtype` parameter above
See Also

@@ 571,19 +650,42 @@
axis=0, ddof=1).""")
def mode(a, axis=0):
 """Returns an array of the modal (most common) value in the passed array.
+ """
+ Returns an array of the modal (most common) value in the passed array.
If there is more than one such value, only the first is returned.
The bincount for the modal bins is also returned.
Parameters

 a : array
 axis=0 : int
+ a : array_like
+ ndimensional array of which to find mode(s).
+ axis : int, optional
+ Axis along which to operate. Default is 0, i.e. the first axis.
Returns

 (array of modal values, array of counts for each mode)
+ vals : ndarray
+ Array of modal values.
+ counts : ndarray
+ Array of counts for each mode.
+
+ Examples
+ 
+ >>> a = np.array([[6, 8, 3, 0],
+ [3, 2, 1, 7],
+ [8, 1, 8, 4],
+ [5, 3, 0, 5],
+ [4, 7, 5, 9]])
+ >>> from scipy import stats
+ >>> stats.mode(a)
+ (array([[ 3., 1., 0., 0.]]), array([[ 1., 1., 1., 1.]]))
+
+ To get mode of whole array, specify axis=None:
+
+ >>> stats.mode(a, axis=None)
+ (array([ 3.]), array([ 3.]))
+
"""
a, axis = _chk_asarray(a, axis)
scores = np.unique(np.ravel(a)) # get ALL unique values
@@ 1248,16 +1350,40 @@
return a + (b  a)*fraction;
def scoreatpercentile(a, per, limit=()):
 """Calculate the score at the given 'per' percentile of the
 sequence a. For example, the score at per=50 is the median.
+ """
+ Calculate the score at the given `per` percentile of the sequence `a`.
 If the desired quantile lies between two data points, we
 interpolate between them.
+ For example, the score at per=50 is the median. If the desired quantile
+ lies between two data points, we interpolate between them. If the parameter
+ `limit` is provided, it should be a tuple (lower, upper) of two values.
+ Values of `a` outside this (closed) interval will be ignored.
 If the parameter 'limit' is provided, it should be a tuple (lower,
 upper) of two values. Values of 'a' outside this (closed)
 interval will be ignored.
+ Parameters
+ 
+ a : ndarray
+ Values from which to extract score.
+ per : int or float
+ Percentile at which to extract score.
+ limit : tuple, optional
+ Tuple of two scalars, the lower and upper limits within which to
+ compute the percentile.
+ Returns
+ 
+ score : float
+ Score at percentile.
+
+ See Also
+ 
+ percentileofscore
+
+ Examples
+ 
+ >>> from scipy import stats
+ >>> a = np.arange(100)
+ >>> stats.scoreatpercentile(a, 50)
+ 49.5
+
"""
# TODO: this should be a simple wrapper around a wellwritten quantile
# function. GNU R provides 9 quantile algorithms (!), with differing
@@ 1631,24 +1757,46 @@
def sem(a, axis=0, ddof=1):
"""
Calculates the standard error of the mean (or standard error of
 measurement) of the values in the passed array.
+ measurement) of the values in the input array.
Parameters

 a: array like
 An array containing the values for which
 axis: int or None, optional.
 if equal None, ravel array first. If equal to an integer, this will be
+ a : array_like
+ An array containing the values for which the standard error is
+ returned.
+ axis : int or None, optional.
+ If axis is None, ravel `a` first. If axis is an integer, this will be
the axis over which to operate. Defaults to 0.
 ddof: int
 Delta degreesoffreedom. How many degrees of freedom to adjust for
 bias in limited samples relative to the population estimate of variance
+ ddof : int, optional
+ Delta degreesoffreedom. How many degrees of freedom to adjust
+ for bias in limited samples relative to the population estimate
+ of variance. Defaults to 1.
Returns

 The standard error of the mean in the sample(s), along the input axis
+ s : ndarray or float
+ The standard error of the mean in the sample(s), along the input axis.
"""
+ Notes
+ 
+ The default value for `ddof` is different to the default (0) used by other
+ ddof containing routines, such as np.std nd stats.nanstd.
+
+ Examples
+ 
+ Find standard error along the first axis:
+
+ >>> from scipy import stats
+ >>> a = np.arange(20).reshape(5,4)
+ >>> stats.sem(a)
+ array([ 2.8284, 2.8284, 2.8284, 2.8284])
+
+ Find standard error across the whole array, using n degrees of freedom:
+
+ >>> stats.sem(a, axis=None, ddof=0)
+ 1.2893796958227628
+
+ """
a, axis = _chk_asarray(a, axis)
n = a.shape[axis]
#s = samplestd(a,axis) / np.sqrt(n1)
@@ 1691,23 +1839,51 @@
Parameters

 a: array_like
 An array like object containing the sample data
 axis: int or None, optional
 If axis is equal to None, the array is first ravel'd. If axis is an
 integer, this is the axis over which to operate. Defaults to 0.
+ a : array_like
+ An array like object containing the sample data.
+ axis : int or None, optional
+ If `axis` is equal to None, the array is first raveled. If `axis` is
+ an integer, this is the axis over which to operate. Default is 0.
+ ddof : int, optional
+ Degrees of freedom correction in the calculation of the
+ standard deviation. Default is 0.
Returns

 zscore: array_like
 the zscores, standardized by mean and standard deviation of input
 array
+ zscore : array_like
+ The zscores, standardized by mean and standard deviation of input
+ array `a`.
Notes

 This function does not convert array classes, and works also with
 matrices and masked arrays.
+ This function preserves ndarray subclasses, and works also with
+ matrices and masked arrays (it uses `asanyarray` instead of `asarray`
+ for parameters).
+ Examples
+ 
+ >>> a = np.array([ 0.7972, 0.0767, 0.4383, 0.7866, 0.8091, 0.1954,
+ 0.6307, 0.6599, 0.1065, 0.0508])
+ >>> from scipy import stats
+ >>> stats.zscore(a)
+ array([ 1.1273, 1.247 , 0.0552, 1.0923, 1.1664, 0.8559, 0.5786,
+ 0.6748, 1.1488, 1.3324])
+
+ Computing along a specified axis, using n1 degrees of freedom (``ddof=1``)
+ to calculate the standard deviation:
+
+ >>> b = np.array([[ 0.3148, 0.0478, 0.6243, 0.4608],
+ [ 0.7149, 0.0775, 0.6072, 0.9656],
+ [ 0.6341, 0.1403, 0.9759, 0.4064],
+ [ 0.5918, 0.6948, 0.904 , 0.3721],
+ [ 0.0921, 0.2481, 0.1188, 0.1366]])
+ >>> stats.zscore(b, axis=1, ddof=1)
+ array([[1.1649, 1.4319, 0.8554, 1.0189],
+ [0.8661, 1.5035, 0.9737, 0.6154],
+ [0.888 , 1.3817, 0.5461, 1.1156],
+ [2.3043, 2.2014, 1.9921, 2.5241],
+ [2.0773, 1.9212, 2.0506, 2.0328]])
+
"""
a = np.asanyarray(a)
mns = a.mean(axis=axis)
@@ 1722,32 +1898,37 @@
def zmap(scores, compare, axis=0, ddof=0):
"""
 Calculates the zscores relative to the mean and standard deviation
 of second input.
+ Calculates the relative zscores.
 Returns an array of zscores, i.e. scores that are standardized to zero
+ Returns an array of zscores, i.e., scores that are standardized to zero
mean and unit variance, where mean and variance are calculated from the
comparison array.
Parameters

 scores : arraylike
 The input for which z scores are calculated
 compare : arraylike
 The input from which the mean and standard deviation of the
 normalization are taken, assumed to have same dimension as scores
 axis : integer or None, {optional, default 0)
 axis over which mean and std of compare array are calculated
+ scores : array_like
+ The input for which zscores are calculated.
+ compare : array_like
+ The input from which the mean and standard deviation of the
+ normalization are taken; assumed to have the same dimension as
+ `scores`.
+ axis : int or None, optional
+ Axis over which mean and variance of `compare` are calculated.
+ Default is 0.
+ ddof : int, optional
+ Degrees of freedom correction in the calculation of the
+ standard deviation. Default is 0.
Returns

zscore : array_like
 zscore in the same shape as scores
+ Zscores, in the same shape as `scores`.
Notes

 This function does not convert array classes, and works also with
 matrices and masked arrays.
+ This function preserves ndarray subclasses, and works also with
+ matrices and masked arrays (it uses `asanyarray` instead of `asarray`
+ for parameters).
"""
scores, compare = map(np.asanyarray, [scores, compare])
@@ 1767,16 +1948,39 @@
#####################################
def threshold(a, threshmin=None, threshmax=None, newval=0):
 """Clip array to a given value.
+ """
+ Clip array to a given value.
Similar to numpy.clip(), except that values less than threshmin or
greater than threshmax are replaced by newval, instead of by
threshmin and threshmax respectively.
+ Similar to numpy.clip(), except that values less than `threshmin` or
+ greater than `threshmax` are replaced by `newval`, instead of by
+ `threshmin` and `threshmax` respectively.
Returns: a, with values less than threshmin or greater than threshmax
 replaced with newval
+ Parameters
+ 
+ a : array_like
+ Data to threshold.
+ threshmin : float, int or None, optional
+ Minimum threshold, defaults to None.
+ threshmax : float, int or None, optional
+ Maximum threshold, defaults to None.
+ newval : float or int, optional
+ Value to put in place of values in `a` outside of bounds.
+ Defaults to 0.
"""
+ Returns
+ 
+ out : ndarray
+ The clipped input array, with values less than `threshmin` or
+ greater than `threshmax` replaced with `newval`.
+
+ Examples
+ 
+ >>> a = np.array([9, 9, 6, 3, 1, 6, 1, 0, 0, 8])
+ >>> from scipy import stats
+ >>> stats.threshold(a, threshmin=2, threshmax=8, newval=1)
+ array([1, 1, 6, 3, 1, 6, 1, 1, 1, 8])
+
+ """
a = asarray(a).copy()
mask = zeros(a.shape, dtype=bool)
if threshmin is not None:
@@ 1789,35 +1993,39 @@
def sigmaclip(a, low=4., high=4.):
 """Iterative sigmaclipping of array elements.
+ """
+ Iterative sigmaclipping of array elements.
The output array contains only those elements of the input array `c`
that satisfy the conditions ::
mean(c)  std(c)*low < c < mean(c) + std(c)*high
+ Starting from the full sample, all elements outside the critical range are
+ removed. The iteration continues with a new critical range until no
+ elements are outside the range.
+
Parameters

a : array_like
 data array, will be raveled if not 1d
 low : float
 lower bound factor of sigma clipping
 high : float
 upper bound factor of sigma clipping
+ Data array, will be raveled if not 1D.
+ low : float, optional
+ Lower bound factor of sigma clipping. Default is 4.
+ high : float, optional
+ Upper bound factor of sigma clipping. Default is 4.
Returns

 c : array
 input array with clipped elements removed
+ c : ndarray
+ Input array with clipped elements removed.
critlower : float
 lower threshold value use for clipping
+ Lower threshold value use for clipping.
critlupper : float
 upper threshold value use for clipping
+ Upper threshold value use for clipping.

Examples

 >>> a = np.concatenate((np.linspace(9.5,10.5,31),np.linspace(0,20,5)))
+ >>> a = np.concatenate((np.linspace(9.5,10.5,31), np.linspace(0,20,5)))
>>> fact = 1.5
>>> c, low, upp = sigmaclip(a, fact, fact)
>>> c
@@ 1851,15 +2059,36 @@
def trimboth(a, proportiontocut):
"""
Slices off the passed proportion of items from BOTH ends of the passed
array (i.e., with proportiontocut=0.1, slices 'leftmost' 10% AND
'rightmost' 10% of scores. You must presort the array if you want
"proper" trimming. Slices off LESS if proportion results in a
noninteger slice index (i.e., conservatively slices off
proportiontocut).
+ Slices off a proportion of items from both ends of an array.
Returns: trimmed version of array a
"""
+ Slices off the passed proportion of items from both ends of the passed
+ array (i.e., with `proportiontocut` = 0.1, slices leftmost 10% **and**
+ rightmost 10% of scores). You must presort the array if you want
+ 'proper' trimming. Slices off less if proportion results in a
+ noninteger slice index (i.e., conservatively slices off
+ `proportiontocut`).
+
+ Parameters
+ 
+ a : array_like
+ Data to trim.
+ proportiontocut : float or int
+ Proportion of total data set to trim of each end.
+
+ Returns
+ 
+ out : ndarray
+ Trimmed version of array `a`.
+
+ Examples
+ 
+ >>> from scipy import stats
+ >>> a = np.arange(20)
+ >>> b = stats.trimboth(a, 0.1)
+ >>> b.shape
+ (16,)
+
+ """
a = asarray(a)
lowercut = int(proportiontocut*len(a))
uppercut = len(a)  lowercut
@@ 1973,35 +2202,35 @@
"""
Performs a 1way ANOVA.
 The onway ANOVA tests the null hypothesis that 2 or more groups have
+ The oneway ANOVA tests the null hypothesis that two or more groups have
the same population mean. The test is applied to samples from two or
more groups, possibly with differing sizes.
Parameters

sample1, sample2, ... : array_like
 The sample measurements should be given as arguments.
+ The sample measurements for each group.
Returns

Fvalue : float
 The computed Fvalue of the test
+ The computed Fvalue of the test.
pvalue : float
 The associated pvalue from the Fdistribution
+ The associated pvalue from the Fdistribution.
Notes

The ANOVA test has important assumptions that must be satisfied in order
for the associated pvalue to be valid.
 1. The samples are independent
 2. Each sample is from a normally distributed population
+ 1. The samples are independent.
+ 2. Each sample is from a normally distributed population.
3. The population standard deviations of the groups are all equal. This
 property is known as homocedasticity.
+ property is known as homoscedasticity.
If these assumptions are not true for a given set of data, it may still be
possible to use the KruskalWallis Htest (`stats.kruskal`_) although with
 some loss of power
+ some loss of power.
The algorithm is from Heiman[2], pp.3947.
@@ 2095,12 +2324,12 @@
Calculates a Spearman rankorder correlation coefficient and the pvalue
to test for noncorrelation.
 The Spearman correlation is a nonparametric measure of the linear
 relationship between two datasets. Unlike the Pearson correlation, the
 Spearman correlation does not assume that both datasets are normally
+ The Spearman correlation is a nonparametric measure of the monotonicity
+ of the relationship between two datasets. Unlike the Pearson correlation,
+ the Spearman correlation does not assume that both datasets are normally
distributed. Like other correlation coefficients, this one varies
between 1 and +1 with 0 implying no correlation. Correlations of 1 or
 +1 imply an exact linear relationship. Positive correlations imply that
+ +1 imply an exact monotonic relationship. Positive correlations imply that
as x increases, so does y. Negative correlations imply that as x
increases, y decreases.
@@ 2109,37 +2338,34 @@
as the one computed from these datasets. The pvalues are not entirely
reliable but are probably reasonable for datasets larger than 500 or so.
 spearmanr currently does not do any tie correction, and is only correct
 if there are no ties in the data.

Parameters

a, b : 1D or 2D array_like, b is optional
One or two 1D or 2D arrays containing multiple variables and
 observations. Each column of m represents a variable, and each row
 entry a single observation of those variables. Also see axis below.
 Both arrays need to have the same length in the `axis` dimension.

+ observations. Each column of `a` and `b` represents a variable, and
+ each row entry a single observation of those variables. See also
+ `axis`. Both arrays need to have the same length in the `axis`
+ dimension.
axis : int or None, optional
If axis=0 (default), then each column represents a variable, with
observations in the rows. If axis=0, the relationship is transposed:
each row represents a variable, while the columns contain observations.
 If axis=None, then both arrays will be raveled
+ If axis=None, then both arrays will be raveled.
Returns

 rho: float or array (2D square)
 Spearman correlation matrix or correlation coefficient (if only 2 variables
 are given as parameters. Correlation matrix is square with length
 equal to total number of variables (columns or rows) in a and b
 combined
+ rho: float or ndarray (2D square)
+ Spearman correlation matrix or correlation coefficient (if only 2
+ variables are given as parameters. Correlation matrix is square with
+ length equal to total number of variables (columns or rows) in a and b
+ combined.
pvalue : float
The twosided pvalue for a hypothesis test whose null hypothesis is
 that two sets of data are uncorrelated, has same dimension as rho
+ that two sets of data are uncorrelated, has same dimension as rho.
Notes

 changes in scipy 0.8: rewrite to add tiehandling, and axis
+ Changes in scipy 0.8.0: rewrite to add tiehandling, and axis.
References

@@ 2151,7 +2377,6 @@
Examples


>>> spearmanr([1,2,3,4,5],[5,6,7,8,7])
(0.82078268166812329, 0.088587005313543798)
>>> np.random.seed(1234321)
@@ 2221,19 +2446,39 @@
implying no correlation. Correlations of 1 or +1 imply a determinative
relationship.
+ This function uses a shortcut formula but produces the same result as
+ `pearsonr`.
+
Parameters

 x : array of bools
 y : array of floats
+ x : array_like of bools
+ Input array.
+ y : array_like
+ Input array.
Returns

 (pointbiserial r,
 2tailed pvalue)
+ r : float
+ R value
+ pvalue : float
+ 2tailed pvalue
References

http://www.childrensmercy.org/stats/definitions/biserial.htm
+
+ Examples
+ 
+ >>> from scipy import stats
+ >>> a = np.array([0, 0, 0, 1, 1, 1, 1])
+ >>> b = np.arange(7)
+ >>> stats.pointbiserialr(a, b)
+ (0.8660254037844386, 0.011724811003954652)
+ >>> stats.pearsonr(a, b)
+ (0.86602540378443871, 0.011724811003954626)
+ >>> np.corrcoef(a, b)
+ array([[ 1. , 0.8660254],
+ [ 0.8660254, 1. ]])
"""
## Test data: http://support.sas.com/ctx/samples/index.jsp?sid=490&tab=output
@@ 3372,28 +3617,75 @@
#####################################
def ss(a, axis=0):
 """Squares each value in the passed array, adds these squares, and
 returns the result.
+ """
+ Squares each element of the input array, and returns the square(s) of that.
Parameters

 a : array
 axis : int or None
+ a : array_like
+ Input array.
+ axis : int or None, optional
+ The axis along which to calculate. If None, use whole array.
+ Default is 0, i.e. along the first axis.
Returns

 The sum along the given axis for (a*a).
+ ss : ndarray
+ The sum along the given axis for (a**2).
+
+ See also
+ 
+ square_of_sums : The square(s) of the sum(s) (the opposite of `ss`).
+
+ Examples
+ 
+ >>> from scipy import stats
+ >>> a = np.array([1., 2., 5.])
+ >>> stats.ss(a)
+ 30.0
+
+ And calculating along an axis:
+
+ >>> b = np.array([[1., 2., 5.], [2., 5., 6.]])
+ >>> stats.ss(b, axis=1)
+ array([ 30., 65.])
+
"""
a, axis = _chk_asarray(a, axis)
return np.sum(a*a, axis)
def square_of_sums(a, axis=0):
 """Adds the values in the passed array, squares that sum, and returns the
result.
+ """
+ Sums elements of the input array, and returns the square(s) of that sum.
Returns: the square of the sum over axis.
"""
+ Parameters
+ 
+ a : array_like
+ Input array.
+ axis : int or None, optional
+ If axis is None, ravel `a` first. If `axis` is an integer, this will
+ be the axis over which to operate. Defaults to 0.
+
+ Returns
+ 
+ ss : float or ndarray
+ The square of the sum over `axis`.
+
+ See also
+ 
+ ss : The sum of squares (the opposite of `square_of_sums`).
+
+ Examples
+ 
+ >>> from scipy import stats
+ >>> a = np.arange(20).reshape(5,4)
+ >>> stats.square_of_sums(a)
+ array([ 1600., 2025., 2500., 3025.])
+ >>> stats.square_of_sums(a, axis=None)
+ 36100.0
+
+ """
a, axis = _chk_asarray(a, axis)
s = np.sum(a,axis)
if not np.isscalar(s):
@@ 3421,25 +3713,28 @@
return as_, it
def rankdata(a):
 """Ranks the data in a, dealing with ties appropriately.
+ """
+ Ranks the data, dealing with ties appropriately.
Equal values are assigned a rank that is the average of the ranks that
would have been otherwise assigned to all of the values within that set.
Ranks begin at 1, not 0.
 Example
 
 In [15]: stats.rankdata([0, 2, 2, 3])
 Out[15]: array([ 1. , 2.5, 2.5, 4. ])

Parameters

 a : array
+ a : array_like
This array is first flattened.
Returns

 An array of length equal to the size of a, containing rank scores.
+ rankdata : ndarray
+ An array of length equal to the size of `a`, containing rank scores.
+
+ Examples
+ 
+ >>> stats.rankdata([0, 2, 2, 3])
+ array([ 1. , 2.5, 2.5, 4. ])
+
"""
a = np.ravel(a)
n = len(a)
More information about the Scipysvn
mailing list