[Numpy-svn] r5077 - trunk/numpy/ma

numpy-svn@scip... numpy-svn@scip...
Thu Apr 24 01:57:15 CDT 2008


Author: pierregm
Date: 2008-04-24 01:57:13 -0500 (Thu, 24 Apr 2008)
New Revision: 5077

Removed:
   trunk/numpy/ma/morestats.py
   trunk/numpy/ma/mstats.py
Log:
suppressed mstats and morestats: the modules are now part of scipy.stats

Deleted: trunk/numpy/ma/morestats.py
===================================================================
--- trunk/numpy/ma/morestats.py	2008-04-24 05:14:41 UTC (rev 5076)
+++ trunk/numpy/ma/morestats.py	2008-04-24 06:57:13 UTC (rev 5077)
@@ -1,424 +0,0 @@
-"""
-Generic statistics functions, with support to MA.
-
-:author: Pierre GF Gerard-Marchant
-:contact: pierregm_at_uga_edu
-:date: $Date: 2007-10-29 17:18:13 +0200 (Mon, 29 Oct 2007) $
-:version: $Id: morestats.py 3473 2007-10-29 15:18:13Z jarrod.millman $
-"""
-__author__ = "Pierre GF Gerard-Marchant ($Author: jarrod.millman $)"
-__version__ = '1.0'
-__revision__ = "$Revision: 3473 $"
-__date__     = '$Date: 2007-10-29 17:18:13 +0200 (Mon, 29 Oct 2007) $'
-
-
-import numpy
-from numpy import bool_, float_, int_, ndarray, \
-    sqrt,\
-    arange, empty,\
-    r_
-from numpy import array as narray
-import numpy.core.numeric as numeric
-from numpy.core.numeric import concatenate
-
-import numpy.ma as MA
-from numpy.ma.core import masked, nomask, MaskedArray, masked_array
-from numpy.ma.extras import apply_along_axis, dot, median
-from numpy.ma.mstats import trim_both, trimmed_stde, mquantiles, stde_median
-
-from scipy.stats.distributions import norm, beta, t, binom
-from scipy.stats.morestats import find_repeats
-
-__all__ = ['hdquantiles', 'hdmedian', 'hdquantiles_sd',
-           'trimmed_mean_ci', 'mjci', 'rank_data']
-
-
-#####--------------------------------------------------------------------------
-#---- --- Quantiles ---
-#####--------------------------------------------------------------------------
-def hdquantiles(data, prob=list([.25,.5,.75]), axis=None, var=False,):
-    """Computes quantile estimates with the Harrell-Davis method, where the estimates
-are calculated as a weighted linear combination of order statistics.
-
-Parameters
-----------
-    data: ndarray
-        Data array.
-    prob: sequence
-        Sequence of quantiles to compute.
-    axis : int
-        Axis along which to compute the quantiles. If None, use a flattened array.
-    var : boolean
-        Whether to return the variance of the estimate.
-
-Returns
--------
-    A (p,) array of quantiles (if ``var`` is False), or a (2,p) array of quantiles
-    and variances (if ``var`` is True), where ``p`` is the number of quantiles.
-
-Notes
------
-    The function is restricted to 2D arrays.
-
-    """
-    def _hd_1D(data,prob,var):
-        "Computes the HD quantiles for a 1D array. Returns nan for invalid data."
-        xsorted = numpy.squeeze(numpy.sort(data.compressed().view(ndarray)))
-        # Don't use length here, in case we have a numpy scalar
-        n = xsorted.size
-        #.........
-        hd = empty((2,len(prob)), float_)
-        if n < 2:
-            hd.flat = numpy.nan
-            if var:
-                return hd
-            return hd[0]
-        #.........
-        v = arange(n+1) / float(n)
-        betacdf = beta.cdf
-        for (i,p) in enumerate(prob):
-            _w = betacdf(v, (n+1)*p, (n+1)*(1-p))
-            w = _w[1:] - _w[:-1]
-            hd_mean = dot(w, xsorted)
-            hd[0,i] = hd_mean
-            #
-            hd[1,i] = dot(w, (xsorted-hd_mean)**2)
-            #
-        hd[0, prob == 0] = xsorted[0]
-        hd[0, prob == 1] = xsorted[-1]
-        if var:
-            hd[1, prob == 0] = hd[1, prob == 1] = numpy.nan
-            return hd
-        return hd[0]
-    # Initialization & checks ---------
-    data = masked_array(data, copy=False, dtype=float_)
-    p = numpy.array(prob, copy=False, ndmin=1)
-    # Computes quantiles along axis (or globally)
-    if (axis is None) or (data.ndim == 1):
-        result = _hd_1D(data, p, var)
-    else:
-        assert data.ndim <= 2, "Array should be 2D at most !"
-        result = apply_along_axis(_hd_1D, axis, data, p, var)
-    #
-    return masked_array(result, mask=numpy.isnan(result))
-
-#..............................................................................
-def hdmedian(data, axis=-1, var=False):
-    """Returns the Harrell-Davis estimate of the median along the given axis.
-
-Parameters
-----------
-    data: ndarray
-        Data array.
-    axis : int
-        Axis along which to compute the quantiles. If None, use a flattened array.
-    var : boolean
-        Whether to return the variance of the estimate.
-
-    """
-    result = hdquantiles(data,[0.5], axis=axis, var=var)
-    return result.squeeze()
-
-
-#..............................................................................
-def hdquantiles_sd(data, prob=list([.25,.5,.75]), axis=None):
-    """Computes the standard error of the Harrell-Davis quantile estimates by jackknife.
-
-
-Parameters
-----------
-    data: ndarray
-        Data array.
-    prob: sequence
-        Sequence of quantiles to compute.
-    axis : int
-        Axis along which to compute the quantiles. If None, use a flattened array.
-
-Notes
------
-    The function is restricted to 2D arrays.
-
-    """
-    def _hdsd_1D(data,prob):
-        "Computes the std error for 1D arrays."
-        xsorted = numpy.sort(data.compressed())
-        n = len(xsorted)
-        #.........
-        hdsd = empty(len(prob), float_)
-        if n < 2:
-            hdsd.flat = numpy.nan
-        #.........
-        vv = arange(n) / float(n-1)
-        betacdf = beta.cdf
-        #
-        for (i,p) in enumerate(prob):
-            _w = betacdf(vv, (n+1)*p, (n+1)*(1-p))
-            w = _w[1:] - _w[:-1]
-            mx_ = numpy.fromiter([dot(w,xsorted[r_[range(0,k),
-                                                   range(k+1,n)].astype(int_)])
-                                  for k in range(n)], dtype=float_)
-            mx_var = numpy.array(mx_.var(), copy=False, ndmin=1) * n / float(n-1)
-            hdsd[i] = float(n-1) * sqrt(numpy.diag(mx_var).diagonal() / float(n))
-        return hdsd
-    # Initialization & checks ---------
-    data = masked_array(data, copy=False, dtype=float_)
-    p = numpy.array(prob, copy=False, ndmin=1)
-    # Computes quantiles along axis (or globally)
-    if (axis is None):
-        result = _hdsd_1D(data.compressed(), p)
-    else:
-        assert data.ndim <= 2, "Array should be 2D at most !"
-        result = apply_along_axis(_hdsd_1D, axis, data, p)
-    #
-    return masked_array(result, mask=numpy.isnan(result)).ravel()
-
-
-#####--------------------------------------------------------------------------
-#---- --- Confidence intervals ---
-#####--------------------------------------------------------------------------
-
-def trimmed_mean_ci(data, proportiontocut=0.2, alpha=0.05, axis=None):
-    """Returns the selected confidence interval of the trimmed mean along the
-given axis.
-
-Parameters
-----------
-    data : sequence
-        Input data. The data is transformed to a masked array
-    proportiontocut : float
-        Proportion of the data to cut from each side of the data .
-        As a result, (2*proportiontocut*n) values are actually trimmed.
-    alpha : float
-        Confidence level of the intervals.
-    axis : int
-        Axis along which to cut. If None, uses a flattened version of the input.
-
-    """
-    data = masked_array(data, copy=False)
-    trimmed = trim_both(data, proportiontocut=proportiontocut, axis=axis)
-    tmean = trimmed.mean(axis)
-    tstde = trimmed_stde(data, proportiontocut=proportiontocut, axis=axis)
-    df = trimmed.count(axis) - 1
-    tppf = t.ppf(1-alpha/2.,df)
-    return numpy.array((tmean - tppf*tstde, tmean+tppf*tstde))
-
-#..............................................................................
-def mjci(data, prob=[0.25,0.5,0.75], axis=None):
-    """Returns the Maritz-Jarrett estimators of the standard error of selected
-experimental quantiles of the data.
-
-Parameters
------------
-    data: ndarray
-        Data array.
-    prob: sequence
-        Sequence of quantiles to compute.
-    axis : int
-        Axis along which to compute the quantiles. If None, use a flattened array.
-
-    """
-    def _mjci_1D(data, p):
-        data = data.compressed()
-        sorted = numpy.sort(data)
-        n = data.size
-        prob = (numpy.array(p) * n + 0.5).astype(int_)
-        betacdf = beta.cdf
-        #
-        mj = empty(len(prob), float_)
-        x = arange(1,n+1, dtype=float_) / n
-        y = x - 1./n
-        for (i,m) in enumerate(prob):
-            (m1,m2) = (m-1, n-m)
-            W = betacdf(x,m-1,n-m) - betacdf(y,m-1,n-m)
-            C1 = numpy.dot(W,sorted)
-            C2 = numpy.dot(W,sorted**2)
-            mj[i] = sqrt(C2 - C1**2)
-        return mj
-    #
-    data = masked_array(data, copy=False)
-    assert data.ndim <= 2, "Array should be 2D at most !"
-    p = numpy.array(prob, copy=False, ndmin=1)
-    # Computes quantiles along axis (or globally)
-    if (axis is None):
-        return _mjci_1D(data, p)
-    else:
-        return apply_along_axis(_mjci_1D, axis, data, p)
-
-#..............................................................................
-def mquantiles_cimj(data, prob=[0.25,0.50,0.75], alpha=0.05, axis=None):
-    """Computes the alpha confidence interval for the selected quantiles of the
-data, with Maritz-Jarrett estimators.
-
-Parameters
-----------
-    data: ndarray
-        Data array.
-    prob: sequence
-        Sequence of quantiles to compute.
-    alpha : float
-        Confidence level of the intervals.
-    axis : integer
-        Axis along which to compute the quantiles. If None, use a flattened array.
-    """
-    alpha = min(alpha, 1-alpha)
-    z = norm.ppf(1-alpha/2.)
-    xq = mquantiles(data, prob, alphap=0, betap=0, axis=axis)
-    smj = mjci(data, prob, axis=axis)
-    return (xq - z * smj, xq + z * smj)
-
-
-#.............................................................................
-def median_cihs(data, alpha=0.05, axis=None):
-    """Computes the alpha-level confidence interval for the median of the data,
-following the Hettmasperger-Sheather method.
-
-Parameters
-----------
-    data : sequence
-        Input data. Masked values are discarded. The input should be 1D only, or
-        axis should be set to None.
-    alpha : float
-        Confidence level of the intervals.
-    axis : integer
-        Axis along which to compute the quantiles. If None, use a flattened array.
-    """
-    def _cihs_1D(data, alpha):
-        data = numpy.sort(data.compressed())
-        n = len(data)
-        alpha = min(alpha, 1-alpha)
-        k = int(binom._ppf(alpha/2., n, 0.5))
-        gk = binom.cdf(n-k,n,0.5) - binom.cdf(k-1,n,0.5)
-        if gk < 1-alpha:
-            k -= 1
-            gk = binom.cdf(n-k,n,0.5) - binom.cdf(k-1,n,0.5)
-        gkk = binom.cdf(n-k-1,n,0.5) - binom.cdf(k,n,0.5)
-        I = (gk - 1 + alpha)/(gk - gkk)
-        lambd = (n-k) * I / float(k + (n-2*k)*I)
-        lims = (lambd*data[k] + (1-lambd)*data[k-1],
-                lambd*data[n-k-1] + (1-lambd)*data[n-k])
-        return lims
-    data = masked_array(data, copy=False)
-    # Computes quantiles along axis (or globally)
-    if (axis is None):
-        result = _cihs_1D(data.compressed(), p, var)
-    else:
-        assert data.ndim <= 2, "Array should be 2D at most !"
-        result = apply_along_axis(_cihs_1D, axis, data, alpha)
-    #
-    return result
-
-#..............................................................................
-def compare_medians_ms(group_1, group_2, axis=None):
-    """Compares the medians from two independent groups along the given axis.
-
-The comparison is performed using the McKean-Schrader estimate of the standard
-error of the medians.
-
-Parameters
-----------
-    group_1 : {sequence}
-        First dataset.
-    group_2 : {sequence}
-        Second dataset.
-    axis : {integer}
-        Axis along which the medians are estimated. If None, the arrays are flattened.
-
-Returns
--------
-    A (p,) array of comparison values.
-
-    """
-    (med_1, med_2) = (median(group_1, axis=axis), median(group_2, axis=axis))
-    (std_1, std_2) = (stde_median(group_1, axis=axis),
-                      stde_median(group_2, axis=axis))
-    W = abs(med_1 - med_2) / sqrt(std_1**2 + std_2**2)
-    return 1 - norm.cdf(W)
-
-
-#####--------------------------------------------------------------------------
-#---- --- Ranking ---
-#####--------------------------------------------------------------------------
-
-#..............................................................................
-def rank_data(data, axis=None, use_missing=False):
-    """Returns the rank (also known as order statistics) of each data point
-    along the given axis.
-
-    If some values are tied, their rank is averaged.
-    If some values are masked, their rank is set to 0 if use_missing is False,
-    or set to the average rank of the unmasked values if use_missing is True.
-
-    Parameters
-    ----------
-        data : sequence
-            Input data. The data is transformed to a masked array
-        axis : integer
-            Axis along which to perform the ranking.
-            If None, the array is first flattened. An exception is raised if
-            the axis is specified for arrays with a dimension larger than 2
-        use_missing : boolean
-            Whether the masked values have a rank of 0 (False) or equal to the
-            average rank of the unmasked values (True).
-    """
-    #
-    def _rank1d(data, use_missing=False):
-        n = data.count()
-        rk = numpy.empty(data.size, dtype=float_)
-        idx = data.argsort()
-        rk[idx[:n]] = numpy.arange(1,n+1)
-        #
-        if use_missing:
-            rk[idx[n:]] = (n+1)/2.
-        else:
-            rk[idx[n:]] = 0
-        #
-        repeats = find_repeats(data)
-        for r in repeats[0]:
-            condition = (data==r).filled(False)
-            rk[condition] = rk[condition].mean()
-        return rk
-    #
-    data = masked_array(data, copy=False)
-    if axis is None:
-        if data.ndim > 1:
-            return _rank1d(data.ravel(), use_missing).reshape(data.shape)
-        else:
-            return _rank1d(data, use_missing)
-    else:
-        return apply_along_axis(_rank1d, axis, data, use_missing)
-
-###############################################################################
-if __name__ == '__main__':
-
-    if 0:
-        from numpy.ma.testutils import assert_almost_equal
-        data = [0.706560797,0.727229578,0.990399276,0.927065621,0.158953014,
-            0.887764025,0.239407086,0.349638551,0.972791145,0.149789972,
-            0.936947700,0.132359948,0.046041972,0.641675031,0.945530547,
-            0.224218684,0.771450991,0.820257774,0.336458052,0.589113496,
-            0.509736129,0.696838829,0.491323573,0.622767425,0.775189248,
-            0.641461450,0.118455200,0.773029450,0.319280007,0.752229111,
-            0.047841438,0.466295911,0.583850781,0.840581845,0.550086491,
-            0.466470062,0.504765074,0.226855960,0.362641207,0.891620942,
-            0.127898691,0.490094097,0.044882048,0.041441695,0.317976349,
-            0.504135618,0.567353033,0.434617473,0.636243375,0.231803616,
-            0.230154113,0.160011327,0.819464108,0.854706985,0.438809221,
-            0.487427267,0.786907310,0.408367937,0.405534192,0.250444460,
-            0.995309248,0.144389588,0.739947527,0.953543606,0.680051621,
-            0.388382017,0.863530727,0.006514031,0.118007779,0.924024803,
-            0.384236354,0.893687694,0.626534881,0.473051932,0.750134705,
-            0.241843555,0.432947602,0.689538104,0.136934797,0.150206859,
-            0.474335206,0.907775349,0.525869295,0.189184225,0.854284286,
-            0.831089744,0.251637345,0.587038213,0.254475554,0.237781276,
-            0.827928620,0.480283781,0.594514455,0.213641488,0.024194386,
-            0.536668589,0.699497811,0.892804071,0.093835427,0.731107772]
-        #
-        assert_almost_equal(hdquantiles(data,[0., 1.]),
-                            [0.006514031, 0.995309248])
-        hdq = hdquantiles(data,[0.25, 0.5, 0.75])
-        assert_almost_equal(hdq, [0.253210762, 0.512847491, 0.762232442,])
-        hdq = hdquantiles_sd(data,[0.25, 0.5, 0.75])
-        assert_almost_equal(hdq, [0.03786954, 0.03805389, 0.03800152,], 4)
-        #
-        data = numpy.array(data).reshape(10,10)
-        hdq = hdquantiles(data,[0.25,0.5,0.75],axis=0)

Deleted: trunk/numpy/ma/mstats.py
===================================================================
--- trunk/numpy/ma/mstats.py	2008-04-24 05:14:41 UTC (rev 5076)
+++ trunk/numpy/ma/mstats.py	2008-04-24 06:57:13 UTC (rev 5077)
@@ -1,462 +0,0 @@
-"""
-Generic statistics functions, with support to MA.
-
-:author: Pierre GF Gerard-Marchant
-:contact: pierregm_at_uga_edu
-:date: $Date: 2007-10-29 17:18:13 +0200 (Mon, 29 Oct 2007) $
-:version: $Id: mstats.py 3473 2007-10-29 15:18:13Z jarrod.millman $
-"""
-__author__ = "Pierre GF Gerard-Marchant ($Author: jarrod.millman $)"
-__version__ = '1.0'
-__revision__ = "$Revision: 3473 $"
-__date__     = '$Date: 2007-10-29 17:18:13 +0200 (Mon, 29 Oct 2007) $'
-
-
-import numpy
-from numpy import bool_, float_, int_, ndarray, \
-    sqrt
-from numpy import array as narray
-import numpy.core.numeric as numeric
-from numpy.core.numeric import concatenate
-
-import numpy.ma
-from numpy.ma.core import masked, nomask, MaskedArray, masked_array
-from numpy.ma.extras import apply_along_axis, dot, median as mmedian
-
-__all__ = ['cov','meppf','plotting_positions','meppf','mquantiles',
-           'stde_median','trim_tail','trim_both','trimmed_mean','trimmed_stde',
-           'winsorize']
-
-#####--------------------------------------------------------------------------
-#---- -- Trimming ---
-#####--------------------------------------------------------------------------
-
-def winsorize(data, alpha=0.2):
-    """Returns a Winsorized version of the input array.
-
-    The (alpha/2.) lowest values are set to the (alpha/2.)th percentile,
-    and the (alpha/2.) highest values are set to the (1-alpha/2.)th
-    percentile.
-    Masked values are skipped.
-
-    Parameters
-    ----------
-        data : ndarray
-            Input data to Winsorize. The data is first flattened.
-        alpha : float
-            Percentage of total Winsorization: alpha/2. on the left,
-            alpha/2. on the right
-
-    """
-    data = masked_array(data, copy=False).ravel()
-    idxsort = data.argsort()
-    (nsize, ncounts) = (data.size, data.count())
-    ntrim = int(alpha * ncounts)
-    (xmin,xmax) = data[idxsort[[ntrim, ncounts-nsize-ntrim-1]]]
-    return masked_array(numpy.clip(data, xmin, xmax), mask=data._mask)
-
-#..............................................................................
-def trim_both(data, proportiontocut=0.2, axis=None):
-    """Trims the data by masking the int(trim*n) smallest and int(trim*n)
-    largest values of data along the given axis, where n is the number
-    of unmasked values.
-
-    Parameters
-    ----------
-        data : ndarray
-            Data to trim.
-        proportiontocut : float
-            Percentage of trimming. If n is the number of unmasked values
-            before trimming, the number of values after trimming is:
-                (1-2*trim)*n.
-        axis : int
-            Axis along which to perform the trimming.
-            If None, the input array is first flattened.
-
-    Notes
-    -----
-        The function works only for arrays up to 2D.
-
-    """
-    #...................
-    def _trim_1D(data, trim):
-        "Private function: return a trimmed 1D array."
-        nsize = data.size
-        ncounts = data.count()
-        ntrim = int(trim * ncounts)
-        idxsort = data.argsort()
-        data[idxsort[:ntrim]] = masked
-        data[idxsort[ncounts-nsize-ntrim:]] = masked
-        return data
-    #...................
-    data = masked_array(data, copy=False, subok=True)
-    data.unshare_mask()
-    if (axis is None):
-        return _trim_1D(data.ravel(), proportiontocut)
-    else:
-        assert data.ndim <= 2, "Array should be 2D at most !"
-        return apply_along_axis(_trim_1D, axis, data, proportiontocut)
-
-#..............................................................................
-def trim_tail(data, proportiontocut=0.2, tail='left', axis=None):
-    """Trims the data by masking int(trim*n) values from ONE tail of the
-    data along the given axis, where n is the number of unmasked values.
-
-    Parameters
-    ----------
-        data : ndarray
-            Data to trim.
-        proportiontocut : float
-            Percentage of trimming. If n is the number of unmasked values
-            before trimming, the number of values after trimming is
-            (1-trim)*n.
-        tail : string
-            Trimming direction, in ('left', 'right').
-            If left, the ``proportiontocut`` lowest values are set to the
-            corresponding percentile. If right, the ``proportiontocut``
-            highest values are used instead.
-        axis : int
-            Axis along which to perform the trimming.
-            If None, the input array is first flattened.
-
-    Notes
-    -----
-        The function works only for arrays up to 2D.
-
-    """
-    #...................
-    def _trim_1D(data, trim, left):
-        "Private function: return a trimmed 1D array."
-        nsize = data.size
-        ncounts = data.count()
-        ntrim = int(trim * ncounts)
-        idxsort = data.argsort()
-        if left:
-            data[idxsort[:ntrim]] = masked
-        else:
-            data[idxsort[ncounts-nsize-ntrim:]] = masked
-        return data
-    #...................
-    data = masked_array(data, copy=False, subok=True)
-    data.unshare_mask()
-    #
-    if not isinstance(tail, str):
-        raise TypeError("The tail argument should be in ('left','right')")
-    tail = tail.lower()[0]
-    if tail == 'l':
-        left = True
-    elif tail == 'r':
-        left=False
-    else:
-        raise ValueError("The tail argument should be in ('left','right')")
-    #
-    if (axis is None):
-        return _trim_1D(data.ravel(), proportiontocut, left)
-    else:
-        assert data.ndim <= 2, "Array should be 2D at most !"
-        return apply_along_axis(_trim_1D, axis, data, proportiontocut, left)
-
-#..............................................................................
-def trimmed_mean(data, proportiontocut=0.2, axis=None):
-    """Returns the trimmed mean of the data along the given axis.
-    Trimming is performed on both ends of the distribution.
-
-    Parameters
-    ----------
-        data : ndarray
-            Data to trim.
-        proportiontocut : float
-            Proportion of the data to cut from each side of the data .
-            As a result, (2*proportiontocut*n) values are actually trimmed.
-        axis : int
-            Axis along which to perform the trimming.
-            If None, the input array is first flattened.
-
-    """
-    return trim_both(data, proportiontocut=proportiontocut, axis=axis).mean(axis=axis)
-
-#..............................................................................
-def trimmed_stde(data, proportiontocut=0.2, axis=None):
-    """Returns the standard error of the trimmed mean for the input data,
-    along the given axis. Trimming is performed on both ends of the distribution.
-
-    Parameters
-    ----------
-        data : ndarray
-            Data to trim.
-        proportiontocut : float
-            Proportion of the data to cut from each side of the data .
-            As a result, (2*proportiontocut*n) values are actually trimmed.
-        axis : int
-            Axis along which to perform the trimming.
-            If None, the input array is first flattened.
-
-    Notes
-    -----
-        The function worrks with arrays up to 2D.
-
-    """
-    #........................
-    def _trimmed_stde_1D(data, trim=0.2):
-        "Returns the standard error of the trimmed mean for a 1D input data."
-        winsorized = winsorize(data)
-        nsize = winsorized.count()
-        winstd = winsorized.std(ddof=1)
-        return winstd / ((1-2*trim) * numpy.sqrt(nsize))
-    #........................
-    data = masked_array(data, copy=False, subok=True)
-    data.unshare_mask()
-    if (axis is None):
-        return _trimmed_stde_1D(data.ravel(), proportiontocut)
-    else:
-        assert data.ndim <= 2, "Array should be 2D at most !"
-        return apply_along_axis(_trimmed_stde_1D, axis, data, proportiontocut)
-
-#.............................................................................
-def stde_median(data, axis=None):
-    """Returns the McKean-Schrader estimate of the standard error of the sample
-median along the given axis.
-
-    Parameters
-    ----------
-        data : ndarray
-            Data to trim.
-        axis : int
-            Axis along which to perform the trimming.
-            If None, the input array is first flattened.
-
-    """
-    def _stdemed_1D(data):
-        sorted = numpy.sort(data.compressed())
-        n = len(sorted)
-        z = 2.5758293035489004
-        k = int(round((n+1)/2. - z * sqrt(n/4.),0))
-        return ((sorted[n-k] - sorted[k-1])/(2.*z))
-    #
-    data = masked_array(data, copy=False, subok=True)
-    if (axis is None):
-        return _stdemed_1D(data)
-    else:
-        assert data.ndim <= 2, "Array should be 2D at most !"
-        return apply_along_axis(_stdemed_1D, axis, data)
-
-
-#####--------------------------------------------------------------------------
-#---- --- Quantiles ---
-#####--------------------------------------------------------------------------
-
-
-def mquantiles(data, prob=list([.25,.5,.75]), alphap=.4, betap=.4, axis=None):
-    """Computes empirical quantiles for a *1xN* data array.
-Samples quantile are defined by:
-*Q(p) = (1-g).x[i] +g.x[i+1]*
-where *x[j]* is the jth order statistic,
-with *i = (floor(n*p+m))*, *m=alpha+p*(1-alpha-beta)* and *g = n*p + m - i)*.
-
-Typical values of (alpha,beta) are:
-
-    - (0,1)    : *p(k) = k/n* : linear interpolation of cdf (R, type 4)
-    - (.5,.5)  : *p(k) = (k+1/2.)/n* : piecewise linear function (R, type 5)
-    - (0,0)    : *p(k) = k/(n+1)* : (R type 6)
-    - (1,1)    : *p(k) = (k-1)/(n-1)*. In this case, p(k) = mode[F(x[k])].
-      That's R default (R type 7)
-    - (1/3,1/3): *p(k) = (k-1/3)/(n+1/3)*. Then p(k) ~ median[F(x[k])].
-      The resulting quantile estimates are approximately median-unbiased
-      regardless of the distribution of x. (R type 8)
-    - (3/8,3/8): *p(k) = (k-3/8)/(n+1/4)*. Blom.
-      The resulting quantile estimates are approximately unbiased
-      if x is normally distributed (R type 9)
-    - (.4,.4)  : approximately quantile unbiased (Cunnane)
-    - (.35,.35): APL, used with PWM
-
-Parameters
-----------
-    x : sequence
-        Input data, as a sequence or array of dimension at most 2.
-    prob : sequence
-        List of quantiles to compute.
-    alpha : float
-        Plotting positions parameter.
-    beta : float
-        Plotting positions parameter.
-    axis : int
-        Axis along which to perform the trimming. If None, the input array is first
-        flattened.
-    """
-    def _quantiles1D(data,m,p):
-        x = numpy.sort(data.compressed())
-        n = len(x)
-        if n == 0:
-            return masked_array(numpy.empty(len(p), dtype=float_), mask=True)
-        elif n == 1:
-            return masked_array(numpy.resize(x, p.shape), mask=nomask)
-        aleph = (n*p + m)
-        k = numpy.floor(aleph.clip(1, n-1)).astype(int_)
-        gamma = (aleph-k).clip(0,1)
-        return (1.-gamma)*x[(k-1).tolist()] + gamma*x[k.tolist()]
-
-    # Initialization & checks ---------
-    data = masked_array(data, copy=False)
-    p = narray(prob, copy=False, ndmin=1)
-    m = alphap + p*(1.-alphap-betap)
-    # Computes quantiles along axis (or globally)
-    if (axis is None):
-        return _quantiles1D(data, m, p)
-    else:
-        assert data.ndim <= 2, "Array should be 2D at most !"
-        return apply_along_axis(_quantiles1D, axis, data, m, p)
-
-
-def plotting_positions(data, alpha=0.4, beta=0.4):
-    """Returns the plotting positions (or empirical percentile points) for the
-    data.
-    Plotting positions are defined as (i-alpha)/(n-alpha-beta), where:
-        - i is the rank order statistics
-        - n is the number of unmasked values along the given axis
-        - alpha and beta are two parameters.
-
-    Typical values for alpha and beta are:
-        - (0,1)    : *p(k) = k/n* : linear interpolation of cdf (R, type 4)
-        - (.5,.5)  : *p(k) = (k-1/2.)/n* : piecewise linear function (R, type 5)
-        - (0,0)    : *p(k) = k/(n+1)* : Weibull (R type 6)
-        - (1,1)    : *p(k) = (k-1)/(n-1)*. In this case, p(k) = mode[F(x[k])].
-          That's R default (R type 7)
-        - (1/3,1/3): *p(k) = (k-1/3)/(n+1/3)*. Then p(k) ~ median[F(x[k])].
-          The resulting quantile estimates are approximately median-unbiased
-          regardless of the distribution of x. (R type 8)
-        - (3/8,3/8): *p(k) = (k-3/8)/(n+1/4)*. Blom.
-          The resulting quantile estimates are approximately unbiased
-          if x is normally distributed (R type 9)
-        - (.4,.4)  : approximately quantile unbiased (Cunnane)
-        - (.35,.35): APL, used with PWM
-
-Parameters
-----------
-    x : sequence
-        Input data, as a sequence or array of dimension at most 2.
-    prob : sequence
-        List of quantiles to compute.
-    alpha : float
-        Plotting positions parameter.
-    beta : float
-        Plotting positions parameter.
-
-    """
-    data = masked_array(data, copy=False).reshape(1,-1)
-    n = data.count()
-    plpos = numpy.empty(data.size, dtype=float_)
-    plpos[n:] = 0
-    plpos[data.argsort()[:n]] = (numpy.arange(1,n+1) - alpha)/(n+1-alpha-beta)
-    return masked_array(plpos, mask=data._mask)
-
-meppf = plotting_positions
-
-
-def cov(x, y=None, rowvar=True, bias=False, strict=False):
-    """Estimates the covariance matrix.
-
-
-Normalization is by (N-1) where N is the number of observations (unbiased
-estimate).  If bias is True then normalization is by N.
-
-Parameters
-----------
-    x : ndarray
-        Input data. If x is a 1D array, returns the variance. If x is a 2D array,
-        returns the covariance matrix.
-    y : ndarray
-        Optional set of variables.
-    rowvar : boolean
-        If rowvar is true, then each row is a variable with obersvations in columns.
-        If rowvar is False, each column is a variable and the observations are in
-        the rows.
-    bias : boolean
-        Whether to use a biased or unbiased estimate of the covariance.
-        If bias is True, then the normalization is by N, the number of observations.
-        Otherwise, the normalization is by (N-1)
-    strict : {boolean}
-        If strict is True, masked values are propagated: if a masked value appears in
-        a row or column, the whole row or column is considered masked.
-    """
-    X = narray(x, ndmin=2, subok=True, dtype=float)
-    if X.shape[0] == 1:
-        rowvar = True
-    if rowvar:
-        axis = 0
-        tup = (slice(None),None)
-    else:
-        axis = 1
-        tup = (None, slice(None))
-    #
-    if y is not None:
-        y = narray(y, copy=False, ndmin=2, subok=True, dtype=float)
-        X = concatenate((X,y),axis)
-    #
-    X -= X.mean(axis=1-axis)[tup]
-    n = X.count(1-axis)
-    #
-    if bias:
-        fact = n*1.0
-    else:
-        fact = n-1.0
-    #
-    if not rowvar:
-        return (dot(X.T, X.conj(), strict=False) / fact).squeeze()
-    else:
-        return (dot(X, X.T.conj(), strict=False) / fact).squeeze()
-
-
-def idealfourths(data, axis=None):
-    """Returns an estimate of the interquartile range of the data along the given
-axis, as computed with the ideal fourths.
-    """
-    def _idf(data):
-        x = numpy.sort(data.compressed())
-        n = len(x)
-        (j,h) = divmod(n/4. + 5/12.,1)
-        qlo = (1-h)*x[j] + h*x[j+1]
-        k = n - j
-        qup = (1-h)*x[k] + h*x[k-1]
-        return qup - qlo
-    data = masked_array(data, copy=False)
-    if (axis is None):
-        return _idf(data)
-    else:
-        return apply_along_axis(_idf, axis, data)
-
-
-def rsh(data, points=None):
-    """Evalutates Rosenblatt's shifted histogram estimators for each point
-on the dataset 'data'.
-
-Parameters
-    data : sequence
-        Input data. Masked values are ignored.
-    points : sequence
-        Sequence of points where to evaluate Rosenblatt shifted histogram.
-        If None, use the data.
-    """
-    data = masked_array(data, copy=False)
-    if points is None:
-        points = data
-    else:
-        points = numpy.array(points, copy=False, ndmin=1)
-    if data.ndim != 1:
-        raise AttributeError("The input array should be 1D only !")
-    n = data.count()
-    h = 1.2 * idealfourths(data) / n**(1./5)
-    nhi = (data[:,None] <= points[None,:] + h).sum(0)
-    nlo = (data[:,None] < points[None,:] - h).sum(0)
-    return (nhi-nlo) / (2.*n*h)
-
-################################################################################
-if __name__ == '__main__':
-    from numpy.ma.testutils import assert_almost_equal
-    if 1:
-        a = numpy.ma.arange(1,101)
-        a[1::2] = masked
-        b = numpy.ma.resize(a, (100,100))
-        assert_almost_equal(mquantiles(b), [25., 50., 75.])
-        assert_almost_equal(mquantiles(b, axis=0), numpy.ma.resize(a,(3,100)))
-        assert_almost_equal(mquantiles(b, axis=1),
-                            numpy.ma.resize([24.9, 50., 75.1], (100,3)))



More information about the Numpy-svn mailing list