[Scipy-svn] r6698 - in trunk/scipy/stats: . tests

scipy-svn@scip... scipy-svn@scip...
Sat Sep 11 14:00:43 CDT 2010


Author: warren.weckesser
Date: 2010-09-11 14:00:43 -0500 (Sat, 11 Sep 2010)
New Revision: 6698

Modified:
   trunk/scipy/stats/morestats.py
   trunk/scipy/stats/tests/test_morestats.py
Log:
BUG: stats.levene() and stats.fligner() raised exceptions in the case center='trimmed' (ticket #1270).

Modified: trunk/scipy/stats/morestats.py
===================================================================
--- trunk/scipy/stats/morestats.py	2010-09-10 14:27:02 UTC (rev 6697)
+++ trunk/scipy/stats/morestats.py	2010-09-11 19:00:43 UTC (rev 6698)
@@ -1,5 +1,7 @@
 # Author:  Travis Oliphant, 2002
 #
+# Further updates and enhancements by many SciPy developers.
+#
 
 import math
 import statlib
@@ -833,6 +835,10 @@
     center : {'mean', 'median', 'trimmed'}, optional
         Which function of the data to use in the test.  The default
         is 'median'.
+    proportiontocut : float, optional
+        When `center` is 'trimmed', this gives the proportion of data points
+        to cut from each end. (See `scipy.stats.trim_mean`.)
+        Default is 0.05.
 
     Returns
     -------
@@ -863,24 +869,35 @@
               Statistical Association, 69, 364-367
 
     """
+    # Handle keyword arguments.
+    center = 'median'
+    proportiontocut = 0.05
+    for kw, value in kwds.items():
+        if kw not in ['center', 'proportiontocut']:
+            raise TypeError("levene() got an unexpected keyword argument '%s'" % kw)
+        if kw == 'center':
+            center = value
+        else:
+            proportiontocut = value
+
     k = len(args)
     if k < 2:
-        raise ValueError, "Must enter at least two input sample vectors."
+        raise ValueError("Must enter at least two input sample vectors.")
     Ni = zeros(k)
     Yci = zeros(k,'d')
-    if 'center' in kwds.keys():
-        center = kwds['center']
-    else:
-        center = 'median'
+
     if not center in ['mean','median','trimmed']:
-        raise ValueError, "Keyword argument <center> must be 'mean', 'median'"\
-              + "or 'trimmed'."
+        raise ValueError("Keyword argument <center> must be 'mean', 'median'"
+              + "or 'trimmed'.")
+
     if center == 'median':
         func = lambda x: np.median(x, axis=0)
     elif center == 'mean':
         func = lambda x: np.mean(x, axis=0)
-    else:
-        func = stats.trim_mean
+    else: # center == 'trimmed'
+        args = tuple(stats.trimboth(arg, proportiontocut) for arg in args)
+        func = lambda x: np.mean(x, axis=0)
+
     for j in range(k):
         Ni[j] = len(args[j])
         Yci[j] = func(args[j])
@@ -997,6 +1014,10 @@
         keyword argument controlling which function of the data
         is used in computing the test statistic.  The default
         is 'median'.
+    proportiontocut : float, optional
+        When `center` is 'trimmed', this gives the proportion of data points
+        to cut from each end. (See `scipy.stats.trim_mean`.)
+        Default is 0.05.
 
     Returns
     -------
@@ -1021,22 +1042,32 @@
            71(353), 210-213.
 
     """
+    # Handle keyword arguments.
+    center = 'median'
+    proportiontocut = 0.05
+    for kw, value in kwds.items():
+        if kw not in ['center', 'proportiontocut']:
+            raise TypeError("fligner() got an unexpected keyword argument '%s'" % kw)
+        if kw == 'center':
+            center = value
+        else:
+            proportiontocut = value
+
     k = len(args)
     if k < 2:
-        raise ValueError, "Must enter at least two input sample vectors."
-    if 'center' in kwds.keys():
-        center = kwds['center']
-    else:
-        center = 'median'
+        raise ValueError("Must enter at least two input sample vectors.")
+
     if not center in ['mean','median','trimmed']:
-        raise ValueError, "Keyword argument <center> must be 'mean', 'median'"\
-              + "or 'trimmed'."
+        raise ValueError("Keyword argument <center> must be 'mean', 'median'"
+              + "or 'trimmed'.")
+
     if center == 'median':
         func = lambda x: np.median(x, axis=0)
     elif center == 'mean':
         func = lambda x: np.mean(x, axis=0)
-    else:
-        func = stats.trim_mean
+    else: # center == 'trimmed'
+        args = tuple(stats.trimboth(arg, proportiontocut) for arg in args)
+        func = lambda x: np.mean(x, axis=0)
 
     Ni = asarray([len(args[j]) for j in range(k)])
     Yci = asarray([func(args[j]) for j in range(k)])
@@ -1048,16 +1079,15 @@
     for i in range(k):
         allZij.extend(list(Zij[i]))
         g.append(len(allZij))
+    
+    ranks = stats.rankdata(allZij)
+    a = distributions.norm.ppf(ranks/(2*(Ntot+1.0)) + 0.5)
 
-    a = distributions.norm.ppf(stats.rankdata(allZij)/(2*(Ntot+1.0)) + 0.5)
-
     # compute Aibar
     Aibar = _apply_func(a,g,sum) / Ni
     anbar = np.mean(a, axis=0)
     varsq = np.var(a,axis=0, ddof=1)
-
     Xsq = sum(Ni*(asarray(Aibar)-anbar)**2.0,axis=0)/varsq
-
     pval = distributions.chi2.sf(Xsq,k-1) # 1 - cdf
     return Xsq, pval
 
@@ -1158,7 +1188,6 @@
     return F, pval
 
 
-
 def wilcoxon(x,y=None):
     """
     Calculate the Wilcoxon signed-rank test

Modified: trunk/scipy/stats/tests/test_morestats.py
===================================================================
--- trunk/scipy/stats/tests/test_morestats.py	2010-09-10 14:27:02 UTC (rev 6697)
+++ trunk/scipy/stats/tests/test_morestats.py	2010-09-11 19:00:43 UTC (rev 6698)
@@ -1,15 +1,18 @@
 # Author:  Travis Oliphant, 2002
 #
+# Further enhancements and tests added by numerous SciPy developers.
+#
 
 from numpy.testing import TestCase, run_module_suite, assert_array_equal, \
-    assert_almost_equal, assert_array_less, assert_array_almost_equal
+    assert_almost_equal, assert_array_less, assert_array_almost_equal, \
+    assert_raises
 
-
 import scipy.stats as stats
 
 import numpy as np
 from numpy.random import RandomState
 
+
 g1 = [1.006, 0.996, 0.998, 1.000, 0.992, 0.993, 1.002, 0.999, 0.994, 1.000]
 g2 = [0.998, 1.006, 1.000, 1.002, 0.997, 0.998, 0.996, 1.000, 1.006, 0.988]
 g3 = [0.991, 0.987, 0.997, 0.999, 0.995, 0.994, 1.000, 0.999, 0.996, 0.996]
@@ -79,22 +82,53 @@
 
 class TestBartlett(TestCase):
     def test_data(self):
-        args = []
-        for k in range(1,11):
-            args.append(eval('g%d'%k))
+        args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10]
         T, pval = stats.bartlett(*args)
         assert_almost_equal(T,20.78587342806484,7)
         assert_almost_equal(pval,0.0136358632781,7)
 
 class TestLevene(TestCase):
+
     def test_data(self):
-        args = []
-        for k in range(1,11):
-            args.append(eval('g%d'%k))
+        args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10]
         W, pval = stats.levene(*args)
         assert_almost_equal(W,1.7059176930008939,7)
         assert_almost_equal(pval,0.0990829755522,7)
 
+    def test_trimmed1(self):
+        """Test that center='trimmed' gives the same result as center='mean' when proportiontocut=0."""
+        W1, pval1 = stats.levene(g1, g2, g3, center='mean')
+        W2, pval2 = stats.levene(g1, g2, g3, center='trimmed', proportiontocut=0.0)
+        assert_almost_equal(W1, W2)
+        assert_almost_equal(pval1, pval2)
+    
+    def test_trimmed2(self):
+        x = [1.2, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 100.0]
+        y = [0.0, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 200.0]
+        # Use center='trimmed'
+        W1, pval1 = stats.levene(x, y, center='trimmed', proportiontocut=0.125)
+        # Trim the data here, and use center='mean'
+        W2, pval2 = stats.levene(x[1:-1], y[1:-1], center='mean')
+        # Result should be the same.
+        assert_almost_equal(W1, W2)
+        assert_almost_equal(pval1, pval2)
+
+    def test_equal_mean_median(self):
+        x = np.linspace(-1,1,21)
+        y = x**3
+        W1, pval1 = stats.levene(x, y, center='mean')
+        W2, pval2 = stats.levene(x, y, center='median')
+        assert_almost_equal(W1, W2)        
+        assert_almost_equal(pval1, pval2)
+
+    def test_bad_keywod(self):
+        x = np.linspace(-1,1,21)
+        assert_raises(TypeError, stats.levene, x, x, portiontocut=0.1)
+
+    def test_bad_center_value(self):
+        x = np.linspace(-1,1,21)
+        assert_raises(ValueError, stats.levene, x, x, center='trim')        
+
 class TestBinomP(TestCase):
     def test_data(self):
         pval = stats.binom_test(100,250)
@@ -111,15 +145,58 @@
         assert_array_equal(res,[1,2,3,4])
         assert_array_equal(nums,[3,3,2,2])
 
-def test_fligner():
-    #numbers from R: fligner.test in package stats
-    x1=np.arange(5)
-    assert_array_almost_equal(stats.fligner(x1,x1**2),
-                       (3.2282229927203536, 0.072379187848207877), 11)
 
+class TestFligner(TestCase):
+
+    def test_data(self):
+        # numbers from R: fligner.test in package stats
+        x1 = np.arange(5)
+        assert_array_almost_equal(stats.fligner(x1,x1**2),
+                           (3.2282229927203536, 0.072379187848207877), 11)
+
+    def test_trimmed1(self):
+        """Test that center='trimmed' gives the same result as center='mean' when proportiontocut=0."""
+        Xsq1, pval1 = stats.fligner(g1, g2, g3, center='mean')
+        Xsq2, pval2 = stats.fligner(g1, g2, g3, center='trimmed', proportiontocut=0.0)
+        assert_almost_equal(Xsq1, Xsq2)
+        assert_almost_equal(pval1, pval2)
+        
+    def test_trimmed2(self):
+        x = [1.2, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 100.0]
+        y = [0.0, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 200.0]
+        # Use center='trimmed'
+        Xsq1, pval1 = stats.fligner(x, y, center='trimmed', proportiontocut=0.125)
+        # Trim the data here, and use center='mean'
+        Xsq2, pval2 = stats.fligner(x[1:-1], y[1:-1], center='mean')
+        # Result should be the same.
+        assert_almost_equal(Xsq1, Xsq2)
+        assert_almost_equal(pval1, pval2) 
+
+    # The following test looks reasonable at first, but fligner() uses the
+    # function stats.rankdata(), and in one of the cases in this test,
+    # there are ties, while in the other (because of normal rounding
+    # errors) there are not.  This difference leads to differences in the
+    # third significant digit of W.
+    #
+    #def test_equal_mean_median(self):
+    #    x = np.linspace(-1,1,21)
+    #    y = x**3
+    #    W1, pval1 = stats.fligner(x, y, center='mean')
+    #    W2, pval2 = stats.fligner(x, y, center='median')
+    #    assert_almost_equal(W1, W2)
+    #    assert_almost_equal(pval1, pval2)
+
+    def test_bad_keywod(self):
+        x = np.linspace(-1,1,21)
+        assert_raises(TypeError, stats.fligner, x, x, portiontocut=0.1)
+
+    def test_bad_center_value(self):
+        x = np.linspace(-1,1,21)
+        assert_raises(ValueError, stats.levene, x, x, center='trim')
+
 def test_mood():
-    #numbers from R: mood.test in package stats
-    x1=np.arange(5)
+    # numbers from R: mood.test in package stats
+    x1 = np.arange(5)
     assert_array_almost_equal(stats.mood(x1,x1**2),
             (-1.3830857299399906, 0.16663858066771478), 11)
 



More information about the Scipy-svn mailing list