# [Scipy-svn] r6701 - in trunk/scipy/stats: . tests

scipy-svn@scip... scipy-svn@scip...
Sat Sep 11 18:16:47 CDT 2010

```Author: warren.weckesser
Date: 2010-09-11 18:16:47 -0500 (Sat, 11 Sep 2010)
New Revision: 6701

Modified:
trunk/scipy/stats/morestats.py
trunk/scipy/stats/tests/test_morestats.py
Log:
ENH: stats.morestats: updated 'raise' statements, tweaked code a bit, added tests that incorrect arguments raise the expected exceptions.

Modified: trunk/scipy/stats/morestats.py
===================================================================
--- trunk/scipy/stats/morestats.py	2010-09-11 19:20:41 UTC (rev 6700)
+++ trunk/scipy/stats/morestats.py	2010-09-11 23:16:47 UTC (rev 6701)
@@ -157,7 +157,7 @@
x = ravel(data)
n = len(x)
if (n < 2):
-        raise ValueError, "Need at least 2 data-points."
+        raise ValueError("Need at least 2 data-points.")
xbar = x.mean()
C = x.var()
if (n > 1000): # gaussian approximations for large n
@@ -206,8 +206,8 @@
The nth k-statistic is the unique symmetric unbiased estimator of the nth
cumulant kappa_n
"""
-    if n>4 or n<1:
-        raise ValueError, "k-statistics only supported for 1<=n<=4"
+    if n > 4 or n < 1:
+        raise ValueError("k-statistics only supported for 1<=n<=4")
n = int(n)
S = zeros(n+1,'d')
data = ravel(data)
@@ -225,21 +225,21 @@
4*N*(N+1)*S[1]*S[3] + N*N*(N+1)*S[4]) / \
(N*(N-1.0)*(N-2.0)*(N-3.0))
else:
-        raise ValueError, "Should not be here."
+        raise ValueError("Should not be here.")

def kstatvar(data,n=2):
"""Returns an unbiased estimator of the variance of the k-statistic:  n=1 or 2
"""
data = ravel(data)
N = len(data)
-    if n==1:
+    if n == 1:
return kstat(data,n=2)*1.0/N
-    elif n==2:
+    elif n == 2:
k2 = kstat(data,n=2)
k4 = kstat(data,n=4)
return (2*k2*k2*N + (N-1)*k4)/(N*(N+1))
else:
-        raise ValueError, "Only n=1 or n=2 supported."
+        raise ValueError("Only n=1 or n=2 supported.")

#__all__ = ['probplot','ppcc_max','ppcc_plot','boxcox','boxcox_llf',
@@ -262,9 +262,9 @@
i = arange(2,N)
Ui[1:-1] = (i-0.3175)/(N+0.365)
try:
-        ppf_func = eval('distributions.%s.ppf'%dist)
+        ppf_func = eval('distributions.%s.ppf' % dist)
except AttributeError:
-        raise dist, "is not a valid distribution with a ppf."
+        raise ValueError("%s is not a valid distribution with a ppf." % dist)
if sparams is None:
sparams = ()
if isscalar(sparams):
@@ -312,7 +312,7 @@
try:
ppf_func = eval('distributions.%s.ppf'%dist)
except AttributeError:
-        raise dist, "is not a valid distribution with a ppf."
+        raise ValueError("%s is not a valid distribution with a ppf." % dist)
"""
res = inspect.getargspec(ppf_func)
if not ('loc' == res[0][-2] and 'scale' == res[0][-1] and \
@@ -386,18 +386,18 @@
while (rootfunc(newlm,x,target) > 0.0) and (N < 500):
newlm += 0.1
N +=1
-    if (N==500):
-        raise RuntimeError, "Could not find endpoint."
+    if N == 500:
+        raise RuntimeError("Could not find endpoint.")
lmplus = optimize.brentq(rootfunc,lmax,newlm,args=(x,target))
newlm = lmax-0.5
N = 0
while (rootfunc(newlm,x,target) > 0.0) and (N < 500):
newlm += 0.1
N +=1
-    if (N==500):
-        raise RuntimeError, "Could not find endpoint."
-    lmminus = optimize.brentq(rootfunc,newlm,lmax,args=(x,target))
-    return lmminus,lmplus
+    if N == 500:
+        raise RuntimeError("Could not find endpoint.")
+    lmminus = optimize.brentq(rootfunc, newlm, lmax, args=(x,target))
+    return lmminus, lmplus

def boxcox(x,lmbda=None,alpha=None):
"""Return a positive dataset tranformed by a Box-Cox power transformation.
@@ -411,7 +411,7 @@
lambda as the third output argument.
"""
if any(x < 0):
-        raise ValueError, "Data must be positive."
+        raise ValueError("Data must be positive.")
if lmbda is not None:  # single transformation
lmbda = lmbda*(x==x)
y = where(lmbda == 0, log(x), (x**lmbda - 1)/lmbda)
@@ -506,7 +506,7 @@
"""
N = len(x)
if N < 3:
-        raise ValueError, "Data must be at least length 3."
+        raise ValueError("Data must be at least length 3.")
if a is None:
a = zeros(N,'f')
init = 0
@@ -603,7 +603,8 @@

"""
if not dist in ['norm','expon','gumbel','extreme1','logistic']:
-        raise ValueError, "Invalid distribution."
+        raise ValueError("Invalid distribution; dist must be 'norm', "
+                            "'expon', 'gumbel', 'extreme1' or 'logistic'.")
y = sort(x)
xbar = np.mean(x, axis=0)
N = len(y)
@@ -632,7 +633,7 @@
z = distributions.logistic.cdf(w)
sig = array([25,10,5,2.5,1,0.5])
critical = around(_Avals_logistic / (1.0+0.25/N),3)
-    elif (dist == 'gumbel') or (dist == 'extreme1'):
+    else:  # (dist == 'gumbel') or (dist == 'extreme1'):
#the following is incorrect, see ticket:1097
##        def fixedsolve(th,xj,N):
##            val = stats.sum(xj)*1.0/N
@@ -647,9 +648,7 @@
z = distributions.gumbel_l.cdf(w)
sig = array([25,10,5,2.5,1])
critical = around(_Avals_gumbel / (1.0 + 0.2/sqrt(N)),3)
-    else:
-        raise ValueError("dist has to be one of 'norm','expon','logistic'",
-                         "'gumbel','extreme1'")
+
i = arange(1,N+1)
S = sum((2*i-1.0)/N*(log(z)+log(1-z[::-1])),axis=0)
A2 = -N-S
@@ -722,10 +721,10 @@
x,y = asarray(x),asarray(y)
n = len(x)
m = len(y)
-    if (m < 1):
-        raise ValueError, "Not enough other observations."
-    if (n < 1):
-        raise ValueError, "Not enough test observations."
+    if m < 1:
+        raise ValueError("Not enough other observations.")
+    if n < 1:
+        raise ValueError("Not enough test observations.")
N = m+n
xy = r_[x,y]  # combine
rank = stats.rankdata(xy)
@@ -804,7 +803,7 @@
"""
k = len(args)
if k < 2:
-        raise ValueError, "Must enter at least two input sample vectors."
+        raise ValueError("Must enter at least two input sample vectors.")
Ni = zeros(k)
ssq = zeros(k,'d')
for j in range(k):
@@ -966,13 +965,13 @@
elif len(x) == 1:
x = x[0]
if n is None or n < x:
-            raise ValueError, "n must be >= x"
+            raise ValueError("n must be >= x")
n = np.int_(n)
else:
-        raise ValueError, "Incorrect length for x."
+        raise ValueError("Incorrect length for x.")

if (p > 1.0) or (p < 0.0):
-        raise ValueError, "p must be in range [0,1]"
+        raise ValueError("p must be in range [0,1]")

d = distributions.binom.pmf(x,n,p)
rerr = 1+1e-7
@@ -1128,8 +1127,8 @@
m = len(y)
xy = r_[x,y]
N = m+n
-    if (N < 3):
-        raise ValueError, "Not enough observations."
+    if N < 3:
+        raise ValueError("Not enough observations.")
ranks = stats.rankdata(xy)
Ri = ranks[:n]
M = sum((Ri - (N+1.0)/2)**2,axis=0)
@@ -1162,7 +1161,7 @@
"""
k = len(args)
if k < 2:
-        raise ValueError, "Must enter at least two input sample vectors."
+        raise ValueError("Must enter at least two input sample vectors.")
if 'equal_var' in kwds.keys():
if kwds['equal_var']: evar = 1
else: evar = 0
@@ -1229,7 +1228,7 @@
else:
x, y = map(asarray, (x, y))
if len(x) <> len(y):
-            raise ValueError, 'Unequal N in wilcoxon.  Aborting.'
+            raise ValueError('Unequal N in wilcoxon.  Aborting.')
d = x-y
d = compress(not_equal(d,0),d,axis=-1) # Keep all non-zero differences
count = len(d)
@@ -1275,8 +1274,8 @@
"""
N = len(cnt)
if N < 2:
-        raise ValueError, "At least two moments must be given to" + \
-              "approximate the pdf."
+        raise ValueError("At least two moments must be given to " +
+              "approximate the pdf.")
totp = poly1d(1)
sig = sqrt(cnt[1])
mu = cnt[0]

Modified: trunk/scipy/stats/tests/test_morestats.py
===================================================================
--- trunk/scipy/stats/tests/test_morestats.py	2010-09-11 19:20:41 UTC (rev 6700)
+++ trunk/scipy/stats/tests/test_morestats.py	2010-09-11 23:16:47 UTC (rev 6701)
@@ -24,6 +24,7 @@
g9 = [1.002, 0.998, 0.996, 0.995, 0.996, 1.004, 1.004, 0.998, 0.999, 0.991]
g10= [0.991, 0.995, 0.984, 0.994, 0.997, 0.997, 0.991, 0.998, 1.004, 0.997]

+
class TestShapiro(TestCase):
def test_basic(self):
x1 = [0.11,7.87,4.61,10.14,7.95,3.14,0.46,
@@ -39,6 +40,12 @@
assert_almost_equal(w,0.9590270,6)
assert_almost_equal(pw,0.52460,3)

+    def test_bad_arg(self):
+        # Length of x is less than 3.
+        x = [1]
+        assert_raises(ValueError, stats.shapiro, x)
+
+
class TestAnderson(TestCase):
def test_normal(self):
rs = RandomState(1234567890)
@@ -58,7 +65,12 @@
A,crit,sig = stats.anderson(x2,'expon')
assert_array_less(crit[:-1], A)

+    def test_bad_arg(self):
+        assert_raises(ValueError, stats.anderson, [1], dist='plate_of_shrimp')
+
+
class TestAnsari(TestCase):
+
def test_small(self):
x = [1,2,3,3,4]
y = [3,2,6,1,6,1,4,1]
@@ -80,13 +92,24 @@
assert_almost_equal(W,10.0,11)
assert_almost_equal(pval,0.533333333333333333,7)

+    def test_bad_arg(self):
+        assert_raises(ValueError, stats.ansari, [], [1])
+        assert_raises(ValueError, stats.ansari, [1], [])
+
+
class TestBartlett(TestCase):
+
def test_data(self):
args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10]
T, pval = stats.bartlett(*args)
assert_almost_equal(T,20.78587342806484,7)
assert_almost_equal(pval,0.0136358632781,7)

+    def test_bad_arg(self):
+        """Too few args raises ValueError."""
+        assert_raises(ValueError, stats.bartlett, [1])
+
+
class TestLevene(TestCase):

def test_data(self):
@@ -127,9 +150,14 @@

def test_bad_center_value(self):
x = np.linspace(-1,1,21)
-        assert_raises(ValueError, stats.levene, x, x, center='trim')
+        assert_raises(ValueError, stats.levene, x, x, center='trim')
+
+    def test_too_few_args(self):
+        assert_raises(ValueError, stats.levene, [1])

+
class TestBinomP(TestCase):
+
def test_data(self):
pval = stats.binom_test(100,250)
assert_almost_equal(pval,0.0018833009350757682,11)
@@ -138,6 +166,21 @@
pval = stats.binom_test([682,243],p=3.0/4)
assert_almost_equal(pval,0.38249155957481695,11)

+    def test_bad_len_x(self):
+        """Length of x must be 1 or 2."""
+        assert_raises(ValueError, stats.binom_test, [1,2,3])
+
+    def test_bad_n(self):
+        """len(x) is 1, but n is invalid."""
+        # Missing n
+        assert_raises(ValueError, stats.binom_test, [100])
+        # n less than x[0]
+        assert_raises(ValueError, stats.binom_test, [100], n=50)
+
+    def test_bad_p(self):
+        assert_raises(ValueError, stats.binom_test, [50, 50], p=2.0)
+
+
class TestFindRepeats(TestCase):
def test_basic(self):
a = [1,2,3,4,1,2,3,4,1,2,5]
@@ -194,11 +237,60 @@
x = np.linspace(-1,1,21)
assert_raises(ValueError, stats.fligner, x, x, center='trim')

+    def test_bad_num_args(self):
+        """Too few args raises ValueError."""
+        assert_raises(ValueError, stats.fligner, [1])
+
+
def test_mood():
# numbers from R: mood.test in package stats
x1 = np.arange(5)
assert_array_almost_equal(stats.mood(x1,x1**2),
(-1.3830857299399906, 0.16663858066771478), 11)

+def test_mood_bad_arg():
+    """Raise ValueError when the sum of the lengths of the args is less than 3."""
+    assert_raises(ValueError, stats.mood, [1], [])
+
+def test_oneway_bad_arg():
+    """Raise ValueError is fewer than two args are given."""
+    assert_raises(ValueError, stats.oneway, [1])
+
+def test_wilcoxon_bad_arg():
+    """Raise ValueError when two args of different lengths are given."""
+    assert_raises(ValueError, stats.wilcoxon, [1], [1,2])
+
+def test_mvsdist_bad_arg():
+    """Raise ValueError if fewer than two data points are given."""
+    data = [1]
+    assert_raises(ValueError, stats.mvsdist, data)
+
+def test_kstat_bad_arg():
+    """Raise ValueError if n > 4 or n > 1."""
+    data = [1]
+    n = 10
+    assert_raises(ValueError, stats.kstat, data, n=n)
+
+def test_kstatvar_bad_arg():
+    """Raise ValueError is n is not 1 or 2."""
+    data = [1]
+    n = 10
+    assert_raises(ValueError, stats.kstatvar, data, n=n)
+
+def test_probplot_bad_arg():
+    """Raise ValueError when given an invalid distribution."""
+    data = [1]
+    assert_raises(ValueError, stats.probplot, data, dist="plate_of_shrimp")
+
+def test_ppcc_max_bad_arg():
+    """Raise ValueError when given an invalid distribution."""
+    data = [1]
+    assert_raises(ValueError, stats.ppcc_max, data, dist="plate_of_shrimp")
+
+def test_boxcox_bad_arg():
+    """Raise ValueError if any data value is negative."""
+    x = np.array([-1])
+    assert_raises(ValueError, stats.boxcox, x)
+
if __name__ == "__main__":
run_module_suite()

```

More information about the Scipy-svn mailing list