# [Scipy-svn] r2250 - trunk/Lib/stats

scipy-svn at scipy.org scipy-svn at scipy.org
Mon Oct 9 19:30:21 CDT 2006

Author: oliphant
Date: 2006-10-09 19:30:16 -0500 (Mon, 09 Oct 2006)
New Revision: 2250

Modified:
trunk/Lib/stats/continuous.lyx
trunk/Lib/stats/morestats.py
Log:

Modified: trunk/Lib/stats/continuous.lyx
===================================================================
--- trunk/Lib/stats/continuous.lyx	2006-10-09 22:37:42 UTC (rev 2249)
+++ trunk/Lib/stats/continuous.lyx	2006-10-10 00:30:16 UTC (rev 2250)
@@ -690,6 +690,39 @@

\layout Subsection

+Median and mode
+\layout Standard
+
+The mean,
+\begin_inset Formula $m_{n}$
+\end_inset
+
+ is defined as the point at which half of the density is on one side and
+ half on the other.
+ In other words,
+\begin_inset Formula $F\left(m_{n}\right)=\frac{1}{2}$
+\end_inset
+
+ so that
+\begin_inset Formula $+m_{n}=G\left(\frac{1}{2}\right).$
+
+\end_inset
+
+\begin_inset Formula $m_{d}$
+\end_inset
+
+, is defined as the value for which the probability density function reaches
+ it's peak
+\begin_inset Formula $+m_{d}=\arg\max_{x}f\left(x\right).$
+
+\end_inset
+
+
+\layout Subsection
+
Fitting data
\layout Standard

@@ -2296,7 +2329,8 @@
\mu & = & \frac{\Gamma\left(a+\frac{1}{c}\right)}{\Gamma\left(a\right)}\\
\mu_{2} & = & \frac{\Gamma\left(a+\frac{2}{c}\right)}{\Gamma\left(a\right)}-\mu^{2}\\
\gamma_{1} & = & \frac{\Gamma\left(a+\frac{3}{c}\right)/\Gamma\left(a\right)-3\mu\mu_{2}-\mu^{3}}{\mu_{2}^{3/2}}\\
-\gamma_{2} & = & \frac{\Gamma\left(a+\frac{4}{c}\right)/\Gamma\left(a\right)-4\mu\mu_{3}-6\mu^{2}\mu_{2}-\mu^{4}}{\mu_{2}^{2}}-3\end{eqnarray*}
+\gamma_{2} & = & \frac{\Gamma\left(a+\frac{4}{c}\right)/\Gamma\left(a\right)-4\mu\mu_{3}-6\mu^{2}\mu_{2}-\mu^{4}}{\mu_{2}^{2}}-3\\
+m_{d} & = & \left(\frac{ac-1}{c}\right)^{1/c}.\end{eqnarray*}

\end_inset

@@ -2905,6 +2939,12 @@
\end_inset

+\begin_inset Formula $+m_{d}=\frac{1}{a+1}$
+
+\end_inset
+
+
\layout Standard

Modified: trunk/Lib/stats/morestats.py
===================================================================
--- trunk/Lib/stats/morestats.py	2006-10-09 22:37:42 UTC (rev 2249)
+++ trunk/Lib/stats/morestats.py	2006-10-10 00:30:16 UTC (rev 2250)
@@ -16,6 +16,7 @@
import numpy
import types
import scipy.optimize as optimize
+import scipy.special as special
import futil
import numpy as sb

@@ -50,17 +51,18 @@

Assumes 1-d data all has same mean and variance and uses Jeffrey's prior
for variance and std.
-
alpha gives the probability that the returned interval contains
the true parameter.

Uses peak of conditional pdf as starting center.

Returns (peak, (a, b)) for each of mean, variance and standard deviation.
+    Requires 2 or more data-points.
"""
x = ravel(data)
n = len(x)
assert(n > 1)
+    assert(alpha < 1 and alpha > 0)
n = float(n)
@@ -73,29 +75,35 @@
mp = xbar
#
fac = n*C/2.0
-    peak = 2/(n+1.)
-    a = (n-1)/2.0
-    F_peak = distributions.invgamma.cdf(peak,a)
+    a = (n-1)/2.0
+    if (n > 3): # use mean of distribution as center
+        peak = 2/(n-3.0)
+        F_peak = distributions.invgamma.cdf(peak,a)
+    else: # use median
+        F_peak = -1.0
+    if (F_peak < alpha/2.0):
+        peak = distributions.invgamma.ppf(0.5,a)
+        F_peak = 0.5
q1 = F_peak - alpha/2.0
q2 = F_peak + alpha/2.0
-    if (q1 < 0):  # non-symmetric area
-        q2 = alpha
-        va = 0.0
-    else:
-        va = fac*distributions.invgamma.ppf(q1,a)
+    if (q2 > 1): q2 = 1.0
+    va = fac*distributions.invgamma.ppf(q1,a)
vb = fac*distributions.invgamma.ppf(q2,a)
vp = peak*fac
#
fac = sqrt(fac)
-    peak = sqrt(2./n)
-    F_peak = distributions.gengamma.cdf(peak,a,-2)
+    if (n > 2):
+        peak = special.gamma(a-0.5) / special.gamma(a)
+        F_peak = distributions.gengamma.cdf(peak,a,-2)
+    else: # use median
+        F_peak = -1.0
+    if (F_peak < alpha/2.0):
+        peak = distributions.gengamma.ppf(0.5,a,-2)
+        F_peak = 0.5
q1 = F_peak - alpha/2.0
q2 = F_peak + alpha/2.0
-    if (q1 < 0):
-        q2 = alpha
-        sta = 0.0
-    else:
-        sta = fac*distributions.gengamma.ppf(q1,a,-2)
+    if (q2 > 1): q2 = 1.0
+    sta = fac*distributions.gengamma.ppf(q1,a,-2)
stb = fac*distributions.gengamma.ppf(q2,a,-2)
stp = peak*fac