[Scipy-tickets] [SciPy] #620: scipy.stats.distributions.binom.pmf returns incorrect values

SciPy scipy-tickets@scipy....
Mon Dec 1 05:10:25 CST 2008


#620: scipy.stats.distributions.binom.pmf returns incorrect values
----------------------------+-----------------------------------------------
 Reporter:  robertbjornson  |        Owner:  somebody
     Type:  defect          |       Status:  new     
 Priority:  normal          |    Milestone:  0.7.0   
Component:  scipy.stats     |      Version:          
 Severity:  normal          |   Resolution:          
 Keywords:                  |  
----------------------------+-----------------------------------------------
Comment (by pbrod):

 Below you will find the implementation binom._pmf as described in Loader
 (2000) that will give the desired accuracy for small probabilities:

 {{{
 In [43]: n=10; error_rate=1.0e-5
 In [43]: print [scipy.stats.distributions.binom.pmf(i, n, error_rate) for
 i in range(n+1)]
 [0.99990000449988004, 9.9991000359991725e-005, 4.4996400125997598e-009,
 1.1999160025199592e-013, 2.0998740031499488e-018, 2.5198740025199631e-023,
 2.0999160012599896e-028, 1.1999640003600129e-033, 4.4999100004500268e-039,
 9.999899999999941e-045, 9.9999999999999443e-051]
 }}}

 as well as large n:

 {{{
 In [53]: pmfn = scipy.stats.binom.pmf(np.arange(5001),5000, 0.99)
 In [54]: pmfn.sum()
 Out[54]: 1.0
 In [55]: np.sum(np.isnan(pmfn))
 Out[55]: 0

 }}}

 Replace binom_gen._pmf in scipy.stats.distribution with:
 {{{
 def _pmf(self,x,n,pr):
         """ Return PMF

         Reference
         --------------
          Catherine Loader (2000).
          "Fast and Accurate Computation of Binomial Probabilities";
            url =
 "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.35.2719" }
         """
         # if (p==0.0) return( (x==0) ? 1.0 : 0.0);
         # if (p==1.0) return( (x==n) ? 1.0 : 0.0);
         # if (x==0) return(exp(n.*log1p(-p)));
         # if (x==n) return(exp(n.*log(p)));
         PI2 = 2.*pi #6.283185307179586476925286;
         yborder = (x==0.)*exp(n*log1p(-pr))+(x==n)*exp(n*log(pr))
         nx = n-x
         nq = n*(1.-pr)
         lc = stirlerr(n) - stirlerr(x) - stirlerr(nx) - bd0(x,n*pr) -
 bd0(nx,nq)
         inside = (0.<pr) & (pr<1.) & (0.<x) & (x < n)
         return where(inside,exp(lc)*sqrt(n/(PI2*x*nx)),yborder)
 }}}

 where

 {{{

 def stirlerr(n):
     """Return error of Stirling approximation, i.e., log(n!) - log(
 sqrt(2*pi*n)*(n/exp(1))**n )

     Example
     -------
     >>> stirlerr(2)
     array([ 0.0413407])

     See also
     ---------
     binom


     Reference
     -----------
     Catherine Loader (2000).
     'Fast and Accurate Computation of Binomial Probabilities'
     <http://www.citeseer.ist.psu.edu/312695.html>
     """

     S0 = 0.083333333333333333333   # /* 1/12 */
     S1 = 0.00277777777777777777778 # /* 1/360 */
     S2 = 0.00079365079365079365079365 # /* 1/1260 */
     S3 = 0.000595238095238095238095238 # /* 1/1680 */
     S4 = 0.0008417508417508417508417508  # /* 1/1188 */

     logical_and = numpy.logical_and
     atleast_1d = numpy.atleast_1d
     gammaln = special.gammaln
     pi = numpy.pi
     exp = numpy.exp
     sqrt = numpy.sqrt
     log = numpy.log

     n1 = atleast_1d(n)
 #    if numpy.isscalar(n):
 #        n1 = asfarray([n])
 #    else:
 #        n1 = asfarray(n)

     y = gammaln(n1+1) - log(sqrt(2*pi*n1)*(n1/exp(1))**n1 )


     nn = n1*n1

     n500    = 500<n1
     y[n500] = (S0-S1/nn[n500])/n1[n500]
     n80     = logical_and(80<n1 , n1<=500)
     if any(n80):
         y[n80]  = (S0-(S1-S2/nn[n80])/nn[n80])/n1[n80]
     n35     = logical_and(35<n1, n1<=80)
     if any(n35):
         nn35   = nn[n35]
         y[n35] = (S0-(S1-(S2-S3/nn35)/nn35)/nn35)/n1[n35]

     n15      = logical_and(15<n1, n1<=35)
     if any(n15):
         nn15   = nn[n15]
         y[n15] = (S0-(S1-(S2-(S3-S4/nn15)/nn15)/nn15)/nn15)/n1[n15]

     return y

 }}}

 and

 {{{
 def bd0(x,npr):
     """
     Return deviance term x*log(x/npr) + npr - x

     See also
     --------
     stirlerr,
     binom.pmf,
     poisson.pmf

     Reference
     ---------
     Catherine Loader (2000).
     'Fast and Accurate Computation of Binomial Probabilities'
     <http//www.citeseer.ist.psu.edu/312695.html>
     """
     def bd0_iter(x,np1):
         xmnp = x-np1
         v = (xmnp)/(x+np1)
         s1 = (xmnp)*v
         s = np.zeros_like(s1)
         ej = 2*x*v
         #v2 = v*v
         v = v*v
         j = 0
         ix, = (s!=s1).nonzero()
         while ix.size>0:
             j += 1
             s[ix]  = s1[ix].copy()
             ej[ix] = ej[ix]*v[ix]
             s1[ix] = s[ix]+ej[ix]/(2.*j+1.0)
             ix, = (s1!=s).nonzero()
         return s1
     x1,npr1 = atleast_1d(x,npr)
     y = x1*log(x1/npr1)+npr1-x1
     sml = nonzero(abs(x1-npr1)<0.1*(x1+npr1))
     if sml.size>0:
         if x1.size!=1:
             x1 = x1[sml]
         if npr1.size!=1:
             npr1 = npr1[sml]
         y.put(sml,bd0_iter(x1,npr1))
     return y

 }}}

-- 
Ticket URL: <http://scipy.org/scipy/scipy/ticket/620#comment:4>
SciPy <http://www.scipy.org/>
SciPy is open-source software for mathematics, science, and engineering.


More information about the Scipy-tickets mailing list