[SciPy-User] fisherexact.py returns None

josef.pktd@gmai... josef.pktd@gmai...
Sun Dec 20 16:04:51 CST 2009


On Sun, Dec 20, 2009 at 4:44 PM, Pete Shepard <peter.shepard@gmail.com> wrote:
> I think the indent issue was due to copying and pasting also, attached is
> the code.

your attached file has inconsistent indentation all over the place
(according to IDLE, which refuses to run the file, or maybe there are
additional tabs instead of white space)
your return statement is indented relative to the if , as mentioned in
my previous message. If you unindent then the None disappears.

I attach my indent corrected version.

What editor are you using? It is possible that you are seeing
different formatting than I do on Windows.

Josef

>
> On Sun, Dec 20, 2009 at 1:05 PM, <josef.pktd@gmail.com> wrote:
>>
>> On Sun, Dec 20, 2009 at 3:50 PM, Pete Shepard <peter.shepard@gmail.com>
>> wrote:
>> > Thanks for the reply Josef,
>> >
>> > Numpy version =1:1.3.0-3
>> > SciPy version=0.7.0-2
>> >
>> >
>> > Here is the code I am using, interstingly line 99 chokes in the
>> > testFisherExact() : example, don't know if these problems are related
>> >
>> >
>> > # -*- coding: utf-8 -*-
>> >
>> > #!/usr/bin/python
>> >
>> >
>> > import re
>> > import sys, os
>> > from scipy.stats import *
>> > #from scipy import array,searchsorted, histogram
>> > import re
>> > import operator
>> > import matplotlib
>> > matplotlib.use('PDF')
>> > import pylab
>> > "
>> >
>> > def fisherExact(c) :
>> >             """Performs a Fisher exact test on a 2x2 contingency table
>> > in
>> > list of lists
>> >                format.  Returns a tuple of (odds ratio, two-tailed
>> > P-value).
>> >
>> >                Examples:
>> >                >>> fisherExact([[100, 2], [1000, 5]])
>> >                (0.25, 0.13007593634330314)
>> >                """
>> >             oddsRatio = c[0][0] * c[1][1] / float(c[1][0] * c[0][1])
>> >             n1 = c[0][0] + c[0][1]
>> >             n2 = c[1][0] + c[1][1]
>> >             n  = c[0][0] + c[1][0]
>> >
>> >             mode = int(float((n + 1) * (n1 + 1)) / (n1 + n2 + 2))
>> >             pExact = hypergeom.pmf(c[0][0], n1 + n2, n1, n)
>> >             pMode = hypergeom.pmf(c[0][0], n1 + n2, n1, n)
>> >
>> >             if c[0][0] == mode :
>> >                     return oddsRatio, 1.0
>> >             elif c[0][0] < mode :
>> >                     pLower = hypergeom.cdf(c[0][0], n1 + n2, n1, n)
>> >
>> >                     # Binary search for where to begin upper half.
>> >                     min = mode
>> >                     max = n
>> >                     guess = -1
>> >                     while min != max :
>> >                             guess = max if (max == min + 1 and guess ==
>> > min)
>> > else \
>> >                                             (max + min) / 2
>> >
>> >                             pGuess = hypergeom.pmf(guess, n1 + n2, n1,
>> > n)
>> >                             if pGuess <= pExact and hypergeom.pmf(guess
>> > - 1,
>> > n1 + n2, n1, n) > pExact :
>> >                                     break
>> >                             elif pGuess < pExact :
>> >                                     max = guess
>> >                             else :
>> >                                     min = guess
>> >
>> >                     if guess == -1 and min == max :
>> >                             guess = min
>> >
>> >                     return oddsRatio, pLower + hypergeom.sf(guess - 1,
>> > n1 +
>> > n2, n1, n)
>> >             else :
>> >                     pUpper = hypergeom.sf(c[0][0] - 1, n1 + n2, n1, n);
>> >
>> >                     # Special case to prevent binary search from getting
>> > stuck.
>> >                     if hypergeom.pmf(0, n1 + n2, n1, n) > pExact :
>> >                             return oddsRatio, pUpper
>> >
>> >                     # Binary search for where to begin lower half.
>> >                     min = 0
>> >                     max = mode
>> >                     guess = -1
>> >                     while min != max :
>> >                             guess = max if (max == min + 1 and guess ==
>> > min)
>> > else \
>> >                                             (max + min) / 2
>> >                             pGuess = hypergeom.pmf(guess, n1 + n2, n1,
>> > n);
>> >
>> >                             if pGuess <= pExact and hypergeom.pmf(guess
>> > + 1,
>> > n1 + n2, n1, n) > pExact :
>> >                                     break;
>> >                             elif pGuess <= pExact  :
>> >                                     min = guess
>> >                             else :
>> >                                     max = guess
>> >
>> >                     if guess == -1 and min == max :
>> >                             guess = min
>> >                         return oddsRatio, pUpper + hypergeom.cdf(guess,
>> > n1 +
>> > n2, n1, n)
>>
>> this return line has the wrong indent, it should be at the same column
>> as the   if guess == -1 and min == max :
>>
>> Note: I had to reformat the indent to 4 spaces because IDLE
>> complained. In these cases, it would be better to also attach the
>> file, then I would be sure I have the same file than with copy and
>> paste from the mail reader.
>>
>> Tell me if that solves the problem, I would think it's unlikely that
>> your versions of scipy is to old for this.
>>
>> Josef
>>
>> >
>> > def testFisherExact() :
>> >     """Just some tests to show that fisherExact() works correctly."""
>> >     def approxEqual(n1, n2) :
>> >         return abs(n1 - n2) < 0.01
>> >
>> >     res = fisherExact([[100, 2], [1000, 5]])
>> >     assert(approxEqual(res[1], 0.1301))
>> >     assert(approxEqual(res[0], 0.25))
>> >     res = fisherExact([[2, 7], [8, 2]])
>> >     assert(approxEqual(res[1], 0.0230141))
>> >     assert(approxEqual(res[0], 4.0 / 56))
>> >     res = fisherExact([[100, 2], [1000, 5]])
>> >     #assert(approxEqual(res[1],  0.1973244))
>> >         res = fisherExact([[5, 15], [20, 20]])
>> >     assert(approxEqual(res[1], 0.0958044))
>> >     res = fisherExact([[5, 16], [20, 25]])
>> >     assert(approxEqual(res[1], 0.1725862))
>> >     res = fisherExact([[10, 5], [10, 1]])
>> >     assert(approxEqual(res[1], 0.1973244))
>> >
>> > D=[[6, 1], [1, 6]]
>> > testFisherExact()
>> > p=fisherExact(D)
>> > print p
>> > On Sun, Dec 20, 2009 at 10:37 AM, <josef.pktd@gmail.com> wrote:
>> >>
>> >> On Sun, Dec 20, 2009 at 12:26 PM, Pete Shepard
>> >> <peter.shepard@gmail.com>
>> >> wrote:
>> >> > Hello,
>> >> >
>> >> > I am using a fisher exact test I got from
>> >> > http://projects.scipy.org/scipy/attachment/ticket/956/fisher.py. This
>> >> > program takes in two tuples and returns an odds ratio and a  p-value.
>> >> > Most
>> >> > tuples are handled nicely by the script but certain tuples eg "[[6,
>> >> > 1],
>> >> > [1,
>> >> > 6]]" return "None". I am wondering if anyone knows why this is true?
>> >>
>> >> I get this, with and without the patch of thomas in the comments
>> >>
>> >> >>> fisherExact([[6, 1], [1, 6]])
>> >> (36.0, 0.029137529137528768)
>> >>
>> >> Do you have an exact example? Which version of numpy and scipy?
>> >> Looking at the code, I don't see any reason why it should return None.
>> >>
>> >> Josef
>> >> >
>> >> > TIA
>> >> >
>> >> > _______________________________________________
>> >> > SciPy-User mailing list
>> >> > SciPy-User@scipy.org
>> >> > http://mail.scipy.org/mailman/listinfo/scipy-user
>> >> >
>> >> >
>> >> _______________________________________________
>> >> SciPy-User mailing list
>> >> SciPy-User@scipy.org
>> >> http://mail.scipy.org/mailman/listinfo/scipy-user
>> >
>> >
>> > _______________________________________________
>> > SciPy-User mailing list
>> > SciPy-User@scipy.org
>> > http://mail.scipy.org/mailman/listinfo/scipy-user
>> >
>> >
>> _______________________________________________
>> SciPy-User mailing list
>> SciPy-User@scipy.org
>> http://mail.scipy.org/mailman/listinfo/scipy-user
>
>
> _______________________________________________
> SciPy-User mailing list
> SciPy-User@scipy.org
> http://mail.scipy.org/mailman/listinfo/scipy-user
>
>
-------------- next part --------------
# -*- coding: utf-8 -*-

#!/usr/bin/python


import re
import sys, os
from scipy.stats import *
#from scipy import array,searchsorted, histogram
import re
import operator
import matplotlib
matplotlib.use('PDF')
import pylab


def fisherExact(c) :
    """Performs a Fisher exact test on a 2x2 contingency table in list of lists
       format.  Returns a tuple of (odds ratio, two-tailed P-value).
      
       Examples:
       >>> fisherExact([[100, 2], [1000, 5]])
       (0.25, 0.13007593634330314)
       """
    oddsRatio = c[0][0] * c[1][1] / float(c[1][0] * c[0][1])
    n1 = c[0][0] + c[0][1]
    n2 = c[1][0] + c[1][1]
    n  = c[0][0] + c[1][0]
  
    mode = int(float((n + 1) * (n1 + 1)) / (n1 + n2 + 2))
    pExact = hypergeom.pmf(c[0][0], n1 + n2, n1, n)
    pMode = hypergeom.pmf(c[0][0], n1 + n2, n1, n)
  
    if c[0][0] == mode :
        return oddsRatio, 1.0
    elif c[0][0] < mode :
        pLower = hypergeom.cdf(c[0][0], n1 + n2, n1, n)
      
        # Binary search for where to begin upper half.
        min = mode
        max = n
        guess = -1
        while min != max :
                guess = max if (max == min + 1 and guess == min) else \
                                (max + min) / 2

                pGuess = hypergeom.pmf(guess, n1 + n2, n1, n)
                if pGuess <= pExact and hypergeom.pmf(guess - 1, n1 + n2, n1, n) > pExact :
                        break
                elif pGuess < pExact :
                        max = guess
                else :
                        min = guess

        if guess == -1 and min == max :
                guess = min

        return oddsRatio, pLower + hypergeom.sf(guess - 1, n1 + n2, n1, n)
    else :
        pUpper = hypergeom.sf(c[0][0] - 1, n1 + n2, n1, n);

        # Special case to prevent binary search from getting stuck.
        if hypergeom.pmf(0, n1 + n2, n1, n) > pExact :
                return oddsRatio, pUpper

        # Binary search for where to begin lower half.
        min = 0
        max = mode
        guess = -1
        while min != max :
            guess = max if (max == min + 1 and guess == min) else \
                            (max + min) / 2
            pGuess = hypergeom.pmf(guess, n1 + n2, n1, n);

            if pGuess <= pExact and hypergeom.pmf(guess + 1, n1 + n2, n1, n) > pExact :
                    break;
            elif pGuess <= pExact  :
                    min = guess
            else :
                    max = guess

        if guess == -1 and min == max :
            guess = min
        return oddsRatio, pUpper + hypergeom.cdf(guess, n1 + n2, n1, n)
   
def testFisherExact() :
    """Just some tests to show that fisherExact() works correctly."""
    def approxEqual(n1, n2) :
        return abs(n1 - n2) < 0.01

    res = fisherExact([[100, 2], [1000, 5]])
    assert(approxEqual(res[1], 0.1301))
    assert(approxEqual(res[0], 0.25))
    res = fisherExact([[2, 7], [8, 2]])
    assert(approxEqual(res[1], 0.0230141))
    assert(approxEqual(res[0], 4.0 / 56))
    res = fisherExact([[100, 2], [1000, 5]])
    #assert(approxEqual(res[1],  0.1973244))
    res = fisherExact([[5, 15], [20, 20]])
    assert(approxEqual(res[1], 0.0958044))
    res = fisherExact([[5, 16], [20, 25]])           
    assert(approxEqual(res[1], 0.1725862))           
    res = fisherExact([[10, 5], [10, 1]])
    assert(approxEqual(res[1], 0.1973244))
   
D=[[6, 1], [1, 6]]
testFisherExact()
p=fisherExact(D)
print p 


More information about the SciPy-User mailing list