[SciPy-user] Generate random samples from a non-standard distribution

Michael Nandris mnandris@btinternet....
Mon Oct 22 16:42:59 CDT 2007


i'm sure there are shorter ways of doing this,

like here--> google: ifa.hawaii.edu markov lecture10 for some shorter code on markov models

######################################
from __future__ import division
from numpy.random import multinomial
from numpy import dot
from MA import take            # requires python-numeric-ext
from scipy.stats import rv_discrete
import timeit
from numpy import NaN

"""
src: Mathematical Modeling and Computer Simulation by Maki and Thompson page 179
Consider a Markov Chain with 4 states and transiton matrix T. Use simulation to estimate the 5-step transition matrix.
There are at least 2 ways of doing this (very slowly, using scipy.rv_discrete, or quite fast using numpy.multinomial [which has a bug that may or may not have been fixed in the official release, plus an unsolved bug:- it cant handle zeros, which require substituting with 1e-16; but it does give the right answer.
"""

T = [[ 0.6, 0.3, 0.0, 0.1 ],        
      [ 0.5, 0.0, 0.5, 0.0 ],
      [ 0.0, 0.2, 0.2, 0.6 ],
      [ 0.2, 0.0, 0.8, 0.0 ]] 

def recursor( n ): 
    SPAN = [ T for i in range( n ) ]              
    return reduce( lambda x, y: dot(x,y), SPAN )

def analyticalSolution():
    """ Analytical solution agrees with answer on p180 """
    print recursor(5)

INPUT = [[ 0.6-(1e-16), 0.3, 1e-16, 0.1 ],        # input for multinomial is same as T
      [ 0.5-(1e-16), 1e-16, 0.5-(1e-16), 1e-16 ],
      [ 1e-16, 0.2, 0.2, 0.6-(1e-16) ],
      [ 0.2-(1e-16), 1e-16, 0.8-(1e-16), 1e-16 ]] 

STATES = [0,1,2,3]
SIZE = 10000
MAGNITUDE = 1/SIZE

def showResults(d):
    """ Requires 'from __future__ import division' """
    L = []
    #print d 
    while d:
     k,v = d.popitem() 
    L.append(str(round(v*MAGNITUDE,5)))    # <<<< need to alter rounding value by hand <<<<<
    print L

class ScipyStatsMarkovSim(object):
    def __init__(self, i):
    self.i = i 
    self.ans = dict(zip( STATES, (0,0,0,0) )) 

    def markov(self): 
        first = list(take(T,(self.i,))[0])             
        sample = rv_discrete( name='sample', values=( STATES, first ) ).rvs( size=SIZE ) #[0]
        for i in sample:
        nthStep = self.nthStep(i)
            self.ans[nthStep]+=1     
        return showResults(self.ans)

    def nthStep(self, inpt):
        """ Returns the 5th state calculated in sequence """
        second = list(take(T,(inpt,))[0])
        two = rv_discrete( name='sample', values=( STATES, second ) ).rvs( size=1 )[0]
        third = list(take(T,(two,))[0])
        three = rv_discrete( name='sample', values=( STATES, third ) ).rvs( size=1 )[0]
        fourth = list(take(T,(three,))[0])
        four = rv_discrete( name='sample', values=( STATES, fourth ) ).rvs( size=1 )[0]
        fifth = list(take(T,(four,))[0])
        five = rv_discrete( name='sample', values=( STATES, fifth ) ).rvs( size=1 )[0]
        return five 

class NumpyRandomMarkovSim(object): 
    def markov(self,i): 
        ans = dict(zip( STATES, (0,0,0,0) ))
        first = INPUT[i] 
        t = multinomial( SIZE, first )  # only of length 4 
    for state, n in enumerate(t):    # n is number of samples 'thrown'      
        for k in xrange(n):
            nthStep = self.nthStep(state) 
                ans[nthStep]+=1     
        return showResults(ans)
    
    def nthStep(self, inpt):
        """ Returns the 5th state calculated in sequence """
        second = INPUT[inpt]
        two = list(multinomial( 1, second )).index(1)  
        third = INPUT[two]
    three = list(multinomial( 1, third )).index(1)
        fourth = INPUT[three]
        four = list(multinomial( 1, fourth )).index(1)
        fifth = INPUT[four]
        return list(multinomial( 1, fifth )).index(1)

def testScipy():
    """ Takes 32 seconds to execute a quarter of the workload """
    ScipyStatsMarkovSim(0).markov()
    #ScipyStatsMarkovSim(1).markov()
    #ScipyStatsMarkovSim(2).markov()
    #ScipyStatsMarkovSim(3).markov()

def testNumpy():
    """ Takes 3 seconds to execute 4 times the workload  """
    NumpyRandomMarkovSim().markov(0)
    NumpyRandomMarkovSim().markov(1)
    NumpyRandomMarkovSim().markov(2)
    NumpyRandomMarkovSim().markov(3)

if __name__=='__main__':
    t = timeit.Timer('analyticalSolution()', 'from __main__ import analyticalSolution')
    PASSES = 10
    elapsed = (t.timeit(number=PASSES))
    print "analyticalSolution() takes %0.3f micro-seconds per pass" % (elapsed/PASSES) 
    
    """
    t = timeit.Timer('testScipy()', 'from __main__ import testScipy')
    PASSES = 1
    elapsed = (t.timeit(number=PASSES))
    print "scipy.markov() takes %0.3f micro-seconds per pass" % (elapsed/PASSES) 
    """
    PASSES = 1
    s = timeit.Timer('testNumpy()', 'from __main__ import testNumpy')
    elapsed2 = (s.timeit(number=PASSES)) 
    print "numpy.markov() takes %0.3f micro-seconds per pass" % (elapsed2/PASSES) 
    
########################################
# distgen.py
from numpy import random
from pylab import show, plot, hist
from scipy import polyfit, polyval
from scipy.stats.distributions import cauchy

def polynomialRegression( x,y ):
    coeffs = polyfit( x, y, 20 )
    return x, polyval( coeffs, x )

opt = []

for i in range( 1000000 ):
    #opt.append( random.beta(2,2.5) )
    #pt.append( random.weibull(2) )
    #opt.append( random.standard_cauchy(2) )
    #opt.append( random.zipf(2) )
    #opt.append( random.vonmises(2,3))
    #opt.append( random.wald(2,3))
    #opt.append( random.rayleigh())
    #opt.append( random.standard_gamma(2))
    #opt.append( random.binomial(1000,0.5))
    opt.append( random.gumbel(1000,0.5))

"""
multinomial(...)
    Multinomial distribution.
    
    multinomial(n, pvals, size=None) -> random values
    
    pvals is a sequence of probabilities that should sum to 1 (however, the
    last element is always assumed to account for the remaining probability
    as long as sum(pvals[:-1]) <= 1).
"""
data = hist(opt,10000)

x = data[1] 
y = data[0] 

coeffs = polyfit( x, y, 20 )
besty = polyval( coeffs, x )

plot(x,besty)

show()    

#########################################
#from __future__ import division
from Numeric import *
from pylab import hist, xlabel, ylabel, show
import random

def MetHastDirich( param, M, B, start ):
    """
    Uses a Metropolis-Hastings method to generate a chain of values from a Dirichlet distribution.
    Input(a four element list):
        'param' - vector of parameters for the Dirichlet dist., all > 0,
        'M' - total number of steps kept in the MH chain after burn-in,
        'B' - number of steps used for the burn-in,
        'start' - vector of starting values which has length equal to the
        param vector minus 1 (NOTE: sum(start) must be < = 1).
    Output(a two element list):
        1) the array of generated values
        2) the number of accepted candidates out of B+M tries.
    """

    step_cnt = 0
    accept_ind = 0        ## To keep track of number of accepted candidates.
    x = start
    rv = len(param)-1     ## Last random variable is constrained.
    data_matrix = zeros( (M,rv), typecode = 'f' )

    for i in range( B + M ):
        step_cnt = step_cnt + 1
        y = []
        cumy = 0

        ## Generate a candidate over support space (0,1) under restriction of summation=1
        for j in range( rv ):
            present_y = ( 1 - cumy ) * random.uniform( 0, 1 )
            cumy = cumy + present_y
            y.append( present_y )

        alpha_prob=((1-sum( y )) / (1.0 -sum( x )))**( param[rv]-1.0 )

        for k in range(rv):
            tterm_1 = ( y[k] / x[k] )**( param[k]-1 )
            if k == 0:
                    tterm_2 = 1
                else:
                    tterm_2 = ( 1-sum( y[0:(k-1)] )) / ( 1-sum( x[0:k-1] ) )
                alpha_prob = alpha_prob * tterm_1 * tterm_2

        alpha_prob_final = min( [alpha_prob,1] )
        u = random.uniform( 0, 1 )

        if u <= alpha_prob_final:                  ## Candidate accepted.
            x=y                                    ## Jump to new position.
            accept_ind=accept_ind+1
        if step_cnt > B:
            data_matrix[ (step_cnt-B-1),...] = y   ## Fill-in a full row
        else:                                      ## of the matrix.
            if step_cnt > B:
                data_matrix[(step_cnt-B-1),...]=x  ## Remain at old position.
    return [data_matrix,accept_ind]

## Use the above function to compute a chain of observations for the given argument values below:

parameters=array([9,13,16,12]) # [2.,2.,3.,3.])
M = 100 # 000
B = 10 #000
start = array([0.1,0.6,0.1]) # [.25,.25,.25])
MH_out = MetHastDirich(parameters,M,B,start)
chain = MH_out[0]
count_successes = MH_out[1]

hist( chain, 10000 )

#plot( chain, count_successes )
#title('MetHastDirich( [2.0,2.0,3.0,3.0]), 100000, 10000, [0.99,0.99,0.99]' ) # [0.25,0.25,0.25]')
xlabel('Datum')
ylabel('Frequency')
show()

"David M. Cooke" <cookedm@physics.mcmaster.ca> wrote: "Manu Hack"  writes:

> Hi,
>
> I've been goolging around but not sure how to generate random samples
> given the density function of a random variable.  I was looking around
> something like scipy.stats.rv_continuous but couldn't find anything
> useful.
>
> Also, if one wants to do Markov Chain Monte Carlo, is scipy possible
> and if not, what should be a workaround (if still want to do in
> Python).  I heard something like openbugs but seems that debian
> doesn't have a package.

Look at scipy.sandbox.montecarlo (you'll have to add a line with
'montecarlo' on it to scipy/sandbox/enabled_packages.txt to compile it,
and copy some files from numpy/random first).

For instance, scipy.sandbox.montecarlo.intsampler takes a list or array
of weights, and can sample from those. Here's a clip from the docstring:

>>> table = [10, 15, 20]
#representing this pmf:
#x       0       1       2
#p(x)    10/45   15/45   20/45
#The output will be something like:
>>> sampler = intsampler(table)
>>> sampler.sample(10)
array([c, b, b, b, b, b, c, b, b, b], dtype=object)

I use it by sampling from my PDF on a grid, and using those points as
the PMF.

-- 
|>|\/|<
/------------------------------------------------------------------\
|David M. Cooke              http://arbutus.physics.mcmaster.ca/dmc/
|cookedm@physics.mcmaster.ca
_______________________________________________
SciPy-user mailing list
SciPy-user@scipy.org
http://projects.scipy.org/mailman/listinfo/scipy-user

-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://projects.scipy.org/pipermail/scipy-user/attachments/20071022/2858dee4/attachment.html 


More information about the SciPy-user mailing list