# [SciPy-user] Generate random samples from a non-standard distribution

Michael Nandris mnandris@btinternet....
Mon Oct 22 16:42:59 CDT 2007

```i'm sure there are shorter ways of doing this,

like here--> google: ifa.hawaii.edu markov lecture10 for some shorter code on markov models

######################################
from __future__ import division
from numpy.random import multinomial
from numpy import dot
from MA import take            # requires python-numeric-ext
from scipy.stats import rv_discrete
import timeit
from numpy import NaN

"""
src: Mathematical Modeling and Computer Simulation by Maki and Thompson page 179
Consider a Markov Chain with 4 states and transiton matrix T. Use simulation to estimate the 5-step transition matrix.
There are at least 2 ways of doing this (very slowly, using scipy.rv_discrete, or quite fast using numpy.multinomial [which has a bug that may or may not have been fixed in the official release, plus an unsolved bug:- it cant handle zeros, which require substituting with 1e-16; but it does give the right answer.
"""

T = [[ 0.6, 0.3, 0.0, 0.1 ],
[ 0.5, 0.0, 0.5, 0.0 ],
[ 0.0, 0.2, 0.2, 0.6 ],
[ 0.2, 0.0, 0.8, 0.0 ]]

def recursor( n ):
SPAN = [ T for i in range( n ) ]
return reduce( lambda x, y: dot(x,y), SPAN )

def analyticalSolution():
""" Analytical solution agrees with answer on p180 """
print recursor(5)

INPUT = [[ 0.6-(1e-16), 0.3, 1e-16, 0.1 ],        # input for multinomial is same as T
[ 0.5-(1e-16), 1e-16, 0.5-(1e-16), 1e-16 ],
[ 1e-16, 0.2, 0.2, 0.6-(1e-16) ],
[ 0.2-(1e-16), 1e-16, 0.8-(1e-16), 1e-16 ]]

STATES = [0,1,2,3]
SIZE = 10000
MAGNITUDE = 1/SIZE

def showResults(d):
""" Requires 'from __future__ import division' """
L = []
#print d
while d:
k,v = d.popitem()
L.append(str(round(v*MAGNITUDE,5)))    # <<<< need to alter rounding value by hand <<<<<
print L

class ScipyStatsMarkovSim(object):
def __init__(self, i):
self.i = i
self.ans = dict(zip( STATES, (0,0,0,0) ))

def markov(self):
first = list(take(T,(self.i,))[0])
sample = rv_discrete( name='sample', values=( STATES, first ) ).rvs( size=SIZE ) #[0]
for i in sample:
nthStep = self.nthStep(i)
self.ans[nthStep]+=1
return showResults(self.ans)

def nthStep(self, inpt):
""" Returns the 5th state calculated in sequence """
second = list(take(T,(inpt,))[0])
two = rv_discrete( name='sample', values=( STATES, second ) ).rvs( size=1 )[0]
third = list(take(T,(two,))[0])
three = rv_discrete( name='sample', values=( STATES, third ) ).rvs( size=1 )[0]
fourth = list(take(T,(three,))[0])
four = rv_discrete( name='sample', values=( STATES, fourth ) ).rvs( size=1 )[0]
fifth = list(take(T,(four,))[0])
five = rv_discrete( name='sample', values=( STATES, fifth ) ).rvs( size=1 )[0]
return five

class NumpyRandomMarkovSim(object):
def markov(self,i):
ans = dict(zip( STATES, (0,0,0,0) ))
first = INPUT[i]
t = multinomial( SIZE, first )  # only of length 4
for state, n in enumerate(t):    # n is number of samples 'thrown'
for k in xrange(n):
nthStep = self.nthStep(state)
ans[nthStep]+=1
return showResults(ans)

def nthStep(self, inpt):
""" Returns the 5th state calculated in sequence """
second = INPUT[inpt]
two = list(multinomial( 1, second )).index(1)
third = INPUT[two]
three = list(multinomial( 1, third )).index(1)
fourth = INPUT[three]
four = list(multinomial( 1, fourth )).index(1)
fifth = INPUT[four]
return list(multinomial( 1, fifth )).index(1)

def testScipy():
""" Takes 32 seconds to execute a quarter of the workload """
ScipyStatsMarkovSim(0).markov()
#ScipyStatsMarkovSim(1).markov()
#ScipyStatsMarkovSim(2).markov()
#ScipyStatsMarkovSim(3).markov()

def testNumpy():
""" Takes 3 seconds to execute 4 times the workload  """
NumpyRandomMarkovSim().markov(0)
NumpyRandomMarkovSim().markov(1)
NumpyRandomMarkovSim().markov(2)
NumpyRandomMarkovSim().markov(3)

if __name__=='__main__':
t = timeit.Timer('analyticalSolution()', 'from __main__ import analyticalSolution')
PASSES = 10
elapsed = (t.timeit(number=PASSES))
print "analyticalSolution() takes %0.3f micro-seconds per pass" % (elapsed/PASSES)

"""
t = timeit.Timer('testScipy()', 'from __main__ import testScipy')
PASSES = 1
elapsed = (t.timeit(number=PASSES))
print "scipy.markov() takes %0.3f micro-seconds per pass" % (elapsed/PASSES)
"""
PASSES = 1
s = timeit.Timer('testNumpy()', 'from __main__ import testNumpy')
elapsed2 = (s.timeit(number=PASSES))
print "numpy.markov() takes %0.3f micro-seconds per pass" % (elapsed2/PASSES)

########################################
# distgen.py
from numpy import random
from pylab import show, plot, hist
from scipy import polyfit, polyval
from scipy.stats.distributions import cauchy

def polynomialRegression( x,y ):
coeffs = polyfit( x, y, 20 )
return x, polyval( coeffs, x )

opt = []

for i in range( 1000000 ):
#opt.append( random.beta(2,2.5) )
#pt.append( random.weibull(2) )
#opt.append( random.standard_cauchy(2) )
#opt.append( random.zipf(2) )
#opt.append( random.vonmises(2,3))
#opt.append( random.wald(2,3))
#opt.append( random.rayleigh())
#opt.append( random.standard_gamma(2))
#opt.append( random.binomial(1000,0.5))
opt.append( random.gumbel(1000,0.5))

"""
multinomial(...)
Multinomial distribution.

multinomial(n, pvals, size=None) -> random values

pvals is a sequence of probabilities that should sum to 1 (however, the
last element is always assumed to account for the remaining probability
as long as sum(pvals[:-1]) <= 1).
"""
data = hist(opt,10000)

x = data[1]
y = data[0]

coeffs = polyfit( x, y, 20 )
besty = polyval( coeffs, x )

plot(x,besty)

show()

#########################################
#from __future__ import division
from Numeric import *
from pylab import hist, xlabel, ylabel, show
import random

def MetHastDirich( param, M, B, start ):
"""
Uses a Metropolis-Hastings method to generate a chain of values from a Dirichlet distribution.
Input(a four element list):
'param' - vector of parameters for the Dirichlet dist., all > 0,
'M' - total number of steps kept in the MH chain after burn-in,
'B' - number of steps used for the burn-in,
'start' - vector of starting values which has length equal to the
param vector minus 1 (NOTE: sum(start) must be < = 1).
Output(a two element list):
1) the array of generated values
2) the number of accepted candidates out of B+M tries.
"""

step_cnt = 0
accept_ind = 0        ## To keep track of number of accepted candidates.
x = start
rv = len(param)-1     ## Last random variable is constrained.
data_matrix = zeros( (M,rv), typecode = 'f' )

for i in range( B + M ):
step_cnt = step_cnt + 1
y = []
cumy = 0

## Generate a candidate over support space (0,1) under restriction of summation=1
for j in range( rv ):
present_y = ( 1 - cumy ) * random.uniform( 0, 1 )
cumy = cumy + present_y
y.append( present_y )

alpha_prob=((1-sum( y )) / (1.0 -sum( x )))**( param[rv]-1.0 )

for k in range(rv):
tterm_1 = ( y[k] / x[k] )**( param[k]-1 )
if k == 0:
tterm_2 = 1
else:
tterm_2 = ( 1-sum( y[0:(k-1)] )) / ( 1-sum( x[0:k-1] ) )
alpha_prob = alpha_prob * tterm_1 * tterm_2

alpha_prob_final = min( [alpha_prob,1] )
u = random.uniform( 0, 1 )

if u <= alpha_prob_final:                  ## Candidate accepted.
accept_ind=accept_ind+1
if step_cnt > B:
data_matrix[ (step_cnt-B-1),...] = y   ## Fill-in a full row
else:                                      ## of the matrix.
if step_cnt > B:
data_matrix[(step_cnt-B-1),...]=x  ## Remain at old position.
return [data_matrix,accept_ind]

## Use the above function to compute a chain of observations for the given argument values below:

parameters=array([9,13,16,12]) # [2.,2.,3.,3.])
M = 100 # 000
B = 10 #000
start = array([0.1,0.6,0.1]) # [.25,.25,.25])
MH_out = MetHastDirich(parameters,M,B,start)
chain = MH_out[0]
count_successes = MH_out[1]

hist( chain, 10000 )

#plot( chain, count_successes )
#title('MetHastDirich( [2.0,2.0,3.0,3.0]), 100000, 10000, [0.99,0.99,0.99]' ) # [0.25,0.25,0.25]')
xlabel('Datum')
ylabel('Frequency')
show()

"David M. Cooke" <cookedm@physics.mcmaster.ca> wrote: "Manu Hack"  writes:

> Hi,
>
> I've been goolging around but not sure how to generate random samples
> given the density function of a random variable.  I was looking around
> something like scipy.stats.rv_continuous but couldn't find anything
> useful.
>
> Also, if one wants to do Markov Chain Monte Carlo, is scipy possible
> and if not, what should be a workaround (if still want to do in
> Python).  I heard something like openbugs but seems that debian
> doesn't have a package.

Look at scipy.sandbox.montecarlo (you'll have to add a line with
'montecarlo' on it to scipy/sandbox/enabled_packages.txt to compile it,
and copy some files from numpy/random first).

For instance, scipy.sandbox.montecarlo.intsampler takes a list or array
of weights, and can sample from those. Here's a clip from the docstring:

>>> table = [10, 15, 20]
#representing this pmf:
#x       0       1       2
#p(x)    10/45   15/45   20/45
#The output will be something like:
>>> sampler = intsampler(table)
>>> sampler.sample(10)
array([c, b, b, b, b, b, c, b, b, b], dtype=object)

I use it by sampling from my PDF on a grid, and using those points as
the PMF.

--
|>|\/|<
/------------------------------------------------------------------\
|David M. Cooke              http://arbutus.physics.mcmaster.ca/dmc/
|cookedm@physics.mcmaster.ca
_______________________________________________
SciPy-user mailing list
SciPy-user@scipy.org
http://projects.scipy.org/mailman/listinfo/scipy-user

-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://projects.scipy.org/pipermail/scipy-user/attachments/20071022/2858dee4/attachment.html
```