[SciPy-user] histogramdd once more
Chris Lee
c.j.lee@tnw.utwente...
Fri Feb 15 07:51:40 CST 2008
Hi All,
I didn't strip all of the debug print comments from the earlier snippet.
I think this is clean
def histogramdd(sample, bins=10, range=None, normed=False, weights=None):
"""histogramdd(sample, bins=10, range=None, normed=False, weights=None)
Return the N-dimensional histogram of the sample.
Parameters:
sample : sequence or array
A sequence containing N arrays or an NxM array. Input data.
bins : sequence or scalar
A sequence of edge arrays, a sequence of bin counts, or a scalar
which is the bin count for all dimensions. Default is 10.
range : sequence
A sequence of lower and upper bin edges. Default is [min, max].
normed : boolean
If False, return the number of samples in each bin, if True,
returns the density.
weights : array
Array of weights. The weights are normed only if normed is
True.
Should the sum of the weights not equal N, the total bin
count will
not be equal to the number of samples.
Returns:
hist : array
Histogram array.
edges : list
List of arrays defining the lower bin edges.
SeeAlso:
histogram
Example
>>> x = random.randn(100,3)
>>> hist3d, edges = histogramdd(x, bins = (5, 6, 7))
"""
try:
# Sample is an ND-array.
N, D = sample.shape
except (AttributeError, ValueError):
# Sample is a sequence of 1D arrays.
sample = atleast_2d(sample).T
N, D = sample.shape
nbin = empty(D, int)
edges = D*[None]
dedges = D*[None]
if weights is not None:
weights = asarray(weights)
try:
M = len(bins)
if M != D:
raise AttributeError, 'The dimension of bins must be a equal
to the dimension of the sample x.'
except TypeError:
bins = D*[bins]
# Select range for each dimension
# Used only if number of bins is given.
if range is None:
smin = atleast_1d(array(sample.min(0), float))
smax = atleast_1d(array(sample.max(0), float))
else:
smin = zeros(D)
smax = zeros(D)
for i in arange(D):
smin[i], smax[i] = range[i]
# Make sure the bins have a finite width.
for i in arange(len(smin)):
if smin[i] == smax[i]:
smin[i] = smin[i] - .5
smax[i] = smax[i] + .5
# Create edge arrays
for i in arange(D):
if isscalar(bins[i]):
nbin[i] = bins[i] + 2 # +2 for outlier bins
edges[i] = linspace(smin[i], smax[i], nbin[i]-1)
else:
edges[i] = asarray(bins[i], float)
nbin[i] = len(edges[i])+1 # +1 for outlier bins
dedges[i] = diff(edges[i])
nbin = asarray(nbin)
# Compute the bin number each sample falls into.
Ncount = {}
for i in arange(D):
Ncount[i] = digitize(sample[:,i], edges[i])
# Using digitize, values that fall on an edge are put in the right bin.
# For the rightmost bin, we want values equal to the right
# edge to be counted in the last bin, and not as an outlier.
outliers = zeros(N, int)
for i in arange(D):
# Rounding precision
decimal = int(-log10(dedges[i].min())) +6
# Find which points are on the rightmost edge.
on_edge = where(around(sample[:,i], decimal) ==
around(edges[i][-1], decimal))[0]
# Shift these points one bin to the left.
Ncount[i][on_edge] -= 1
# Flattened histogram matrix (1D)
hist = zeros(nbin.prod(), int)
# Compute the sample indices in the flattened histogram matrix.
ni = nbin.argsort()
shape = []
xy = zeros(N, int)
for i in arange(0, D-1):
xy += Ncount[ni[i]] * nbin[ni[i+1:]].prod()
xy += Ncount[ni[-1]]
# Compute the number of repetitions in xy and assign it to the
flattened histmat.
if len(xy) == 0:
return zeros(nbin-2, int), edges
flatcount = bincount(xy, weights)
a = arange(len(flatcount))
hist[a] = flatcount
# Shape into a proper matrix
hist = hist.reshape(sort(nbin))
mustPermute = True
while mustPermute:
nothingChanged = True
for i in arange(nbin.size):
j = ni[i]
if j != i:
nothingChanged = False
hist = hist.swapaxes(i,j)
ni[i],ni[j] = ni[j],ni[i]
if nothingChanged:
mustPermute = False
#print "THis is after swapping the axis ", hist.shape
# Remove outliers (indices 0 and -1 for each dimension).
core = D*[slice(1,-1)]
hist = hist[core]
# Normalize if normed is True
if normed:
s = hist.sum()
for i in arange(D):
shape = ones(D, int)
shape[i] = nbin[i]-2
hist = hist / dedges[i].reshape(shape)
hist /= s
return hist, edges
--
**********************************************
* Chris Lee *
* Laser physics and nonlinear optics group *
* MESA+ Institute *
* University of Twente *
* Phone: ++31 (0)53 489 3968 *
* fax: ++31 (0) 53 489 1102 *
**********************************************
More information about the SciPy-user
mailing list