[Scipy-svn] r3377 - trunk/scipy/sandbox/dhuard

scipy-svn@scip... scipy-svn@scip...
Thu Sep 27 13:37:28 CDT 2007


Author: dhuard
Date: 2007-09-27 13:37:13 -0500 (Thu, 27 Sep 2007)
New Revision: 3377

Added:
   trunk/scipy/sandbox/dhuard/histogram.f
   trunk/scipy/sandbox/dhuard/histogram.py
   trunk/scipy/sandbox/dhuard/test_histogram.py
Log:
added histogram functions.

Added: trunk/scipy/sandbox/dhuard/histogram.f
===================================================================
--- trunk/scipy/sandbox/dhuard/histogram.f	2007-09-27 15:49:05 UTC (rev 3376)
+++ trunk/scipy/sandbox/dhuard/histogram.f	2007-09-27 18:37:13 UTC (rev 3377)
@@ -0,0 +1,348 @@
+C*******************************************************************
+C RETURN THE HISTOGRAM OF ARRAY X, THAT IS, THE NUMBER OF ELEMENTS
+C IN X FALLING INTO EACH BIN.
+C THE BIN ARRAY CONSISTS IN N BINS STARTING AT BIN0 WITH WIDTH DELTA.
+C HISTO H : | LOWER OUTLIERS | 1 | 2 | 3 | ... |  N  | UPPER OUTLIERS |
+C INDEX i : |        1       | 2 | 3 | 4 | ... | N+1 |      N+2       |
+
+      SUBROUTINE FIXED_BINSIZE(X, BIN0, DELTA, N, NX, H)
+
+C PARAMETERS
+C ----------
+C X : ARRAY 
+C BIN0 : LEFT BIN EDGE
+C DELTA : BIN WIDTH
+C N : NUMBER OF BINS
+C H : HISTOGRAM
+
+      IMPLICIT NONE
+      INTEGER :: N, NX, i, K
+      DOUBLE PRECISION ::  X(NX), BIN0, DELTA
+      INTEGER :: H(N+2), UP, LOW
+
+CF2PY INTEGER INTENT(IN) :: N
+CF2PY INTEGER INTENT(HIDE) :: NX = LEN(X)
+CF2PY DOUBLE PRECISION DIMENSION(NX), INTENT(IN) :: X
+CF2PY DOUBLE PRECISION INTENT(IN) :: BIN0, DELTA
+CF2PY INTEGER DIMENSION(N+2), INTENT(OUT) :: H
+
+
+      DO i=1,N+2
+        H(i) = 0
+      ENDDO
+      
+C     OUTLIERS INDICES
+      UP = N+2
+      LOW = 1
+
+      DO i=1,NX
+        IF (X(i) >= BIN0) THEN
+          K = INT((X(i)-BIN0)/DELTA)+1
+          IF (K <= N) THEN
+            H(K+1) = H(K+1) + 1
+          ELSE 
+            H(UP) = H(UP) + 1
+          ENDIF
+        ELSE 
+          H(LOW) = H(LOW) + 1
+        ENDIF
+      ENDDO
+
+      END SUBROUTINE
+
+
+
+C*******************************************************************
+C RETURN THE WEIGHTED HISTOGRAM OF ARRAY X, THAT IS, THE SUM OF THE 
+C WEIGHTS OF THE ELEMENTS OF X FALLING INTO EACH BIN.
+C THE BIN ARRAY CONSISTS IN N BINS STARTING AT BIN0 WITH WIDTH DELTA.
+C HISTO H : | LOWER OUTLIERS | 1 | 2 | 3 | ... |  N  | UPPER OUTLIERS |
+C INDEX i : |        1       | 2 | 3 | 4 | ... | N+1 |      N+2       |
+
+      SUBROUTINE WEIGHTED_FIXED_BINSIZE(X, W, BIN0, DELTA, N, NX, H)
+
+C PARAMETERS
+C ----------
+C X : ARRAY 
+C W : WEIGHTS
+C BIN0 : LEFT BIN EDGE
+C DELTA : BIN WIDTH
+C N : NUMBER OF BINS
+C H : HISTOGRAM
+
+      IMPLICIT NONE
+      INTEGER :: N, NX, i, K
+      DOUBLE PRECISION ::  X(NX), W(NX), BIN0, DELTA, H(N+2)
+      INTEGER :: UP, LOW
+
+CF2PY INTEGER INTENT(IN) :: N
+CF2PY INTEGER INTENT(HIDE) :: NX = LEN(X)
+CF2PY DOUBLE PRECISION DIMENSION(NX), INTENT(IN) :: X, W
+CF2PY DOUBLE PRECISION INTENT(IN) :: BIN0, DELTA
+CF2PY DOUBLE PRECISION DIMENSION(N+2), INTENT(OUT) :: H
+
+
+      DO i=1,N+2
+        H(i) = 0.D0
+      ENDDO
+      
+C     OUTLIERS INDICES
+      UP = N+2
+      LOW = 1
+
+      DO i=1,NX
+        IF (X(i) >= BIN0) THEN
+          K = INT((X(i)-BIN0)/DELTA)+1
+          IF (K <= N) THEN
+            H(K+1) = H(K+1) + W(i)
+          ELSE 
+            H(UP) = H(UP) + W(i)
+          ENDIF
+        ELSE 
+          H(LOW) = H(LOW) + W(i)
+        ENDIF
+      ENDDO
+
+      END SUBROUTINE
+
+
+C*****************************************************************************
+C COMPUTE N DIMENSIONAL FLATTENED HISTOGRAM
+
+      SUBROUTINE FIXED_BINSIZE_ND(X, BIN0, DELTA, N, COUNT, NX,D,NC)
+
+C PARAMETERS
+C ----------
+C X : ARRAY (NXD)
+C BIN0 : LEFT BIN EDGES (D)      
+C DELTA : BIN WIDTH (D)
+C N : NUMBER OF BINS (D)
+C COUNT : FLATTENED HISTOGRAM (NC)
+C NC : PROD(N(:)+2)
+
+      IMPLICIT NONE
+      INTEGER :: NX, D, NC,N(D), i, j, k, T
+      DOUBLE PRECISION :: X(NX,D), BIN0(D), DELTA(D)
+      INTEGER :: INDEX(NX), ORDER(D), MULT, COUNT(NC)
+
+
+CF2PY DOUBLE PRECISION DIMENSION(NX,D), INTENT(IN) :: X
+CF2PY DOUBLE PRECISION DIMENSION(D) :: BIN0, DELTA
+CF2PY INTEGER INTENT(IN) :: N
+CF2PY INTEGER DIMENSION(NC), INTENT(OUT) :: COUNT
+CF2PY INTEGER INTENT(HIDE) :: NX=SHAPE(X,1)
+CF2PY INTEGER INTENT(HIDE) :: D=SHAPE(X,2)
+
+
+C     INITIALIZE INDEX
+      DO i=1, NX
+        INDEX(i) = 0
+      ENDDO
+
+C     INITIALIZE COUNT
+      DO i=1,NC
+        COUNT(i) = 0
+      ENDDO
+
+C     ORDER THE BIN SIZE ARRAY N(D)
+      CALL QSORTI(ORDER, D, N)
+
+C     INITIALIZE THE DIMENSIONAL MULTIPLIER
+      MULT=1
+
+C     FIND THE FLATTENED INDEX OF EACH SAMPLE
+      DO j=1, D
+        k = ORDER(j)
+        MULT=MULT*N(k)
+
+        DO i=1, NX 
+          IF (X(i,k) >= BIN0(k)) THEN
+            T = INT((X(i, k)-BIN0(k))/DELTA(k))+1
+            IF (T <= N(k)) THEN
+              T = T+1
+            ELSE
+              T = N(k)+2
+            ENDIF
+          ELSE
+            T = 1
+          ENDIF
+
+          INDEX(i) = INDEX(I) + T*MULT
+        ENDDO
+      ENDDO
+
+C     COUNT THE NUMBER OF SAMPLES FALLING INTO EACH BIN
+      DO i=1,NX
+        COUNT(INDEX(i)) =  COUNT(INDEX(i)) + 1
+      ENDDO
+
+      END SUBROUTINE 
+
+
+C From HDK@psuvm.psu.edu Thu Dec  8 15:27:16 MST 1994
+C 
+C The following was converted from Algol recursive to Fortran iterative
+C by a colleague at Penn State (a long time ago - Fortran 66, please
+C excuse the GoTo's). The following code also corrects a bug in the
+C Quicksort algorithm published in the ACM (see Algorithm 402, CACM,
+C Sept. 1970, pp 563-567; also you younger folks who weren't born at
+C that time might find interesting the history of the Quicksort
+C algorithm beginning with the original published in CACM, July 1961,
+C pp 321-322, Algorithm 64). Note that the following algorithm sorts
+C integer data; actual data is not moved but sort is affected by sorting
+C a companion index array (see leading comments). The data type being
+C sorted can be changed by changing one line; see comments after
+C declarations and subsequent one regarding comparisons(Fortran
+C 77 takes care of character comparisons of course, so that comment
+C is merely historical from the days when we had to write character
+C compare subprograms, usually in assembler language for a specific
+C mainframe platform at that time). But the following algorithm is
+C good, still one of the best available.
+
+
+      SUBROUTINE QSORTI (ORD,N,A)
+C
+C==============SORTS THE ARRAY A(I),I=1,2,...,N BY PUTTING THE
+C   ASCENDING ORDER VECTOR IN ORD.  THAT IS ASCENDING ORDERED A
+C   IS A(ORD(I)),I=1,2,...,N; DESCENDING ORDER A IS A(ORD(N-I+1)),
+C   I=1,2,...,N .  THIS SORT RUNS IN TIME PROPORTIONAL TO N LOG N .
+C
+C
+C     ACM QUICKSORT - ALGORITHM #402 - IMPLEMENTED IN FORTRAN 66 BY
+C                                 WILLIAM H. VERITY, WHV@PSUVM.PSU.EDU
+C                                 CENTER FOR ACADEMIC COMPUTING
+C                                 THE PENNSYLVANIA STATE UNIVERSITY
+C                                 UNIVERSITY PARK, PA.  16802
+C
+      IMPLICIT INTEGER (A-Z)
+C
+      DIMENSION ORD(N),POPLST(2,20)
+      INTEGER X,XX,Z,ZZ,Y
+C
+C     TO SORT DIFFERENT INPUT TYPES, CHANGE THE FOLLOWING
+C     SPECIFICATION STATEMENTS; FOR EXAMPLE, FOR FORTRAN CHARACTER
+C     USE THE FOLLOWING:  CHARACTER *(*) A(N)
+C
+      INTEGER A(N)
+C
+      NDEEP=0
+      U1=N
+      L1=1
+      DO 1  I=1,N
+    1 ORD(I)=I
+    2 IF (U1.LE.L1) RETURN
+C
+    3 L=L1
+      U=U1
+C
+C PART
+C
+    4 P=L
+      Q=U
+C     FOR CHARACTER SORTS, THE FOLLOWING 3 STATEMENTS WOULD BECOME
+C     X = ORD(P)
+C     Z = ORD(Q)
+C     IF (A(X) .LE. A(Z)) GO TO 2
+C
+C     WHERE "CLE" IS A LOGICAL FUNCTION WHICH RETURNS "TRUE" IF THE
+C     FIRST ARGUMENT IS LESS THAN OR EQUAL TO THE SECOND, BASED ON "LEN"
+C     CHARACTERS.
+C
+      X=A(ORD(P))
+      Z=A(ORD(Q))
+      IF (X.LE.Z) GO TO 5
+      Y=X
+      X=Z
+      Z=Y
+      YP=ORD(P)
+      ORD(P)=ORD(Q)
+      ORD(Q)=YP
+    5 IF (U-L.LE.1) GO TO 15
+      XX=X
+      IX=P
+      ZZ=Z
+      IZ=Q
+C
+C LEFT
+C
+    6 P=P+1
+      IF (P.GE.Q) GO TO 7
+      X=A(ORD(P))
+      IF (X.GE.XX) GO TO 8
+      GO TO 6
+    7 P=Q-1
+      GO TO 13
+C
+C RIGHT
+C
+    8 Q=Q-1
+      IF (Q.LE.P) GO TO 9
+      Z=A(ORD(Q))
+      IF (Z.LE.ZZ) GO TO 10
+      GO TO 8
+    9 Q=P
+      P=P-1
+      Z=X
+      X=A(ORD(P))
+C
+C DIST
+C
+   10 IF (X.LE.Z) GO TO 11
+      Y=X
+      X=Z
+      Z=Y
+      IP=ORD(P)
+      ORD(P)=ORD(Q)
+      ORD(Q)=IP
+   11 IF (X.LE.XX) GO TO 12
+      XX=X
+      IX=P
+   12 IF (Z.GE.ZZ) GO TO 6
+      ZZ=Z
+      IZ=Q
+      GO TO 6
+C
+C OUT
+C
+   13 CONTINUE
+      IF (.NOT.(P.NE.IX.AND.X.NE.XX)) GO TO 14
+      IP=ORD(P)
+      ORD(P)=ORD(IX)
+      ORD(IX)=IP
+   14 CONTINUE
+      IF (.NOT.(Q.NE.IZ.AND.Z.NE.ZZ)) GO TO 15
+      IQ=ORD(Q)
+      ORD(Q)=ORD(IZ)
+      ORD(IZ)=IQ
+   15 CONTINUE
+      IF (U-Q.LE.P-L) GO TO 16
+      L1=L
+      U1=P-1
+      L=Q+1
+      GO TO 17
+   16 U1=U
+      L1=Q+1
+      U=P-1
+   17 CONTINUE
+      IF (U1.LE.L1) GO TO 18
+C
+C START RECURSIVE CALL
+C
+      NDEEP=NDEEP+1
+      POPLST(1,NDEEP)=U
+      POPLST(2,NDEEP)=L
+      GO TO 3
+   18 IF (U.GT.L) GO TO 4
+C
+C POP BACK UP IN THE RECURSION LIST
+C
+      IF (NDEEP.EQ.0) GO TO 2
+      U=POPLST(1,NDEEP)
+      L=POPLST(2,NDEEP)
+      NDEEP=NDEEP-1
+      GO TO 18
+C
+C END SORT
+C END QSORT
+C
+      END

Added: trunk/scipy/sandbox/dhuard/histogram.py
===================================================================
--- trunk/scipy/sandbox/dhuard/histogram.py	2007-09-27 15:49:05 UTC (rev 3376)
+++ trunk/scipy/sandbox/dhuard/histogram.py	2007-09-27 18:37:13 UTC (rev 3377)
@@ -0,0 +1,272 @@
+import numpy as np
+import subprocess
+
+try:
+	import flib
+except:
+	print 'Building the flib fortran library.'
+	subprocess.call('f2py -c histogram.f -m flib', shell=True)
+	import flib
+
+def histogram(a, bins=10, range=None, normed=False, weights=None, axis=None, strategy=None):
+    """histogram(a, bins=10, range=None, normed=False, weights=None, axis=None) 
+                                                                   -> H, dict
+    
+    Return the distribution of sample.
+    
+    :Parameters:
+      - `a` : Array sample.
+      - `bins` : Number of bins, or an array of bin edges, in which case the 
+                range is not used. If 'Scott' or 'Freeman' is passed, then 
+                the named method is used to find the optimal number of bins.
+      - `range` : Lower and upper bin edges, default: [min, max].
+      - `normed` :Boolean, if False, return the number of samples in each bin,
+                if True, return the density.  
+      - `weights` : Sample weights. The weights are normed only if normed is 
+                True. Should weights.sum() not equal len(a), the total bin count 
+                will not be equal to the number of samples.
+      - `axis` : Specifies the dimension along which the histogram is computed. 
+                Defaults to None, which aggregates the entire sample array.
+      - `strategy` : Histogramming method (binsize, searchsorted or digitize).
+    
+    :Return:
+      - `H` : The number of samples in each bin. 
+        If normed is True, H is a frequency distribution.
+      - dict{ 'edges':      The bin edges, including the rightmost edge.
+        'upper':      Upper outliers.
+        'lower':      Lower outliers.
+        'bincenters': Center of bins. 
+        'strategy': the histogramming method employed.}
+    
+    :Examples:
+      >>> x = random.rand(100,10)
+      >>> H, D = histogram(x, bins=10, range=[0,1], normed=True)
+      >>> H2, D = histogram(x, bins=10, range=[0,1], normed=True, axis=0)
+    
+    :SeeAlso: histogramnd
+    """
+    weighted = weights is not None
+    
+    a = np.asarray(a)
+    if axis is None:
+        a = np.atleast_1d(a.ravel())
+        if weighted:
+            weights = np.atleast_1d(weights.ravel())
+        axis = 0 
+        
+    # Define the range    
+    if range is None:
+        mn, mx = a.min(), a.max()
+        if mn == mx:
+            mn = mn - .5
+            mx = mx + .5
+        range = [mn, mx]
+    
+    # Find the optimal number of bins.
+    if bins is None or type(bins) == str:
+        bins = _optimize_binning(a, range, bins)
+        
+    # Compute the bin edges if they are not given explicitely.    
+    # For the rightmost bin, we want values equal to the right 
+    # edge to be counted in the last bin, and not as an outlier. 
+    # Hence, we shift the last bin by a tiny amount.
+    if not np.iterable(bins):
+        dr = np.diff(range)/bins*1e-10
+        edges = np.linspace(range[0], range[1]+dr, bins+1, endpoint=True)
+    else:
+        edges = np.asarray(bins, float)
+    
+    dedges = np.diff(edges)
+    bincenters = edges[:-1] + dedges/2.
+    
+    # Number of bins
+    nbin = len(edges)-1
+    
+        # Measure of bin precision.
+    decimal = int(-np.log10(dedges.min())+10)
+        
+    # Choose the fastest histogramming method
+    even = (len(set(np.around(dedges, decimal))) == 1)
+    if strategy is None:
+        if even:
+            strategy = 'binsize'
+        else:
+            if nbin > 30: # approximative threshold
+                strategy = 'searchsort'
+            else:
+                strategy = 'digitize'
+    else:
+        if strategy not in ['binsize', 'digitize', 'searchsort']:
+            raise 'Unknown histogramming strategy.', strategy
+        if strategy == 'binsize' and not even:
+            raise 'This binsize strategy cannot be used for uneven bins.'
+        
+    # Parameters for the fixed_binsize functions.
+    start = float(edges[0])
+    binwidth = float(dedges[0])
+       
+    # Looping to reduce memory usage
+    block = 66600 
+    slices = [slice(None)]*a.ndim
+    for i in np.arange(0,len(a),block):
+        slices[axis] = slice(i,i+block)
+        at = a[slices]
+        if weighted:
+            at = np.concatenate((at, weights[slices]), axis)        
+            if strategy == 'binsize':   
+                count = np.apply_along_axis(_splitinmiddle,axis,at,
+                    flib.weighted_fixed_binsize,start,binwidth,nbin)               
+            elif strategy == 'searchsort':
+                count = np.apply_along_axis(_splitinmiddle,axis,at, \
+                        _histogram_searchsort_weighted, edges)
+            elif strategy == 'digitize':
+                    count = np.apply_along_axis(_splitinmiddle,axis,at,\
+                        _histogram_digitize,edges,normed)
+        else:
+            if strategy == 'binsize':
+                count = np.apply_along_axis(flib.fixed_binsize,axis,at,start,binwidth,nbin)
+            elif strategy == 'searchsort':
+                count = np.apply_along_axis(_histogram_searchsort,axis,at,edges)
+            elif strategy == 'digitize':
+                count = np.apply_along_axis(_histogram_digitize,axis,at,None,edges,
+                        normed)
+                    
+        if i == 0:
+            total = count
+        else:
+            total += count
+        
+    # Outlier count
+    upper = total.take(np.array([-1]), axis)
+    lower = total.take(np.array([0]), axis)
+    
+    # Non-outlier count
+    core = a.ndim*[slice(None)]
+    core[axis] = slice(1, -1)
+    hist = total[core]
+    
+    if normed:
+        normalize = lambda x: np.atleast_1d(x/(x*dedges).sum())
+        hist = np.apply_along_axis(normalize, axis, hist)
+
+    return hist, {'edges':edges, 'lower':lower, 'upper':upper, \
+        'bincenters':bincenters, 'strategy':strategy}
+        
+
+
+def _histogram_fixed_binsize(a, start, width, n):
+    """histogram_even(a, start, width, n) -> histogram
+    
+    Return an histogram where the first bin counts the number of lower
+    outliers and the last bin the number of upper outliers. Works only with 
+    fixed width bins. 
+    
+    :Parameters:
+      a : array
+        Array of samples.
+      start : float
+        Left-most bin edge.
+      width : float
+        Width of the bins. All bins are considered to have the same width.
+      n : int
+        Number of bins. 
+    
+    :Return:
+      H : array
+        Array containing the number of elements in each bin. H[0] is the number
+        of samples smaller than start and H[-1] the number of samples 
+        greater than start + n*width.
+    """    
+                 
+    return flib.fixed_binsize(a, start, width, n)
+
+
+def _histogram_binsize_weighted(a, w, start, width, n):
+    """histogram_even_weighted(a, start, width, n) -> histogram
+    
+    Return an histogram where the first bin counts the number of lower
+    outliers and the last bin the number of upper outliers. Works only with 
+    fixed width bins. 
+    
+    :Parameters:
+      a : array
+        Array of samples.
+      w : array
+        Weights of samples.
+      start : float
+        Left-most bin edge.
+      width : float
+        Width of the bins. All bins are considered to have the same width.
+      n : int
+        Number of bins. 
+    
+    :Return:
+      H : array
+        Array containing the number of elements in each bin. H[0] is the number
+        of samples smaller than start and H[-1] the number of samples 
+        greater than start + n*width.
+    """    
+    return flib.weighted_fixed_binsize(a, w, start, width, n)
+       
+def _histogram_searchsort(a, bins):
+    n = np.sort(a).searchsorted(bins)
+    n = np.concatenate([n, [len(a)]])
+    count = np.concatenate([[n[0]], n[1:]-n[:-1]])
+    return count
+    
+def _histogram_searchsort_weighted(a, w, bins):
+    i = np.sort(a).searchsorted(bins)
+    sw = w[np.argsort(a)]
+    i = np.concatenate([i, [len(a)]])
+    n = np.concatenate([[0],sw.cumsum()])[i]
+    count = np.concatenate([[n[0]], n[1:]-n[:-1]])
+    return count
+
+def _splitinmiddle(x, function, *args, **kwds):
+    x1,x2 = np.hsplit(x, 2)
+    return function(x1,x2,*args, **kwds)
+
+def _histogram_digitize(a, w, edges, normed):
+    """Internal routine to compute the 1d weighted histogram for uneven bins.
+    a: sample
+    w: weights
+    edges: bin edges
+    weighted: Means that the weights are appended to array a. 
+    Return the bin count or frequency if normed.
+    """
+    weighted = w is not None
+    nbin = edges.shape[0]+1
+    if weighted:
+        count = np.zeros(nbin, dtype=w.dtype)
+        if normed:    
+            count = np.zeros(nbin, dtype=float)
+            w = w/w.mean()
+    else:
+        count = np.zeros(nbin, int)
+            
+    binindex = np.digitize(a, edges)
+        
+    # Count the number of identical indices.
+    flatcount = np.bincount(binindex, w)
+    
+    # Place the count in the histogram array.
+    count[:len(flatcount)] = flatcount
+       
+    return count
+
+
+def _optimize_binning(x, range, method='Freedman'):
+    """Find the optimal number of bins.
+    Available methods : Freedman, Scott
+    """
+    N = x.shape[0]
+    if method.lower()=='freedman':
+        s=np.sort(x) 
+        IQR = s[int(N*.75)] - s[int(N*.25)] # Interquantile range (75% -25%)
+        width = 2* IQR*N**(-1./3)
+        
+    elif method.lower()=='scott':
+        width = 3.49 * x.std()* N**(-1./3)
+    else:
+        raise 'Method must be Scott or Freedman', method
+    return int(np.diff(range)/width)

Added: trunk/scipy/sandbox/dhuard/test_histogram.py
===================================================================
--- trunk/scipy/sandbox/dhuard/test_histogram.py	2007-09-27 15:49:05 UTC (rev 3376)
+++ trunk/scipy/sandbox/dhuard/test_histogram.py	2007-09-27 18:37:13 UTC (rev 3377)
@@ -0,0 +1,99 @@
+from numpy.testing import *
+from histogram import _histogram_fixed_binsize, _histogram_digitize,\
+    _histogram_searchsort, histogram,_optimize_binning
+import numpy as np
+from numpy.random import rand
+
+class test_histogram1d_functions(NumpyTestCase):
+    def check_consistency(self):
+        n = 100
+        r = rand(n)*12-1
+        bins = range(11)
+        a = _histogram_fixed_binsize(r, bins[0], bins[1]-bins[0], len(bins)-1)
+        b = _histogram_digitize(r, None, np.array(bins), False)
+        c = _histogram_searchsort(r,bins)
+        assert_array_equal(a,b)
+        assert_array_equal(c,b)
+        
+class test_histogram(NumpyTestCase):
+    def check_simple(self):
+        n=100
+        v=rand(n)
+        (a,b)=histogram(v)
+        #check if the sum of the bins equals the number of samples
+        assert_equal(np.sum(a,axis=0),n)
+        #check that the bin counts are evenly spaced when the data is from a linear function
+        (a,b)=histogram(np.linspace(0,10,100))
+        assert_array_equal(a,10)
+        #Check the construction of the bin array
+        a, b = histogram(v, bins=4, range=[.2,.8])
+        assert_array_almost_equal(b['edges'],np.linspace(.2, .8, 5),8)
+        #Check the number of outliers
+        assert_equal((v<.2).sum(), b['lower'])
+        assert_equal((v>.8).sum(),b['upper'])
+        #Check the normalization
+        bins = [0,.5,.75,1]
+        a,b = histogram(v, bins, normed=True)
+        assert_almost_equal((a*np.diff(bins)).sum(), 1)
+        
+    def check_axis(self):
+        n,m = 100,20
+        v = rand(n,m)
+        a,b = histogram(v, bins=5)
+        # Check dimension is reduced (axis=None).
+        assert_equal(a.ndim, 1)
+        #Check total number of count is equal to the number of samples.
+        assert_equal(a.sum(), n*m)
+        a,b = histogram(v, bins = 7, axis=0)
+        # Check shape of new array is ok.
+        assert(a.ndim == 2)
+        assert_array_equal(a.shape,[7, m])
+        # Check normalization is consistent 
+        a,b = histogram(v, bins = 7, axis=0, normed=True)
+        assert_array_almost_equal((a.T*np.diff(b['edges'])).sum(1), np.ones((m)),5)
+        a,b = histogram(v, bins = 7, axis=1, normed=True)
+        assert_array_equal(a.shape, [n,7])
+        assert_array_almost_equal((a*np.diff(b['edges'])).sum(1), np.ones((n)))
+        # Check results are consistent with 1d estimate
+        a1, b1 = histogram(v[0,:], bins=b['edges'], normed=True)
+        assert_array_almost_equal(a1, a[0,:],7)
+            
+    def check_weights(self):
+        # Check weights = constant gives the same answer as no weights.
+        v = rand(100)
+        w = np.ones(100)*5
+        a,b = histogram(v)
+        na,nb = histogram(v, normed=True)
+        wa,wb = histogram(v, weights=w)
+        nwa,nwb = histogram(v, weights=w, normed=True)
+        assert_array_equal(a*5, wa)
+        assert_array_almost_equal(na, nwa,8)
+        # Check weights are properly applied.
+        v = np.linspace(0,10,10)
+        w = np.concatenate((np.zeros(5), np.ones(5)))
+        wa,wb = histogram(v, bins=np.linspace(0,10.01, 11),weights=w)
+        assert_array_almost_equal(wa, w)
+        
+    def check_strategies(self):
+        v = rand(100)
+        ae,be = histogram(v, strategy='binsize')
+        ab,bb = histogram(v, strategy='digitize')
+        as,bs = histogram(v, strategy='searchsort')
+        assert_array_equal(ae, ab)
+        assert_array_equal(ae, as)
+        
+        w = rand(100)
+        ae,be = histogram(v, weights=w, strategy='binsize')
+        ab,bb = histogram(v, weights=w, strategy='digitize')
+        as,bs = histogram(v, weights=w, strategy='searchsort')
+        assert_array_almost_equal(ae, ab,8)
+        assert_array_almost_equal(ae, as,8)
+    
+    def check_automatic_binning(self):
+        v = rand(100)
+        h,b = histogram(v, 'Scott')
+        h,b = histogram(v, 'Freedman')
+                            
+        
+if __name__ == "__main__":
+    NumpyTest().run()



More information about the Scipy-svn mailing list