[Scipy-svn] r4405 - in trunk/scipy/cluster: . tests

scipy-svn@scip... scipy-svn@scip...
Tue Jun 3 01:50:53 CDT 2008


Author: damian.eads
Date: 2008-06-03 01:50:49 -0500 (Tue, 03 Jun 2008)
New Revision: 4405

Modified:
   trunk/scipy/cluster/hierarchy.py
   trunk/scipy/cluster/tests/test_hierarchy.py
Log:
Removed unnecessary imports in hierarchy and refactored its code. Wrote more tests.

Modified: trunk/scipy/cluster/hierarchy.py
===================================================================
--- trunk/scipy/cluster/hierarchy.py	2008-06-03 04:41:10 UTC (rev 4404)
+++ trunk/scipy/cluster/hierarchy.py	2008-06-03 06:50:49 UTC (rev 4405)
@@ -175,7 +175,7 @@
 """
 
 import numpy as np
-import _hierarchy_wrap, scipy, types, math, sys, scipy.stats
+import _hierarchy_wrap, types, math, sys
 
 _cpy_non_euclid_methods = {'single': 0, 'complete': 1, 'average': 2,
                            'weighted': 6}
@@ -199,7 +199,7 @@
     observations.
     """
     #n = np.double(X.shape[1])
-    return scipy.stats.var(X, axis=0) # * n / (n - 1.0)
+    return np.var(X, axis=0, ddof=1) # * n / (n - 1.0)
 
 def _copy_array_if_base_present(a):
     """
@@ -638,6 +638,8 @@
     functions in this library.
     """
 
+    Z = numpy.asarray(Z)
+
     is_valid_linkage(Z, throw=True, name='Z')
 
     # The number of original objects is equal to the number of rows minus
@@ -795,6 +797,8 @@
 
         ||u-v||_p = (\sum {|u_i - v_i|^p})^(1/p).
     """
+    u = np.asarray(u)
+    v = np.asarray(v)
     if p < 1:
         raise ValueError("p must be at least 1")
     return math.pow((abs(u-v)**p).sum(), 1.0/p)
@@ -805,6 +809,8 @@
 
       Computes the Euclidean distance between two n-vectors u and v, ||u-v||_2
     """
+    u = np.asarray(u)
+    v = np.asarray(v)
     q=np.matrix(u-v)
     return np.sqrt((q*q.T).sum())
 
@@ -815,6 +821,8 @@
       Computes the squared Euclidean distance between two n-vectors u and v,
         (||u-v||_2)^2.
     """
+    u = np.asarray(u)
+    v = np.asarray(v)
     return ((u-v)*(u-v).T).sum()
 
 def cosine(u, v):
@@ -824,8 +832,10 @@
       Computes the Cosine distance between two n-vectors u and v,
         (1-uv^T)/(||u||_2 * ||v||_2).
     """
-    return (1.0 - (scipy.dot(u, v.T) / \
-                   (np.sqrt(scipy.dot(u, u.T)) * np.sqrt(scipy.dot(v, v.T)))))
+    u = np.asarray(u)
+    v = np.asarray(v)
+    return (1.0 - (np.dot(u, v.T) / \
+                   (np.sqrt(np.dot(u, u.T)) * np.sqrt(np.dot(v, v.T)))))
 
 def correlation(u, v):
     """
@@ -844,9 +854,9 @@
     vmu = v.mean()
     um = u - umu
     vm = v - vmu
-    return 1.0 - (scipy.dot(um, vm) /
-                  (np.sqrt(scipy.dot(um, um)) \
-                   * np.sqrt(scipy.dot(vm, vm))))
+    return 1.0 - (np.dot(um, vm) /
+                  (np.sqrt(np.dot(um, um)) \
+                   * np.sqrt(np.dot(vm, vm))))
 
 def hamming(u, v):
     """
@@ -864,6 +874,8 @@
 
       for k < n.
     """
+    u = np.asarray(u)
+    v = np.asarray(v)
     return (u != v).mean()
 
 def jaccard(u, v):
@@ -883,6 +895,8 @@
 
       for k < n.
     """
+    u = np.asarray(u)
+    v = np.asarray(v)
     return (np.double(np.bitwise_and((u != v),
                      np.bitwise_or(u != 0, v != 0)).sum()) 
             /  np.double(np.bitwise_or(u != 0, v != 0).sum()))
@@ -904,6 +918,9 @@
 
       for k < n.
     """
+    u = np.asarray(u)
+    v = np.asarray(v)
+    n = len(u)
     (nff, nft, ntf, ntt) = _nbool_correspond_all(u, v)
 
     return (ntf + nft - ntt + n) / (ntf + nft + n)
@@ -916,6 +933,8 @@
       n-vectors u and v. V is a m-dimensional vector of component
       variances. It is usually computed among a larger collection vectors.
     """
+    u = np.asarray(u)
+    v = np.asarray(v)
     V = np.asarray(V)
     if len(V.shape) != 1 or V.shape[0] != u.shape[0] or u.shape[0] != v.shape[0]:
         raise TypeError('V must be a 1-D array of the same dimension as u and v.')
@@ -928,6 +947,8 @@
       Computes the Manhattan distance between two n-vectors u and v,
          \sum {u_i-v_i}.
     """
+    u = np.asarray(u)
+    v = np.asarray(v)
     return abs(u-v).sum()
 
 def mahalanobis(u, v, VI):
@@ -938,7 +959,9 @@
         (u-v)VI(u-v)^T
       where VI is the inverse covariance matrix.
     """
-    V = np.asarray(V)
+    u = np.asarray(u)
+    v = np.asarray(v)
+    VI = np.asarray(VI)
     return np.sqrt(np.dot(np.dot((u-v),VI),(u-v).T).sum())
 
 def chebyshev(u, v):
@@ -948,6 +971,8 @@
       Computes the Chebyshev distance between two n-vectors u and v,
         \max {|u_i-v_i|}.
     """
+    u = np.asarray(u)
+    v = np.asarray(v)
     return max(abs(u-v))
 
 def braycurtis(u, v):
@@ -957,6 +982,8 @@
       Computes the Bray-Curtis distance between two n-vectors u and v,
         \sum{|u_i-v_i|} / \sum{|u_i+v_i|}.
     """
+    u = np.asarray(u)
+    v = np.asarray(v)
     return abs(u-v).sum() / abs(u+v).sum()
 
 def canberra(u, v):
@@ -980,22 +1007,30 @@
         ntf = (u * not_v).sum()
         ntt = (u * v).sum()
     elif u.dtype == np.bool:
-        not_u = scipy.bitwise_not(u)
-        not_v = scipy.bitwise_not(v)
-        nff = scipy.bitwise_and(not_u, not_v).sum()
-        nft = scipy.bitwise_and(not_u, v).sum()
-        ntf = scipy.bitwise_and(u, not_v).sum()
-        ntt = scipy.bitwise_and(u, v).sum()
+        not_u = ~u
+        not_v = ~v
+        nff = (not_u & not_v).sum()
+        nft = (not_u & v).sum()
+        ntf = (u & not_v).sum()
+        ntt = (u & v).sum()
     else:
         raise TypeError("Arrays being compared have unknown type.")
 
     return (nff, nft, ntf, ntt)
 
 def _nbool_correspond_ft_tf(u, v):
-    not_u = scipy.bitwise_not(u)
-    not_v = scipy.bitwise_not(v)
-    nft = scipy.bitwise_and(not_u, v).sum()
-    ntf = scipy.bitwise_and(u, not_v).sum()
+    if u.dtype == np.int or u.dtype == np.float_ or u.dtype == np.double:
+        not_u = 1.0 - u
+        not_v = 1.0 - v
+        nff = (not_u * not_v).sum()
+        nft = (not_u * v).sum()
+        ntf = (u * not_v).sum()
+        ntt = (u * v).sum()
+    else:
+        not_u = ~u
+        not_v = ~v
+        nft = (not_u & v).sum()
+        ntf = (u & not_v).sum()
     return (nft, ntf)
 
 def yule(u, v):
@@ -1015,6 +1050,8 @@
 
          R = 2.0 * (c_{TF} + c_{FT}).
     """
+    u = np.asarray(u)
+    v = np.asarray(v)
     (nff, nft, ntf, ntt) = _nbool_correspond_all(u, v)
     print nff, nft, ntf, ntt
     return float(2.0 * ntf * nft) / float(ntt * nff + ntf * nft)
@@ -1034,6 +1071,8 @@
 
       for k < n.
     """
+    u = np.asarray(u)
+    v = np.asarray(v)
     (nft, ntf) = _nbool_correspond_ft_tf(u, v)
     return float(nft + ntf) / float(len(u))
 
@@ -1054,9 +1093,14 @@
 
       for k < n.
     """
-    ntt = scipy.bitwise_and(u, v).sum()
+    u = np.asarray(u)
+    v = np.asarray(v)
+    if u.dtype == np.bool:
+        ntt = (u & v).sum()
+    else:
+        ntt = (u * v).sum()
     (nft, ntf) = _nbool_correspond_ft_tf(u, v)
-    return float(ntf + nft)/float(2.0 * ntt + ntf + nft)
+    return float(ntf + nft) / float(2.0 * ntt + ntf + nft)
 
 def rogerstanimoto(u, v):
     """
@@ -1078,6 +1122,8 @@
          R = 2.0 * (c_{TF} + c_{FT}).
 
     """
+    u = np.asarray(u)
+    v = np.asarray(v)
     (nff, nft, ntf, ntt) = _nbool_correspond_all(u, v)
     return float(2.0 * (ntf + nft)) / float(ntt + nff + (2.0 * (ntf + nft)))
 
@@ -1089,7 +1135,12 @@
       u and v, (n - c_{TT}) / n where c_{ij} is the number of occurrences
       of u[k] == i and v[k] == j for k < n.
     """
-    ntt = scipy.bitwise_and(u, v).sum()
+    u = np.asarray(u)
+    v = np.asarray(v)
+    if u.dtype == np.bool:
+        ntt = (u & v).sum()
+    else:
+        ntt = (u * v).sum()
     return float(len(u) - ntt) / float(len(u))
 
 def sokalmichener(u, v):
@@ -1101,8 +1152,14 @@
       u[k] == i and v[k] == j for k < n and R = 2 * (c_{TF} + c{FT}) and
       S = c_{FF} + c_{TT}.
     """
-    ntt = scipy.bitwise_and(u, v).sum()
-    nff = scipy.bitwise_and(scipy.bitwise_not(u), scipy.bitwise_not(v)).sum()
+    u = np.asarray(u)
+    v = np.asarray(v)
+    if u.dtype == np.bool:
+        ntt = (u & v).sum()
+        nff = (~u & ~v).sum()
+    else:
+        ntt = (u * v).sum()
+        nff = ((1.0 - u) * (1.0 - v)).sum()
     (nft, ntf) = _nbool_correspond_ft_tf(u, v)
     return float(2.0 * (ntf + nft))/float(ntt + nff + 2.0 * (ntf + nft))
 
@@ -1114,33 +1171,29 @@
       u and v, 2R / (c_{TT} + 2R) where c_{ij} is the number of occurrences
       of u[k] == i and v[k] == j for k < n and R = 2 * (c_{TF} + c{FT}).
     """
-    ntt = scipy.bitwise_and(u, v).sum()
+    u = np.asarray(u)
+    v = np.asarray(v)
+    if u.dtype == np.bool:
+        ntt = (u & v).sum()
+    else:
+        ntt = (u * v).sum()
     (nft, ntf) = _nbool_correspond_ft_tf(u, v)
     return float(2.0 * (ntf + nft))/float(ntt + 2.0 * (ntf + nft))
 
-# V means pass covariance
-_pdist_metric_info = {'euclidean': ['double'],
-                      'seuclidean': ['double'],
-                      'sqeuclidean': ['double'],
-                      'minkowski': ['double'],
-                      'cityblock': ['double'],
-                      'cosine': ['double'],
-                      'correlation': ['double'],
-                      'hamming': ['double','bool'],
-                      'jaccard': ['double', 'bool'],
-                      'chebyshev': ['double'],
-                      'canberra': ['double'],
-                      'braycurtis': ['double'],
-                      'mahalanobis': ['bool'],
-                      'yule': ['bool'],
-                      'matching': ['bool'],
-                      'dice': ['bool'],
-                      'kulsinski': ['bool'],
-                      'rogerstanimoto': ['bool'],
-                      'russellrao': ['bool'],
-                      'sokalmichener': ['bool'],
-                      'sokalsneath': ['bool']}
+def _convert_to_bool(X):
+    if X.dtype != np.bool:
+        X = np.bool_(X)
+    if not X.flags.contiguous:
+        X = X.copy()
+    return X
 
+def _convert_to_double(X):
+    if X.dtype != np.double:
+        X = np.double(X)
+    if not X.flags.contiguous:
+        X = X.copy()
+    return X
+
 def pdist(X, metric='euclidean', p=2, V=None, VI=None):
     """ Y = pdist(X, method='euclidean', p=2)
 
@@ -1322,12 +1375,12 @@
 
     X = np.asarray(X)
 
-    if np.issubsctype(X, np.floating) and not np.issubsctype(X, np.double):
-        raise TypeError('Floating point arrays must be 64-bit (got %r).' %
-        (X.dtype.type,))
+    #if np.issubsctype(X, np.floating) and not np.issubsctype(X, np.double):
+    #    raise TypeError('Floating point arrays must be 64-bit (got %r).' %
+    #    (X.dtype.type,))
 
     # The C code doesn't do striding.
-    [X] = _copy_arrays_if_base_present([X])
+    [X] = _copy_arrays_if_base_present([_convert_to_double(X)])
 
     s = X.shape
 
@@ -1365,38 +1418,33 @@
     elif mtype is types.StringType:
         mstr = metric.lower()
 
-        if X.dtype != np.double and \
-               (mstr != 'hamming' and mstr != 'jaccard'):
-            TypeError('A double array must be passed.')
+        #if X.dtype != np.double and \
+        #       (mstr != 'hamming' and mstr != 'jaccard'):
+        #    TypeError('A double array must be passed.')
         if mstr in set(['euclidean', 'euclid', 'eu', 'e']):
-            _hierarchy_wrap.pdist_euclidean_wrap(X, dm)
-        elif mstr in set(['sqeuclidean']):
-            _hierarchy_wrap.pdist_euclidean_wrap(X, dm)
+            _hierarchy_wrap.pdist_euclidean_wrap(_convert_to_double(X), dm)
+        elif mstr in set(['sqeuclidean', 'sqe', 'sqeuclid']):
+            _hierarchy_wrap.pdist_euclidean_wrap(_convert_to_double(X), dm)
             dm = dm ** 2.0
         elif mstr in set(['cityblock', 'cblock', 'cb', 'c']):
             _hierarchy_wrap.pdist_city_block_wrap(X, dm)
         elif mstr in set(['hamming', 'hamm', 'ha', 'h']):
-            if X.dtype == np.double:
-                _hierarchy_wrap.pdist_hamming_wrap(X, dm)
-            elif X.dtype == bool:
-                _hierarchy_wrap.pdist_hamming_bool_wrap(X, dm)
+            if X.dtype == np.bool:
+                _hierarchy_wrap.pdist_hamming_bool_wrap(_convert_to_bool(X), dm)
             else:
-                raise TypeError('Invalid input array value type %s '
-                                'for hamming.' % str(X.dtype))
+                _hierarchy_wrap.pdist_hamming_wrap(_convert_to_double(X), dm)
         elif mstr in set(['jaccard', 'jacc', 'ja', 'j']):
-            if X.dtype == np.double:
-                _hierarchy_wrap.pdist_jaccard_wrap(X, dm)
-            elif X.dtype == np.bool:
-                _hierarchy_wrap.pdist_jaccard_bool_wrap(X, dm)
+            if X.dtype == np.bool:
+                _hierarchy_wrap.pdist_jaccard_bool_wrap(_convert_to_bool(X), dm)
             else:
-                raise TypeError('Invalid input array value type %s for '
-                                'jaccard.' % str(X.dtype))
+                _hierarchy_wrap.pdist_jaccard_wrap(_convert_to_double(X), dm)
         elif mstr in set(['chebychev', 'chebyshev', 'cheby', 'cheb', 'ch']):
-            _hierarchy_wrap.pdist_chebyshev_wrap(X, dm)
+            _hierarchy_wrap.pdist_chebyshev_wrap(_convert_to_double(X), dm)
         elif mstr in set(['minkowski', 'mi', 'm']):
-            _hierarchy_wrap.pdist_minkowski_wrap(X, dm, p)
+            _hierarchy_wrap.pdist_minkowski_wrap(_convert_to_double(X), dm, p)
         elif mstr in set(['seuclidean', 'se', 's']):
             if V is not None:
+                V = np.asarray(V)
                 if type(V) is not _array_type:
                     raise TypeError('Variance vector V must be a numpy array')
                 if V.dtype != np.double:
@@ -1406,17 +1454,17 @@
                 if V.shape[0] != n:
                     raise ValueError('Variance vector V must be of the same dimension as the vectors on which the distances are computed.')
                 # The C code doesn't do striding.
-                [VV] = _copy_arrays_if_base_present([V])
+                [VV] = _copy_arrays_if_base_present([_convert_to_double(V)])
             else:
                 VV = _unbiased_variance(X)
-            _hierarchy_wrap.pdist_seuclidean_wrap(X, VV, dm)
+            _hierarchy_wrap.pdist_seuclidean_wrap(_convert_to_double(X), VV, dm)
         # Need to test whether vectorized cosine works better.
         # Find out: Is there a dot subtraction operator so I can
         # subtract matrices in a similar way to multiplying them?
         # Need to get rid of as much unnecessary C code as possible.
         elif mstr in set(['cosine_old', 'cos_old']):
             norms = np.sqrt(np.sum(X * X, axis=1))
-            _hierarchy_wrap.pdist_cosine_wrap(X, dm, norms)
+            _hierarchy_wrap.pdist_cosine_wrap(_convert_to_double(X), dm, norms)
         elif mstr in set(['cosine', 'cos']):
             norms = np.sqrt(np.sum(X * X, axis=1))
             nV = norms.reshape(m, 1)
@@ -1431,9 +1479,10 @@
             X2 = X - X.mean(1)[:,np.newaxis]
             #X2 = X - np.matlib.repmat(np.mean(X, axis=1).reshape(m, 1), 1, n)
             norms = np.sqrt(np.sum(X2 * X2, axis=1))
-            _hierarchy_wrap.pdist_cosine_wrap(X2, dm, norms)
+            _hierarchy_wrap.pdist_cosine_wrap(_convert_to_double(X2), _convert_to_double(dm), _convert_to_double(norms))
         elif mstr in set(['mahalanobis', 'mahal', 'mah']):
             if VI is not None:
+                VI = _convert_to_double(np.asarray(VI))
                 if type(VI) != _array_type:
                     raise TypeError('VI must be a numpy array.')
                 if VI.dtype != np.double:
@@ -1441,29 +1490,29 @@
                 [VI] = _copy_arrays_if_base_present([VI])
             else:
                 V = np.cov(X.T)
-                VI = np.linalg.inv(V).T.copy()
+                VI = _convert_to_double(np.linalg.inv(V).T.copy())
             # (u-v)V^(-1)(u-v)^T
-            _hierarchy_wrap.pdist_mahalanobis_wrap(X, VI, dm)
+            _hierarchy_wrap.pdist_mahalanobis_wrap(_convert_to_double(X), VI, dm)
         elif mstr == 'canberra':
-            _hierarchy_wrap.pdist_canberra_wrap(X, dm)
+            _hierarchy_wrap.pdist_canberra_wrap(_convert_to_bool(X), dm)
         elif mstr == 'braycurtis':
-            _hierarchy_wrap.pdist_bray_curtis_wrap(X, dm)
+            _hierarchy_wrap.pdist_bray_curtis_wrap(_convert_to_bool(X), dm)
         elif mstr == 'yule':
-            _hierarchy_wrap.pdist_yule_bool_wrap(X, dm)
+            _hierarchy_wrap.pdist_yule_bool_wrap(_convert_to_bool(X), dm)
         elif mstr == 'matching':
-            _hierarchy_wrap.pdist_matching_bool_wrap(X, dm)
+            _hierarchy_wrap.pdist_matching_bool_wrap(_convert_to_bool(X), dm)
         elif mstr == 'kulsinski':
-            _hierarchy_wrap.pdist_kulsinski_bool_wrap(X, dm)
+            _hierarchy_wrap.pdist_kulsinski_bool_wrap(_convert_to_bool(X), dm)
         elif mstr == 'dice':
-            _hierarchy_wrap.pdist_dice_bool_wrap(X, dm)
+            _hierarchy_wrap.pdist_dice_bool_wrap(_convert_to_bool(X), dm)
         elif mstr == 'rogerstanimoto':
-            _hierarchy_wrap.pdist_rogerstanimoto_bool_wrap(X, dm)
+            _hierarchy_wrap.pdist_rogerstanimoto_bool_wrap(_convert_to_bool(X), dm)
         elif mstr == 'russellrao':
-            _hierarchy_wrap.pdist_russellrao_bool_wrap(X, dm)
+            _hierarchy_wrap.pdist_russellrao_bool_wrap(_convert_to_bool(X), dm)
         elif mstr == 'sokalmichener':
-            _hierarchy_wrap.pdist_sokalmichener_bool_wrap(X, dm)
+            _hierarchy_wrap.pdist_sokalmichener_bool_wrap(_convert_to_bool(X), dm)
         elif mstr == 'sokalsneath':
-            _hierarchy_wrap.pdist_sokalsneath_bool_wrap(X, dm)
+            _hierarchy_wrap.pdist_sokalsneath_bool_wrap(_convert_to_bool(X), dm)
         elif metric == 'test_euclidean':
             dm = pdist(X, euclidean)
         elif metric == 'test_sqeuclidean':
@@ -1499,12 +1548,16 @@
             dm = pdist(X, matching)
         elif metric == 'test_dice':
             dm = pdist(X, dice)
+        elif metric == 'test_kulsinski':
+            dm = pdist(X, kulsinski)
         elif metric == 'test_rogerstanimoto':
             dm = pdist(X, rogerstanimoto)
         elif metric == 'test_russellrao':
             dm = pdist(X, russellrao)
         elif metric == 'test_sokalsneath':
             dm = pdist(X, sokalsneath)
+        elif metric == 'test_sokalmichener':
+            dm = pdist(X, sokalmichener)
         else:
             raise ValueError('Unknown Distance Metric: %s' % mstr)
     else:
@@ -1919,7 +1972,7 @@
       Returns the number of original observations that correspond to a
       square, non-condensed distance matrix D.
     """
-    is_valid_dm(D, tol=scipy.inf, throw=True, name='D')
+    is_valid_dm(D, tol=np.inf, throw=True, name='D')
     return D.shape[0]
 
 def numobs_y(Y):
@@ -2123,10 +2176,10 @@
     # p <= 20, size="12"
     # 20 < p <= 30, size="10"
     # 30 < p <= 50, size="8"
-    # 50 < p <= scipy.inf, size="6"
+    # 50 < p <= np.inf, size="6"
 
-    _dtextsizes = {20: 12, 30: 10, 50: 8, 85: 6, scipy.inf: 5}
-    _drotation =  {20: 0,          40: 45,       scipy.inf: 90}
+    _dtextsizes = {20: 12, 30: 10, 50: 8, 85: 6, np.inf: 5}
+    _drotation =  {20: 0,          40: 45,       np.inf: 90}
     _dtextsortedkeys = list(_dtextsizes.keys())
     _dtextsortedkeys.sort()
     _drotationsortedkeys = list(_drotation.keys())
@@ -2162,7 +2215,7 @@
         ivw = len(ivl) * 10
         # Depenendent variable plot height
         dvw = mh + mh * 0.05
-        ivticks = scipy.arange(5, len(ivl)*10+5, 10)
+        ivticks = np.arange(5, len(ivl)*10+5, 10)
         if orientation == 'top':
             axis.set_ylim([0, dvw])
             axis.set_xlim([0, ivw])
@@ -2558,7 +2611,7 @@
 
     if truncate_mode == 'mtica' or truncate_mode == 'level':
         if p <= 0:
-            p = scipy.inf
+            p = np.inf
     if get_leaves:
         lvs = []
     else:
@@ -2658,7 +2711,7 @@
 
 
 def _dendrogram_calculate_info(Z, p, truncate_mode, \
-                               colorthreshold=scipy.inf, get_leaves=True, \
+                               colorthreshold=np.inf, get_leaves=True, \
                                orientation='top', labels=None, \
                                count_sort=False, distance_sort=False, \
                                show_leaf_counts=False, i=-1, iv=0.0, \
@@ -2940,6 +2993,7 @@
       Note that when Z[:,2] is monotonic, Z[:,2] and MD should not differ.
       See linkage for more information on this issue.
     """
+    Z = np.asarray(Z)
     is_valid_linkage(Z, throw=True, name='Z')
 
     n = Z.shape[0] + 1
@@ -2957,6 +3011,8 @@
       inconsistency matrix. MI is a monotonic (n-1)-sized numpy array of
       doubles.
     """
+    Z = np.asarray(Z)
+    R = np.asarray(R)
     is_valid_linkage(Z, throw=True, name='Z')
     is_valid_im(R, throw=True, name='R')
 
@@ -2975,6 +3031,8 @@
     is the maximum over R[Q(j)-n, i] where Q(j) the set of all node ids
     corresponding to nodes below and including j.
     """
+    Z = np.asarray(Z)
+    R = np.asarray(R)
     is_valid_linkage(Z, throw=True, name='Z')
     is_valid_im(R, throw=True, name='R')
     if type(i) is not types.IntType:

Modified: trunk/scipy/cluster/tests/test_hierarchy.py
===================================================================
--- trunk/scipy/cluster/tests/test_hierarchy.py	2008-06-03 04:41:10 UTC (rev 4404)
+++ trunk/scipy/cluster/tests/test_hierarchy.py	2008-06-03 06:50:49 UTC (rev 4405)
@@ -68,7 +68,8 @@
               "linkage-single-tdist.txt",
               "linkage-complete-tdist.txt",
               "linkage-average-tdist.txt",
-              "linkage-weighted-tdist.txt"]
+              "linkage-weighted-tdist.txt",
+              "random-bool-data.txt"]
 
 _tdist = numpy.array([[0,    662,  877,  255,  412,  996],
                       [662,  0,    295,  468,  268,  400],
@@ -101,73 +102,10 @@
 #print numpy.abs(Y_test1 - Y_right).max()
 
 class TestPdist(TestCase):
+    """
+    Test suite for the pdist function.
+    """
 
-    def test_pdist_raises_type_error_float32(self):
-        "Testing whether passing a float32 observation array generates an exception."
-        X = numpy.zeros((10, 10), dtype=numpy.float32)
-        try:
-            pdist(X, 'euclidean')
-        except TypeError:
-            pass
-        except:
-            self.fail("float32 observation matrices should generate an error in pdist.")
-
-    def test_pdist_raises_type_error_longdouble(self):
-        "Testing whether passing a longdouble observation array generates an exception."
-        X = numpy.zeros((10, 10), dtype=numpy.longdouble)
-        try:
-            pdist(X, 'euclidean')
-        except TypeError:
-            pass
-        except:
-            self.fail("longdouble observation matrices should generate an error in pdist.")
-
-    def test_pdist_var_raises_type_error_float32(self):
-        "Testing whether passing a float32 variance matrix generates an exception."
-        X = numpy.zeros((10, 10))
-        V = numpy.zeros((10, 10), dtype=numpy.float32)
-        try:
-            pdist(X, 'seuclidean', V=V)
-        except TypeError:
-            pass
-        except:
-            self.fail("float32 V matrices should generate an error in pdist('seuclidean').")
-
-    def test_pdist_var_raises_type_error_longdouble(self):
-        "Testing whether passing a longdouble variance matrix generates an exception."
-        X = numpy.zeros((10, 10))
-        V = numpy.zeros((10, 10), dtype=numpy.longdouble)
-
-        try:
-            pdist(X, 'seuclidean', V=V)
-        except TypeError:
-            pass
-        except:
-            self.fail("longdouble matrices should generate an error in pdist('seuclidean').")
-
-    def test_pdist_ivar_raises_type_error_float32(self):
-        "Testing whether passing a float32 variance matrix generates an exception."
-        X = numpy.zeros((10, 10))
-        VI = numpy.zeros((10, 10), dtype=numpy.float32)
-        try:
-            pdist(X, 'mahalanobis', VI=VI)
-        except TypeError:
-            pass
-        except:
-            self.fail("float32 matrices should generate an error in pdist('mahalanobis').")
-
-    def test_pdist_ivar_raises_type_error_longdouble(self):
-        "Testing whether passing a longdouble variance matrix generates an exception."
-        X = numpy.zeros((10, 10))
-        VI = numpy.zeros((10, 10), dtype=numpy.longdouble)
-
-        try:
-            pdist(X, 'mahalanobis', VI=VI)
-        except TypeError:
-            pass
-        except:
-            self.fail("longdouble matrices should generate an error in pdist('mahalanobis').")
-
     ################### pdist: euclidean
     def test_pdist_euclidean_random(self):
         "Tests pdist(X, 'euclidean') on random data."
@@ -179,6 +117,16 @@
         Y_test1 = pdist(X, 'euclidean')
         self.failUnless(within_tol(Y_test1, Y_right, eps))
 
+    def test_pdist_euclidean_random_float32(self):
+        "Tests pdist(X, 'euclidean') on random data (float32)."
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['pdist-double-inp'])
+        Y_right = eo['pdist-euclidean']
+
+        Y_test1 = pdist(X, 'euclidean')
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
     def test_pdist_euclidean_random_nonC(self):
         "Tests pdist(X, 'test_euclidean') [the non-C implementation] on random data."
         eps = 1e-07
@@ -188,7 +136,7 @@
         Y_test2 = pdist(X, 'test_euclidean')
         self.failUnless(within_tol(Y_test2, Y_right, eps))
 
-    def test_pdist_euclidean_iris(self):
+    def test_pdist_euclidean_iris_double(self):
         "Tests pdist(X, 'euclidean') on the Iris data set."
         eps = 1e-07
         # Get the data: the input matrix and the right output.
@@ -198,6 +146,17 @@
         Y_test1 = pdist(X, 'euclidean')
         self.failUnless(within_tol(Y_test1, Y_right, eps))
 
+    def test_pdist_euclidean_iris_float32(self):
+        "Tests pdist(X, 'euclidean') on the Iris data set. (float32)"
+        eps = 1e-06
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['iris'])
+        Y_right = eo['pdist-euclidean-iris']
+
+        Y_test1 = pdist(X, 'euclidean')
+        print numpy.abs(Y_right - Y_test1).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
     def test_pdist_euclidean_iris_nonC(self):
         "Tests pdist(X, 'test_euclidean') [the non-C implementation] on the Iris data set."
         eps = 1e-07
@@ -218,6 +177,16 @@
         Y_test1 = pdist(X, 'seuclidean')
         self.failUnless(within_tol(Y_test1, Y_right, eps))
 
+    def test_pdist_seuclidean_random_float32(self):
+        "Tests pdist(X, 'seuclidean') on random data (float32)."
+        eps = 1e-05
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['pdist-double-inp'])
+        Y_right = eo['pdist-seuclidean']
+
+        Y_test1 = pdist(X, 'seuclidean')
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
     def test_pdist_seuclidean_random_nonC(self):
         "Tests pdist(X, 'test_sqeuclidean') [the non-C implementation] on random data."
         eps = 1e-05
@@ -237,6 +206,16 @@
         Y_test1 = pdist(X, 'seuclidean')
         self.failUnless(within_tol(Y_test1, Y_right, eps))
 
+    def test_pdist_seuclidean_iris_float32(self):
+        "Tests pdist(X, 'seuclidean') on the Iris data set (float32)."
+        eps = 1e-05
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['iris'])
+        Y_right = eo['pdist-seuclidean-iris']
+
+        Y_test1 = pdist(X, 'seuclidean')
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
     def test_pdist_seuclidean_iris_nonC(self):
         "Tests pdist(X, 'test_seuclidean') [the non-C implementation] on the Iris data set."
         eps = 1e-05
@@ -253,7 +232,16 @@
         # Get the data: the input matrix and the right output.
         X = eo['pdist-double-inp']
         Y_right = eo['pdist-cosine']
+        Y_test1 = pdist(X, 'cosine')
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
 
+    def test_pdist_cosine_random_float32(self):
+        "Tests pdist(X, 'cosine') on random data. (float32)"
+        eps = 1e-08
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['pdist-double-inp'])
+        Y_right = eo['pdist-cosine']
+
         Y_test1 = pdist(X, 'cosine')
         self.failUnless(within_tol(Y_test1, Y_right, eps))
 
@@ -277,6 +265,18 @@
         self.failUnless(within_tol(Y_test1, Y_right, eps))
         #print "cosine-iris", numpy.abs(Y_test1 - Y_right).max()
 
+    def test_pdist_cosine_iris_float32(self):
+        "Tests pdist(X, 'cosine') on the Iris data set."
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['iris'])
+        Y_right = eo['pdist-cosine-iris']
+
+        Y_test1 = pdist(X, 'cosine')
+        print numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+        #print "cosine-iris", numpy.abs(Y_test1 - Y_right).max()
+
     def test_pdist_cosine_iris_nonC(self):
         "Tests pdist(X, 'test_cosine') [the non-C implementation] on the Iris data set."
         eps = 1e-08
@@ -293,7 +293,16 @@
         # Get the data: the input matrix and the right output.
         X = eo['pdist-double-inp']
         Y_right = eo['pdist-cityblock']
+        Y_test1 = pdist(X, 'cityblock')
+        #print "cityblock", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
 
+    def test_pdist_cityblock_random_float32(self):
+        "Tests pdist(X, 'cityblock') on random data. (float32)"
+        eps = 1e-06
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['pdist-double-inp'])
+        Y_right = eo['pdist-cityblock']
         Y_test1 = pdist(X, 'cityblock')
         #print "cityblock", numpy.abs(Y_test1 - Y_right).max()
         self.failUnless(within_tol(Y_test1, Y_right, eps))
@@ -318,6 +327,17 @@
         self.failUnless(within_tol(Y_test1, Y_right, eps))
         #print "cityblock-iris", numpy.abs(Y_test1 - Y_right).max()
 
+    def test_pdist_cityblock_iris_float32(self):
+        "Tests pdist(X, 'cityblock') on the Iris data set. (float32)"
+        eps = 1e-06
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['iris'])
+        Y_right = eo['pdist-cityblock-iris']
+
+        Y_test1 = pdist(X, 'cityblock')
+        print "cityblock-iris-float32", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
     def test_pdist_cityblock_iris_nonC(self):
         "Tests pdist(X, 'test_cityblock') [the non-C implementation] on the Iris data set."
         eps = 1e-14
@@ -339,6 +359,17 @@
         #print "correlation", numpy.abs(Y_test1 - Y_right).max()
         self.failUnless(within_tol(Y_test1, Y_right, eps))
 
+    def test_pdist_correlation_random_float32(self):
+        "Tests pdist(X, 'correlation') on random data. (float32)"
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['pdist-double-inp'])
+        Y_right = eo['pdist-correlation']
+
+        Y_test1 = pdist(X, 'correlation')
+        #print "correlation", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
     def test_pdist_correlation_random_nonC(self):
         "Tests pdist(X, 'test_correlation') [the non-C implementation] on random data."
         eps = 1e-07
@@ -359,6 +390,17 @@
         #print "correlation-iris", numpy.abs(Y_test1 - Y_right).max()
         self.failUnless(within_tol(Y_test1, Y_right, eps))
 
+    def test_pdist_correlation_iris_float32(self):
+        "Tests pdist(X, 'correlation') on the Iris data set. (float32)"
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = eo['iris']
+        Y_right = numpy.float32(eo['pdist-correlation-iris'])
+
+        Y_test1 = pdist(X, 'correlation')
+        print "correlation-iris", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
     def test_pdist_correlation_iris_nonC(self):
         "Tests pdist(X, 'test_correlation') [the non-C implementation] on the Iris data set."
         eps = 1e-08
@@ -382,6 +424,17 @@
         #print "minkowski", numpy.abs(Y_test1 - Y_right).max()
         self.failUnless(within_tol(Y_test1, Y_right, eps))
 
+    def test_pdist_minkowski_random_float32(self):
+        "Tests pdist(X, 'minkowski') on random data. (float32)"
+        eps = 1e-05
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['pdist-double-inp'])
+        Y_right = eo['pdist-minkowski-3.2']
+
+        Y_test1 = pdist(X, 'minkowski', 3.2)
+        #print "minkowski", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
     def test_pdist_minkowski_random_nonC(self):
         "Tests pdist(X, 'test_minkowski') [the non-C implementation] on random data."
         eps = 1e-05
@@ -397,7 +450,16 @@
         # Get the data: the input matrix and the right output.
         X = eo['iris']
         Y_right = eo['pdist-minkowski-3.2-iris']
+        Y_test1 = pdist(X, 'minkowski', 3.2)
+        #print "minkowski-iris-3.2", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
 
+    def test_pdist_minkowski_iris_float32(self):
+        "Tests pdist(X, 'minkowski') on iris data. (float32)"
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['iris'])
+        Y_right = eo['pdist-minkowski-3.2-iris']
         Y_test1 = pdist(X, 'minkowski', 3.2)
         #print "minkowski-iris-3.2", numpy.abs(Y_test1 - Y_right).max()
         self.failUnless(within_tol(Y_test1, Y_right, eps))
@@ -417,11 +479,21 @@
         # Get the data: the input matrix and the right output.
         X = eo['iris']
         Y_right = eo['pdist-minkowski-5.8-iris']
-
         Y_test1 = pdist(X, 'minkowski', 5.8)
         #print "minkowski-iris-5.8", numpy.abs(Y_test1 - Y_right).max()
         self.failUnless(within_tol(Y_test1, Y_right, eps))
 
+    def test_pdist_minkowski_iris_float32(self):
+        "Tests pdist(X, 'minkowski') on iris data. (float32)"
+        eps = 1e-06
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['iris'])
+        Y_right = eo['pdist-minkowski-5.8-iris']
+
+        Y_test1 = pdist(X, 'minkowski', 5.8)
+        print "minkowski-iris-5.8", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
     def test_pdist_minkowski_iris_nonC(self):
         "Tests pdist(X, 'test_minkowski') [the non-C implementation] on iris data."
         eps = 1e-07
@@ -443,6 +515,17 @@
         #print "hamming", numpy.abs(Y_test1 - Y_right).max()
         self.failUnless(within_tol(Y_test1, Y_right, eps))
 
+    def test_pdist_hamming_random_float32(self):
+        "Tests pdist(X, 'hamming') on random data."
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['pdist-boolean-inp'])
+        Y_right = eo['pdist-hamming']
+
+        Y_test1 = pdist(X, 'hamming')
+        #print "hamming", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
     def test_pdist_hamming_random_nonC(self):
         "Tests pdist(X, 'test_hamming') [the non-C implementation] on random data."
         eps = 1e-07
@@ -460,7 +543,16 @@
         # Get the data: the input matrix and the right output.
         X = numpy.float64(eo['pdist-boolean-inp'])
         Y_right = eo['pdist-hamming']
+        Y_test1 = pdist(X, 'hamming')
+        #print "hamming", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
 
+    def test_pdist_dhamming_random_float32(self):
+        "Tests pdist(X, 'hamming') on random data. (float32)"
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['pdist-boolean-inp'])
+        Y_right = eo['pdist-hamming']
         Y_test1 = pdist(X, 'hamming')
         #print "hamming", numpy.abs(Y_test1 - Y_right).max()
         self.failUnless(within_tol(Y_test1, Y_right, eps))
@@ -487,6 +579,17 @@
         #print "jaccard", numpy.abs(Y_test1 - Y_right).max()
         self.failUnless(within_tol(Y_test1, Y_right, eps))
 
+    def test_pdist_jaccard_random_float32(self):
+        "Tests pdist(X, 'jaccard') on random data. (float32)"
+        eps = 1e-08
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['pdist-boolean-inp'])
+        Y_right = eo['pdist-jaccard']
+
+        Y_test1 = pdist(X, 'jaccard')
+        #print "jaccard", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
     def test_pdist_jaccard_random_nonC(self):
         "Tests pdist(X, 'test_jaccard') [the non-C implementation] on random data."
         eps = 1e-08
@@ -509,6 +612,17 @@
         #print "jaccard", numpy.abs(Y_test1 - Y_right).max()
         self.failUnless(within_tol(Y_test1, Y_right, eps))
 
+    def test_pdist_djaccard_random_float32(self):
+        "Tests pdist(X, 'jaccard') on random data. (float32)"
+        eps = 1e-08
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['pdist-boolean-inp'])
+        Y_right = eo['pdist-jaccard']
+
+        Y_test1 = pdist(X, 'jaccard')
+        #print "jaccard", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
     def test_pdist_djaccard_random_nonC(self):
         "Tests pdist(X, 'test_jaccard') [the non-C implementation] on random data."
         eps = 1e-08
@@ -531,6 +645,17 @@
         #print "chebychev", numpy.abs(Y_test1 - Y_right).max()
         self.failUnless(within_tol(Y_test1, Y_right, eps))
 
+    def test_pdist_chebychev_random_float32(self):
+        "Tests pdist(X, 'chebychev') on random data. (float32)"
+        eps = 1e-07
+        # Get the data: the input matrix and the right output.
+        X = numpy.float32(eo['pdist-double-inp'])
+        Y_right = eo['pdist-chebychev']
+
+        Y_test1 = pdist(X, 'chebychev')
+        print "chebychev", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
+
     def test_pdist_chebychev_random_nonC(self):
         "Tests pdist(X, 'test_chebychev') [the non-C implementation] on random data."
         eps = 1e-08
@@ -547,20 +672,19 @@
         # Get the data: the input matrix and the right output.
         X = eo['iris']
         Y_right = eo['pdist-chebychev-iris']
-
         Y_test1 = pdist(X, 'chebychev')
         #print "chebychev-iris", numpy.abs(Y_test1 - Y_right).max()
         self.failUnless(within_tol(Y_test1, Y_right, eps))
 
-    def test_pdist_chebychev_iris_nonC(self):
-        "Tests pdist(X, 'test_chebychev') [the non-C implementation] on the Iris data set."
-        eps = 1e-15
+    def test_pdist_chebychev_iris_float32(self):
+        "Tests pdist(X, 'chebychev') on the Iris data set. (float32)"
+        eps = 1e-06
         # Get the data: the input matrix and the right output.
-        X = eo['iris']
+        X = numpy.float32(eo['iris'])
         Y_right = eo['pdist-chebychev-iris']
-        Y_test2 = pdist(X, 'test_chebychev')
-        #print "test-chebychev-iris", numpy.abs(Y_test2 - Y_right).max()
-        self.failUnless(within_tol(Y_test2, Y_right, eps))
+        Y_test1 = pdist(X, 'chebychev')
+        print "chebychev-iris", numpy.abs(Y_test1 - Y_right).max()
+        self.failUnless(within_tol(Y_test1, Y_right, eps))
 
     def test_pdist_chebychev_iris_nonC(self):
         "Tests pdist(X, 'test_chebychev') [the non-C implementation] on the Iris data set."
@@ -590,6 +714,20 @@
         self.failUnless(numpy.abs(m - (2.0/3.0)) <= 1e-10)
         self.failUnless(numpy.abs(m2 - (2.0/3.0)) <= 1e-10)
 
+    def test_pdist_matching_match(self):
+        "Tests pdist('matching') to see if the two implementations match on random boolean input data."
+        D = eo['random-bool-data']
+        B = numpy.bool_(D)
+        print B.shape, B.dtype
+        eps = 1e-10
+        y1 = pdist(B, "matching")
+        y2 = pdist(B, "test_matching")
+        y3 = pdist(D, "test_matching")
+        print numpy.abs(y1-y2).max()
+        print numpy.abs(y1-y3).max()
+        self.failUnless(within_tol(y1, y2, eps))
+        self.failUnless(within_tol(y2, y3, eps))
+
     def test_pdist_jaccard_mtica1(self):
         "Tests jaccard(*,*) with mtica example #1."
         m = jaccard(numpy.array([1, 0, 1, 1, 0]),
@@ -608,6 +746,19 @@
         self.failUnless(numpy.abs(m - (2.0/3.0)) <= 1e-10)
         self.failUnless(numpy.abs(m2 - (2.0/3.0)) <= 1e-10)
 
+    def test_pdist_jaccard_match(self):
+        "Tests pdist('jaccard') to see if the two implementations match on random double input data."
+        D = eo['random-bool-data']
+        print D.shape, D.dtype
+        eps = 1e-10
+        y1 = pdist(D, "jaccard")
+        y2 = pdist(D, "test_jaccard")
+        y3 = pdist(numpy.bool_(D), "test_jaccard")
+        print numpy.abs(y1-y2).max()
+        print numpy.abs(y2-y3).max()
+        self.failUnless(within_tol(y1, y2, eps))
+        self.failUnless(within_tol(y2, y3, eps))
+
     def test_pdist_yule_mtica1(self):
         "Tests yule(*,*) with mtica example #1."
         m = yule(numpy.array([1, 0, 1, 1, 0]),
@@ -628,6 +779,19 @@
         self.failUnless(numpy.abs(m - 2.0) <= 1e-10)
         self.failUnless(numpy.abs(m2 - 2.0) <= 1e-10)
 
+    def test_pdist_yule_match(self):
+        "Tests pdist('yule') to see if the two implementations match on random double input data."
+        D = eo['random-bool-data']
+        print D.shape, D.dtype
+        eps = 1e-10
+        y1 = pdist(D, "yule")
+        y2 = pdist(D, "test_yule")
+        y3 = pdist(numpy.bool_(D), "test_yule")
+        print numpy.abs(y1-y2).max()
+        print numpy.abs(y2-y3).max()
+        self.failUnless(within_tol(y1, y2, eps))
+        self.failUnless(within_tol(y2, y3, eps))
+
     def test_pdist_dice_mtica1(self):
         "Tests dice(*,*) with mtica example #1."
         m = dice(numpy.array([1, 0, 1, 1, 0]),
@@ -648,6 +812,19 @@
         self.failUnless(numpy.abs(m - 0.5) <= 1e-10)
         self.failUnless(numpy.abs(m2 - 0.5) <= 1e-10)
 
+    def test_pdist_dice_match(self):
+        "Tests pdist('dice') to see if the two implementations match on random double input data."
+        D = eo['random-bool-data']
+        print D.shape, D.dtype
+        eps = 1e-10
+        y1 = pdist(D, "dice")
+        y2 = pdist(D, "test_dice")
+        y3 = pdist(D, "test_dice")
+        print numpy.abs(y1-y2).max()
+        print numpy.abs(y2-y3).max()
+        self.failUnless(within_tol(y1, y2, eps))
+        self.failUnless(within_tol(y2, y3, eps))
+
     def test_pdist_sokalsneath_mtica1(self):
         "Tests sokalsneath(*,*) with mtica example #1."
         m = sokalsneath(numpy.array([1, 0, 1, 1, 0]),
@@ -668,6 +845,19 @@
         self.failUnless(numpy.abs(m - (4.0/5.0)) <= 1e-10)
         self.failUnless(numpy.abs(m2 - (4.0/5.0)) <= 1e-10)
 
+    def test_pdist_sokalsneath_match(self):
+        "Tests pdist('sokalsneath') to see if the two implementations match on random double input data."
+        D = eo['random-bool-data']
+        print D.shape, D.dtype
+        eps = 1e-10
+        y1 = pdist(D, "sokalsneath")
+        y2 = pdist(D, "test_sokalsneath")
+        y3 = pdist(numpy.bool_(D), "test_sokalsneath")
+        print numpy.abs(y1-y2).max()
+        print numpy.abs(y2-y3).max()
+        self.failUnless(within_tol(y1, y2, eps))
+        self.failUnless(within_tol(y2, y3, eps))
+
     def test_pdist_rogerstanimoto_mtica1(self):
         "Tests rogerstanimoto(*,*) with mtica example #1."
         m = rogerstanimoto(numpy.array([1, 0, 1, 1, 0]),
@@ -688,6 +878,18 @@
         self.failUnless(numpy.abs(m - (4.0/5.0)) <= 1e-10)
         self.failUnless(numpy.abs(m2 - (4.0/5.0)) <= 1e-10)
 
+    def test_pdist_rogerstanimoto_match(self):
+        "Tests pdist('rogerstanimoto') to see if the two implementations match on random double input data."
+        D = eo['random-bool-data']
+        print D.shape, D.dtype
+        eps = 1e-10
+        y1 = pdist(D, "rogerstanimoto")
+        y2 = pdist(D, "test_rogerstanimoto")
+        y3 = pdist(numpy.bool_(D), "test_rogerstanimoto")
+        print numpy.abs(y1-y2).max()
+        print numpy.abs(y2-y3).max()
+        self.failUnless(within_tol(y1, y2, eps))
+        self.failUnless(within_tol(y2, y3, eps))
 
     def test_pdist_russellrao_mtica1(self):
         "Tests russellrao(*,*) with mtica example #1."
@@ -709,6 +911,43 @@
         self.failUnless(numpy.abs(m - (2.0/3.0)) <= 1e-10)
         self.failUnless(numpy.abs(m2 - (2.0/3.0)) <= 1e-10)
 
+    def test_pdist_russellrao_match(self):
+        "Tests pdist('russellrao') to see if the two implementations match on random double input data."
+        D = eo['random-bool-data']
+        print D.shape, D.dtype
+        eps = 1e-10
+        y1 = pdist(D, "russellrao")
+        y2 = pdist(D, "test_russellrao")
+        y3 = pdist(numpy.bool_(D), "test_russellrao")
+        print numpy.abs(y1-y2).max()
+        print numpy.abs(y2-y3).max()
+        self.failUnless(within_tol(y1, y2, eps))
+        self.failUnless(within_tol(y2, y3, eps))
+
+    def test_pdist_sokalmichener_match(self):
+        "Tests pdist('sokalmichener') to see if the two implementations match on random double input data."
+        D = eo['random-bool-data']
+        print D.shape, D.dtype
+        eps = 1e-10
+        y1 = pdist(D, "sokalmichener")
+        y2 = pdist(D, "test_sokalmichener")
+        y3 = pdist(numpy.bool_(D), "test_sokalmichener")
+        print numpy.abs(y1-y2).max()
+        print numpy.abs(y2-y3).max()
+        self.failUnless(within_tol(y1, y2, eps))
+        self.failUnless(within_tol(y2, y3, eps))
+
+    def test_pdist_kulsinski_match(self):
+        "Tests pdist('kulsinski') to see if the two implementations match on random double input data."
+        D = eo['random-bool-data']
+        print D.shape, D.dtype
+        eps = 1e-10
+        y1 = pdist(D, "kulsinski")
+        y2 = pdist(D, "test_kulsinski")
+        y3 = pdist(numpy.bool_(D), "test_kulsinski")
+        print numpy.abs(y1-y2).max()
+        self.failUnless(within_tol(y1, y2, eps))
+
 class TestSquareForm(TestCase):
 
     ################### squareform



More information about the Scipy-svn mailing list