# [Scipy-svn] r4406 - trunk/scipy/cluster

scipy-svn@scip... scipy-svn@scip...
Tue Jun 3 02:35:11 CDT 2008

Author: damian.eads
Date: 2008-06-03 02:35:10 -0500 (Tue, 03 Jun 2008)
New Revision: 4406

Modified:
trunk/scipy/cluster/hierarchy.py
Log:
Removed unnecessary imports in hierarchy and refactored its code. Wrote more tests.

Modified: trunk/scipy/cluster/hierarchy.py
===================================================================
--- trunk/scipy/cluster/hierarchy.py	2008-06-03 06:50:49 UTC (rev 4405)
+++ trunk/scipy/cluster/hierarchy.py	2008-06-03 07:35:10 UTC (rev 4406)
@@ -638,7 +638,7 @@
functions in this library.
"""

-    Z = numpy.asarray(Z)
+    Z = np.asarray(Z)

is_valid_linkage(Z, throw=True, name='Z')

@@ -993,6 +993,8 @@
Computes the Canberra distance between two n-vectors u and v,
\sum{|u_i-v_i|} / \sum{|u_i|+|v_i}.
"""
+    u = np.asarray(u)
+    v = np.asarray(v)
return abs(u-v).sum() / (abs(u).sum() + abs(v).sum())

def _nbool_correspond_all(u, v):
@@ -1518,6 +1520,8 @@
elif metric == 'test_sqeuclidean':
if V is None:
V = _unbiased_variance(X)
+            else:
+                V = np.asarray(V)
dm = pdist(X, lambda u, v: seuclidean(u, v, V))
elif metric == 'test_braycurtis':
dm = pdist(X, braycurtis)
@@ -1525,6 +1529,8 @@
if VI is None:
V = np.cov(X.T)
VI = np.linalg.inv(V)
+            else:
+                VI = np.asarray(VI)
[VI] = _copy_arrays_if_base_present([VI])
# (u-v)V^(-1)(u-v)^T
dm = pdist(X, (lambda u, v: mahalanobis(u, v, VI)))
@@ -1591,6 +1597,8 @@
Also returns the cophenetic distance matrix in condensed form.

"""
+    Z = np.asarray(Z)
+
nargs = len(args)

if nargs < 1:
@@ -1648,6 +1656,7 @@
This function behaves similarly to the MATLAB(TM) inconsistent
function.
"""
+    Z = np.asarray(Z)

Zs = Z.shape
is_valid_linkage(Z, throw=True, name='Z')
@@ -1680,6 +1689,7 @@
the number of original observations (leaves) in the non-singleton
cluster i.
"""
+    Z = np.asarray(Z)
Zs = Z.shape
Zpart = Z[:,0:2]
Zd = Z[:,2].reshape(Zs[0], 1)
@@ -1701,6 +1711,7 @@
last column removed and the cluster indices converted to use
1..N indexing.
"""
+    Z = np.asarray(Z)
is_valid_linkage(Z, throw=True, name='Z')

return np.hstack([Z[:,0:2] + 1, Z[:,2]])
@@ -1713,6 +1724,7 @@
if for every cluster s and t joined, the distance between them is
no less than the distance between any previously joined clusters.
"""
+    Z = np.asarray(Z)
is_valid_linkage(Z, throw=True, name='Z')

# We expect the i'th value to be greater than its successor.
@@ -1727,6 +1739,7 @@
must be nonnegative. The link counts R[:,2] must be positive and
no greater than n-1.
"""
+    R = np.asarray(R)
valid = True
try:
if type(R) is not _array_type:
@@ -1786,6 +1799,7 @@
variable.

"""
+    Z = np.asarray(Z)
valid = True
try:
if type(Z) is not _array_type:
@@ -1847,6 +1861,7 @@
referencing the offending variable.

"""
+    y = np.asarray(y)
valid = True
try:
if type(y) is not _array_type:
@@ -1908,7 +1923,7 @@
the offending variable.

"""
-
+    D = np.asarray(D)
valid = True
try:
if type(D) is not _array_type:
@@ -1962,6 +1977,7 @@
Returns the number of original observations that correspond to a
linkage matrix Z.
"""
+    Z = np.asarray(Z)
is_valid_linkage(Z, throw=True, name='Z')
return (Z.shape[0] + 1)

@@ -1972,6 +1988,7 @@
Returns the number of original observations that correspond to a
square, non-condensed distance matrix D.
"""
+    D = np.asarray(D)
is_valid_dm(D, tol=np.inf, throw=True, name='D')
return D.shape[0]

@@ -1982,6 +1999,7 @@
Returns the number of original observations that correspond to a
condensed distance matrix Y.
"""
+    Y = np.asarray(Y)
is_valid_y(Y, throw=True, name='Y')
d = int(np.ceil(np.sqrt(Y.shape[0] * 2)))
return d
@@ -1996,6 +2014,8 @@
check in algorithms that make extensive use of linkage and distance
matrices that must correspond to the same set of original observations.
"""
+    Z = np.asarray(Z)
+    Y = np.asarray(Y)
return numobs_y(Y) == numobs_Z(Z)

def fcluster(Z, t, criterion='inconsistent', depth=2, R=None, monocrit=None):
@@ -2055,6 +2075,7 @@
cluster(Z, t=3, criterion='maxclust_monocrit', monocrit=MI)

"""
+    Z = np.asarray(Z)
is_valid_linkage(Z, throw=True, name='Z')

n = Z.shape[0] + 1
@@ -2068,6 +2089,7 @@
if R is None:
R = inconsistent(Z, depth)
else:
+            R = np.asarray(R)
is_valid_im(R, throw=True, name='R')
# Since the C code does not support striding using strides.
# The dimensions are used instead.
@@ -2137,14 +2159,17 @@

This function is similar to MATLAB(TM) clusterdata function.
"""
+    X = np.asarray(X)

if type(X) is not _array_type or len(X.shape) != 2:
-        raise TypeError('X must be an n by m numpy array.')
+        raise TypeError('The observation matrix X must be an n by m numpy array.')

Y = pdist(X, metric=distance)
Z = linkage(Y, method=method)
if R is None:
R = inconsistent(Z, d=depth)
+    else:
+        R = np.asarray(R)
T = fcluster(Z, criterion=criterion, depth=depth, R=R, t=t)
return T

@@ -2155,6 +2180,7 @@
Returns a list of leaf node ids as they appear in the tree from
left to right. Z is a linkage matrix.
"""
+    Z = np.asarray(Z)
is_valid_linkage(Z, throw=True, name='Z')
n = Z.shape[0] + 1
ML = np.zeros((n,), dtype=np.int32)
@@ -2593,6 +2619,7 @@
#         or results in a crossing, an exception will be thrown. Passing
#         None orders leaf nodes based on the order they appear in the
#         pre-order traversal.
+    Z = np.asarray(Z)

is_valid_linkage(Z, throw=True, name='Z')
Zs = Z.shape
@@ -2956,6 +2983,9 @@
Returns True iff two different cluster assignments T1 and T2 are
equivalent. T1 and T2 must be arrays of the same size.
"""
+    T1 = np.asarray(T1)
+    T2 = np.asarray(T2)
+
if type(T1) is not _array_type:
raise TypeError('T1 must be a numpy array.')
if type(T2) is not _array_type:
@@ -3068,6 +3098,8 @@
i < n, i corresponds to an original observation, otherwise it
corresponds to a non-singleton cluster.
"""
+    Z = np.asarray(Z)
+    T = np.asarray(T)
if type(T) != _array_type or T.dtype != np.int:
raise TypeError('T must be a one-dimensional numpy array of integers.')
is_valid_linkage(Z, throw=True, name='Z')



More information about the Scipy-svn mailing list