[Scipysvn] r5165  trunk/scipy/cluster
scipysvn@scip...
scipysvn@scip...
Sat Nov 22 04:40:38 CST 2008
Author: damian.eads
Date: 20081122 04:40:35 0600 (Sat, 22 Nov 2008)
New Revision: 5165
Modified:
trunk/scipy/cluster/hierarchy.py
Log:
RSTifying docs in hierarchy.
Modified: trunk/scipy/cluster/hierarchy.py
===================================================================
 trunk/scipy/cluster/hierarchy.py 20081122 08:03:44 UTC (rev 5164)
+++ trunk/scipy/cluster/hierarchy.py 20081122 10:40:35 UTC (rev 5165)
@@ 1274,60 +1274,80 @@
def fcluster(Z, t, criterion='inconsistent', depth=2, R=None, monocrit=None):
"""
+ Forms flat clusters from the hierarchical clustering defined by
+ the linkage matrix ``Z``. The threshold ``t`` is a required parameter.
 T = fcluster(Z, t, criterion, depth=2, R=None, monocrit=None):
+ :Arguments:
 Forms flat clusters from the hierarchical clustering defined by
 the linkage matrix Z. The threshold t is a required parameter.
+  Z : ndarray
+ The hierarchical clustering encoded with the matrix returned
+ by the ``linkage`` function.
 T is a vector of length n; T[i] is the flat cluster number to which
 original observation i belongs.
+  t : double
+ The threshold to apply when forming flat clusters.
 The criterion parameter can be any of the following values,
+  criterion : string (optional)
+ The criterion to use in forming flat clusters. This can
+ be any of the following values:
 * 'inconsistent': If a cluster node and all its decendents have an
 inconsistent value less than or equal to c then all its leaf
 descendents belong to the same flat cluster. When no nonsingleton
 cluster meets this criterion, every node is assigned to its
 own cluster. The depth parameter is the maximum depth to perform
 the inconsistency calculation; it has no meaning for the other
 criteria.
+ * 'inconsistent': If a cluster node and all its
+ decendents have an inconsistent value less than or equal
+ to ``t`` then all its leaf descendents belong to the
+ same flat cluster. When no nonsingleton cluster meets
+ this criterion, every node is assigned to its own
+ cluster. (Default)
 * 'distance': Forms flat clusters so that the original
 observations in each flat cluster have no greater a cophenetic
 distance than t.
+ * 'distance': Forms flat clusters so that the original
+ observations in each flat cluster have no greater a
+ cophenetic distance than ``t``.
 * 'maxclust': Finds a minimum threshold r so that the cophenetic
 distance between any two original observations in the same flat
 cluster is no more than r and no more than t flat clusters are
 formed.
+ * 'maxclust': Finds a minimum threshold ``r`` so that
+ the cophenetic distance between any two original
+ observations in the same flat cluster is no more than
+ ``r`` and no more than ``t`` flat clusters are formed.
 * 'monocrit': Forms a flat cluster from a cluster node c with
 index i when monocrit[j] <= t. monocrit must be monotonic.
+ * 'monocrit': Forms a flat cluster from a cluster node c
+ with index i when ``monocrit[j] <= t``.
 monocrit is a (n1) numpy vector of doubles; monocrit[i] is
 the criterion upon which nonsingleton i is thresholded. The
 monocrit vector must be monotonic, i.e. given a node c with
 index i, for all node indices j corresponding to nodes below c,
 monocrit[i] >= monocrit[j].
+ For example, to threshold on the maximum mean distance
+ as computed in the inconsistency matrix R with a
+ threshold of 0.8 do::
 For example, to threshold on the maximum mean distance as computed
 in the inconsistency matrix R with a threshold of 0.8 do
+ MR = maxRstat(Z, R, 3)
+ cluster(Z, t=0.8, criterion='monocrit', monocrit=MR)
 MR = maxRstat(Z, R, 3)
 cluster(Z, t=0.8, criterion='monocrit', monocrit=MR)
+ * 'maxclust_monocrit': Forms a flat cluster from a
+ nonsingleton cluster node ``c`` when ``monocrit[i] <=
+ r`` for all cluster indices ``i`` below and including
+ ``c``. ``r`` is minimized such that no more than ``t``
+ flat clusters are formed. monocrit must be
+ monotonic. For example, to minimize the threshold t on
+ maximum inconsistency values so that no more than 3 flat
+ clusters are formed, do:
 * 'maxclust_monocrit': Forms a flat cluster from a nonsingleton
 cluster node c when monocrit[i] <= r for all cluster indices i below
 and including c. r is minimized such that no more than t flat clusters
 are formed. monocrit must be monotonic.
+ MI = maxinconsts(Z, R)
+ cluster(Z, t=3, criterion='maxclust_monocrit', monocrit=MI)
 For example, to minimize the threshold t on maximum inconsistency
 values so that no more than 3 flat clusters are formed, do:
+  depth : int (optional)
+ The maximum depth to perform the inconsistency calculation.
+ It has no meaning for the other criteria. (default=2)
 MI = maxinconsts(Z, R)
 cluster(Z, t=3, criterion='maxclust_monocrit', monocrit=MI)
+  R : ndarray (optional)
+ The inconsistency matrix to use for the 'inconsistent'
+ criterion. This matrix is computed if not provided.
+  monocrit : ndarray (optional)
+ A ``(n1)`` numpy vector of doubles. ``monocrit[i]`` is the
+ statistics upon which nonsingleton ``i`` is thresholded. The
+ monocrit vector must be monotonic, i.e. given a node ``c`` with
+ index ``i``, for all node indices j corresponding to nodes
+ below ``c``, ``monocrit[i] >= monocrit[j]``.
+
+ :Returns:
+
+  T : ndarray
+ A vector of length ``n``. ``T[i]`` is the flat cluster number to
+ which original observation ``i`` belongs.
"""
Z = np.asarray(Z, order='c')
is_valid_linkage(Z, throw=True, name='Z')
@@ 1367,50 +1387,66 @@
def fclusterdata(X, t, criterion='inconsistent', \
metric='euclidean', depth=2, method='single', R=None):
"""
 T = fclusterdata(X, t)
+ ``T = fclusterdata(X, t)``
 Clusters the original observations in the n by m data matrix X
 (n observations in m dimensions), using the euclidean distance
 metric to calculate distances between original observations,
 performs hierarchical clustering using the single linkage
 algorithm, and forms flat clusters using the inconsistency
 method with t as the cutoff threshold.
+ Clusters the original observations in the ``n`` by ``m`` data
+ matrix ``X`` (``n`` observations in ``m`` dimensions), using the
+ euclidean distance metric to calculate distances between original
+ observations, performs hierarchical clustering using the single
+ linkage algorithm, and forms flat clusters using the inconsistency
+ method with t as the cutoff threshold.
 A onedimensional numpy array T of length n is returned. T[i]
 is the index of the flat cluster to which the original
 observation i belongs.
+ A onedimensional numpy array ``T`` of length ``n`` is
+ returned. ``T[i]`` is the index of the flat cluster to which the
+ original observation ``i`` belongs.
 T = fclusterdata(X, t, criterion='inconsistent', method='single',
 metric='euclid', depth=2, R=None)
+ :Arguments:
 Clusters the original observations in the n by m data matrix X using
 the thresholding criterion, linkage method, and distance metric
 specified.
+  Z : ndarray
+ The hierarchical clustering encoded with the matrix returned
+ by the ``linkage`` function.
 Named parameters are described below.
+  t : double
+ The threshold to apply when forming flat clusters.
 criterion: specifies the criterion for forming flat clusters.
 Valid values are 'inconsistent', 'distance', or
 'maxclust' cluster formation algorithms. See
 cluster for descriptions.
 method: the linkage method to use. See linkage for
 descriptions.
+  criterion : string
+ Specifies the criterion for forming flat clusters. Valid
+ values are 'inconsistent', 'distance', or 'maxclust' cluster
+ formation algorithms. See ``fcluster`` for descriptions.
 metric: the distance metric for calculating pairwise
 distances. See distance.pdist for descriptions and
 linkage to verify compatibility with the linkage
 method.
+  method : string
+ The linkage method to use (single, complete, average,
+ weighted, median centroid, ward). See ``linkage`` for more
+ information.
 t: the cutoff threshold for the cluster function or
 the maximum number of clusters (criterion='maxclust').
+  metric : string
+ The distance metric for calculating pairwise distances. See
+ distance.pdist for descriptions and linkage to verify
+ compatibility with the linkage method.
 depth: the maximum depth for the inconsistency calculation.
 See inconsistent for more information.
+  t : double
+ The cutoff threshold for the cluster function or the
+ maximum number of clusters (criterion='maxclust').
 R: the inconsistency matrix. It will be computed if
 necessary if it is not passed.
+  depth : int
+ The maximum depth for the inconsistency calculation. See
+ ``inconsistent`` for more information.
+  R : ndarray
+ The inconsistency matrix. It will be computed if necessary
+ if it is not passed.
+
+
+ :Returns:
+
+  T : ndarray
+ A vector of length ``n``. ``T[i]`` is the flat cluster number to
+ which original observation ``i`` belongs.
+
+ Notes
+ 
+
This function is similar to MATLAB(TM) clusterdata function.
"""
X = np.asarray(X, order='c', dtype=np.double)
@@ 1429,10 +1465,19 @@
def leaves_list(Z):
"""
 L = leaves_list(Z):
+ Returns a list of leaf node ids (corresponding to observation
+ vector index) as they appear in the tree from left to right. Z is
+ a linkage matrix.
 Returns a list of leaf node ids as they appear in the tree from
 left to right. Z is a linkage matrix.
+ :Arguments:
+
+  Z : ndarray
+ The hierarchical clustering encoded as a matrix. See
+ ``linkage`` for more information.
+
+ :Returns:
+  L : ndarray
+ The list of leaf node ids.
"""
Z = np.asarray(Z, order='c')
is_valid_linkage(Z, throw=True, name='Z')
@@ 1651,10 +1696,14 @@
def set_link_color_palette(palette):
"""
 set_link_color_palette(palette):

Changes the list of matplotlib color codes to use when coloring
links with the dendrogram colorthreshold feature.
+
+ :Arguments:
+  palette : A list of matplotlib color codes. The order of
+ the color codes is the order in which the colors are cycled
+ through when color thresholding in the dendrogram.
+
"""
if type(palette) not in (types.ListType, types.TupleType):
@@ 2239,8 +2288,25 @@
def is_isomorphic(T1, T2):
"""
 Returns True iff two different cluster assignments T1 and T2 are
 equivalent. T1 and T2 must be arrays of the same size.
+
+ Determines if two different cluster assignments ``T1`` and
+ ``T2`` are equivalent.
+
+ :Arguments:
+  T1 : ndarray
+ An assignment of singleton cluster ids to flat cluster
+ ids.
+
+  T2 : ndarray
+ An assignment of singleton cluster ids to flat cluster
+ ids.
+
+ :Returns:
+
+  b : boolean
+ Whether the flat cluster assignments ``T1`` and ``T2`` are
+ equivalent.
+
"""
T1 = np.asarray(T1, order='c')
T2 = np.asarray(T2, order='c')
More information about the Scipysvn
mailing list