[Scipy-svn] r6890 - trunk/scipy/maxentropy

scipy-svn@scip... scipy-svn@scip...
Sun Nov 14 04:00:53 CST 2010

Author: rgommers
Date: 2010-11-14 04:00:53 -0600 (Sun, 14 Nov 2010)
New Revision: 6890

Modified:
trunk/scipy/maxentropy/__init__.py
trunk/scipy/maxentropy/maxentropy.py
trunk/scipy/maxentropy/maxentutils.py
Log:
DOC: merge wiki edits for maxentropy module.

Modified: trunk/scipy/maxentropy/__init__.py
===================================================================
--- trunk/scipy/maxentropy/__init__.py	2010-11-14 10:00:33 UTC (rev 6889)
+++ trunk/scipy/maxentropy/__init__.py	2010-11-14 10:00:53 UTC (rev 6890)
@@ -1,3 +1,91 @@
+"""
+Routines for fitting maximum entropy models
+===========================================
+
+Contains two classes for fitting maximum entropy models (also known
+as "exponential family" models) subject to linear constraints on the
+expectations of arbitrary feature statistics.  One class, "model", is
+for small discrete sample spaces, using explicit summation. The other,
+"bigmodel", is for sample spaces that are either continuous (and
+perhaps high-dimensional) or discrete but too large to sum over, and
+uses importance sampling.  conditional Monte Carlo methods.
+
+The maximum entropy model has exponential form
+
+..
+   p(x) = exp(theta^T f(x)) / Z(theta)
+
+.. math::
+   p\\left(x\\right)=\\exp\\left(\\frac{\\theta^{T}f\\left(x\\right)}
+                                  {Z\\left(\\theta\\right)}\\right)
+
+with a real parameter vector theta of the same length as the feature
+statistic f(x), For more background, see, for example, Cover and
+Thomas (1991), *Elements of Information Theory*.
+
+See the file bergerexample.py for a walk-through of how to use these
+routines when the sample space is small enough to be enumerated.
+
+See bergerexamplesimulated.py for a a similar walk-through using
+simulation.
+
+Copyright: Ed Schofield, 2003-2006
+License: BSD-style (see LICENSE.txt in main source directory)
+
+Modules
+-------
+
+.. autosummary::
+   :toctree: generated/
+
+   maxentropy -
+   maxentutils -
+
+Classes
+-------
+
+.. autosummary::
+   :toctree: generated/
+
+   DivergenceError -
+   basemodel -
+   bigmodel -
+   conditionalmodel -
+   model -
+
+Functions
+---------
+
+.. autosummary::
+   :toctree: generated/
+
+   arrayexp -
+   arrayexpcomplex -
+   columnmeans -
+   columnvariances -
+   densefeaturematrix -
+   densefeatures -
+   dotprod -
+   flatten -
+   innerprod -
+   innerprodtranspose -
+   logsumexp -
+   logsumexp_naive -
+   robustlog -
+   rowmeans -
+   sample_wr -
+   sparsefeaturematrix -
+   sparsefeatures -
+
+Objects
+-------
+
+.. autosummary::
+   :toctree: generated/
+
+   division -
+
+"""
from info import __doc__
from maxentropy import *

Modified: trunk/scipy/maxentropy/maxentropy.py
===================================================================
--- trunk/scipy/maxentropy/maxentropy.py	2010-11-14 10:00:33 UTC (rev 6889)
+++ trunk/scipy/maxentropy/maxentropy.py	2010-11-14 10:00:53 UTC (rev 6890)
@@ -790,7 +790,9 @@
class conditionalmodel(model):
"""
A conditional maximum-entropy (exponential-form) model p(x|w) on a
-    discrete sample space.  This is useful for classification problems:
+    discrete sample space.
+
+    This is useful for classification problems:
given the context w, what is the probability of each class x?

The form of such a model is::

Modified: trunk/scipy/maxentropy/maxentutils.py
===================================================================
--- trunk/scipy/maxentropy/maxentutils.py	2010-11-14 10:00:33 UTC (rev 6889)
+++ trunk/scipy/maxentropy/maxentutils.py	2010-11-14 10:00:53 UTC (rev 6890)
@@ -1,5 +1,7 @@
-"""maxentutils.py: Utility routines for the maximum entropy module.  Most
-of them are either Python replacements for the corresponding Fortran
+"""
+Utility routines for the maximum entropy module.
+
+Most of them are either Python replacements for the corresponding Fortran
routines or wrappers around matrices to allow the maxent module to
manipulate ndarrays, scipy sparse matrices, and PySparse matrices a
common interface.
@@ -9,6 +11,7 @@

Copyright: Ed Schofield, 2003-2006
License: BSD-style (see LICENSE.txt in main source directory)
+
"""

# Future imports must come before any code in 2.5
@@ -121,12 +124,15 @@

def arrayexp(x):
-    """Returns the elementwise antilog of the real array x.  We try to
-    exponentiate with numpy.exp() and, if that fails, with python's
-    math.exp().  numpy.exp() is about 10 times faster but throws an
-    OverflowError exception for numerical underflow (e.g. exp(-800),
+    """
+    Returns the elementwise antilog of the real array x.
+
+    We try to exponentiate with numpy.exp() and, if that fails, with
+    python's math.exp().  numpy.exp() is about 10 times faster but throws
+    an OverflowError exception for numerical underflow (e.g. exp(-800),
whereas python's math.exp() just returns zero, which is much more
+
"""
try:
ex = numpy.exp(x)
@@ -139,12 +145,15 @@
return ex

def arrayexpcomplex(x):
-    """Returns the elementwise antilog of the vector x.  We try to
-    exponentiate with numpy.exp() and, if that fails, with python's
+    """
+    Returns the elementwise antilog of the vector x.
+
+    We try to exponentiate with numpy.exp() and, if that fails, with python's
math.exp().  numpy.exp() is about 10 times faster but throws an
OverflowError exception for numerical underflow (e.g. exp(-800),
whereas python's math.exp() just returns zero, which is much more
+
"""
try:
ex = numpy.exp(x).real
@@ -272,12 +281,15 @@

def dotprod(u,v):
-    """This is a wrapper around general dense or sparse dot products.
+    """
+    This is a wrapper around general dense or sparse dot products.
+
It is not necessary except as a common interface for supporting
ndarray, scipy spmatrix, and PySparse arrays.

Returns the dot product of the (1 x m) sparse array u with the
(m x 1) (dense) numpy array v.
+
"""
#print "Taking the dot product u.v, where"
#print "u has shape " + str(u.shape)
@@ -294,7 +306,9 @@

def innerprod(A,v):
-    """This is a wrapper around general dense or sparse dot products.
+    """
+    This is a wrapper around general dense or sparse dot products.
+
It is not necessary except as a common interface for supporting
ndarray, scipy spmatrix, and PySparse arrays.

@@ -302,6 +316,7 @@
with the n-element dense array v.  This is a wrapper for A.dot(v) for
dense arrays and spmatrix objects, and for A.matvec(v, result) for
PySparse matrices.
+
"""

# We assume A is sparse.
@@ -337,7 +352,9 @@

def innerprodtranspose(A,v):
-    """This is a wrapper around general dense or sparse dot products.
+    """
+    This is a wrapper around general dense or sparse dot products.
+
It is not necessary except as a common interface for supporting
ndarray, scipy spmatrix, and PySparse arrays.

@@ -346,6 +363,7 @@
function is efficient for large matrices A.  This is a wrapper for
u.T.dot(v) for dense arrays and spmatrix objects, and for
u.matvec_transp(v, result) for pysparse matrices.
+
"""

(m, n) = A.shape
@@ -382,8 +400,10 @@

def rowmeans(A):
-    """This is a wrapper for general dense or sparse dot products.  It is
-    only necessary as a common interface for supporting ndarray,
+    """
+    This is a wrapper for general dense or sparse dot products.
+
+    It is only necessary as a common interface for supporting ndarray,
scipy spmatrix, and PySparse arrays.

Returns a dense (m x 1) vector representing the mean of the rows of A,
@@ -392,6 +412,7 @@
>>> a = numpy.array([[1,2],[3,4]], float)
>>> rowmeans(a)
array([ 1.5,  3.5])
+
"""
if type(A) is numpy.ndarray:
return A.mean(1)
@@ -406,8 +427,10 @@
return rowsum / float(n)

def columnmeans(A):
-    """This is a wrapper for general dense or sparse dot products.  It is
-    only necessary as a common interface for supporting ndarray,
+    """
+    This is a wrapper for general dense or sparse dot products.
+
+    It is only necessary as a common interface for supporting ndarray,
scipy spmatrix, and PySparse arrays.

Returns a dense (1 x n) vector with the column averages of A, which can
@@ -416,6 +439,7 @@
>>> a = numpy.array([[1,2],[3,4]],'d')
>>> columnmeans(a)
array([ 2.,  3.])
+
"""
if type(A) is numpy.ndarray:
return A.mean(0)
@@ -430,8 +454,10 @@
return columnsum / float(m)

def columnvariances(A):
-    """This is a wrapper for general dense or sparse dot products.  It
-    is not necessary except as a common interface for supporting ndarray,
+    """
+    This is a wrapper for general dense or sparse dot products.
+
+    It is not necessary except as a common interface for supporting ndarray,
scipy spmatrix, and PySparse arrays.

Returns a dense (1 x n) vector with unbiased estimators for the column
@@ -441,6 +467,7 @@
>>> a = numpy.array([[1,2], [3,4]], 'd')
>>> columnvariances(a)
array([ 2.,  2.])
+
"""
if type(A) is numpy.ndarray:
return numpy.std(A,0)**2