# [Scipy-svn] r2308 - in trunk/Lib/sandbox/models: . family tests

scipy-svn at scipy.org scipy-svn at scipy.org
Tue Nov 7 15:51:34 CST 2006

```Author: jonathan.taylor
Date: 2006-11-07 15:50:20 -0600 (Tue, 07 Nov 2006)
New Revision: 2308

Modified:
trunk/Lib/sandbox/models/cox.py
trunk/Lib/sandbox/models/formula.py
trunk/Lib/sandbox/models/tests/test_regression.py
trunk/Lib/sandbox/models/tests/test_robust.py
Log:
some name changes: changing CamelCase to lower_case

Modified: trunk/Lib/sandbox/models/cox.py
===================================================================
--- trunk/Lib/sandbox/models/cox.py	2006-11-07 09:07:45 UTC (rev 2307)
+++ trunk/Lib/sandbox/models/cox.py	2006-11-07 21:50:20 UTC (rev 2308)
@@ -3,7 +3,7 @@
import numpy as N
from scipy.sandbox.models import survival, model

-class DiscreteRV:
+class discrete:

"""
A simple little class for working with discrete random vectors.
@@ -15,13 +15,13 @@
self.x = N.array([self.x])
self.n = self.x.shape[0]
if w is None:
-            w = N.ones(self.n, N.float64)
+            w = N.ones(self.n, N.float64)
else:
if w.shape[0] != self.n:
raise ValueError, 'incompatible shape for weights w'
if N.any(N.less(w, 0)):
raise ValueError, 'weights should be non-negative'
-            self.w = w / w.sum()
+        self.w = w / w.sum()

def mean(self, f=None):
if f is None:
@@ -31,11 +31,11 @@
return (fx * self.w).sum()

def cov(self):
-        mu = self.moment()
+        mu = self.mean()
dx = self.x - N.multiply.outer(mu, self.x.shape[1])
return N.dot(dx, N.transpose(dx))

-class Observation(survival.RightCensored):
+class observation(survival.right_censored):

def __getitem__(self, item):
if self.namespace is not None:
@@ -45,18 +45,17 @@

def __init__(self, time, delta, namespace=None):
self.namespace = namespace
-        survival.RightCensored.__init__(self, time, delta)
+        survival.right_censored.__init__(self, time, delta)

def __call__(self, formula, time=None, **extra):
return formula(namespace=self, time=time, **extra)

-class ProportionalHazards(model.LikelihoodModel):
+class coxph(model.likelihood_model):

def __init__(self, subjects, formula, time_dependent=False):
self.subjects, self.formula = subjects, formula
self.time_dependent = time_dependent
self.initialize(self.subjects)
-

def initialize(self, subjects):

@@ -142,7 +141,7 @@

if ties == 'breslow':
w = N.exp(N.dot(Z, b))
-                rv = DiscreteRV(Z[risk], w=w[risk])
+                rv = discrete(Z[risk], w=w[risk])
score -= rv.mean() * d
elif ties == 'efron':
w = N.exp(N.dot(Z, b))
@@ -150,7 +149,7 @@
for j in range(d):
efron_w = w
efron_w[fail] -= i * w[fail] / d
-                    rv = DiscreteRV(Z[risk], w=efron_w[risk])
+                    rv = discrete(Z[risk], w=efron_w[risk])
score -= rv.mean()
elif ties == 'cox':
raise NotImplementedError, 'Cox tie breaking method not implemented'
@@ -175,7 +174,7 @@

if ties == 'breslow':
w = N.exp(N.dot(Z, b))
-                rv = DiscreteRV(Z[risk], w=w[risk])
+                rv = discrete(Z[risk], w=w[risk])
info += rv.cov()
elif ties == 'efron':
w = N.exp(N.dot(Z, b))
@@ -183,7 +182,7 @@
for j in range(d):
efron_w = w
efron_w[fail] -= i * w[fail] / d
-                    rv = DiscreteRV(Z[risk], w=efron_w[risk])
+                    rv = discrete(Z[risk], w=efron_w[risk])
info += rv.cov()
elif ties == 'cox':
raise NotImplementedError, 'Cox tie breaking method not implemented'
@@ -200,7 +199,7 @@
Y = R.standard_exponential((2*n,)) / lin
delta = R.binomial(1, 0.9, size=(2*n,))

-    subjects = [Observation(Y[i], delta[i]) for i in range(2*n)]
+    subjects = [observation(Y[i], delta[i]) for i in range(2*n)]
for i in range(2*n):
subjects[i].X = X[i]

@@ -208,7 +207,7 @@
x = F.Quantitative('X')
f = F.Formula(x)

-    c = ProportionalHazards(subjects, f)
+    c = coxph(subjects, f)

c.cache()
c.newton([0.4])

===================================================================
--- trunk/Lib/sandbox/models/family/links.py	2006-11-07 09:07:45 UTC (rev 2307)
+++ trunk/Lib/sandbox/models/family/links.py	2006-11-07 21:50:20 UTC (rev 2308)
@@ -3,11 +3,9 @@

-    def __init__(self):
-        pass
+    def initialize(self, Y):
+        return N.asarray(Y).mean() * N.ones(Y.shape)

-    pass
-

"""
@@ -17,26 +15,16 @@
"""

tol = 1.0e-10
-    init = 1.0e-03

-    def clean(self, p, inverse=False, initialize=False):
-        if initialize:
-            tol = Logit.tol
-        else:
-            tol = Logit.init
+    def clean(self, p):
+        return N.clip(p, Logit.tol, 1. - Logit.tol)

-        if not inverse:
-            return N.clip(p, tol, 1. - tol)
-        else:
-            l = self(tol); u = self(1 - tol)
-            return N.clip(p, l, u)
-
-    def __call__(self, p, initialize=True, **extra):
-        p = self.clean(p, **extra)
+    def __call__(self, p):
+        p = self.clean(p)
return N.log(p / (1. - p))

def inverse(self, z):
-        t = N.exp(self.clean(z, inverse=True))
+        t = N.exp(z)
return t / (1. + t)

def deriv(self, p):
@@ -57,7 +45,7 @@
def __init__(self, power=1.):
self.power = power

-    def __call__(self, x, **extra):
+    def __call__(self, x):
return N.power(x, self.power)

def inverse(self, x):
@@ -108,16 +96,10 @@
"""

tol = 1.0e-10
-    init = 1.0e-03

def clean(self, x):
-        if initialize:
-            tol = Logit.tol
-        else:
-            tol = Logit.init
+        return N.clip(x, Logit.tol, N.inf)

-        return N.clip(x, tol, N.inf)
-
def __call__(self, x, **extra):
x = self.clean(x)
return N.log(x)
@@ -143,7 +125,7 @@
def __init__(self, dbn=scipy.stats.norm):
self.dbn = dbn

-    def __call__(self, p, **extra):
+    def __call__(self, p):
p = self.clean(p)
return self.dbn.ppf(p)

@@ -181,7 +163,7 @@

"""

-    def __call__(self, p, **extra):
+    def __call__(self, p):
p = self.clean(p)
return N.log(-N.log(p))

Modified: trunk/Lib/sandbox/models/formula.py
===================================================================
--- trunk/Lib/sandbox/models/formula.py	2006-11-07 09:07:45 UTC (rev 2307)
+++ trunk/Lib/sandbox/models/formula.py	2006-11-07 21:50:20 UTC (rev 2308)
@@ -4,7 +4,7 @@

terms = {}

-class Term:
+class term:

"""
This class is very simple: it is just a named term in a model formula.
@@ -15,7 +15,7 @@

"""

-    def __init__(self, name, func=None, termname=None, namespace=terms):
+    def __init__(self, name, func=None, termname=None, namespace={}):

self.name = name

@@ -39,22 +39,22 @@

"""
-        Formula(self) + Formula(other)
+        formula(self) + formula(other)
"""
-        other = Formula(other)
+        other = formula(other)
return other + self

def __mul__(self, other):
"""
-        Formula(self) * Formula(other)
+        formula(self) * formula(other)
"""

if other.name is 'intercept':
-            return Formula(self)
+            return formula(self)
elif self.name is 'intercept':
-            return Formula(other)
+            return formula(other)

-        other = Formula(other)
+        other = formula(other)
return other * self

def names(self):
@@ -67,28 +67,32 @@
else:
return list(self.name)

-    def __call__(self, namespace=terms, usefn=True, **extra):
+    def __call__(self, namespace=terms, usefn=True, args=(), kw={}):
"""
Return the columns associated to self in a design matrix.
The default behaviour is to return namespace[self.termname]
where namespace defaults to globals().

If usefn, and self.func exists then return
-            self.func(namespace=namespace, **extra)
+
+            self.func(*args, **kw)
+
+        with kw['namespace'] = namespace.
"""

+        kw['namespace'] = namespace
if not hasattr(self, 'func') or not usefn:
val = namespace[self.termname]
-            if isinstance(val, Formula):
-                val = val(namespace=namespace, **extra)
+            if isinstance(val, formula):
+                val = val(*args, **kw)
elif callable(val):
-                val = val(**extra)
+                val = val(*args, **kw)
else:
-            val = self.func(namespace=namespace, **extra)
+            val = self.func(*args, **kw)
val = N.asarray(val)
return N.squeeze(val)

-class Factor(Term):
+class factor(term):

"""
A categorical factor.
@@ -96,7 +100,7 @@

def __init__(self, termname, keys, ordinal=False):
"""
-        Factor is initialized with keys, representing all valid
+        factor is initialized with keys, representing all valid
levels of the factor.
"""

@@ -114,7 +118,7 @@
# FIXME: n is not defined here
col = [float(self.keys.index(v[i])) for i in range(n)]
return N.array(col)
-            Term.__init__(self, self.name, func=func)
+            term.__init__(self, self.name, func=func)

else:
def func(namespace=terms):
@@ -124,9 +128,9 @@
col = [float((v[i] == key)) for i in range(len(v))]
value.append(col)
return N.array(value)
-            Term.__init__(self, ['(%s==%s)' % (self.termname, str(key)) for key in self.keys], func=func, termname=self.termname)
+            term.__init__(self, ['(%s==%s)' % (self.termname, str(key)) for key in self.keys], func=func, termname=self.termname)

-    def __call__(self, namespace=terms, values=False, **extra):
+    def __call__(self, namespace=terms, values=False, args=(), kw={}):
"""
Return either the columns in the design matrix, or the
actual values of the factor, if values==True.
@@ -135,9 +139,9 @@
if namespace is None:
namespace = globals()
if not values:
-            return Term.__call__(self, namespace=namespace, usefn=True, **extra)
+            return term.__call__(self, namespace=namespace, usefn=True, args=args, kw=kw)
else:
-            return Term.__call__(self, namespace=namespace, usefn=False, **extra)
+            return term.__call__(self, namespace=namespace, usefn=False, args=args, kw=kw)

def verify(self, values):
"""
@@ -149,19 +153,19 @@

"""
-        Formula(self) + Formula(other)
+        formula(self) + formula(other)

-        When adding \'intercept\' to a Factor, this just returns self.
+        When adding \'intercept\' to a factor, this just returns self.
"""

if other.name is 'intercept':
-            return Formula(self)
+            return formula(self)
else:

def main_effect(self, reference=None):
"""
-        Return the 'main effect' columns of a Factor, choosing
+        Return the 'main effect' columns of a factor, choosing
a reference column number to remove.
"""

@@ -181,12 +185,12 @@
keep.pop(reference)
__names = self.names()
_names = ['%s-%s' % (__names[keep[i]], __names[reference]) for i in range(len(keep))]
-        return Term(_names, func=func, termname='%s:maineffect' % self.termname)
+        return term(_names, func=func, termname='%s:maineffect' % self.termname)

-class Quantitative(Term):
+class quantitative(term):

"""
-    A subclass of Term that presumes namespace[self.termname] is
+    A subclass of term that presumes namespace[self.termname] is
an ndarray.

Basically used for __pow__ method and (looking forward) for splines.
@@ -195,7 +199,7 @@

def __pow__(self, power):
"""
-        Raise the quantitative Term's values to an integer power, i.e.
+        Raise the quantitative term's values to an integer power, i.e.
polynomial.
"""
try:
@@ -211,13 +215,13 @@
def func(obj=self, namespace=terms, power=power, **extra):
x = N.asarray(obj(namespace=namespace, **extra))
return N.power(x, power)
-        value = Term(name, func=func)
+        value = term(name, func=func)
value.power = power
return value

-class FuncQuant(Quantitative):
+class func_quant(quantitative):
"""
-    A Term for a quantitative function of a Term.
+    A term for a quantitative function of a term.
"""

counter = 0
@@ -238,18 +242,18 @@
except:
termname = 'f%d(%s)' % (FuncQuant.counter, quant.name)
FuncQuant.counter += 1
-        Term.__init__(self, termname, func=func)
+        term.__init__(self, termname, func=func)

-class Formula:
+class formula:

"""

-    A Formula object for manipulating design matrices in regression models,
-    essentially consisting of a list of Term instances.
+    A formula object for manipulating design matrices in regression models,
+    essentially consisting of a list of term instances.

The object supports addition and multiplication which correspond
to concatenation and pairwise multiplication, respectively,
-    of the columns of the two Formulas.
+    of the columns of the two formulas.
"""

def _terms_changed(self):
@@ -258,19 +262,19 @@

def __init__(self, terms):
"""
-        Create a Formula from either:
+        Create a formula from either:

-        i) a Formula object
-        ii) a sequence of Term instances
-        iii) one Term
+        i) a formula object
+        ii) a sequence of term instances
+        iii) one term

"""

-        if isinstance(terms, Formula):
+        if isinstance(terms, formula):
self.terms = copy.copy(list(terms.terms))
elif type(terms) is types.ListType:
self.terms = terms
-        elif isinstance(terms, Term):
+        elif isinstance(terms, term):
self.terms = [terms]
else:
raise ValueError
@@ -279,18 +283,18 @@

def __str__(self):
"""
-        String representation of list of termnames of a Formula.
+        String representation of list of termnames of a formula.
"""
value = []
for term in self.terms:
value += [term.termname]
return '<formula: %s>' % ' + '.join(value)

-    def __call__(self, namespace=terms, nrow=-1, **extra):
+    def __call__(self, namespace=terms, nrow=-1, args=(), kw={}):
"""
-        Create (transpose) of the design matrix of the Formula within
-        namespace. Extra arguments are passed to each Term instance. If
-        the Formula just contains an intercept, then the keyword
+        Create (transpose) of the design matrix of the formula within
+        namespace. Extra arguments are passed to each term instance. If
+        the formula just contains an intercept, then the keyword
argument 'n' indicates the number of rows (observations).
"""

@@ -299,7 +303,7 @@
allvals = []
intercept = False
for term in self.terms:
-            val = term(namespace=namespace, **extra)
+            val = term(namespace=namespace, args=args, kw=kw)
if term.termname == 'intercept':
intercept = True
elif val.ndim == 1:
@@ -330,7 +334,7 @@
Determine whether a given term is in a formula.
"""

-        if not isinstance(term, Formula):
+        if not isinstance(term, formula):
return term.termname in self.termnames()
elif len(term.terms) == 1:
term = term.terms[0]
@@ -365,7 +369,7 @@

def names(self):
"""
-        Return a list of the names in the Formula. The order of the
+        Return a list of the names in the formula. The order of the
names corresponds to the order of the columns when self
is evaluated.
"""
@@ -378,7 +382,7 @@
def termnames(self):
"""
Return a list of the term names in the formula. These
-        are the names of each Term instance in self.
+        are the names of each term instance in self.
"""

names = []
@@ -386,21 +390,21 @@
names += [term.termname]
return names

-    def design(self, namespace=terms, **keywords):
+    def design(self, namespace=terms, args=(), kw={}):
"""
transpose(self(namespace=namespace, **keywords))
"""
-        return N.transpose(self(namespace=namespace, **keywords))
+        return self(namespace=namespace, args=args, kw=kw).T

def __mul__(self, other, nested=False):
"""
-        This returns a Formula whose columns are the pairwise
+        This returns a formula whose columns are the pairwise
product of the columns of self and other.

TO DO: check for nesting relationship. Should not be too difficult.
"""

-        other = Formula(other)
+        other = formula(other)

selftermnames = self.termnames()
othertermnames = other.termnames()
@@ -423,9 +427,9 @@
othernames = other.terms[j].names()

if self.terms[i].name is 'intercept':
-                    term = other.terms[j]
+                    _term = other.terms[j]
elif other.terms[j].name is 'intercept':
-                    term = self.terms[i]
+                    _term = self.terms[i]

else:
names = []
@@ -451,36 +455,36 @@
value.append(selfval[r] * otherval[s])

return N.array(value)
-                    term = Term(names, func=func, termname=termname)
-                terms.append(term)
+                    _term = term(names, func=func, termname=termname)
+                terms.append(_term)

-        return Formula(terms)
+        return formula(terms)

"""
-        Return a Formula whose columns are the
+        Return a formula whose columns are the
concatenation of the columns of self and other.

-        Terms in the formula are sorted alphabetically.
+        terms in the formula are sorted alphabetically.
"""

-        other = Formula(other)
+        other = formula(other)
terms = self.terms + other.terms
pieces = [(term.name, term) for term in terms]
pieces.sort()
terms = [piece[1] for piece in pieces]
-        return Formula(terms)
+        return formula(terms)

def __sub__(self, other):

"""
-        Return a Formula with all terms in other removed from self.
-        If other contains Term instances not in Formula, this
+        Return a formula with all terms in other removed from self.
+        If other contains term instances not in formula, this
function does not raise an exception.
"""

-        other = Formula(other)
+        other = formula(other)
terms = copy.copy(self.terms)

for term in other.terms:
@@ -488,7 +492,7 @@
if terms[i].termname == term.termname:
terms.pop(i)
break
-        return Formula(terms)
+        return formula(terms)

def isnested(A, B, namespace=globals()):
"""
@@ -524,9 +528,9 @@

def _intercept_fn(nrow=1, **extra):
return N.ones((1,nrow))
-I = Term('intercept', func=_intercept_fn)
+I = term('intercept', func=_intercept_fn)
I.__doc__ = """
-Intercept term in a Formula. If intercept is the
+Intercept term in a formula. If intercept is the
only term in the formula, then a keywords argument
\'nrow\' is needed.

@@ -536,7 +540,7 @@
>>> I(nrow=5)
array([1, 1, 1, 1, 1])

->>> f=Formula(I)
+>>> f=formula(I)
>>> f(nrow=5)
array([1, 1, 1, 1, 1])

Modified: trunk/Lib/sandbox/models/tests/test_regression.py
===================================================================
--- trunk/Lib/sandbox/models/tests/test_regression.py	2006-11-07 09:07:45 UTC (rev 2307)
+++ trunk/Lib/sandbox/models/tests/test_regression.py	2006-11-07 21:50:20 UTC (rev 2308)
@@ -1,6 +1,6 @@
import unittest
from numpy.random import standard_normal
-from scipy.sandbox.models.regression import OLSModel, ARModel
+from scipy.sandbox.models.regression import ols_model, ar_model
from numpy.testing import *

W = standard_normal
@@ -10,14 +10,14 @@
def testOLS(self):
X = W((40,10))
Y = W((40,))
-        model = OLSModel(design=X)
+        model = ols_model(design=X)
results = model.fit(Y)
self.assertEquals(results.df_resid, 30)

def testAR(self):
X = W((40,10))
Y = W((40,))
-        model = ARModel(design=X, rho=0.4)
+        model = ar_model(design=X, rho=0.4)
results = model.fit(Y)
self.assertEquals(results.df_resid, 30)

@@ -25,7 +25,7 @@
X = W((40,10))
X[:,0] = X[:,1] + X[:,2]
Y = W((40,))
-        model = OLSModel(design=X)
+        model = ols_model(design=X)
results = model.fit(Y)
self.assertEquals(results.df_resid, 31)

@@ -33,14 +33,10 @@
X = W((40,10))
X[:,0] = X[:,1] + X[:,2]
Y = W((40,))
-        model = ARModel(design=X, rho=0.9)
+        model = ar_model(design=X, rho=0.9)
results = model.fit(Y)
self.assertEquals(results.df_resid, 31)

-def suite():
-    suite = unittest.makeSuite(RegressionTest)
-    return suite
-

if __name__ == '__main__':
ScipyTest.run()

Modified: trunk/Lib/sandbox/models/tests/test_robust.py
===================================================================
--- trunk/Lib/sandbox/models/tests/test_robust.py	2006-11-07 09:07:45 UTC (rev 2307)
+++ trunk/Lib/sandbox/models/tests/test_robust.py	2006-11-07 21:50:20 UTC (rev 2308)
@@ -1,4 +1,4 @@
-import models as S
+import scipy.sandbox.models as S
import unittest
import numpy.random as R
import numpy as N
@@ -10,7 +10,7 @@
def testRobust(self):
X = W((40,10))
Y = W((40,))
-        model = S.rlm.RobustLinearModel(design=X)
+        model = S.rlm(design=X)
results = model.fit(Y)
self.assertEquals(results.df_resid, 30)

@@ -18,15 +18,10 @@
X = W((40,10))
X[:,0] = X[:,1] + X[:,2]
Y = W((40,))
-        model = S.rlm.RobustLinearModel(design=X)
+        model = S.rlm(design=X)
results = model.fit(Y)
self.assertEquals(results.df_resid, 31)

-def suite():
-    suite = unittest.makeSuite(RegressionTest)
-    return suite
-
-
if __name__ == '__main__':
unittest.main()

```