[Scipy-svn] r3933 - in trunk/scipy/stats/models: . tests

scipy-svn@scip... scipy-svn@scip...
Tue Feb 12 23:04:06 CST 2008


Author: jonathan.taylor
Date: 2008-02-12 23:04:04 -0600 (Tue, 12 Feb 2008)
New Revision: 3933

Modified:
   trunk/scipy/stats/models/contrast.py
   trunk/scipy/stats/models/formula.py
   trunk/scipy/stats/models/tests/test_formula.py
Log:
made namespace conventions less ambiguous

Modified: trunk/scipy/stats/models/contrast.py
===================================================================
--- trunk/scipy/stats/models/contrast.py	2008-02-13 02:39:15 UTC (rev 3932)
+++ trunk/scipy/stats/models/contrast.py	2008-02-13 05:04:04 UTC (rev 3933)
@@ -1,3 +1,5 @@
+import copy
+
 import numpy as N
 from numpy.linalg import pinv
 from scipy.stats.models import utils
@@ -78,8 +80,9 @@
         then evaldesign can be set to False.
         """
 
-        self.term.namespace = self.formula.namespace
-        T = N.transpose(N.array(self.term(*args, **kw)))
+        t = copy.copy(self.term)
+        t.namespace = self.formula.namespace
+        T = N.transpose(N.array(t(*args, **kw)))
 
         if T.ndim == 1:
             T.shape = (T.shape[0], 1)

Modified: trunk/scipy/stats/models/formula.py
===================================================================
--- trunk/scipy/stats/models/formula.py	2008-02-13 02:39:15 UTC (rev 3932)
+++ trunk/scipy/stats/models/formula.py	2008-02-13 05:04:04 UTC (rev 3933)
@@ -22,6 +22,31 @@
     defaults to formula.default_namespace.
     When called in an instance of formula,
     the namespace used is that formula's namespace.
+
+    Inheritance of the namespace under +,*,- operations:
+    ----------------------------------------------------
+
+    By default, the namespace is empty, which means it must be
+    specified before evaluating the design matrix. 
+
+    When it is unambiguous, the namespaces of objects are derived from the
+    context. 
+
+    Rules:
+    ------
+
+    i) "X * I", "X + I", "X**i": these inherit X's namespace
+    ii) "F.main_effect()": this inherits the Factor F's namespace
+    iii) "A-B": this inherits A's namespace
+    iv) if A.namespace == B.namespace, then A+B inherits this namespace
+    v) if A.namespace == B.namespace, then A*B inherits this namespace
+
+    Equality of namespaces:
+    -----------------------
+
+    This is done by comparing the namespaces directly, if
+    an exception is raised in the check of equality, they are
+    assumed not to be equal.
     """
 
     def __pow__(self, power):
@@ -80,9 +105,10 @@
         """
         Formula(self) + Formula(other)
         """
-        other = Formula(other, namespace=self.namespace)
+        other = Formula(other)
         f = other + self
-        f.namespace = self.namespace
+        if _namespace_equal(other.namespace, self.namespace):
+            f.namespace = self.namespace
         return f
 
     def __mul__(self, other):
@@ -95,9 +121,10 @@
         elif self.name is 'intercept':
             f = Formula(other, namespace=other.namespace)
         else:
-            other = Formula(other, namespace=self.namespace)
+            other = Formula(other, namespace=other.namespace)
             f = other * self
-        f.namespace = self.namespace
+        if _namespace_equal(other.namespace, self.namespace):
+            f.namespace = self.namespace
         return f
 
     def names(self):
@@ -124,7 +151,8 @@
         else:
             val = self.func
         if callable(val):
-            if hasattr(val, "namespace"):
+            if isinstance(val, (Term, Formula)):
+                val = copy.copy(val)
                 val.namespace = self.namespace
             val = val(*args, **kw)
 
@@ -172,7 +200,8 @@
         v = self.namespace[self._name]
         while True:
             if callable(v):
-                if hasattr(v, "namespace"):
+                if isinstance(v, (Term, Formula)):
+                    v = copy.copy(v)
                     v.namespace = self.namespace
                 v = v(*args, **kw)
             else: break
@@ -376,7 +405,7 @@
         intercept = False
         iindex = 0
         for t in self.terms:
-
+            t = copy.copy(t)
             t.namespace = namespace
             val = t(*args, **kw)
 
@@ -414,7 +443,7 @@
             else:
                 allvals = I(nrow=nrow)
                 allvals.shape = (1,) + allvals.shape
-        return allvals
+        return N.squeeze(allvals)
 
     def hasterm(self, query_term):
         """
@@ -495,7 +524,7 @@
         TO DO: check for nesting relationship. Should not be too difficult.
         """
 
-        other = Formula(other, namespace=self.namespace)
+        other = Formula(other)
 
         selftermnames = self.termnames()
         othertermnames = other.termnames()
@@ -520,7 +549,6 @@
                 if self.terms[i].name is 'intercept':
                     _term = other.terms[j]
                     _term.namespace = other.namespace
-
                 elif other.terms[j].name is 'intercept':
                     _term = self.terms[i]
                     _term.namespace = self.namespace
@@ -548,16 +576,17 @@
 
                     sumterms = self + other
                     sumterms.terms = [self, other] # enforce the order we want
-                    sumterms.namespace = self.namespace
 
-                    _term = Quantitative(names, func=sumterms, termname=termname,
+                    _term = Quantitative(names, func=sumterms,
+                                         termname=termname,
                                          transform=product_func)
-                    _term.namespace = self.namespace
 
+                    if _namespace_equal(self.namespace, other.namespace):
+                        _term.namespace = self.namespace
 
                 terms.append(_term)
 
-        return Formula(terms, namespace=self.namespace)
+        return Formula(terms)
 
     def __add__(self, other):
 
@@ -568,12 +597,15 @@
         terms in the formula are sorted alphabetically.
         """
 
-        other = Formula(other, namespace=self.namespace)
+        other = Formula(other)
         terms = self.terms + other.terms
         pieces = [(term.name, term) for term in terms]
         pieces.sort()
         terms = [piece[1] for piece in pieces]
-        return Formula(terms, namespace=self.namespace)
+        f = Formula(terms)
+        if _namespace_equal(self.namespace, other.namespace):
+            f.namespace = self.namespace
+        return f
 
     def __sub__(self, other):
 
@@ -583,7 +615,7 @@
         function does not raise an exception.
         """
 
-        other = Formula(other, namespace=self.namespace)
+        other = Formula(other)
         terms = copy.copy(self.terms)
 
         for term in other.terms:
@@ -591,9 +623,11 @@
                 if terms[i].termname == term.termname:
                     terms.pop(i)
                     break
-        return Formula(terms, namespace=self.namespace)
+        f = Formula(terms)
+        f.namespace = self.namespace
+        return f
 
-def isnested(A, B, namespace=globals()):
+def isnested(A, B, namespace=None):
     """
     Is factor B nested within factor A or vice versa: a very crude test
     which depends on the namespace.
@@ -603,9 +637,13 @@
 
     """
 
-    a = A(namespace, values=True)[0]
-    b = B(namespace, values=True)[0]
+    if namespace is not None:
+        A = copy.copy(A); A.namespace = namespace
+        B = copy.copy(B); B.namespace = namespace
 
+    a = A(values=True)[0]
+    b = B(values=True)[0]
+
     if len(a) != len(b):
         raise ValueError, 'A() and B() should be sequences of the same length'
 
@@ -645,17 +683,25 @@
 
 """
 
-def interactions(terms, order=2):
+def interactions(terms, order=[1,2]):
     """
-    Output all pairwise interactions up to a given order of a
+    Output all pairwise interactions of given order of a
     sequence of terms.
 
-    If order is greater than len(terms), it is treated as len(terms).
+    The argument order is a sequence specifying which order
+    of interactions should be generated -- the default
+    creates main effects and two-way interactions. If order
+    is an integer, it is changed to range(1,order+1), so
+    order=3 is equivalent to order=[1,2,3], generating
+    all one, two and three-way interactions.
+    
+    If any entry of order is greater than len(terms), it is
+    effectively treated as len(terms).
 
     >>> print interactions([Term(l) for l in ['a', 'b', 'c']])
     <formula: a*b + a*c + b*c + a + b + c>
     >>>
-    >>> print interactions([Term(l) for l in ['a', 'b', 'c']], order=5)
+    >>> print interactions([Term(l) for l in ['a', 'b', 'c']], order=range(5))
     <formula: a*b + a*b*c + a*c + b*c + a + b + c>
     >>>
 
@@ -664,10 +710,13 @@
 
     values = {}
 
+    if N.asarray(order).shape == ():
+        order = range(1, int(order)+1)
+
     # First order
 
-    for o in range(order):
-        I = N.indices((l,)*(o+1))
+    for o in order:
+        I = N.indices((l,)*(o))
         I.shape = (I.shape[0], N.product(I.shape[1:]))
         for m in range(I.shape[1]):
 
@@ -681,8 +730,15 @@
                     v *= ll[ii+1]
                 values[tuple(I[:,m])] = v
 
-    value = values[(0,)]; del(values[(0,)])
+    key = values.keys()[0]
+    value = values[key]; del(values[key])
     
     for v in values.values():
         value += v
     return value
+
+def _namespace_equal(space1, space2):
+    try:
+        return space1 == space2
+    except:
+        return False

Modified: trunk/scipy/stats/models/tests/test_formula.py
===================================================================
--- trunk/scipy/stats/models/tests/test_formula.py	2008-02-13 02:39:15 UTC (rev 3932)
+++ trunk/scipy/stats/models/tests/test_formula.py	2008-02-13 05:04:04 UTC (rev 3933)
@@ -66,6 +66,7 @@
     def test_namespace(self):
         space1 = {'X':N.arange(50), 'Y':N.arange(50)*2}
         space2 = {'X':N.arange(20), 'Y':N.arange(20)*2}
+        space3 = {'X':N.arange(30), 'Y':N.arange(30)*2}
         X = formula.Term('X')
         Y = formula.Term('Y')
 
@@ -79,15 +80,44 @@
 
         f.namespace = space1
         self.assertEqual(f().shape, (2,50))
-        assert_almost_equal(Y(), N.arange(50)*2)
+        assert_almost_equal(Y(), N.arange(20)*2)
         assert_almost_equal(X(), N.arange(50))
 
         f.namespace = space2
         self.assertEqual(f().shape, (2,20))
         assert_almost_equal(Y(), N.arange(20)*2)
-        assert_almost_equal(X(), N.arange(20))
+        assert_almost_equal(X(), N.arange(50))
 
+        f.namespace = space3
+        self.assertEqual(f().shape, (2,30))
+        assert_almost_equal(Y(), N.arange(20)*2)
+        assert_almost_equal(X(), N.arange(50))
 
+        xx = X**2
+        self.assertEqual(xx().shape, (50,))
+
+        xx.namespace = space3
+        self.assertEqual(xx().shape, (30,))
+
+        xx = X * formula.I
+        self.assertEqual(xx().shape, (50,))
+        xx.namespace = space3
+        self.assertEqual(xx().shape, (30,))
+
+        xx = X * X
+        self.assertEqual(xx.namespace, X.namespace)
+
+        xx = X + Y
+        self.assertEqual(xx.namespace, {})
+
+        Y.namespace = {'X':N.arange(50), 'Y':N.arange(50)*2}
+        xx = X + Y
+        self.assertEqual(xx.namespace, {})
+
+        Y.namespace = X.namespace
+        xx = X+Y
+        self.assertEqual(xx.namespace, Y.namespace)
+
     def test_termcolumns(self):
         t1 = formula.Term("A")
         t2 = formula.Term("B")
@@ -112,33 +142,30 @@
         self.assertEquals(x.shape, (40, 10))
 
     def test_product(self):
-        prod = self.terms[0] * self.terms[2]
-        self.formula += prod
-        x = self.formula.design()
-        p = self.formula['A*C']
-        col = self.formula.termcolumns(prod, dict=False)
+        prod = self.formula['A'] * self.formula['C']
+        f = self.formula + prod
+        f.namespace = self.namespace
+        x = f.design()
+        p = f['A*C']
+        p.namespace = self.namespace
+        col = f.termcolumns(prod, dict=False)
         assert_almost_equal(N.squeeze(x[:,col]), self.X[:,0] * self.X[:,2])
         assert_almost_equal(N.squeeze(p()), self.X[:,0] * self.X[:,2])
 
     def test_intercept1(self):
         prod = self.terms[0] * self.terms[2]
-        self.formula += formula.I
-        icol = self.formula.names().index('intercept')
-        assert_almost_equal(self.formula()[icol], N.ones((40,)))
+        f = self.formula + formula.I
+        icol = f.names().index('intercept')
+        f.namespace = self.namespace
+        assert_almost_equal(f()[icol], N.ones((40,)))
 
-    def test_intercept2(self):
-        prod = self.terms[0] * self.terms[2]
-        self.formula += formula.I
-        icol = self.formula.names().index('intercept')
-        assert_almost_equal(self.formula()[icol], N.ones((40,)))
-
     def test_intercept3(self):
-        prod = self.terms[0] * formula.I
+        t = self.formula['A']
+        t.namespace = self.namespace
+        prod = t * formula.I
         prod.namespace = self.formula.namespace
-        assert_almost_equal(N.squeeze(prod()), self.terms[0]())
+        assert_almost_equal(N.squeeze(prod()), t())
 
-
-
     def test_contrast1(self):
         term = self.terms[0] + self.terms[2]
         c = contrast.Contrast(term, self.formula)
@@ -202,6 +229,7 @@
         fac = formula.Factor('ff', f)
         fac.namespace = {'ff':f}
         m = fac.main_effect(reference=1)
+        m.namespace = fac.namespace
         self.assertEquals(m().shape, (2,30))
 
     def test_factor4(self):
@@ -209,6 +237,7 @@
         fac = formula.Factor('ff', f)
         fac.namespace = {'ff':f}
         m = fac.main_effect(reference=2)
+        m.namespace = fac.namespace
         r = N.array([N.identity(3)]*10)
         r.shape = (30,3)
         r = r.T
@@ -251,7 +280,7 @@
     def test_contrast4(self):
 
         f = self.formula + self.terms[5] + self.terms[5]
-
+        f.namespace = self.namespace
         estimable = False
 
         c = contrast.Contrast(self.terms[5], f)
@@ -267,6 +296,12 @@
         f = formula.interactions([formula.Term(l) for l in ['a', 'b', 'c', 'd']], order=3)
         assert_equal(set(f.termnames()), set(['a', 'b', 'c', 'd', 'a*b', 'a*c', 'a*d', 'b*c', 'b*d', 'c*d', 'a*b*c', 'a*c*d', 'a*b*d', 'b*c*d']))
 
+        f = formula.interactions([formula.Term(l) for l in ['a', 'b', 'c', 'd']], order=[1,2,3])
+        assert_equal(set(f.termnames()), set(['a', 'b', 'c', 'd', 'a*b', 'a*c', 'a*d', 'b*c', 'b*d', 'c*d', 'a*b*c', 'a*c*d', 'a*b*d', 'b*c*d']))
+
+        f = formula.interactions([formula.Term(l) for l in ['a', 'b', 'c', 'd']], order=[3])
+        assert_equal(set(f.termnames()), set(['a*b*c', 'a*c*d', 'a*b*d', 'b*c*d']))
+
     def test_subtract(self):
         f = formula.interactions([formula.Term(l) for l in ['a', 'b', 'c']])
         ff = f - f['a*b']



More information about the Scipy-svn mailing list