[Numpy-discussion] labeled array

Keith Goodman kwgoodman@gmail....
Tue May 6 11:44:33 CDT 2008


I'm trying to design a labeled array class. A labeled array contains a
2d array and two lists. One list labels the rows of the array (e.g.
variable names) and another list labels the columns of the array (e.g.
dates).

You can sum (or multiply, divide, subtract, etc.) two labeled arrays
that have different shapes. The shape of the sum will be NxM where N
is the number of labeled rows the two arrays have in common and M is
the number of labeled columns the two arrays have in common.

Does something like this already exist?

If not, should I be building this on top of the array class? I'm new
to OO programming so I don't know what I mean by "on top of". Anyway,
here's a prototype:

Oh, one last question. Does anyone see a way to speed up the __align
method? The __align method reduces two labeled arrays to the shape the
sum (or product etc) will have.

import numpy as np

class Larry:
    "Meet Larry, he's a labeled 2d array"

    def __init__(self, x, row, col):
        """
        x    len(row) by len(col) 2d array
        row  list of row names, such as variable names, no duplicates
        col  list of column names, such as dates, no duplicates
        """
        lrow = len(row)
        lcol = len(col)
        assert x.shape[0] == lrow, 'Number of x rows must equal length of row'
        assert x.shape[1] == lcol, 'Number of x columns must equal
length of col'
        assert len(frozenset(row)) == lrow, 'row elements must be unique'
        assert len(frozenset(col)) == lcol, 'col elements must be unique'
        self.x = x
        self.row = row
        self.col = col

    def log(self):
        "In-place log"
        np.log(self.x, self.x)

    def exp(self):
        "In-place exp"
        np.exp(self.x, self.x)

    def sum(self, axis=None, dtype=None):
        "Returns sum of x"
        return self.x.sum(axis, dtype)

    def cumsum(self, axis=1, dtype=None):
        "In-place cumsum over axis 0 or 1"
        assert axis in (0,1), 'axis must be 0 or 1'
        self.x = self.x.cumsum(axis, dtype, out=self.x)

    def __neg__(self):
        x = -self.x.copy()
        row = list(self.row)
        col = list(self.col)
        return Larry(x, row, col)

    def __pos__(self):
        pass

    def abs(self):
        "In-place absolute value of x"
        self.x = np.absolute(self.x, self.x)

    def __abs__(self):
        x = np.abs(self.x.copy())
        row = list(self.row)
        col = list(self.col)
        return Larry(x, row, col)

    def __eq__(self, other):
        row = sorted(self.row) == sorted(other.row)
        col = sorted(self.col) == sorted(other.col)
        x = (self.x == other.x).all()
        if row & col & x:
            return True
        else:
            return False

    def __add__(self, other):
        x, y, row, col = self.__align(other)
        x += y
        return Larry(x, row, col)

    def __sub__(self, other):
        x, y, row, col = self.__align(other)
        x -= y
        return Larry(x, row, col)

    def __div__(self, other):
        x, y, row, col = self.__align(other)
        x /= y
        return Larry(x, row, col)

    def __mul__(self, other):
        x, y, row, col = self.__align(other)
        x *= y
        return Larry(x, row, col)

    def __align(self, other):
        row = list(frozenset(self.row) & frozenset(other.row))
        col = list(frozenset(self.col) & frozenset(other.col))
        row.sort()
        col.sort()
        x = np.zeros((len(row), len(col)))
        ridx = [self.row.index(i) for i in row]
        cidx = [self.col.index(i) for i in col]
        x += self.x[np.ix_(ridx, cidx)]
        ridx = [other.row.index(i) for i in row]
        cidx = [other.col.index(i) for i in col]
        y = other.x[np.ix_(ridx, cidx)]
        return x, y, row, col

def example():

    x = np.array([[1, np.nan], [3, 4]])
    row = ['one', 'three']
    col = [1, 2]
    a = Larry(x, row, col)

    x = np.array([[5, 6, 7], [8, 9, 10], [11, 12, 13]])
    row = ['one', 'two', 'three']
    col = [1, 2, 3]
    b = Larry(x, row, col)

    c = a + b

    print
    print 'a.row'
    print a.row
    print
    print 'a.col'
    print a.col
    print
    print 'a.x'
    print a.x

    print
    print '----------'

    print
    print 'b.row'
    print b.row
    print
    print 'b.col'
    print b.col
    print
    print 'b.x'
    print b.x

    print
    print '----------'
    print 'c = a + b'
    print '----------'

    print
    print 'c.row'
    print c.row
    print
    print 'c.col'
    print c.col
    print
    print 'c.x'
    print c.x


More information about the Numpy-discussion mailing list