[Scipy-svn] r2198 - in trunk/Lib/io: . tests tests/data

scipy-svn at scipy.org scipy-svn at scipy.org
Thu Sep 14 04:32:00 CDT 2006


Author: stefan
Date: 2006-09-14 04:29:55 -0500 (Thu, 14 Sep 2006)
New Revision: 2198

Added:
   trunk/Lib/io/bytestream.py
   trunk/Lib/io/mio4.py
   trunk/Lib/io/mio5.py
   trunk/Lib/io/miobase.py
   trunk/Lib/io/tests/data/japanese_utf8.txt
   trunk/Lib/io/tests/data/test3dmatrix_6.1_SOL2.mat
   trunk/Lib/io/tests/data/test3dmatrix_7.1_GLNX86.mat
   trunk/Lib/io/tests/data/testcell_6.1_SOL2.mat
   trunk/Lib/io/tests/data/testcell_7.1_GLNX86.mat
   trunk/Lib/io/tests/data/testcellnest_6.1_SOL2.mat
   trunk/Lib/io/tests/data/testcellnest_7.1_GLNX86.mat
   trunk/Lib/io/tests/data/testcomplex_4.2c_SOL2.mat
   trunk/Lib/io/tests/data/testcomplex_6.1_SOL2.mat
   trunk/Lib/io/tests/data/testcomplex_7.1_GLNX86.mat
   trunk/Lib/io/tests/data/testdouble_4.2c_SOL2.mat
   trunk/Lib/io/tests/data/testdouble_6.1_SOL2.mat
   trunk/Lib/io/tests/data/testdouble_7.1_GLNX86.mat
   trunk/Lib/io/tests/data/testmatrix_4.2c_SOL2.mat
   trunk/Lib/io/tests/data/testmatrix_6.1_SOL2.mat
   trunk/Lib/io/tests/data/testmatrix_7.1_GLNX86.mat
   trunk/Lib/io/tests/data/testminus_4.2c_SOL2.mat
   trunk/Lib/io/tests/data/testminus_6.1_SOL2.mat
   trunk/Lib/io/tests/data/testminus_7.1_GLNX86.mat
   trunk/Lib/io/tests/data/testmulti_4.2c_SOL2.mat
   trunk/Lib/io/tests/data/testmulti_7.1_GLNX86.mat
   trunk/Lib/io/tests/data/testobject_6.1_SOL2.mat
   trunk/Lib/io/tests/data/testobject_7.1_GLNX86.mat
   trunk/Lib/io/tests/data/testonechar_4.2c_SOL2.mat
   trunk/Lib/io/tests/data/testonechar_6.1_SOL2.mat
   trunk/Lib/io/tests/data/testonechar_7.1_GLNX86.mat
   trunk/Lib/io/tests/data/testsparse_4.2c_SOL2.mat
   trunk/Lib/io/tests/data/testsparse_6.1_SOL2.mat
   trunk/Lib/io/tests/data/testsparse_7.1_GLNX86.mat
   trunk/Lib/io/tests/data/testsparsecomplex_4.2c_SOL2.mat
   trunk/Lib/io/tests/data/testsparsecomplex_6.1_SOL2.mat
   trunk/Lib/io/tests/data/testsparsecomplex_7.1_GLNX86.mat
   trunk/Lib/io/tests/data/teststring_4.2c_SOL2.mat
   trunk/Lib/io/tests/data/teststring_6.1_SOL2.mat
   trunk/Lib/io/tests/data/teststring_7.1_GLNX86.mat
   trunk/Lib/io/tests/data/teststringarray_4.2c_SOL2.mat
   trunk/Lib/io/tests/data/teststringarray_6.1_SOL2.mat
   trunk/Lib/io/tests/data/teststringarray_7.1_GLNX86.mat
   trunk/Lib/io/tests/data/teststruct_6.1_SOL2.mat
   trunk/Lib/io/tests/data/teststruct_7.1_GLNX86.mat
   trunk/Lib/io/tests/data/teststructarr_6.1_SOL2.mat
   trunk/Lib/io/tests/data/teststructarr_7.1_GLNX86.mat
   trunk/Lib/io/tests/data/teststructnest_6.1_SOL2.mat
   trunk/Lib/io/tests/data/teststructnest_7.1_GLNX86.mat
   trunk/Lib/io/tests/data/testunicode_7.1_GLNX86.mat
   trunk/Lib/io/tests/gen_unittests.m
   trunk/Lib/io/tests/gen_unittests4.m
   trunk/Lib/io/tests/save_test.m
Modified:
   trunk/Lib/io/mio.py
   trunk/Lib/io/tests/data/test3dmatrix_6.5.1_GLNX86.mat
   trunk/Lib/io/tests/data/testcell_6.5.1_GLNX86.mat
   trunk/Lib/io/tests/data/testcellnest_6.5.1_GLNX86.mat
   trunk/Lib/io/tests/data/testcomplex_6.5.1_GLNX86.mat
   trunk/Lib/io/tests/data/testdouble_6.5.1_GLNX86.mat
   trunk/Lib/io/tests/data/testmatrix_6.5.1_GLNX86.mat
   trunk/Lib/io/tests/data/testminus_6.5.1_GLNX86.mat
   trunk/Lib/io/tests/data/testobject_6.5.1_GLNX86.mat
   trunk/Lib/io/tests/data/testonechar_6.5.1_GLNX86.mat
   trunk/Lib/io/tests/data/testsparse_6.5.1_GLNX86.mat
   trunk/Lib/io/tests/data/testsparsecomplex_6.5.1_GLNX86.mat
   trunk/Lib/io/tests/data/teststring_6.5.1_GLNX86.mat
   trunk/Lib/io/tests/data/teststringarray_6.5.1_GLNX86.mat
   trunk/Lib/io/tests/data/teststruct_6.5.1_GLNX86.mat
   trunk/Lib/io/tests/data/teststructarr_6.5.1_GLNX86.mat
   trunk/Lib/io/tests/data/teststructnest_6.5.1_GLNX86.mat
   trunk/Lib/io/tests/test_mio.py
Log:
Improved support for reading MATLAB files (contributed by Matthew Brett).


Added: trunk/Lib/io/bytestream.py
===================================================================
--- trunk/Lib/io/bytestream.py	2006-09-12 20:49:35 UTC (rev 2197)
+++ trunk/Lib/io/bytestream.py	2006-09-14 09:29:55 UTC (rev 2198)
@@ -0,0 +1,69 @@
+# Author: Matthew Brett
+
+''' File-like interfact for memmapped array '''
+
+from numpy import *
+
+class ByteStream(object):
+    ''' Overlays file-like interface on memmapped array
+
+    This may speed up array reading from files
+    
+    @byte_array         - uint array or string containing bytes
+    '''
+
+    def __init__(self, byte_array):
+        if isinstance(byte_array, ndarray):
+            if not byte_array.dtype == uint8:
+                raise ValueError, 'Need uint8 byte array as array input'
+            self.bytes = byte_array
+        elif isinstance(byte_array, basestring):
+            self.bytes = ndarray(
+                shape=(len(byte_array)),
+                dtype=uint8,
+                buffer=byte_array)
+        else:
+            raise ValueError, "Need string or byte array as input"
+        self.array_len = len(byte_array)
+        self.seek(0)
+
+    # current file position
+    def get_pos(self):
+        return self._pos
+    def set_pos(self, offset):
+        if offset < 0:
+            raise IOError, 'Invalid argument'
+        self._pos = offset
+    pos = property(get_pos,
+                   set_pos,
+                   None,
+                   'get/set current position')
+    
+    def seek(self, offset, whence=0):
+        """ Method emulates seek method of file objects """
+        if whence == 0:
+            self.pos = offset
+        elif whence == 1: # seek relative to the current position
+            self.pos += offset
+        elif whence == 2: # relative to end
+            self.pos = self.array_len + offset
+        else:
+            raise ValueError, 'Invalid value %d for whence parameter' % whence
+
+    def tell(self):
+        return self.pos
+    
+    def read(self, num_bytes=-1):
+        if num_bytes < 0:
+            num_bytes = self.array_len
+        if self.pos >= self.array_len:
+            return []
+        next_pos = min(self.pos + num_bytes, self.array_len)
+        res = self.bytes[self.pos:next_pos]
+        self.pos = next_pos
+        return res
+
+    def write(self, data):
+        assert False, 'Not implemented'
+        
+

Modified: trunk/Lib/io/mio.py
===================================================================
--- trunk/Lib/io/mio.py	2006-09-12 20:49:35 UTC (rev 2197)
+++ trunk/Lib/io/mio.py	2006-09-14 09:29:55 UTC (rev 2198)
@@ -1,790 +1,46 @@
-## Automatically adapted for scipy Oct 05, 2005 by convertcode.py
+# Authors: Travis Oliphant, Matthew Brett
 
-# Author: Travis Oliphant
+"""
+Module for reading and writing matlab .mat files
+"""
 
-import struct, os, sys
-import types
-from tempfile import mkstemp
-import zlib
+import os
+import sys
 
-from numpy import array, asarray, empty, obj2sctype, product, reshape, \
-    squeeze, transpose, zeros, vstack, ndarray, shape, diff, where, uint8, \
-    atleast_1d
-import numpyio
+from numpy import *
 
-try:
-    import scipy.sparse
-    have_sparse = 1
-except ImportError:
-    have_sparse = 0
+from bytestream import ByteStream
+from mio4 import MatFile4Reader, MatFile4Writer
+from mio5 import MatFile5Reader
 
-LittleEndian = (sys.byteorder == 'little')
 
-_unit_imag = {'f': array(1j,'F'), 'd': 1j}
-
-__all__ = ['fopen','loadmat','savemat']
-
-def getsize_type(mtype):
-    if mtype in ['B','uchar','byte','unsigned char','integer*1', 'int8']:
-        mtype = 'B'
-    elif mtype in ['S1', 'char', 'char*1']:
-        mtype = 'B'
-    elif mtype in ['b', 'schar', 'signed char']:
-        mtype = 'b'
-    elif mtype in ['h','short','int16','integer*2']:
-        mtype = 'h'
-    elif mtype in ['H','ushort','uint16','unsigned short']:
-        mtype = 'H'
-    elif mtype in ['i','int']:
-        mtype = 'i'
-    elif mtype in ['I','uint','uint32','unsigned int']:
-        mtype = 'I'
-    elif mtype in ['u4','int32','integer*4']:
-        mtype = 'u4'
-    elif mtype in ['f','float','float32','real*4', 'real']:
-        mtype = 'f'
-    elif mtype in ['d','double','float64','real*8', 'double precision']:
-        mtype = 'd'
-    elif mtype in ['F','complex float','complex*8','complex64']:
-        mtype = 'F'
-    elif mtype in ['D','complex*16','complex128','complex','complex double']:
-        mtype = 'D'
-    else:
-        mtype = obj2sctype(mtype)
-
-    newarr = empty((1,),mtype)
-    return newarr.itemsize, newarr.dtype.char
-
-class fopen(object):
-    """Class for reading and writing binary files into numpy arrays.
-
-    Inputs:
-
-      file_name -- The complete path name to the file to open.
-      permission -- Open the file with given permissions: ('r', 'H', 'a')
-                    for reading, writing, or appending.  This is the same
-                    as the mode argument in the builtin open command.
-      format -- The byte-ordering of the file:
-                (['native', 'n'], ['ieee-le', 'l'], ['ieee-be', 'B']) for
-                native, little-endian, or big-endian respectively.
-
-    Attributes (Read only):
-
-      bs -- non-zero if byte-swapping is performed on read and write.
-      format -- 'native', 'ieee-le', or 'ieee-be'
-      closed -- non-zero if the file is closed.
-      mode -- permissions with which this file was opened
-      name -- name of the file
-    """
-
-#    Methods:
-#
-#      read -- read data from file and return numpy array
-#      write -- write to file from numpy array
-#      fort_read -- read Fortran-formatted binary data from the file.
-#      fort_write -- write Fortran-formatted binary data to the file.
-#      rewind -- rewind to beginning of file
-#      size -- get size of file
-#      seek -- seek to some position in the file
-#      tell -- return current position in file
-#      close -- close the file
-
-    def __init__(self,file_name,permission='rb',format='n'):
-        if 'b' not in permission: permission += 'b'
-        if isinstance(file_name, basestring):
-            self.file = file(file_name, permission)
-        elif isinstance(file_name, file) and not file_name.closed:
-            # first argument is an open file
-            self.file = file_name
-        else:
-            raise TypeError, 'Need filename or open file as input'
-        self.setformat(format)
-        self.zbuffer = None
-        
-    def __del__(self):
-        try:
-            self.file.close()
-        except:
-            pass
-
-    def close(self):
-        self.file.close()
-
-    def seek(self, *args):
-        self.file.seek(*args)
-
-    def tell(self):
-        return self.file.tell()
-        
-    def raw_read(self, size=-1):
-        """Read raw bytes from file as string."""
-        return self.file.read(size)
-
-    def raw_write(self, str):
-        """Write string to file as raw bytes."""
-        return self.file.write(str)
-
-    def setformat(self, format):
-        """Set the byte-order of the file."""
-        if format in ['native','n','default']:
-            self.bs = False
-            self.format = 'native'
-        elif format in ['ieee-le','l','little-endian','le']:
-            self.bs = not LittleEndian
-            self.format = 'ieee-le'
-        elif format in ['ieee-be','B','big-endian','be']:
-            self.bs = LittleEndian
-            self.format = 'ieee-be'
-        else:
-            raise ValueError, "Unrecognized format: " + format
-        return
-
-    def write(self,data,mtype=None,bs=None):
-        """Write to open file object the flattened numpy array data.
-
-        Inputs:
-
-          data -- the numpy array to write.
-          mtype -- a string indicating the binary type to write.
-                   The default is the type of data. If necessary a cast is made.
-                   unsigned byte  : 'B', 'uchar', 'byte' 'unsigned char', 'int8',
-                                    'integer*1'
-                   character      : 'S1', 'char', 'char*1'
-                   signed char    : 'b', 'schar', 'signed char'
-                   short          : 'h', 'short', 'int16', 'integer*2'
-                   unsigned short : 'H', 'ushort','uint16','unsigned short'
-                   int            : 'i', 'int'
-                   unsigned int   : 'I', 'uint32','uint','unsigned int'
-                   int32           : 'u4', 'int32', 'integer*4'
-                   float          : 'f', 'float', 'float32', 'real*4'
-                   double         : 'd', 'double', 'float64', 'real*8'
-                   complex float  : 'F', 'complex float', 'complex*8', 'complex64'
-                   complex double : 'D', 'complex', 'complex double', 'complex*16',
-                                    'complex128'
-        """
-        if bs is None:
-            bs = self.bs
-        else:
-            bs = (bs == 1)
-        if isinstance(data, str):
-            N, buf = len(data), buffer(data)
-            data = ndarray(shape=(N,),dtype='B',buffer=buf)
-        else:
-            data = asarray(data)
-        if mtype is None:
-            mtype = data.dtype.char
-        howmany,mtype = getsize_type(mtype)
-        count = product(data.shape,axis=0)
-        numpyio.fwrite(self.file,count,data,mtype,bs)
-        return
-
-    fwrite = write
-
-    def read(self,count,stype,rtype=None,bs=None,c_is_b=0):
-        """Read data from file and return it in a numpy array.
-
-        Inputs:
-
-          count -- an integer specifying the number of elements of type
-                   stype to read or a tuple indicating the shape of
-                   the output array.
-          stype -- The data type of the stored data (see fwrite method).
-          rtype -- The type of the output array.  Same as stype if None.
-          bs -- Whether or not to byteswap (or use self.bs if None)
-          c_is_b --- If non-zero then the count is an integer
-                   specifying the total number of bytes to read
-                   (must be a multiple of the size of stype).
-
-        Outputs: (output,)
-
-          output -- a numpy array of type rtype.
-        """
-        if bs is None:
-            bs = self.bs
-        else:
-            bs = (bs == 1)
-        howmany,stype = getsize_type(stype)
-        shape = None
-        if c_is_b:
-            if count % howmany != 0:
-                raise ValueError, "When c_is_b is non-zero then " \
-                      "count is bytes\nand must be multiple of basic size."
-            count = count / howmany
-        elif type(count) in [types.TupleType, types.ListType]:
-            shape = list(count)
-            # allow -1 to specify unknown dimension size as in reshape
-            minus_ones = shape.count(-1)
-            if minus_ones == 0:
-                count = product(shape,axis=0)
-            elif minus_ones == 1:
-                now = self.tell()
-                self.seek(0,2)
-                end = self.tell()
-                self.seek(now)
-                remaining_bytes = end - now
-                know_dimensions_size = -product(count,axis=0) * getsize_type(stype)[0]
-                unknown_dimension_size, illegal = divmod(remaining_bytes,
-                                                         know_dimensions_size)
-                if illegal:
-                    raise ValueError("unknown dimension doesn't match filesize")
-                shape[shape.index(-1)] = unknown_dimension_size
-                count = product(shape,axis=0)
-            else:
-                raise ValueError(
-                    "illegal count; can only specify one unknown dimension")
-            shape = tuple(shape)
-        if rtype is None:
-            rtype = stype
-        else:
-            howmany,rtype = getsize_type(rtype)
-        if count == 0:
-            return zeros(0,rtype)
-        retval = numpyio.fread(self.file, count, stype, rtype, bs)
-        if shape is not None:
-            retval = resize(retval, shape)
-        return retval
-
-    fread = read
-
-    def rewind(self,howmany=None):
-        """Rewind a file to its beginning or by a specified amount.
-        """
-        if howmany is None:
-            self.seek(0)
-        else:
-            self.seek(-howmany,1)
-
-    def size(self):
-        """Return the size of the file.
-        """
-        try:
-            sz = self.thesize
-        except AttributeError:
-            curpos = self.tell()
-            self.seek(0,2)
-            sz = self.tell()
-            self.seek(curpos)
-            self.thesize = sz
-        return sz
-
-    def fort_write(self,fmt,*args):
-        """Write a Fortran binary record.
-
-        Inputs:
-
-          fmt -- If a string then it represents the same format string as
-                 used by struct.pack.  The remaining arguments are passed
-                 to struct.pack.
-
-                 If fmt is an array, then this array will be written as
-                 a Fortran record using the output type args[0].
-
-          *args -- Arguments representing data to write.
-        """
-        if self.format == 'ieee-le':
-            nfmt = "<i"
-        elif self.format == 'ieee-be':
-            nfmt = ">i"
-        else:
-            nfmt = "i"
-        if isinstance(fmt, basestring):
-            if self.format == 'ieee-le':
-                fmt = "<"+fmt
-            elif self.format == 'ieee-be':
-                fmt = ">"+fmt
-            str = apply(struct.pack,(fmt,)+args)
-            strlen = struct.pack(nfmt,len(str))
-            self.write(strlen)
-            self.write(str)
-            self.write(strlen)
-        elif type(fmt) == type(array([0])):
-            if len(args) > 0:
-                sz,mtype = getsize_type(args[0])
-            else:
-                sz,mtype = getsize_type(fmt.dtype.char)
-            count = product(fmt.shape,axis=0)
-            strlen = struct.pack(nfmt,count*sz)
-            self.write(strlen)
-            numpyio.fwrite(self.file,count,fmt,mtype,self.bs)
-            self.write(strlen)
-        else:
-            raise TypeError, "Unknown type in first argument"
-
-    def fort_read(self,fmt,dtype=None):
-        """Read a Fortran binary record.
-
-        Inputs:
-
-          fmt -- If dtype is not given this represents a struct.pack
-                 format string to interpret the next record.  Otherwise this
-                 argument is ignored.
-          dtype -- If dtype is not None, then read in the next record as
-                   an array of type dtype.
-
-        Outputs: (data,)
-
-          data -- If dtype is None, then data is a tuple containing the output
-                  of struct.unpack on the next Fortan record.
-                  If dtype is a datatype string, then the next record is
-                  read in as a 1-D array of type datatype.
-        """
-        lookup_dict = {'ieee-le':"<",'ieee-be':">",'native':''}
-        if dtype is None:
-            fmt = lookup_dict[self.format] + fmt
-            numbytes = struct.calcsize(fmt)
-            nn = struct.calcsize("i");
-            if (self.raw_read(nn) == ''):
-                raise ValueError, "Unexpected end of file..."
-            strdata = self.raw_read(numbytes)
-            if strdata == '':
-                raise ValueError, "Unexpected end of file..."
-            data = struct.unpack(fmt,strdata)
-            if (self.raw_read(nn) == ''):
-                raise ValueError, "Unexpected end of file..."
-            return data
-        else:  # Ignore format string and read in next record as an array.
-            fmt = lookup_dict[self.format] + "i"
-            nn = struct.calcsize(fmt)
-            nbytestr = self.raw_read(nn)
-            if nbytestr == '':
-                raise ValueError, "Unexpected end of file..."
-            nbytes = struct.unpack(fmt,nbytestr)[0]
-            howmany, dtype = getsize_type(dtype)
-            ncount = nbytes / howmany
-            if ncount*howmany != nbytes:
-                self.rewind(4)
-                raise ValueError, "A mismatch between the type requested and the data stored."
-            if ncount < 0:
-                raise ValueError, "Negative number of bytes to read:\n    file is probably not opened with correct endian-ness."
-            if ncount == 0:
-                raise ValueError, "End of file?  Zero-bytes to read."
-            retval = numpyio.fread(self.file, ncount, dtype, dtype, self.bs)
-            if len(retval) == 1:
-                retval = retval[0]
-            if (self.raw_read(nn) == ''):
-                raise ValueError, "Unexpected end of file..."
-            return retval
-        
-
-class CompressedFopen(fopen):
-    """ File container for temporary buffer to decompress data """
-    def __init__(self, *args, **kwargs):
-        fd, fname = mkstemp()
-        super(CompressedFopen, self).__init__(
-            os.fdopen(fd, 'w+b'), *args, **kwargs)
-        self.file_name = fname
-        
-    def fill(self, bytes):
-        """ Uncompress buffer in @bytes and write to file """
-        self.rewind()
-        self.raw_write(zlib.decompress(bytes))
-        self.rewind()
-
-    def __del__(self):
-        try:
-            self.file.truncate(0)
-        except:
-            pass
-        try:
-            self.close()
-        except:
-            pass
-        try:
-            os.remove(self.file_name)
-        except:
-            pass
-        
-#### MATLAB Version 5 Support ###########
-
-# Portions of code borrowed and (heavily) adapted
-#    from matfile.py by Heiko Henkelmann
-
-## Notice in matfile.py file
-
-# Copyright (c) 2003 Heiko Henkelmann
-
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to
-# deal in the Software without restriction, including without limitation the
-# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-# sell copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-
-class mat_struct:    # dummy structure holder
-    pass
-
-class mat_obj:    # dummy object holder
-    pass
-
-miINT8 = 1
-miUINT8 = 2
-miINT16 = 3
-miUINT16 = 4
-miINT32 = 5
-miUINT32 = 6
-miSINGLE = 7
-miDOUBLE = 9
-miINT64 =12
-miUINT64 = 13
-miMATRIX = 14
-miCOMPRESSED = 15
-miUTF8 = 16
-miUTF16 = 17
-miUTF32 = 18
-
-miNumbers = (
-    miINT8,
-    miUINT8,
-    miINT16,
-    miUINT16,
-    miINT32,
-    miUINT32,
-    miSINGLE,
-    miDOUBLE,
-    miINT64,
-    miUINT64,
-    )
-
-miDataTypes = {
-    miINT8 : ('miINT8', 1,'b'),
-    miUINT8 : ('miUINT8', 1,'B'),
-    miINT16 : ('miINT16', 2,'h'),
-    miUINT16 :('miUINT16',2,'H'),
-    miINT32 : ('miINT32',4,'u4'),
-    miUINT32 : ('miUINT32',4,'I'),
-    miSINGLE : ('miSINGLE',4,'f'),
-    miDOUBLE : ('miDOUBLE',8,'d'),
-    miINT64 : ('miINT64',8,'q'),
-    miUINT64 : ('miUINT64',8,'Q'),
-    miMATRIX : ('miMATRIX',0,None),
-    miUTF8 : ('miUTF8',1,'b'),
-    miUTF16 : ('miUTF16',2,'h'),
-    miUTF32 : ('miUTF32',4,'u4'),
-    }
-
-''' Before release v7.1 (release 14) matlab used the system default
-character encoding scheme padded out to 16-bits. Release 14 and later
-use Unicode. When saving character data, matlab R14 checks if it can
-be encoded in 7-bit ascii, and saves in that format if so.'''
-miCodecs = {
-    miUINT8: 'ascii',
-    miUINT16: sys.getdefaultencoding(),
-    miUTF8: 'utf8',
-    miUTF16: 'utf16',
-    miUTF32: 'utf32',
-    } 
-
-mxCELL_CLASS = 1
-mxSTRUCT_CLASS = 2
-mxOBJECT_CLASS = 3
-mxCHAR_CLASS = 4
-mxSPARSE_CLASS = 5
-mxDOUBLE_CLASS = 6
-mxSINGLE_CLASS = 7
-mxINT8_CLASS = 8
-mxUINT8_CLASS = 9
-mxINT16_CLASS = 10
-mxUINT16_CLASS = 11
-mxINT32_CLASS = 12
-mxUINT32_CLASS = 13
-
-mxArrays = (
-    mxCHAR_CLASS,
-    mxDOUBLE_CLASS,
-    mxSINGLE_CLASS,
-    mxINT8_CLASS,
-    mxUINT8_CLASS,
-    mxINT16_CLASS,
-    mxUINT16_CLASS,
-    mxINT32_CLASS,
-    mxUINT32_CLASS,
-    )
+def mat_reader_factory(file_name, append_mat=True):
+    """Create reader for matlab format files
     
-def _parse_header(fid, hdict):
-    correct_endian = (ord('M')<<8) + ord('I')
-                 # if this number is read no BS
-    fid.seek(126)  # skip to endian detector
-    endian_test = fid.read(1,'int16')
-    if (endian_test == correct_endian): openstr = 'n'
-    else:  # must byteswap
-        if LittleEndian:
-            openstr = 'B'
-        else: openstr = 'l'
-    fid.setformat(openstr)  # change byte-order if necessary
-    fid.rewind()
-    hdict['__header__'] = fid.raw_read(124).strip(' \t\n\000')
-    vers = fid.read(1,'int16')
-    hdict['__version__'] = '%d.%d' % (vers >> 8, vers & 0xFF)
-    fid.seek(2,1)  # move to start of data
-    return
+    If name not a full path name, search for the file on the sys.path
+    list and use the first one found (the current directory is
+    searched first).
 
-def _skip_padding(fid, numbytes, rowsize):
-    """ Skip to next row or @rowsize after previous read of @numbytes """
-    mod = numbytes % rowsize
-    if mod:
-        skip = rowsize-mod
-        fid.seek(skip,1)
-
-def _parse_array_flags(fid):
-    # first 8 bytes are always miUINT32 and 8 --- just a check
-    dtype, nbytes = fid.read(2,'I')
-    if (dtype != miUINT32) or (nbytes != 8):
-        raise IOError, "Invalid MAT file. Perhaps a byte-order problem."
-
-    # read array flags.
-    rawflags = fid.read(2,'I')
-    class_ = rawflags[0] & 0xFF
-    flags = (rawflags[0] & 0xFFFF) >> 8
-    # Global and logical fields are currently ignored
-    if (flags & 8): cmplx = 1
-    else: cmplx = 0
-    if class_ == mxSPARSE_CLASS:
-        nzmax = rawflags[1]
-    else:
-        nzmax = None
-    return class_, cmplx, nzmax
-
-def _parse_mimatrix(fid,bytes):
-    dclass, cmplx, nzmax =_parse_array_flags(fid)
-    dims = _get_element(fid)
-    name = _get_element(fid).tostring()
-    tupdims = tuple(dims[::-1])
-    if dclass in mxArrays:
-        result, unused, dtype =_get_element(fid, return_name_dtype=True)
-        if dclass == mxCHAR_CLASS:
-            en = miCodecs[dtype]
-            try:
-                " ".encode(en)
-            except LookupError:
-                raise TypeError, 'Character encoding %s not supported' % en
-            if dtype == miUINT16:
-                char_len = len("  ".encode(en)) - len(" ".encode(en))
-                if char_len == 1: # Need to downsample from 16 bit
-                    result = result.astype(uint8)
-                elif char_len != 2:
-                    raise TypeError, 'miUNIT16 type cannot use >2 bytes encoding'
-            result = squeeze(transpose(reshape(result,tupdims)))
-            dims = result.shape
-            if len(dims) >= 2: # return array of strings
-                n_dims = dims[:-1]
-                string_arr = reshape(result, (product(n_dims,axis=0), dims[-1]))
-                result = empty(n_dims, dtype=object)
-                for i in range(0, n_dims[-1]):
-                    result[...,i] = string_arr[i].tostring().decode(en)
-            else: # return string
-                result = result.tostring().decode(en)
-        else:
-            if cmplx:
-                imag  =_get_element(fid)
-                try:
-                    result = result + _unit_imag[imag.dtype.char] * imag
-                except KeyError:
-                    result = result + 1j*imag
-            result = squeeze(transpose(reshape(result,tupdims)))
-            
-    elif dclass == mxCELL_CLASS:
-        length = product(dims,axis=0)
-        result = empty(length, dtype=object)
-        for i in range(length):
-            result[i] = _get_element(fid)
-        result = squeeze(transpose(reshape(result,tupdims)))
-        if not result.shape:
-            result = result.item()
-
-    elif dclass == mxSTRUCT_CLASS:
-        length = product(dims,axis=0)
-        result = zeros(length, object)
-        namelength = _get_element(fid)
-        # get field names
-        names = _get_element(fid)
-        splitnames = [names[i:i+namelength] for i in \
-                      xrange(0,len(names),namelength)]
-        fieldnames = [x.tostring().strip('\x00')
-                              for x in splitnames]
-        for i in range(length):
-            result[i] = mat_struct()
-            for element in fieldnames:
-                result[i].__dict__[element]  = _get_element(fid)
-        result = squeeze(transpose(reshape(result,tupdims)))
-        if not result.shape:
-            result = result.item()
-
-        # object is like a structure with but with a class name
-    elif dclass == mxOBJECT_CLASS:
-        class_name = _get_element(fid).tostring()
-        length = product(dims,axis=0)
-        result = zeros(length, object)
-        namelength = _get_element(fid)
-        # get field names
-        names = _get_element(fid)
-        splitnames = [names[i:i+namelength] for i in \
-                      xrange(0,len(names),namelength)]
-        fieldnames = [x.tostring().strip('\x00')
-                              for x in splitnames]
-        for i in range(length):
-            result[i] = mat_obj()
-            result[i]._classname = class_name
-            for element in fieldnames:
-                result[i].__dict__[element] = _get_element(fid)
-        result = squeeze(transpose(reshape(result,tupdims)))
-        if not result.shape:
-            result = result.item()
-
-    elif dclass == mxSPARSE_CLASS:
-        rowind  = _get_element(fid)
-        colind = _get_element(fid)
-        res = _get_element(fid)
-        if cmplx:
-            imag = _get_element(fid)
-            try:
-                res = res + _unit_imag[imag.dtype.char] * imag
-            except (KeyError,AttributeError):
-                res = res + 1j*imag
-        ''' From the matlab API documentation, last found here:
-        http://www.mathworks.com/access/helpdesk/help/techdoc/matlab_external/
-        @rowind are simply the row indices for all the (@res) non-zero
-        entries in the sparse array.  @rowind has nzmax entries, so
-        may well have more entries than len(@res), the actual number
-        of non-zero entries, but @rowind[len(res):] can be discarded
-        and should be 0. @colind has length (number of columns + 1),
-        and is such that, if D = diff(@colind), D[j] gives the number
-        of non-zero entries in column j. Because @rowind values are
-        stored in column order, this gives the column corresponding to
-        each @rowind
-        '''
-        cols = empty((len(res)), dtype=rowind.dtype)
-        col_counts = diff(colind)
-        start_row = 0
-        for i in where(col_counts)[0]:
-            end_row = start_row + col_counts[i]
-            cols[start_row:end_row] = i
-            start_row = end_row
-        ij = vstack((rowind[:len(res)], cols))
-        if have_sparse:
-            result = scipy.sparse.csc_matrix((res,ij), [dims[0],dims[1]])
-        else:
-            result = (dims, ij, res)
-
-    return result, name
-
-# Return a Python object for the element
-def _get_element(fid, return_name_dtype=False):
-    """ Return a python object from next element in @fid
-
-    @fid    - fopen object for matfile
-    @return_name_dtype - if True, return tuple of (element, name, dtype)
-                         if False, return element only
-    """
-    name = None
-    test = fid.raw_read(1)
-    if len(test) == 0:  # nothing left
-        raise EOFError
-    else:
-        fid.rewind(1)
-    # get the data tag
-    raw_tag = int(fid.read(1,'I'))
-    
-    # check for small data element format
-    numbytes = raw_tag >> 16
-    if numbytes > 0:  # small data element format
-        if numbytes > 4:
-            raise IOError, "Problem with MAT file: " \
-                  "too many bytes in small data element format."
-        dtype = int(raw_tag & 0xFFFF)
-        el = fid.read(numbytes,miDataTypes[dtype][2],c_is_b=1)
-        fid.seek(4-numbytes,1)  # skip padding
-    else:
-        # otherwise parse tag
-        dtype = raw_tag
-        numbytes = fid.read(1,'I')
-        
-        if dtype == miCOMPRESSED: # compressed data type
-            if not fid.zbuffer:
-                fid.zbuffer = CompressedFopen(format=fid.format)
-            fid.zbuffer.fill(fid.raw_read(numbytes))
-            _skip_padding(fid, numbytes, 8)
-            return _get_element(fid.zbuffer, return_name_dtype)
-        if dtype != miMATRIX:  # therefore basic data type
-            try:
-                el = fid.read(numbytes,miDataTypes[dtype][2],c_is_b=1)
-            except KeyError:
-                raise ValueError, "Unknown data type"
-            _skip_padding(fid, numbytes, 8)
-        else:
-            # handle miMatrix type
-            el, name = _parse_mimatrix(fid,numbytes)
-
-    if return_name_dtype:
-        return el, name, dtype
-    return el
-
-def _loadv5(fid,basename):
-    # return a dictionary from a Matlab version 5-7.1 file
-    # always contains the variable __header__
-    mdict = {}
-    _parse_header(fid,mdict)
-    var = 0
-    while 1:  # file pointer to start of next data
-        try:
-            var = var + 1
-            el, varname, unused = _get_element(fid, return_name_dtype=True)
-            if varname is None:
-                varname = '%s_%04d' % (basename,var)
-            mdict[varname] = el
-        except EOFError:
-            break
-    return mdict
-
-### END MATLAB v5 support #############
-
-def loadmat(name, mdict=None, appendmat=1, basename='raw'):
-    """Load the MATLAB(tm) mat file.
-
-    If name is a full path name load it in.  Otherwise search for the file
-    on the sys.path list and load the first one found (the current directory
-    is searched first).
-
     v4 (Level 1.0), v6 and v7.1 matfiles are supported.  
 
     Inputs:
 
-      name -- name of the mat file (don't need .mat extension if appendmat=1)
-      dict -- the dictionary to insert into.  If none the variables will be
-              returned in a dictionary.
-      appendmat -- non-zero to append the .mat extension to the end of the
+      file_name -- name of the mat file (don't need .mat extension if append_mat=True)
+      append_mat -- True to append the .mat extension to the end of the
                    given filename.
-      basename -- for MATLAB(tm) v5 matfiles raw data will have this basename.
-
-    Outputs:
-
-      If dict is None, then a dictionary of names and objects representing the
-      stored arrays is returned.
-    """
-
-    if appendmat and name[-4:] == ".mat":
-        name = name[:-4]
-    if os.sep in name:
-        full_name = name
-        if appendmat:
-            full_name = name + ".mat"
+      """
+    if append_mat and file_name[-4:] == ".mat":
+        file_name = file_name[:-4]
+    if os.sep in file_name:
+        full_file_name = file_name
+        if append_mat:
+            full_name = file_name + ".mat"
     else:
         full_name = None
-        junk,name = os.path.split(name)
+        junk,file_name = os.path.split(file_name)
         for path in sys.path:
-            test_name = os.path.join(path,name)
-            if appendmat:
+            test_name = os.path.join(path,file_name)
+            if append_mat:
                 test_name += ".mat"
             try:
                 fid = open(test_name,'rb')
@@ -794,143 +50,40 @@
             except IOError:
                 pass
         if full_name is None:
-            raise IOError, "%s not found on the path." % name
+            raise IOError, "%s not found on the path." % file_name
 
-    fid = fopen(full_name,'rb')
-    test_vals = fid.fread(4,'byte')
+    byte_stream = ByteStream(memmap(full_name))
+    MR = MatFile4Reader(byte_stream)
+    if MR.format_looks_right():
+        return MR
+    return MatFile5Reader(byte_stream)
 
-    if not (0 in test_vals):       # MATLAB version 5 format
-        fid.rewind()
-        thisdict = _loadv5(fid,basename)
-        if mdict is not None:
-            mdict.update(thisdict)
-            return
-        else:
-            return thisdict
-        
-    # The remainder of this function is the v4 codepath
-    testtype = struct.unpack('i',test_vals.tostring())
-    # Check to see if the number is positive and less than 5000.
-    if testtype[0] < 0 or testtype[0] > 4999:
-        # wrong byte-order
-        if LittleEndian:
-            format = 'ieee-be'
-        else:
-            format = 'ieee-le'
-    else:  # otherwise we are O.K.
-        if LittleEndian:
-            format = 'ieee-le'
-        else:
-            format = 'ieee-be'
+def loadmat(file_name,  mdict=None, appendmat=True, basename='raw'):
+    ''' Load Matlab(tm) file
 
-    fid.setformat(format)
-
-    length = fid.size()
-    fid.rewind()  # back to the begining
-
-    defnames = []
-    thisdict = {}
-    while 1:
-        if (fid.tell() == length):
-            break
-        header = fid.fread(5,'int')
-        if len(header) != 5:
-            fid.close()
-            print "Warning: Read error in file."
-            break
-        M,rest = divmod(int(header[0]),1000) # int is for workaround numpy 0.9.9 bug
-        O,rest = divmod(rest,100)
-        P,rest = divmod(rest,10)
-        T = rest
-
-        if (M > 1):
-            fid.close()
-            raise ValueError, "Unsupported binary format."
-        if (O != 0):
-            fid.close()
-            raise ValuError, "Hundreds digit of first integer should be zero."
-
-        if (T not in [0,1]):
-            fid.close()
-            raise ValueError, "Cannot handle sparse matrices, yet."
-
-        storage = {0:'d',1:'f',2:'i',3:'h',4:'H',5:'B'}[P]
-
-        varname = fid.fread(header[-1],'char')[:-1]
-        varname = varname.tostring()
-        defnames.append(varname)
-        numels = header[1]*header[2]
-        if T == 0:             # Text data
-            data = atleast_1d(fid.fread(numels,storage))
-            if header[3]:  # imaginary data
-                data2 = fid.fread(numels,storage)
-                if data.dtype.char == 'f' and data2.dtype.char == 'f':
-                    new = empty(data.shape,'F')
-                    new.real = data
-                    new.imag = data2
-                    data = new
-                    del(new)
-                    del(data2)
-            if len(data) > 1:
-                data=data.reshape((header[2], header[1])                )
-                thisdict[varname] = transpose(squeeze(data))
-            else:
-                thisdict[varname] = data
-        else:
-            data = atleast_1d(fid.fread(numels,storage,'char'))
-            if len(data) > 1:
-                data=data.reshape((header[2], header[1]))
-                thisdict[varname] = transpose(squeeze(data))
-            else:
-                thisdict[varname] = data
-
-    fid.close()
+    See docs for mat_reader_factory for details of input options
+    '''
+    MR = mat_reader_factory(file_name, basename)
+    matfile_dict = MR.get_variables()
     if mdict is not None:
-        print "Names defined = ", defnames
-        mdict.update(thisdict)
+        mdict.update(matfile_dict)
     else:
-        return thisdict
+        mdict = matfile_dict
+    return mdict
 
-
-def savemat(filename, mdict):
+def savemat(file_name, mdict, appendmat=True):
     """Save a dictionary of names and arrays into the MATLAB-style .mat file.
 
     This saves the arrayobjects in the given dictionary to a matlab Version 4
     style .mat file.
+    @appendmat  - if true, appends '.mat' extension to filename, if not present
     """
-    storage = {'D':0,'d':0,'F':1,'f':1,'u4':2,'i':2,'h':3,'B':5}
-    if filename[-4:] != ".mat":
-        filename = filename + ".mat"
-    fid = fopen(filename,'wb')
-    M = not LittleEndian
-    O = 0
-    for variable in mdict.keys():
-        var = mdict[variable]
-        if not isinstance(var, ndarray):
-            continue
-        if var.dtype.char == 'S1':
-            T = 1
-        else:
-            T = 0
-        if var.dtype.char == 'b':
-            var = var.astype('h')
-        P = storage[var.dtype.char]
-        fid.fwrite([M*1000+O*100+P*10+T],'int')
-
-        if len(var.shape) == 1:
-            var=var.reshape((len(var), 1))
-        var = transpose(var)
-
-        if len(var.shape) > 2:
-            var=var.reshape((product(var.shape[:-1],axis=0), var.shape[-1]))
-
-        imagf = var.dtype.char in ['F', 'D']
-        fid.fwrite([var.shape[1], var.shape[0], imagf, len(variable)+1],'int')
-        fid.fwrite(variable+'\x00','char')
-        if imagf:
-            fid.fwrite(var.real)
-            fid.fwrite(var.imag)
-        else:
-            fid.fwrite(var)
-    fid.close()
-    return
+    if appendmat and file_name[-4:] != ".mat":
+        file_name = file_name + ".mat"
+    file_stream = open(file_name, 'wb')
+    MW = MatFile4Writer(file_stream)
+    MW.put_variables(mdict)
+    file_stream.close()
+    
+if __name__ == '__main__':
+    D = savemat('test.mat', {'a': 1})

Added: trunk/Lib/io/mio4.py
===================================================================
--- trunk/Lib/io/mio4.py	2006-09-12 20:49:35 UTC (rev 2197)
+++ trunk/Lib/io/mio4.py	2006-09-14 09:29:55 UTC (rev 2198)
@@ -0,0 +1,268 @@
+''' Classes for read / write of matlab 4 files
+'''
+
+from numpy import *
+
+from miobase import *
+
+miDOUBLE = 0
+miSINGLE = 1
+miINT32 = 2
+miINT16 = 3
+miUINT16 = 4
+miUINT8 = 5
+
+mdtypes_template = {
+    miDOUBLE: 'f8',
+    miSINGLE: 'f4',
+    miINT32: 'i4',
+    miINT16: 'i2',
+    miUINT16: 'u2',
+    miUINT8: 'u1',
+    'header': [('mopt', 'i4'),
+               ('mrows', 'i4'),
+               ('ncols', 'i4'),
+               ('imagf', 'i4'),
+               ('namlen', 'i4')],
+    'U1': 'U1',
+    }
+
+np_to_mtypes = {
+    'f8': miDOUBLE,
+    'c16': miDOUBLE,
+    'f4': miSINGLE,
+    'c8': miSINGLE,
+    'i4': miINT32,
+    'i2': miINT16,
+    'u2': miUINT16,
+    'u1': miUINT8,
+    'S1': miUINT8,
+    }
+    
+# matrix classes
+mxFULL_CLASS = 0
+mxCHAR_CLASS = 1
+mxSPARSE_CLASS = 2
+
+order_codes = {
+    0: '<',
+    1: '>',
+    2: 'VAX D-float', #!
+    3: 'VAX G-float',
+    4: 'Cray', #!!
+    }
+
+class Mat4Header(object):
+    ''' Place holder for Mat4 header '''
+    pass
+
+class Mat4ArrayReader(MatArrayReader):
+    ''' Class for reading Mat4 arrays
+    '''
+    
+    def __init__(self, *args, **kwargs):
+        super(Mat4ArrayReader,self).__init__(*args, **kwargs)
+        self._getter_classes = {
+            mxFULL_CLASS: Mat4FullGetter,
+            mxCHAR_CLASS: Mat4CharGetter,
+            mxSPARSE_CLASS: Mat4SparseGetter,
+            }
+        
+    def read_header(self):
+        ''' Read and return Mat4 matrix header
+
+        Defines:
+        next_position - start position of next matrix
+        name
+        dtype - numpy dtype of matrix
+        mclass - matlab code for class of matrix
+        dims - shape of matrix as stored (see sparse reader)
+        is_complex - True if data are complex
+        is_char    - True if these are char data
+        '''
+        header = Mat4Header()
+        data = self.read_array(self.dtypes['header'])
+        header.name = self.read_ztstring(data['namlen'])
+        if data['mopt'] < 0 or  data['mopt'] > 5000:
+            ValueError, 'Mat 4 mopt wrong format, byteswapping problem?'
+        M,rest = divmod(data['mopt'], 1000)
+        O,rest = divmod(rest,100)
+        P,rest = divmod(rest,10)
+        T = rest
+        if O != 0:
+            raise ValueError, 'O in MOPT integer should be 0, wrong format?'
+        header.dtype = self.dtypes[P]
+        header.mclass = T
+        header.is_char = None
+        header.is_numeric = None
+        header.original_dtype = None
+        header.dims = (data['mrows'], data['ncols'])
+        header.is_complex = data['imagf'] == 1
+        remaining_bytes = header.dtype.itemsize * product(header.dims)
+        if header.is_complex and not header.mclass == mxSPARSE_CLASS:
+            remaining_bytes *= 2
+        header.next_position = self.mat_stream.tell() + remaining_bytes
+        return header
+
+    def matrix_getter_factory(self):
+        header = self.read_header()
+        return self._getter_classes[header.mclass](self, header)
+
+
+class Mat4MatrixGetter(MatMatrixGetter):
+
+    # Mat4 variables never global or logical
+    is_global = False
+    is_logical = False
+    
+    def read_hdr_array(self, *args, **kwargs):
+        ''' Mat4 read array always uses header dtype and dims '''
+        return self.read_array(
+            self.header.dtype, self.dims, *args, **kwargs)
+
+
+class Mat4FullGetter(Mat4MatrixGetter):
+    def get_raw_array(self):
+        self.header.is_numeric = True
+        if self.header.is_complex:
+            # avoid array copy to save memory
+            res = self.read_hdr_array(copy=False)
+            res_j = self.read_hdr_array(copy=False)
+            return res + (res_j * 1j)
+        else:
+            return self.read_hdr_array()
+
+
+class Mat4CharGetter(Mat4MatrixGetter):
+    def get_raw_array(self):
+        self.header.is_char = True
+        arr = self.read_hdr_array().astype(uint8)
+        # ascii to unicode
+        S = arr.tostring().decode('ascii')
+        return ndarray(shape=self.dims,
+                       dtype=dtype('U1'),
+                       buffer = array(S)).copy()
+
+
+class Mat4SparseGetter(Mat4MatrixGetter):
+    ''' Read sparse matrix type 
+
+    Matlab 4 real sparse arrays are saved in a N+1 by 3 array format,
+    where N is the number of non-zero values.  Column 1 values [0:N]
+    are the (1-based) row indices of the each non-zero value, column 2
+    [0:N] are the column indices, column 3 [0:N] are the (real)
+    values.  The last values [-1:0:2] of the rows, column indices are
+    shape[0] and shape[1] respectively of the output matrix. The last
+    value for the values column is a padding 0. mrows and ncols values
+    from the header give the shape of the stored matrix, here [N+1,
+    3].  Complex data is saved as a 4 column matrix, where the fourth
+    column contains the imaginary component of the data; the last
+    value is again 0
+    '''
+    def get_raw_array(self):
+        res = self.read_hdr_array()
+        tmp = res[:-1,:]
+        dims = res[-1,0:2]
+        ij = transpose(tmp[:,0:2]) - 1 # for matlab 1-based indexing
+        vals = tmp[:,2]
+        if res.shape[1] == 4:
+            self.header.is_complex = True
+            vals = vals + res[:-1,3] * 1j
+        if have_sparse:
+            return scipy.sparse.csc_matrix((vals,ij), dims)
+        return (dims, ij, vals)
+
+    
+class MatFile4Reader(MatFileReader):
+    ''' Reader for Mat4 files '''
+    def __init__(self, mat_stream, *args, **kwargs):
+        self._array_reader = Mat4ArrayReader(
+            mat_stream,
+            None,
+            None,
+            )
+        super(MatFile4Reader, self).__init__(mat_stream, *args, **kwargs)
+        self._array_reader.processor_func = self.processor_func
+
+    def set_dtypes(self):
+        self.dtypes = self.convert_dtypes(mdtypes_template)
+        self._array_reader.dtypes = self.dtypes
+
+    def matrix_getter_factory(self):
+        return self._array_reader.matrix_getter_factory()
+
+    def format_looks_right(self):
+        # Matlab 4 files have a zero somewhere in first 4 bytes
+        self.mat_stream.seek(0)
+        mopt_bytes = self.read_bytes(4)
+        self.mat_stream.seek(0)
+        return 0 in mopt_bytes
+    
+    def guess_byte_order(self):
+        self.mat_stream.seek(0)
+        mopt = self.read_array(dtype('i4'))
+        self.mat_stream.seek(0)
+        if mopt < 0 or mopt > 5000:
+            return ByteOrder.swapped_code
+        return ByteOrder.native_code
+
+
+class MatFile4Writer(MatFileWriter):
+    codec = 'ascii'
+    
+    def arr_to_matrix(self, arr):
+        ''' Convert numeric array to matlab format '''
+        dts = arr.dtype.str[1:]
+        if not dts in np_to_mtypes:
+            arr = arr.astype('f8')
+        return atleast_2d(arr)
+        
+    def matrix_header(self, var, name):
+        ''' Return header for matrix array '''
+        header = empty((), mdtypes_template['header'])
+        dt = var.dtype.str[1:]
+        M = not ByteOrder.little_endian
+        O = 0
+        P = np_to_mtypes[dt]
+        T = dt == 'S1' # could also be sparse -> 2
+        header['mopt'] = M*1000+O*100+P*10+T
+        dims = var.shape
+        header['mrows'] = dims[0]
+        header['ncols'] = dims[1]
+        header['imagf'] = var.dtype.kind == 'c'
+        header['namlen'] = len(name) + 1
+        return header
+    
+    def put_variable(self, var, name):
+        arr = array(var)
+        if arr.dtype.hasobject:
+            raise TypeError, 'Cannot save object arrays in Mat4'
+        if have_sparse:
+            if scipy.sparse.issparse(arr):
+                raise TypeError, 'Cannot save sparse arrays yet'
+        if arr.dtype.kind in ('U', 'S'):
+            arr = self.str_to_chars(arr)
+        else:
+            arr = self.arr_to_matrix(arr)
+        dims = arr.shape
+        if len(dims) > 2:
+            dims = [product(dims[:-1]), dims[-1]]
+            arr = reshape(arr, dims)
+        if arr.dtype.kind == 'U':
+            # Recode unicode to ascii
+            dt = 'U' + str(product(dims))
+            st_arr = ndarray(shape=(), dtype=dt, buffer=arr)
+            st = st_arr.item().encode('ascii')
+            arr = ndarray(shape=dims, dtype='S1', buffer=st)
+        header = self.matrix_header(arr, name)
+        self.write_bytes(header)
+        self.write_string(name + '\0')
+        if header['imagf']:
+            self.write_bytes(arr.real)
+            self.write_bytes(arr.imag)
+        else:
+            self.write_bytes(arr)
+            
+    def put_variables(self, mdict):
+        for name, var in mdict.items():
+            self.put_variable(var, name)

Added: trunk/Lib/io/mio5.py
===================================================================
--- trunk/Lib/io/mio5.py	2006-09-12 20:49:35 UTC (rev 2197)
+++ trunk/Lib/io/mio5.py	2006-09-14 09:29:55 UTC (rev 2198)
@@ -0,0 +1,543 @@
+''' Classes for read / write of matlab 5 files
+'''
+
+# Small fragments of current code adapted from matfile.py by Heiko
+# Henkelmann
+
+## Notice in matfile.py file
+
+# Copyright (c) 2003 Heiko Henkelmann
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+import zlib
+from copy import copy as pycopy
+
+from numpy import *
+
+from bytestream import ByteStream
+from miobase import *
+
+miINT8 = 1
+miUINT8 = 2
+miINT16 = 3
+miUINT16 = 4
+miINT32 = 5
+miUINT32 = 6
+miSINGLE = 7
+miDOUBLE = 9
+miINT64 = 12
+miUINT64 = 13
+miMATRIX = 14
+miCOMPRESSED = 15
+miUTF8 = 16
+miUTF16 = 17
+miUTF32 = 18
+
+mxCELL_CLASS = 1
+mxSTRUCT_CLASS = 2
+mxOBJECT_CLASS = 3
+mxCHAR_CLASS = 4
+mxSPARSE_CLASS = 5
+mxDOUBLE_CLASS = 6
+mxSINGLE_CLASS = 7
+mxINT8_CLASS = 8
+mxUINT8_CLASS = 9
+mxINT16_CLASS = 10
+mxUINT16_CLASS = 11
+mxINT32_CLASS = 12
+mxUINT32_CLASS = 13
+
+mdtypes_template = {
+    miINT8: 'i1',
+    miUINT8: 'u1',
+    miINT16: 'i2',
+    miUINT16: 'u2',
+    miINT32: 'i4',
+    miUINT32: 'u4',
+    miSINGLE: 'f4',
+    miDOUBLE: 'f8',
+    miINT64: 'i8',
+    miUINT64: 'u8',
+    miUTF8: 'u1',
+    miUTF16: 'u2',
+    miUTF32: 'u4',
+    'file_header': [('description', 'S116'),
+                    ('subsystem_offset', 'i8'),
+                    ('version', 'u2'),        
+                    ('endian_test', 'S2')],
+    'tag_mdtype': 'u4',
+    'tag_byte_count': 'u4',
+    'array_flags': [('data_type', 'u4'),
+                    ('byte_count', 'u4'),
+                    ('flags_class','u4'),
+                    ('nzmax', 'u4')],
+    'U1': 'U1',
+    }
+
+mclass_dtypes_template = {
+    mxINT8_CLASS: 'i1',
+    mxUINT8_CLASS: 'u1',
+    mxINT16_CLASS: 'i2',
+    mxUINT16_CLASS: 'u2',
+    mxINT32_CLASS: 'i4',
+    mxUINT32_CLASS: 'u4',
+    mxSINGLE_CLASS: 'f4',
+    mxDOUBLE_CLASS: 'f8',
+    }
+
+''' Before release v7.1 (release 14) matlab used the system default
+character encoding scheme padded out to 16-bits. Release 14 and later
+use Unicode. When saving character data, matlab R14 checks if it can
+be encoded in 7-bit ascii, and saves in that format if so.'''
+
+codecs_template = {
+    miUTF8: {'codec': 'utf_8', 'width': 1},
+    miUTF16: {'codec': 'utf_16', 'width': 2},
+    miUTF32: {'codec': 'utf_32','width': 4},
+    }
+
+miUINT16_codec = sys.getdefaultencoding()
+
+mx_numbers = (
+    mxDOUBLE_CLASS,
+    mxSINGLE_CLASS,
+    mxINT8_CLASS,
+    mxUINT8_CLASS,
+    mxINT16_CLASS,
+    mxUINT16_CLASS,
+    mxINT32_CLASS,
+    mxUINT32_CLASS,
+    )
+
+class mat_struct(object):
+    ''' Placeholder for holding read data from structs '''
+    pass
+
+class mat_obj(object):
+    ''' Placeholder for holding read data from objects '''
+    pass
+
+class Mat5Tag(object):
+    ''' Placeholder for holding tag information '''
+    pass
+
+class Mat5Header(object):
+    ''' Placeholder for Mat5 header '''
+    pass
+
+class Mat5ArrayFlags(object):
+    ''' Place holder for array flags '''
+    pass
+
+
+class Mat5Arrayreader(MatArrayReader):
+    ''' Class to get Mat5 arrays
+
+    Provides element reader functions, header reader, matrix reader
+    factory function
+    '''
+
+    def __init__(self, mat_stream, dtypes, processor_func, codecs, class_dtypes):
+        super(Mat5Arrayreader, self).__init__(mat_stream,
+                                              dtypes,
+                                              processor_func,
+                                              )
+        self.codecs = codecs
+        self.class_dtypes = class_dtypes
+
+    def read_tag(self):
+        tag = Mat5Tag()
+        # Check for small data element first
+        tag.mdtype = int(self.read_array(self.dtypes['tag_mdtype']))
+        byte_count = tag.mdtype >> 16
+        if byte_count: # small data element format
+            if byte_count > 4:
+                raise ValueError, 'Too many bytes for sde format'
+            tag.byte_count = byte_count
+            tag.mdtype = tag.mdtype & 0xFFFF
+            tag.skip = 4 - byte_count
+            return tag
+        tag.byte_count = self.read_array(
+            self.dtypes['tag_byte_count'])
+        tag.skip = tag.byte_count % 8 and 8 - tag.byte_count % 8
+        return tag
+    
+    def read_element(self, copy=True):
+        tag = self.read_tag()
+        if tag.mdtype == miMATRIX:
+            header = self.read_header(tag)
+            return self.header_to_getter(header).get_array()
+        if tag.mdtype in self.codecs: # encoded char data
+           raw_str = self.read_bytes(tag.byte_count)
+           codec = self.codecs[tag.mdtype]
+           if not codec:
+               raise TypeError, 'Do not support encoding %d' % tag.mdtype
+           el = raw_str.tostring().decode(codec)
+        else: # numeric data
+            try:
+                dt = self.dtypes[tag.mdtype]
+            except KeyError:
+                raise TypeError, 'Do not know matlab data code %d' % tag.mdtype
+            el_count = tag.byte_count / dt.itemsize
+            el = self.read_array(dt, a_shape=(el_count), copy=copy)
+        if tag.skip:
+            self.mat_stream.seek(tag.skip, 1)
+        return el
+
+    def read_header(self, tag):
+        ''' Read header from Mat5 matrix
+        
+        Defines:
+        next_position - start position of next matrix
+        name
+        dtype - numpy dtype of matrix
+        mclass - matlab code for class of matrix
+        dims - shape of matrix as stored (see sparse reader)
+        is_complex - True if data are complex
+        is_char    - True if these are char data
+        is_global  - is a global variable in matlab workspace
+        is_numeric - is basic numeric matrix
+        original_dtype - data type when saved from matlab
+        '''
+        if not tag.mdtype == miMATRIX:
+            raise TypeError, \
+                  'Expecting miMATRIX type here, got %d' %  tag.mdtype
+        header = Mat5Header()
+        header.next_position = (self.mat_stream.pos +
+                                tag.byte_count +
+                                tag.skip)
+        header.flags = self.read_array_flags()
+        header.is_complex = header.flags.is_complex
+        header.is_global = header.flags.is_global
+        header.is_logical = header.flags.is_logical
+        header.mclass = header.flags.mclass
+        header.is_numeric = None
+        header.original_dtype = None
+        header.is_char = None
+        header.dims = self.read_element()
+        header.name = self.read_element().tostring()
+        return header
+    
+    def read_array_flags(self):
+        flags = Mat5ArrayFlags()
+        af = self.read_array(self.dtypes['array_flags'])
+        flags_class = af['flags_class']
+        flags.mclass = flags_class & 0xFF
+        flags.is_logical = flags_class >> 9 & 1
+        flags.is_global = flags_class >> 10 & 1
+        flags.is_complex = flags_class >> 11 & 1
+        flags.nzmax = af['nzmax']
+        return flags
+
+    def matrix_getter_factory(self):
+        ''' Returns reader for next matrix '''
+        tag = self.read_tag()
+        if tag.mdtype == miCOMPRESSED:
+            return Mat5ZArrayreader(self, tag).matrix_getter_factory()
+        header = self.read_header(tag)
+        return self.header_to_getter(header)
+    
+    def header_to_getter(self, header):
+        mc = header.mclass
+        if mc in mx_numbers:
+            return Mat5NumericMatrixGetter(self, header)
+        if mc == mxSPARSE_CLASS:
+            return Mat5SparseMatrixGetter(self, header)
+        if mc == mxCHAR_CLASS:
+            return Mat5CharMatrixGetter(self, header)
+        if mc == mxCELL_CLASS:
+            return Mat5CellMatrixGetter(self, header)
+        if mc == mxSTRUCT_CLASS:
+            return Mat5StructMatrixGetter(self, header)
+        if mc == mxOBJECT_CLASS:
+            return Mat5ObjectMatrixGetter(self, header)
+        raise TypeError, 'No reader for class code %s' % mc
+
+
+class Mat5ZArrayreader(Mat5Arrayreader):
+    ''' Getter for compressed arrays
+
+    Reads and uncompresses gzipped stream on init, providing wrapper
+    for this new sub-stream.  Sets next_position for main stream to
+    allow skipping over this variable (although we have to read and
+    uncompress the whole thing anyway to get the name)
+    '''
+    def __init__(self, array_reader, tag):
+        '''Reads and uncompresses gzipped stream'''
+        data = array_reader.read_bytes(tag.byte_count)
+        if tag.skip:
+            array_reader.mat_stream.seek(tag.skip, 1)
+        super(Mat5ZArrayreader, self).__init__(
+            ByteStream(zlib.decompress(data.tostring())),
+            array_reader.dtypes,
+            array_reader.processor_func,
+            array_reader.codecs,
+            array_reader.class_dtypes)
+        self.next_position = array_reader.mat_stream.tell()
+        
+    def header_to_getter(self, header):
+        ''' Set next_position to current position in parent stream '''
+        header.next_position = self.next_position
+        return super(Mat5ZArrayreader, self).header_to_getter(header)
+        
+
+class Mat5MatrixGetter(MatMatrixGetter):
+    ''' Base class for getting Mat5 matrices
+
+    Gets current read information from passed array_reader
+    '''
+    
+    def __init__(self, array_reader, header):
+        ''' Accepts @array_reader and @header '''
+        super(Mat5MatrixGetter, self).__init__(array_reader, header)
+        self.class_dtypes = array_reader.class_dtypes
+        self.codecs = array_reader.codecs
+        self.is_global = header.is_global
+
+    def read_tag(self):
+        return self.array_reader.read_tag()
+    
+    def read_element(self, *args, **kwargs):
+        return self.array_reader.read_element(*args, **kwargs)
+
+
+class Mat5NumericMatrixGetter(Mat5MatrixGetter):
+    def get_raw_array(self):
+        self.header.is_numeric = True
+        self.header.original_dtype = self.class_dtypes[self.header.mclass]
+        if self.header.is_complex:
+            # avoid array copy to save memory
+            res = self.read_element(copy=False)
+            res_j = self.read_element(copy=False)
+            res = res + (res_j * 1j)
+        else:
+            res = self.read_element()
+        return ndarray(shape=self.dims,
+                       dtype=res.dtype,
+                       buffer=res,
+                       order='F')
+        
+
+class Mat5SparseMatrixGetter(Mat5MatrixGetter):
+    def get_raw_array(self):
+        rowind  = self.read_element()
+        colind = self.read_element()
+        if self.header.is_complex:
+            # avoid array copy to save memory
+            res = self.read_element(copy=False)
+            res_j = self.read_element(copy=False)
+            res = res + (res_j * 1j)
+        else:
+            res = self.read_element()
+        ''' From the matlab API documentation, last found here:
+        http://www.mathworks.com/access/helpdesk/help/techdoc/matlab_external/
+        @rowind are simply the row indices for all the (@res) non-zero
+        entries in the sparse array.  @rowind has nzmax entries, so
+        may well have more entries than len(@res), the actual number
+        of non-zero entries, but @rowind[len(res):] can be discarded
+        and should be 0. @colind has length (number of columns + 1),
+        and is such that, if D = diff(@colind), D[j] gives the number
+        of non-zero entries in column j. Because @rowind values are
+        stored in column order, this gives the column corresponding to
+        each @rowind
+        '''
+        cols = empty((len(res)), dtype=rowind.dtype)
+        col_counts = diff(colind)
+        start_row = 0
+        for i in where(col_counts)[0]:
+            end_row = start_row + col_counts[i]
+            cols[start_row:end_row] = i
+            start_row = end_row
+        ij = vstack((rowind[:len(res)], cols))
+        if have_sparse:
+            result = scipy.sparse.csc_matrix((res,ij),
+                                             self.dims)
+        else:
+            result = (dims, ij, res)
+        return result
+
+
+class Mat5CharMatrixGetter(Mat5MatrixGetter):
+    def get_raw_array(self):
+        self.header.is_char = True
+        res = self.read_element()
+        # Convert non-string types to unicode
+        if isinstance(res, ndarray):
+            if res.dtype.type == uint16:
+                codec = miUINT16_codec
+                if self.codecs['uint16_len'] == 1:
+                    res = res.astype(uint8)
+            elif res.dtype.type in (uint8, int8):
+                codec = 'ascii'
+            else:
+                raise TypeError, 'Did not expect type %s' % res.dtype
+            res = res.tostring().decode(codec)
+        return ndarray(shape=self.dims,
+                       dtype=dtype('U1'),
+                       buffer=array(res),
+                       order='F').copy()
+
+
+class Mat5CellMatrixGetter(Mat5MatrixGetter):
+    def get_raw_array(self):
+        # Account for fortran indexing of cells
+        tupdims = tuple(self.dims[::-1]) 
+        length = product(self.dims)
+        result = empty(length, dtype=object)
+        for i in range(length):
+            result[i] = self.get_item()
+        result = transpose(reshape(result,tupdims))
+        return result
+
+    def get_item(self):
+        return self.read_element()
+
+
+class Mat5StructMatrixGetter(Mat5CellMatrixGetter):
+    obj_template = mat_struct()
+    def get_raw_array(self):
+        namelength = self.read_element()
+        # get field names
+        names = self.read_element()
+        splitnames = [names[i:i+namelength] for i in \
+                      xrange(0,len(names),namelength)]
+        self.obj_template._fieldnames = [x.tostring().strip('\x00')
+                                        for x in splitnames]
+        return super(Mat5StructMatrixGetter, self).get_raw_array()
+
+    def get_item(self):
+        item = pycopy(self.obj_template)
+        for element in item._fieldnames:
+            item.__dict__[element]  = self.read_element()
+        return item
+
+
+class Mat5ObjectMatrixGetter(Mat5StructMatrixGetter):
+    obj_template = mat_obj()
+    def get_raw_array(self):
+        self.obj_template._classname = self.read_element().tostring()
+        return super(Mat5ObjectMatrixGetter, self).get_raw_array()
+
+
+class MatFile5Reader(MatFileReader):
+    ''' Reader for Mat 5 mat files
+
+    Adds the following attribute to base class
+    
+    @uint16_codec       - char codec to use for uint16 char arrays
+                          (defaults to system default codec)
+   '''
+
+    def __init__(self,
+                 mat_stream,
+                 byte_order=None,
+                 base_name='raw',
+                 matlab_compatible=False,
+                 squeeze_me=True,
+                 chars_as_strings=True,
+                 uint16_codec=None
+                 ):
+        self.codecs = {}
+        self._array_reader = Mat5Arrayreader(
+            mat_stream,
+            None,
+            None,
+            None,
+            None,
+            )
+        super(MatFile5Reader, self).__init__(
+            mat_stream,
+            byte_order,
+            base_name,
+            matlab_compatible,
+            squeeze_me,
+            chars_as_strings)
+        self._array_reader.processor_func = self.processor_func
+        self.uint16_codec = uint16_codec
+
+    def get_uint16_codec(self):
+        return self._uint16_codec
+    def set_uint16_codec(self, uint16_codec):
+        if not uint16_codec:
+            uint16_codec = sys.getdefaultencoding()
+        # Set length of miUINT16 char encoding
+        self.codecs['uint16_len'] = len("  ".encode(uint16_codec)) \
+                               - len(" ".encode(uint16_codec))
+        self.codecs['uint16_codec'] = uint16_codec
+        self._array_reader.codecs = self.codecs
+        self._uint16_codec = uint16_codec
+    uint16_codec = property(get_uint16_codec,
+                            set_uint16_codec,
+                            None,
+                            'get/set uint16_codec')
+
+    def set_dtypes(self):
+        ''' Set dtypes and codecs '''
+        self.dtypes = self.convert_dtypes(mdtypes_template)
+        self.class_dtypes = self.convert_dtypes(mclass_dtypes_template)
+        codecs = {}
+        postfix = self.order_code == '<' and '_le' or '_be'
+        for k, v in codecs_template.items():
+            codec = v['codec']
+            try:
+                " ".encode(codec)
+            except LookupError:
+                codecs[k] = None
+                continue
+            if v['width'] > 1:
+                codec += postfix
+            codecs[k] = codec
+        self.codecs.update(codecs)
+        self.update_array_reader()
+
+    def update_array_reader(self):
+        self._array_reader.codecs = self.codecs
+        self._array_reader.dtypes = self.dtypes
+        self._array_reader.class_dtypes = self.class_dtypes
+        
+    def matrix_getter_factory(self):
+        return self._array_reader.matrix_getter_factory()
+
+    def guess_byte_order(self):
+        self.mat_stream.seek(126)
+        mi = self.read_bytes(2).tostring()
+        self.mat_stream.seek(0)
+        return mi == 'IM' and '<' or '>'
+
+    def file_header(self):
+        ''' Read in mat 5 file header '''
+        hdict = {}
+        hdr = self.read_array(self.dtypes['file_header'])
+        hdict['__header__'] = hdr['description'].strip(' \t\n\000')
+        v_major = hdr['version'] >> 8
+        v_minor = hdr['version'] & 0xFF
+        hdict['__version__'] = '%d.%d' % (v_major, v_minor)
+        return hdict
+        
+    def format_looks_right(self):
+        # Matlab 4 files have a zero somewhere in first 4 bytes
+        self.mat_stream.seek(0)
+        mopt_bytes = self.read_bytes(4)
+        self.mat_stream.seek(0)
+        return 0 not in mopt_bytes
+
+
+class Mat5Writer(MatFileWriter):
+    pass

Added: trunk/Lib/io/miobase.py
===================================================================
--- trunk/Lib/io/miobase.py	2006-09-12 20:49:35 UTC (rev 2197)
+++ trunk/Lib/io/miobase.py	2006-09-14 09:29:55 UTC (rev 2198)
@@ -0,0 +1,337 @@
+# Authors: Travis Oliphant, Matthew Brett
+
+"""
+Base classes for matlab file stream reading
+"""
+
+import sys
+
+from numpy import *
+
+try:
+    import scipy.sparse
+    have_sparse = 1
+except ImportError:
+    have_sparse = 0
+
+
+class ByteOrder(object):
+    ''' Namespace for byte ordering '''
+    little_endian = sys.byteorder == 'little'
+    native_code = little_endian and '<' or '>'
+    swapped_code = little_endian and '>' or '<'
+    
+    def to_numpy_code(code):
+        if code is None:
+            return ByteOrder.native_code
+        if code in ('little', '<', 'l', 'L'):
+            return '<'
+        elif code in ('BIG', '>', 'B', 'b'):
+            return '>'
+        elif code in ('native', '='):
+            return ByteOrder.native_code
+        elif code in ('swapped'):
+            return ByteOrder.swapped_code
+        else:
+            raise ValueError, 'We cannot handle byte order %s' % byte_order
+    to_numpy_code = staticmethod(to_numpy_code)
+
+
+class MatStreamAgent(object):
+    ''' Base object for readers / getters from mat file streams
+
+    Attaches to initialized stream
+
+    Base class for "getters" - which do store state of what they are
+    reading on itialization, and therefore need to be initialized
+    before each read, and "readers" which do not store state, and only
+    need to be initialized once on object creation
+
+    Implements common array reading functions
+
+    Inputs @mat_steam - MatFileReader object
+    '''
+
+    def __init__(self, mat_stream):
+        self.mat_stream = mat_stream
+
+    def read_bytes(self, num_bytes):
+        ''' Get next block of data of length @num_bytes '''
+        res = self.mat_stream.read(num_bytes)
+        # Allow stream to return strings instead of bytes
+        if isinstance(res, basestring):
+            res = ndarray(shape=(len(res)),
+                          dtype=uint8,
+                          buffer=res)
+        return res
+    
+    def read_array(self, a_dtype, a_shape=(), copy=True):
+        ''' Generic get of byte stream data of known type and shape
+
+        Inputs
+        @a_dtype     - dtype of array
+        @a_shape     - shape of desired array
+        @copy        - copies array if True
+        (buffer is usually read only)
+        a_dtype is assumed to be correct endianness
+        '''
+        num_bytes = a_dtype.itemsize * product(a_shape)
+        data = self.read_bytes(num_bytes)
+        arr = ndarray(shape=a_shape,
+                      dtype=a_dtype,
+                      buffer=data,
+                      order='F')
+        if copy:
+            arr = arr.copy()
+        return arr
+
+    def read_ztstring(self, num_bytes):
+        return self.read_bytes(num_bytes).tostring().strip('\x00')
+
+        
+class MatFileReader(MatStreamAgent):
+    """ Base object for reading mat files
+
+    @initialized byte stream object  - file io interface object
+    @byte_order         - byte order ('native', 'little', 'BIG')
+                          in ('native', '=')
+                          or in ('little', '<')
+                          or in ('BIG', '>')
+    @base_name          - base name for unnamed variables
+    @matlab_compatible  - return arrays as matlab saved them
+    @squeeze_me         - whether to squeeze unit dimensions or not
+    @chars_as_strings   - whether to convert char arrays to string arrays
+
+    To make this class functional, you will need to override the
+    following methods:
+
+    set_dtypes              - sets data types defs from byte order
+    matrix_getter_factory   - gives object to fetch next matrix from stream
+    format_looks_right      - returns True if format looks correct for
+                              this file type (Mat4, Mat5)
+    guess_byte_order        - guesses file byte order from file
+    """
+
+    def __init__(self, mat_stream,
+                 byte_order=None,
+                 base_name='raw',
+                 matlab_compatible=False,
+                 squeeze_me=True,
+                 chars_as_strings=True,
+                 ):
+        # Initialize stream
+        self.mat_stream = mat_stream
+        self.dtypes = {}
+        if not byte_order:
+            byte_order = self.guess_byte_order()
+        self.order_code = byte_order # sets dtypes and other things too
+        self.base_name = base_name
+        self.squeeze_me = squeeze_me
+        self.chars_as_strings = chars_as_strings
+        self.matlab_compatible = matlab_compatible
+        self.processor_func = self.get_processor_func()
+        
+    # matlab_compatible property sets squeeze_me and chars_as_strings
+    def get_matlab_compatible(self):
+        return self._matlab_compatible
+    def set_matlab_compatible(self, matlab_compatible):
+        self._matlab_compatible = matlab_compatible
+        if matlab_compatible:
+            self.squeeze_me = False
+            self.char_as_strings = False
+    matlab_compatible = property(get_matlab_compatible,
+                                 set_matlab_compatible,
+                                 None,
+                                 'get/set matlab_compatible property')
+
+    def get_order_code(self):
+        return self._order_code
+    def set_order_code(self, order_code):
+        order_code = ByteOrder.to_numpy_code(order_code)
+        self._order_code = order_code
+        self.set_dtypes()
+    order_code = property(get_order_code,
+                          set_order_code,
+                          None,
+                          'get/set order code')
+
+    def set_dtypes(self):
+        assert False, 'Not implemented'
+
+    def convert_dtypes(self, dtype_template):
+        dtypes = dtype_template.copy()
+        for k in dtypes:
+            dtypes[k] = dtype(dtypes[k]).newbyteorder(
+                self.order_code)
+        return dtypes
+    
+    def matrix_getter_factory(self):
+        assert False, 'Not implemented'
+    
+    def format_looks_right(self):
+        "Return True if the format looks right for this object"
+        assert False, 'Not implemented'
+
+    def file_header(self):
+        return {}
+    
+    def guess_byte_order(self):
+        assert 0, 'Not implemented'
+
+    def get_processor_func(self):
+        ''' Processing to apply to read matrices
+
+        Function applies options to matrices. We have to pass this
+        function into the reader routines because Matlab 5 matrices
+        occur as submatrices - in cell arrays, structs and objects -
+        so we will not see these in the main variable getting routine
+        here.
+        '''
+        
+        def func(arr, header):
+            if header.is_char and self.chars_as_strings:
+                # Convert char array to string or array of strings
+                dims = arr.shape
+                if len(dims) >= 2: # return array of strings
+                    dtt = self.order_code + 'U'
+                    n_dims = dims[:-1]
+                    str_arr = reshape(arr,
+                                    (product(n_dims),
+                                     dims[-1]))
+                    arr = empty(n_dims, dtype=object)
+                    for i in range(0, n_dims[-1]):
+                        arr[...,i] = self.chars_to_str(str_arr[i])
+                else: # return string
+                    arr = self.chars_to_str(arr)
+            if self.matlab_compatible:
+                # Apply options to replicate matlab's load into workspace
+                if header.is_logical:
+                    arr = arr.astype(bool)
+                elif header.is_numeric:
+                    # Cast as original matlab type
+                    if header.original_dtype:
+                        arr = arr.astype(header.original_dtype)
+            if self.squeeze_me:
+                arr = squeeze(arr)
+                if not arr.shape: # 0d coverted to scalar
+                    arr = arr.item()
+            return arr
+        return func
+
+    def chars_to_str(self, str_arr):
+        ''' Convert string array to string '''
+        dt = dtype('U' + str(product(str_arr.shape)))
+        return ndarray(shape=(),
+                       dtype = dt,
+                       buffer = str_arr.copy()).item()
+
+    def get_variables(self, variable_names=None):
+        ''' get variables from stream as dictionary
+
+        @variable_names   - optional list of variable names to get
+
+        If variable_names is None, then get all variables in file
+        '''
+        self.mat_stream.seek(0)
+        mdict = self.file_header()
+        mdict['__globals__'] = []
+        while not self.end_of_stream():
+            getter = self.matrix_getter_factory()
+            name = getter.name
+            if variable_names and name not in variable_names:
+                getter.to_next()
+                continue
+            res = getter.get_array()
+            mdict[name] = res
+            if getter.is_global:
+                mdict['__globals__'].append(name)
+            if variable_names:
+                variable_names.remove(name)
+                if not variable_names:
+                    break
+        return mdict
+
+    def end_of_stream(self):
+        b = self.read_bytes(1)
+        self.mat_stream.seek(-1,1)
+        return len(b) == 0
+        
+class MatMatrixGetter(MatStreamAgent):
+    """ Base class for matrix getters
+
+    Getters are stateful versions of agents, and record state of
+    current read on initialization, so need to be created for each
+    read - one-shot objects.
+
+    MatrixGetters are initialized with the content of the matrix
+    header
+
+    Accepts
+    @array_reader - array reading object (see below)
+    @header       - header for matrix being read
+    """
+    
+    def __init__(self, array_reader, header):
+        super(MatMatrixGetter, self).__init__(array_reader.mat_stream)
+        self.array_reader = array_reader
+        self.dtypes = array_reader.dtypes
+        self.header = header
+        self.name = header.name
+        self.next_position = header.next_position
+        self.dims = header.dims
+        self.data_position = self.mat_stream.tell()
+        
+    def get_array(self):
+        ''' Gets an array from matrix, and applies any necessary processing '''
+        if not self.mat_stream.tell() == self.data_position:
+            self.mat_stream.seek(self.data_position)
+        arr = self.get_raw_array()
+        return self.array_reader.processor_func(arr, self.header)
+
+    def get_raw_array(self):
+        assert False, 'Not implemented'
+
+    def to_next(self):
+        self.mat_stream.seek(self.next_position)
+
+
+class MatArrayReader(MatStreamAgent):
+    ''' Base class for array readers
+
+    The array_reader contains information about the current reading
+    process, such as byte ordered dtypes and the processing function
+    to apply to matrices as they are read, as well as routines for
+    reading matrix compenents.
+    '''
+
+    def __init__(self, mat_stream, dtypes, processor_func):
+        self.mat_stream = mat_stream
+        self.dtypes = dtypes
+        self.processor_func = processor_func
+
+    def matrix_getter_factory(self):
+        assert False, 'Not implemented'
+
+
+class MatFileWriter(object):
+    ''' Base type for writing mat files '''
+    def __init__(self, file_stream):
+        self.file_stream = file_stream
+
+    def str_to_chars(self, arr):
+        ''' Converts string array to matlab char array '''
+        dims = list(arr.shape)
+        if not dims:
+            dims = [1]
+        dims.append(int(arr.dtype.str[2:]))
+        num_els = product(dims)
+        dt = dtype(arr.dtype.kind + '1')
+        return ndarray(shape=dims, dtype=dt, buffer=arr)
+
+    def write_bytes(self, arr):
+        arr.dtype.newbyteorder(ByteOrder.native_code)
+        s = arr.tostring(order='F')
+        self.file_stream.write(s)
+
+    def write_string(self, s):
+        self.file_stream.write(s)

Added: trunk/Lib/io/tests/data/japanese_utf8.txt
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/japanese_utf8.txt
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/test3dmatrix_6.1_SOL2.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/test3dmatrix_6.1_SOL2.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Modified: trunk/Lib/io/tests/data/test3dmatrix_6.5.1_GLNX86.mat
===================================================================
(Binary files differ)

Added: trunk/Lib/io/tests/data/test3dmatrix_7.1_GLNX86.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/test3dmatrix_7.1_GLNX86.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/testcell_6.1_SOL2.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testcell_6.1_SOL2.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Modified: trunk/Lib/io/tests/data/testcell_6.5.1_GLNX86.mat
===================================================================
(Binary files differ)

Added: trunk/Lib/io/tests/data/testcell_7.1_GLNX86.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testcell_7.1_GLNX86.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/testcellnest_6.1_SOL2.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testcellnest_6.1_SOL2.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Modified: trunk/Lib/io/tests/data/testcellnest_6.5.1_GLNX86.mat
===================================================================
(Binary files differ)

Added: trunk/Lib/io/tests/data/testcellnest_7.1_GLNX86.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testcellnest_7.1_GLNX86.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/testcomplex_4.2c_SOL2.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testcomplex_4.2c_SOL2.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/testcomplex_6.1_SOL2.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testcomplex_6.1_SOL2.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Modified: trunk/Lib/io/tests/data/testcomplex_6.5.1_GLNX86.mat
===================================================================
(Binary files differ)

Added: trunk/Lib/io/tests/data/testcomplex_7.1_GLNX86.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testcomplex_7.1_GLNX86.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/testdouble_4.2c_SOL2.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testdouble_4.2c_SOL2.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/testdouble_6.1_SOL2.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testdouble_6.1_SOL2.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Modified: trunk/Lib/io/tests/data/testdouble_6.5.1_GLNX86.mat
===================================================================
(Binary files differ)

Added: trunk/Lib/io/tests/data/testdouble_7.1_GLNX86.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testdouble_7.1_GLNX86.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/testmatrix_4.2c_SOL2.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testmatrix_4.2c_SOL2.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/testmatrix_6.1_SOL2.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testmatrix_6.1_SOL2.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Modified: trunk/Lib/io/tests/data/testmatrix_6.5.1_GLNX86.mat
===================================================================
(Binary files differ)

Added: trunk/Lib/io/tests/data/testmatrix_7.1_GLNX86.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testmatrix_7.1_GLNX86.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/testminus_4.2c_SOL2.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testminus_4.2c_SOL2.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/testminus_6.1_SOL2.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testminus_6.1_SOL2.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Modified: trunk/Lib/io/tests/data/testminus_6.5.1_GLNX86.mat
===================================================================
(Binary files differ)

Added: trunk/Lib/io/tests/data/testminus_7.1_GLNX86.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testminus_7.1_GLNX86.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/testmulti_4.2c_SOL2.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testmulti_4.2c_SOL2.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/testmulti_7.1_GLNX86.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testmulti_7.1_GLNX86.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/testobject_6.1_SOL2.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testobject_6.1_SOL2.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Modified: trunk/Lib/io/tests/data/testobject_6.5.1_GLNX86.mat
===================================================================
(Binary files differ)

Added: trunk/Lib/io/tests/data/testobject_7.1_GLNX86.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testobject_7.1_GLNX86.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/testonechar_4.2c_SOL2.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testonechar_4.2c_SOL2.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/testonechar_6.1_SOL2.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testonechar_6.1_SOL2.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Modified: trunk/Lib/io/tests/data/testonechar_6.5.1_GLNX86.mat
===================================================================
(Binary files differ)

Added: trunk/Lib/io/tests/data/testonechar_7.1_GLNX86.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testonechar_7.1_GLNX86.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/testsparse_4.2c_SOL2.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testsparse_4.2c_SOL2.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/testsparse_6.1_SOL2.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testsparse_6.1_SOL2.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Modified: trunk/Lib/io/tests/data/testsparse_6.5.1_GLNX86.mat
===================================================================
(Binary files differ)

Added: trunk/Lib/io/tests/data/testsparse_7.1_GLNX86.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testsparse_7.1_GLNX86.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/testsparsecomplex_4.2c_SOL2.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testsparsecomplex_4.2c_SOL2.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/testsparsecomplex_6.1_SOL2.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testsparsecomplex_6.1_SOL2.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Modified: trunk/Lib/io/tests/data/testsparsecomplex_6.5.1_GLNX86.mat
===================================================================
(Binary files differ)

Added: trunk/Lib/io/tests/data/testsparsecomplex_7.1_GLNX86.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testsparsecomplex_7.1_GLNX86.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/teststring_4.2c_SOL2.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/teststring_4.2c_SOL2.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/teststring_6.1_SOL2.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/teststring_6.1_SOL2.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Modified: trunk/Lib/io/tests/data/teststring_6.5.1_GLNX86.mat
===================================================================
(Binary files differ)

Added: trunk/Lib/io/tests/data/teststring_7.1_GLNX86.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/teststring_7.1_GLNX86.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/teststringarray_4.2c_SOL2.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/teststringarray_4.2c_SOL2.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/teststringarray_6.1_SOL2.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/teststringarray_6.1_SOL2.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Modified: trunk/Lib/io/tests/data/teststringarray_6.5.1_GLNX86.mat
===================================================================
(Binary files differ)

Added: trunk/Lib/io/tests/data/teststringarray_7.1_GLNX86.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/teststringarray_7.1_GLNX86.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/teststruct_6.1_SOL2.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/teststruct_6.1_SOL2.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Modified: trunk/Lib/io/tests/data/teststruct_6.5.1_GLNX86.mat
===================================================================
(Binary files differ)

Added: trunk/Lib/io/tests/data/teststruct_7.1_GLNX86.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/teststruct_7.1_GLNX86.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/teststructarr_6.1_SOL2.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/teststructarr_6.1_SOL2.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Modified: trunk/Lib/io/tests/data/teststructarr_6.5.1_GLNX86.mat
===================================================================
(Binary files differ)

Added: trunk/Lib/io/tests/data/teststructarr_7.1_GLNX86.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/teststructarr_7.1_GLNX86.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/teststructnest_6.1_SOL2.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/teststructnest_6.1_SOL2.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Modified: trunk/Lib/io/tests/data/teststructnest_6.5.1_GLNX86.mat
===================================================================
(Binary files differ)

Added: trunk/Lib/io/tests/data/teststructnest_7.1_GLNX86.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/teststructnest_7.1_GLNX86.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/data/testunicode_7.1_GLNX86.mat
===================================================================
(Binary files differ)


Property changes on: trunk/Lib/io/tests/data/testunicode_7.1_GLNX86.mat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/Lib/io/tests/gen_unittests.m
===================================================================
--- trunk/Lib/io/tests/gen_unittests.m	2006-09-12 20:49:35 UTC (rev 2197)
+++ trunk/Lib/io/tests/gen_unittests.m	2006-09-14 09:29:55 UTC (rev 2198)
@@ -0,0 +1,88 @@
+% Generates mat files for loadmat unit tests
+% This is the version for matlab 5 and higher
+% Uses save_test.m function
+
+% work out matlab version and file suffix for test files
+global FILEPREFIX FILESUFFIX
+FILEPREFIX = [fullfile(pwd, 'data') filesep];
+temp = ver('MATLAB');
+mlv = temp.Version;
+FILESUFFIX = ['_' mlv '_' computer '.mat'];
+
+% basic double array
+save_test('testdouble', 0:pi/4:2*pi);
+
+% string
+save_test('teststring', '"Do nine men interpret?" "Nine men," I nod.')
+
+% complex
+theta = 0:pi/4:2*pi;
+save_test('testcomplex', cos(theta) + 1j*sin(theta));
+
+% asymmetric array to check indexing
+a = zeros(3, 5);
+a(:,1) = [1:3]';
+a(1,:) = 1:5;
+
+% 2D matrix
+save_test('testmatrix', a);
+
+% minus number - tests signed int 
+save_test('testminus', -1);
+
+% single character
+save_test('testonechar', 'r');
+
+% string array
+save_test('teststringarray', ['one  '; 'two  '; 'three']);
+
+% sparse array
+save_test('testsparse', sparse(a));
+
+% sparse complex array
+b = sparse(a);
+b(1,1) = b(1,1) + j;
+save_test('testsparsecomplex', b);
+
+% Two variables in same file
+save([FILEPREFIX 'testmulti' FILESUFFIX], 'a', 'theta')
+
+% struct
+save_test('teststruct', ...
+	  struct('stringfield','Rats live on no evil star.',...
+		 'doublefield',[sqrt(2) exp(1) pi],...
+		 'complexfield',(1+1j)*[sqrt(2) exp(1) pi]));
+
+% cell
+save_test('testcell', ...
+	  {['This cell contains this string and 3 arrays of increasing' ...
+	    ' length'], 1., 1.:2., 1.:3.});
+
+% 3D matrix
+save_test('test3dmatrix', reshape(1:24,[2 3 4]))
+
+% nested cell array
+save_test('testcellnest', {1, {2, 3, {4, 5}}});
+
+% nested struct
+save_test('teststructnest', struct('one', 1, 'two', ...
+				   struct('three', 'number 3')));
+
+% array of struct
+save_test('teststructarr', [struct('one', 1, 'two', 2) ...
+		    struct('one', 'number 1', 'two', 'number 2')]);
+
+% matlab object
+save_test('testobject', inline('x'))
+
+% array of matlab objects
+%save_test('testobjarr', [inline('x') inline('y')])
+
+% unicode test
+if str2num(mlv) > 7  % function added 7.0.1
+  fid = fopen([FILEPREFIX 'japanese_utf8.txt']);
+  from_japan = fread(fid, 'uint8')';
+  fclose(fid);
+  save_test('testunicode', native2unicode(from_japan, 'utf-8'));
+end
+  
\ No newline at end of file

Added: trunk/Lib/io/tests/gen_unittests4.m
===================================================================
--- trunk/Lib/io/tests/gen_unittests4.m	2006-09-12 20:49:35 UTC (rev 2197)
+++ trunk/Lib/io/tests/gen_unittests4.m	2006-09-14 09:29:55 UTC (rev 2198)
@@ -0,0 +1,50 @@
+% Generates mat files for loadmat unit tests
+% Uses save_test.m function
+% This is the version for matlab 4
+
+% work out matlab version and file suffix for test files
+global FILEPREFIX FILESUFFIX
+sepchar = '/';
+if strcmp(computer, 'PCWIN'), sepchar = '\'; end
+FILEPREFIX = [pwd sepchar 'data' sepchar];
+mlv = version;
+FILESUFFIX = ['_' mlv '_' computer '.mat'];
+
+% basic double array
+save_test('testdouble', 0:pi/4:2*pi);
+
+% string
+save_test('teststring', '"Do nine men interpret?" "Nine men," I nod.')
+
+% complex
+theta = 0:pi/4:2*pi;
+save_test('testcomplex', cos(theta) + 1j*sin(theta));
+
+% asymmetric array to check indexing
+a = zeros(3, 5);
+a(:,1) = [1:3]';
+a(1,:) = 1:5;
+
+% 2D matrix
+save_test('testmatrix', a);
+
+% minus number - tests signed int 
+save_test('testminus', -1);
+
+% single character
+save_test('testonechar', 'r');
+
+% string array
+save_test('teststringarray', ['one  '; 'two  '; 'three']);
+
+% sparse array
+save_test('testsparse', sparse(a));
+
+% sparse complex array
+b = sparse(a);
+b(1,1) = b(1,1) + j;
+save_test('testsparsecomplex', b);
+
+% Two variables in same file
+save([FILEPREFIX 'testmulti' FILESUFFIX], 'a', 'theta')
+

Added: trunk/Lib/io/tests/save_test.m
===================================================================
--- trunk/Lib/io/tests/save_test.m	2006-09-12 20:49:35 UTC (rev 2197)
+++ trunk/Lib/io/tests/save_test.m	2006-09-14 09:29:55 UTC (rev 2198)
@@ -0,0 +1,6 @@
+function save_test(test_name, v)
+% saves variable passed in m with filename from prefix
+  
+global FILEPREFIX FILESUFFIX
+eval([test_name ' = v;']);
+save([FILEPREFIX test_name FILESUFFIX], test_name)
\ No newline at end of file

Modified: trunk/Lib/io/tests/test_mio.py
===================================================================
--- trunk/Lib/io/tests/test_mio.py	2006-09-12 20:49:35 UTC (rev 2197)
+++ trunk/Lib/io/tests/test_mio.py	2006-09-14 09:29:55 UTC (rev 2198)
@@ -2,6 +2,7 @@
 
 import os
 from glob import glob
+from tempfile import mkstemp
 from numpy.testing import set_package_path, restore_path, ScipyTestCase, ScipyTest
 from numpy.testing import assert_equal, assert_array_almost_equal
 from numpy import arange, array, eye, pi, cos, exp, sin, sqrt, ndarray,  \
@@ -9,7 +10,9 @@
 import scipy.sparse as SP
 
 set_package_path()
-from scipy.io.mio import loadmat, mat_obj, mat_struct
+from scipy.io.mio import loadmat, savemat
+from scipy.io.mio5 import mat_obj, mat_struct
+from scipy.io.mio4 import MatFile4Writer
 restore_path()
 
 try:  # Python 2.3 support
@@ -17,16 +20,19 @@
 except:
     pass
 
+test_data_path = os.path.join(os.path.dirname(__file__), './data')
+
 class test_mio_array(ScipyTestCase):    
     def __init__(self, *args, **kwargs):
         super(test_mio_array, self).__init__(*args, **kwargs)
-        self.test_data_path = os.path.join(os.path.dirname(__file__), './data')
 
     def _check_level(self, label, expected, actual):
-        """ Check one level of a potentially nested dictionary / list """
+        """ Check one level of a potentially nested object / list """
         # object array is returned from cell array in mat file
+        typex = type(expected)
+        typac = type(actual)
         if isinstance(expected, ndarray) and expected.dtype.hasobject == 1:
-            assert type(expected) is type(actual), "Different types at %s" % label
+            assert typex is typac, "Different types at %s" % label
             assert len(expected) == len(actual), "Different list lengths at %s" % label
             for i, ev in enumerate(expected):
                 level_label = "%s, [%d], " % (label, i)
@@ -34,17 +40,17 @@
             return
         # object, as container for matlab structs and objects
         elif isinstance(expected, mat_struct) or isinstance(expected, mat_obj):
-            assert isinstance(actual, type(expected)), \
-                   "Different types %s and %s at %s" % label
+            assert isinstance(actual, typex), \
+                   "Different types %s and %s at %s" % (typex, typac, label)
             ex_fields = dir(expected)
             ac_fields = dir(actual)
             for k in ex_fields:
                 if k.startswith('__') and k.endswith('__'):
                     continue
-                assert k in ac_fields, "Missing field at %s" % label
+                assert k in ac_fields, "Missing property at %s" % label
                 ev = expected.__dict__[k]
                 v = actual.__dict__[k]
-                level_label = "%s, field %s, " % (label, k)
+                level_label = "%s, property %s, " % (label, k)
                 self._check_level(level_label, ev, v)
             return
         # hoping this is a single value, which might be an array
@@ -55,32 +61,43 @@
                                       err_msg = label,
                                       decimal = 5)
         elif isinstance(expected, ndarray):
-            assert isinstance(actual, ndarray), "Expected ndarray at %s" % label
-            assert_array_almost_equal(actual, expected, err_msg=label, decimal = 5)
+            if expected.shape: # allow scalar and 0d array comparisons
+                assert isinstance(actual, ndarray), "Expected ndarray at %s" % label
+            assert_array_almost_equal(actual, expected, err_msg=label, decimal=5)
         else:
-            assert isinstance(expected, type(actual)), \
-                   "Types %s and %s do not match at %s" % (type(expected), type(actual), label)
+            assert isinstance(expected, typac), \
+                   "Types %s and %s do not match at %s" % (typex, typac, label)
             assert_equal(actual, expected, err_msg=label)
     
-    def _check_case(self, name, case):
-        filt = os.path.join(self.test_data_path, 'test%s_*.mat' % name)
-        files = glob(filt)
-        assert files, "No files for test %s using filter %s" % (name, filt)
-        for f in files:
-            matdict = loadmat(f)
-            label = "Test '%s', file:%s" % (name, f)
+    def _check_case(self, name, files, case):
+        for file_name in files:
+            matdict = loadmat(file_name)
+            label = "test %s; file %s" % (name, file_name)
             for k, expected in case.items():
                 k_label = "%s, variable %s" % (label, k)
                 assert k in matdict, "Missing key at %s" % k_label
                 self._check_level(k_label, expected, matdict[k])
 
-    # Add the actual tests dynamically, with given parameters
-    def _make_check_case(name, expected):
+    # Add the load tests dynamically, with given parameters
+    def _make_check_case(name, files, expected):
         def cc(self):
-            self._check_case(name, expected)
+            self._check_case(name, files, expected)
         cc.__doc__ = "check loadmat case %s" % name
         return cc
 
+    # Add the round trip tests dynamically, with given parameters
+    def _make_rt_check_case(name, expected):
+        def cc(self):
+            (fd, fname) = mkstemp('.mat')
+            file_stream = os.fdopen(fd, 'wb')
+            MW = MatFile4Writer(file_stream)
+            MW.put_variables(expected)
+            file_stream.close()
+            self._check_case(name, [fname], expected)
+            os.remove(fname)
+        cc.__doc__ = "check loadmat case %s" % name
+        return cc
+
     # Define cases to test
     theta = pi/4*arange(9,dtype=float)
     case_table = [
@@ -100,7 +117,7 @@
          'expected': {'testcell':
                       array([u'This cell contains this string and 3 arrays of '+\
                              'increasing length',
-                             array([1]), array([1,2]), array([1,2,3])], 
+                             array(1), array([1,2]), array([1,2,3])], 
                             dtype=object)}
          })
     st = mat_struct()
@@ -133,8 +150,13 @@
          'expected': {'testsparsecomplex': SP.csc_matrix(B)},
          })
     case_table.append(
+        {'name': 'multi',
+         'expected': {'theta': theta,
+                      'a': A},
+         })
+    case_table.append(
         {'name': 'minus',
-         'expected': {'testminus': array([-1])},
+         'expected': {'testminus': array(-1)},
          })
     case_table.append(
         {'name': 'onechar',
@@ -144,58 +166,83 @@
         {'name': 'stringarray',
          'expected': {'teststringarray': array([u'one  ', u'two  ', u'three'], dtype=object)},
          })
+    a = array([array(1),
+               array([array(2), array(3),
+                      array([array(4), array(5)],
+                            dtype=object)],
+                     dtype=object)],
+              dtype=object)
     case_table.append(
         {'name': 'cellnest',
-         'expected': {'testcellnest': array([array([1]),
-                                             array([array([2]), array([3]),
-                                                   array([array([4]), array([5])],
-                                                                dtype=object)],
-                                                          dtype=object)],
-                                             dtype=object)},
+         'expected': {'testcellnest': a},
          })
     st = mat_struct()
-    st.one = array([1])
+    st.one = array(1)
     st.two = mat_struct()
     st.two.three = u'number 3'
     case_table.append(
         {'name': 'structnest',
          'expected': {'teststructnest': st}
          })
-    a = empty((2), dtype=object)
-    a[0], a[1] = mat_struct(), mat_struct()
-    a[0].one = array([1])
-    a[0].two = array([2])
+    a = array([mat_struct(), mat_struct()])
+    a[0].one = array(1)
+    a[0].two = array(2)
     a[1].one = u'number 1'
     a[1].two = u'number 2'
     case_table.append(
         {'name': 'structarr',
          'expected': {'teststructarr': a}
          })
-
     a = mat_obj()
     a._classname = 'inline'
     a.expr = u'x'
     a.inputExpr = u' x = INLINE_INPUTS_{1};'
     a.args = u'x'
-    a.isEmpty = array([0])
-    a.numArgs = array([1])
-    a.version = array([1])
+    a.isEmpty = array(0)
+    a.numArgs = array(1)
+    a.version = array(1)
     case_table.append(
         {'name': 'object',
          'expected': {'testobject': a}
          })
-         
+    u_str = file(
+        os.path.join(test_data_path, 'japanese_utf8.txt'),
+        'rb').read().decode('utf-8')
     case_table.append(
-        {'name': 'vec',
-         'expected': {'fit_params': array([1.27661364061704e+09,7.51130255826677e-03]),
-                      'xdot_filt': array([8.11154474752301e-13,1.28504039006994e-11])}
-         })
-    
-    # add tests
+        {'name': 'unicode',
+        'expected': {'testunicode': u_str}
+        })
+    # add load tests
     for case in case_table:
         name = case['name']
         expected = case['expected']
-        exec 'check_%s = _make_check_case(name, expected)' % name
+        filt = os.path.join(test_data_path, 'test%s_*.mat' % name)
+        files = glob(filt)
+        assert files, "No files for test %s using filter %s" % (name, filt)
+        exec 'check_%s = _make_check_case(name, files, expected)' % name
+    # round trip tests
+    case_table = [
+        {'name': 'double',
+         'expected': {'testdouble': theta}
+         }]
+    case_table.append(
+        {'name': 'string',
+         'expected': {'teststring': u'"Do nine men interpret?" "Nine men," I nod.'},
+         })
+    case_table.append(
+        {'name': 'complex',
+         'expected': {'testcomplex': cos(theta) + 1j*sin(theta)}
+         })
+    case_table.append(
+        {'name': 'multi',
+         'expected': {'theta': theta,
+                      'a': A},
+         })
+    for case in case_table:
+        name = case['name'] + '_round_trip'
+        expected = case['expected']
+        exec 'check_%s = _make_rt_check_case(name, expected)' % name
+        
 
 if __name__ == "__main__":
     ScipyTest().run()



More information about the Scipy-svn mailing list