[Scipy-svn] r5577 - in trunk/scipy/io/matlab: . tests

scipy-svn@scip... scipy-svn@scip...
Thu Feb 19 21:38:01 CST 2009


Author: matthew.brett@gmail.com
Date: 2009-02-19 21:37:55 -0600 (Thu, 19 Feb 2009)
New Revision: 5577

Modified:
   trunk/scipy/io/matlab/mio.py
   trunk/scipy/io/matlab/mio4.py
   trunk/scipy/io/matlab/mio5.py
   trunk/scipy/io/matlab/miobase.py
   trunk/scipy/io/matlab/tests/test_mio.py
Log:
Refactoring of mio5 to move streams out of writer getter class; therefore of compression on write for mat files; multiple doc cleanup; addition of do_compression and oned_as keywords to savemat

Modified: trunk/scipy/io/matlab/mio.py
===================================================================
--- trunk/scipy/io/matlab/mio.py	2009-02-19 21:45:37 UTC (rev 5576)
+++ trunk/scipy/io/matlab/mio.py	2009-02-20 03:37:55 UTC (rev 5577)
@@ -8,18 +8,18 @@
 import sys
 import warnings
 
-from miobase import get_matfile_version, filldoc
+from miobase import get_matfile_version, docfiller
 from mio4 import MatFile4Reader, MatFile4Writer
 from mio5 import MatFile5Reader, MatFile5Writer
 
 __all__ = ['find_mat_file', 'mat_reader_factory', 'loadmat', 'savemat']
 
-@filldoc
+@docfiller
 def find_mat_file(file_name, appendmat=True):
     ''' Try to find .mat file on system path
 
     file_name : string
-        file name for mat file
+       file name for mat file
     %(append_arg)s
     '''
     warnings.warn('Searching for mat files on python system path will be ' +
@@ -47,7 +47,7 @@
                 pass
     return full_name
 
-@filldoc
+@docfiller
 def mat_reader_factory(file_name, appendmat=True, **kwargs):
     """Create reader for matlab .mat format files
 
@@ -87,7 +87,7 @@
     else:
         raise TypeError('Did not recognize version %s' % mv)
 
-@filldoc
+@docfiller
 def loadmat(file_name,  mdict=None, appendmat=True, **kwargs):
     ''' Load Matlab(tm) file
 
@@ -115,8 +115,13 @@
         mdict = matfile_dict
     return mdict
 
-@filldoc
-def savemat(file_name, mdict, appendmat=True, format='5', long_field_names=False):
+@docfiller
+def savemat(file_name, mdict, 
+            appendmat=True, 
+            format='5', 
+            long_field_names=False,
+            do_compression=False,
+            oned_as=None):
     """Save a dictionary of names and arrays into the MATLAB-style .mat file.
 
     This saves the arrayobjects in the given dictionary to a matlab
@@ -132,6 +137,8 @@
         '5' for matlab 5 (up to matlab 7.2)
         '4' for matlab 4 mat files
     %(long_fields)s
+    %(do_compression)s
+    %(oned_as)s
     """
     file_is_string = isinstance(file_name, basestring)
     if file_is_string:
@@ -152,8 +159,10 @@
         MW = MatFile4Writer(file_stream)
     elif format == '5':
         MW = MatFile5Writer(file_stream,
+                            do_compression=do_compression,
                             unicode_strings=True,
-                            long_field_names=long_field_names)
+                            long_field_names=long_field_names,
+                            oned_as=oned_as)
     else:
         raise ValueError("Format should be '4' or '5'")
     MW.put_variables(mdict)

Modified: trunk/scipy/io/matlab/mio4.py
===================================================================
--- trunk/scipy/io/matlab/mio4.py	2009-02-19 21:45:37 UTC (rev 5576)
+++ trunk/scipy/io/matlab/mio4.py	2009-02-20 03:37:55 UTC (rev 5577)
@@ -7,7 +7,7 @@
 import scipy.sparse
 
 from miobase import MatFileReader, MatArrayReader, MatMatrixGetter, \
-     MatFileWriter, MatStreamWriter, filldoc
+     MatFileWriter, MatStreamWriter, docfiller
 
 
 SYS_LITTLE_ENDIAN = sys.byteorder == 'little'
@@ -187,7 +187,7 @@
 
 class MatFile4Reader(MatFileReader):
     ''' Reader for Mat4 files '''
-    @filldoc
+    @docfiller
     def __init__(self, mat_stream, *args, **kwargs):
         ''' Initialize matlab 4 file reader
 
@@ -340,6 +340,9 @@
 
 
 class MatFile4Writer(MatFileWriter):
+    ''' Class for writing matlab 4 format files '''
+    def __init__(self, file_stream):
+        self.file_stream = file_stream
 
     def put_variables(self, mdict):
         for name, var in mdict.items():

Modified: trunk/scipy/io/matlab/mio5.py
===================================================================
--- trunk/scipy/io/matlab/mio5.py	2009-02-19 21:45:37 UTC (rev 5576)
+++ trunk/scipy/io/matlab/mio5.py	2009-02-20 03:37:55 UTC (rev 5577)
@@ -10,6 +10,8 @@
 # Small fragments of current code adapted from matfile.py by Heiko
 # Henkelmann
 
+import os
+import time
 import sys
 import zlib
 from zlibstreams import TwoShotZlibInputStream
@@ -22,7 +24,7 @@
 import scipy.sparse
 
 from miobase import MatFileReader, MatArrayReader, MatMatrixGetter, \
-     MatFileWriter, MatStreamWriter, filldoc, matdims
+     MatFileWriter, MatStreamWriter, docfiller, matdims
 
 miINT8 = 1
 miUINT8 = 2
@@ -527,7 +529,7 @@
     uint16_codec       - char codec to use for uint16 char arrays
                           (defaults to system default codec)
    '''
-    @filldoc
+    @docfiller
     def __init__(self,
                  mat_stream,
                  byte_order=None,
@@ -649,11 +651,13 @@
                  name,
                  is_global=False,
                  unicode_strings=False,
-                 long_field_names=False):
+                 long_field_names=False,
+                 oned_as='column'):
         super(Mat5MatrixWriter, self).__init__(file_stream, arr, name)
         self.is_global = is_global
         self.unicode_strings = unicode_strings
         self.long_field_names = long_field_names
+        self.oned_as = oned_as
 
     def write_dtype(self, arr):
         self.file_stream.write(arr.tostring())
@@ -706,7 +710,7 @@
         if mclass is None:
             mclass = self.default_mclass
         if shape is None:
-            shape = matdims(self.arr)
+            shape = matdims(self.arr, self.oned_as)
         self._mat_tag_pos = self.file_stream.tell()
         self.write_dtype(self.mat_tag)
         # write array flags (complex, global, logical, class, nzmax)
@@ -733,9 +737,9 @@
 
     def make_writer_getter(self):
         ''' Make writer getter for this stream '''
-        return Mat5WriterGetter(self.file_stream,
-                                self.unicode_strings,
-                                self.long_field_names)
+        return Mat5WriterGetter(self.unicode_strings,
+                                self.long_field_names,
+                                self.oned_as)
 
 
 class Mat5NumericWriter(Mat5MatrixWriter):
@@ -820,7 +824,7 @@
         A = np.atleast_2d(self.arr).flatten('F')
         MWG = self.make_writer_getter()
         for el in A:
-            MW = MWG.matrix_writer_factory(el, '')
+            MW = MWG.matrix_writer_factory(self.file_stream, el)
             MW.write()
         self.update_matrix_tag()
 
@@ -857,7 +861,7 @@
         MWG = self.make_writer_getter()
         for el in A:
             for f in fieldnames:
-                MW = MWG.matrix_writer_factory(el[f], '')
+                MW = MWG.matrix_writer_factory(self.file_stream, el[f])
                 MW.write()
         self.update_matrix_tag()
 
@@ -877,26 +881,25 @@
 
 
 class Mat5WriterGetter(object):
-    ''' Wraps stream and options, provides methods for getting Writer objects '''
-    def __init__(self, stream, unicode_strings, long_field_names=False):
+    ''' Wraps options, provides methods for getting Writer objects '''
+    @docfiller
+    def __init__(self, 
+                 unicode_strings=True, 
+                 long_field_names=False,
+                 oned_as='column'):
         ''' Initialize writer getter
 
         Parameters
         ----------
-        stream : fileobj
-           object to which to write
         unicode_strings : bool
            If True, write unicode strings
-        long_field_names : bool, optional
-           If True, allow writing of long field names (127 bytes)
+        %(long_fields)s
+        %(oned_as)s
         '''
-        self.stream = stream
         self.unicode_strings = unicode_strings
         self.long_field_names = long_field_names
+        self.oned_as = oned_as
 
-    def rewind(self):
-        self.stream.seek(0)
-
     def to_writeable(self, source):
         ''' Convert input object ``source`` to something we can write
 
@@ -910,13 +913,18 @@
 
         Examples
         --------
-        >>> from StringIO import StringIO
-        >>> mwg = Mat5WriterGetter(StringIO(), True)
+        >>> mwg = Mat5WriterGetter()
         >>> mwg.to_writeable(np.array([1])) # pass through ndarrays
         array([1])
         >>> expected = np.array([(1, 2)], dtype=[('a', '|O8'), ('b', '|O8')])
         >>> np.all(mwg.to_writeable({'a':1,'b':2}) == expected)
         True
+        >>> np.all(mwg.to_writeable({'a':1,'b':2, '_c':3}) == expected)
+        True
+        >>> np.all(mwg.to_writeable({'a':1,'b':2, 100:3}) == expected)
+        True
+        >>> np.all(mwg.to_writeable({'a':1,'b':2, '99':3}) == expected)
+        True
         >>> class klass(object): pass
         >>> c = klass
         >>> c.a = 1
@@ -949,8 +957,18 @@
             source = source.__dict__
         # Mappings or object dicts
         if hasattr(source, 'keys'):
-            dtype = [(k,object) for k in source]
-            return np.array( [tuple(source.itervalues())] ,dtype)
+            dtype = []
+            values = []
+            for field, value in source.items():
+                if (isinstance(field, basestring) and 
+                    not field.startswith('_') and
+                    not field[0] in '0123456789'):
+                    dtype.append((field,object))
+                    values.append(value)
+            if dtype:
+                return np.array( [tuple(values)] ,dtype)
+            else:
+                return None
         # Next try and convert to an array
         narr = np.asanyarray(source)
         if narr.dtype.type in (np.object, np.object_) and \
@@ -959,32 +977,40 @@
             return None
         return narr
 
-    def matrix_writer_factory(self, arr, name, is_global=False):
+    def matrix_writer_factory(self, stream, arr, name='', is_global=False):
         ''' Factory function to return matrix writer given variable to write
 
         Parameters
         ----------
+        stream : fileobj
+            stream to write to
         arr : array-like
             array-like object to create writer for
         name : string
             name as it will appear in matlab workspace
+            default is empty string
         is_global : {False, True} optional
             whether variable will be global on load into matlab
+
+        Returns
+        -------
+        writer : matrix writer object
         '''
         # First check if these are sparse
         if scipy.sparse.issparse(arr):
-            return Mat5SparseWriter(self.stream, arr, name, is_global)
+            return Mat5SparseWriter(stream, arr, name, is_global)
         # Try to convert things that aren't arrays
         narr = self.to_writeable(arr)
         if narr is None:
             raise TypeError('Could not convert %s (type %s) to array'
                             % (arr, type(arr)))
-        args = (self.stream,
+        args = (stream,
                 narr,
                 name,
                 is_global,
                 self.unicode_strings,
-                self.long_field_names)
+                self.long_field_names,
+                self.oned_as)
         if isinstance(narr, MatlabFunction):
             return Mat5FunctionWriter(*args)
         if isinstance(narr, MatlabObject):
@@ -1005,28 +1031,48 @@
 
 class MatFile5Writer(MatFileWriter):
     ''' Class for writing mat5 files '''
+    @docfiller
     def __init__(self, file_stream,
                  do_compression=False,
                  unicode_strings=False,
                  global_vars=None,
-                 long_field_names=False):
+                 long_field_names=False,
+                 oned_as=None):
+        ''' Initialize writer for matlab 5 format files 
+
+        Parameters
+        ----------
+        %(do_compression)s
+        %(unicode_strings)s
+        global_vars : None or sequence of strings, optional
+            Names of variables to be marked as global for matlab
+        %(long_fields)s
+        %(oned_as)s
+        '''
         super(MatFile5Writer, self).__init__(file_stream)
         self.do_compression = do_compression
         if global_vars:
             self.global_vars = global_vars
         else:
             self.global_vars = []
+        # deal with deprecations
+        if oned_as is None:
+            warnings.warn("Using oned_as default value ('column')" +
+                          " This will change to 'row' in future versions",
+                          FutureWarning, stacklevel=2)
+            oned_as = 'column'
         self.writer_getter = Mat5WriterGetter(
-            StringIO(),
             unicode_strings,
-            long_field_names)
+            long_field_names,
+            oned_as)
         # write header
-        import os, time
         hdr =  np.zeros((), mdtypes_template['file_header'])
-        hdr['description']='MATLAB 5.0 MAT-file Platform: %s, Created on: %s' % (
-                            os.name,time.asctime())
+        hdr['description']='MATLAB 5.0 MAT-file Platform: %s, Created on: %s' \
+            % (os.name,time.asctime())
         hdr['version']= 0x0100
-        hdr['endian_test']=np.ndarray(shape=(),dtype='S2',buffer=np.uint16(0x4d49))
+        hdr['endian_test']=np.ndarray(shape=(),
+                                      dtype='S2',
+                                      buffer=np.uint16(0x4d49))
         file_stream.write(hdr.tostring())
 
     def get_unicode_strings(self):
@@ -1045,28 +1091,40 @@
     long_field_names = property(get_long_field_names,
                                 set_long_field_names,
                                 None,
-                                'enable writing 32-63 character field names for Matlab 7.6+')
+                                'enable writing 32-63 character field '
+                                'names for Matlab 7.6+')
 
+    def get_oned_as(self):
+        return self.writer_getter.oned_as
+    def set_oned_as(self, oned_as):
+        self.writer_getter.oned_as = oned_as
+    oned_as = property(get_oned_as,
+                       set_oned_as,
+                       None,
+                       'get/set oned_as property')
+
     def put_variables(self, mdict):
         for name, var in mdict.items():
             if name[0] == '_':
                 continue
             is_global = name in self.global_vars
-            self.writer_getter.rewind()
-            mat_writer = self.writer_getter.matrix_writer_factory(
-                var,
-                name,
-                is_global)
-            mat_writer.write()
-            stream = self.writer_getter.stream
-            bytes_written = stream.tell()
-            stream.seek(0)
-            out_str = stream.read(bytes_written)
             if self.do_compression:
-                out_str = zlib.compress(out_str)
+                stream = StringIO()
+                mat_writer = self.writer_getter.matrix_writer_factory(
+                    stream,
+                    var,
+                    name,
+                    is_global)
+                mat_writer.write()
+                out_str = zlib.compress(stream.getvalue())
                 tag = np.empty((), mdtypes_template['tag_full'])
                 tag['mdtype'] = miCOMPRESSED
-                tag['byte_count'] = len(str)
+                tag['byte_count'] = len(out_str)
                 self.file_stream.write(tag.tostring() + out_str)
-            else:
-                self.file_stream.write(out_str)
+            else: # not compressing
+                mat_writer = self.writer_getter.matrix_writer_factory(
+                    self.file_stream,
+                    var,
+                    name,
+                    is_global)
+                mat_writer.write()

Modified: trunk/scipy/io/matlab/miobase.py
===================================================================
--- trunk/scipy/io/matlab/miobase.py	2009-02-19 21:45:37 UTC (rev 5576)
+++ trunk/scipy/io/matlab/miobase.py	2009-02-20 03:37:55 UTC (rev 5577)
@@ -6,68 +6,79 @@
 
 import numpy as np
 
+from scipy.ndimage import doccer
+
 import byteordercodes as boc
 
-def filldoc(func):
-    ''' Decorator to put recurring doc elements into mio doc strings '''
-    doc_dict = \
-   {'file_arg':
-    '''file_name : string
-        Name of the mat file (do not need .mat extension if
-        appendmat==True) If name not a full path name, search for the
-        file on the sys.path list and use the first one found (the
-        current directory is searched first).  Can also pass open
-        file-like object''',
-    'append_arg':
-    '''appendmat : {True, False} optional
-        True to append the .mat extension to the end of the given
-        filename, if not already present''',
-    'basename_arg':
-    '''base_name : string, optional, unused
-        base name for unnamed variables.  The code no longer uses
-        this.  We deprecate for this version of scipy, and will remove
-        it in future versions''',
-    'load_args':
-    '''byte_order : {None, string}, optional
-        None by default, implying byte order guessed from mat
-        file. Otherwise can be one of ('native', '=', 'little', '<',
-        'BIG', '>')
-    mat_dtype : {False, True} optional
-         If True, return arrays in same dtype as would be loaded into
-         matlab (instead of the dtype with which they are saved)
-    squeeze_me : {False, True} optional
-         whether to squeeze unit matrix dimensions or not
-    chars_as_strings : {True, False} optional
-         whether to convert char arrays to string arrays
-    matlab_compatible : {False, True}
-         returns matrices as would be loaded by matlab (implies
-         squeeze_me=False, chars_as_strings=False, mat_dtype=True,
-         struct_as_record=True)''',
-    'struct_arg':
-    '''struct_as_record : {False, True} optional
-        Whether to load matlab structs as numpy record arrays, or as
-        old-style numpy arrays with dtype=object.  Setting this flag
-        to False replicates the behaviour of scipy version 0.6
-        (returning numpy object arrays).  The preferred setting is
-        True, because it allows easier round-trip load and save of
-        matlab files.  In a future version of scipy, we will change
-        the default setting to True, and following versions may remove
-        this flag entirely.  For now, we set the default to False, for
-        backwards compatibility, but issue a warning.
-        Note that non-record arrays cannot be exported via savemat.''',
-    'matstream_arg':
-    '''mat_stream : file-like
-        object with file API, open for reading''',
-    'long_fields':
-    '''long_field_names : boolean, optional, default=False
-        False - maximum field name length in a structure is 31 characters
-                which is the documented maximum length
-        True  - maximum field name length in a structure is 63 characters
-                which works for Matlab 7.6'''}
-    func.__doc__ = func.__doc__ % doc_dict
-    return func
 
+doc_dict = \
+    {'file_arg':
+         '''file_name : string
+   Name of the mat file (do not need .mat extension if
+   appendmat==True) If name not a full path name, search for the
+   file on the sys.path list and use the first one found (the
+   current directory is searched first).  Can also pass open
+   file-like object''',
+     'append_arg':
+         '''appendmat : {True, False} optional
+   True to append the .mat extension to the end of the given
+   filename, if not already present''',
+     'basename_arg':
+         '''base_name : string, optional, unused
+   base name for unnamed variables.  The code no longer uses
+   this.  We deprecate for this version of scipy, and will remove
+   it in future versions''',
+     'load_args':
+         '''byte_order : {None, string}, optional
+   None by default, implying byte order guessed from mat
+   file. Otherwise can be one of ('native', '=', 'little', '<',
+   'BIG', '>')
+mat_dtype : {False, True} optional
+   If True, return arrays in same dtype as would be loaded into
+   matlab (instead of the dtype with which they are saved)
+squeeze_me : {False, True} optional
+   whether to squeeze unit matrix dimensions or not
+chars_as_strings : {True, False} optional
+   whether to convert char arrays to string arrays
+matlab_compatible : {False, True}
+   returns matrices as would be loaded by matlab (implies
+   squeeze_me=False, chars_as_strings=False, mat_dtype=True,
+   struct_as_record=True)''',
+     'struct_arg':
+         '''struct_as_record : {False, True} optional
+   Whether to load matlab structs as numpy record arrays, or as
+   old-style numpy arrays with dtype=object.  Setting this flag to
+   False replicates the behaviour of scipy version 0.6 (returning
+   numpy object arrays).  The preferred setting is True, because it
+   allows easier round-trip load and save of matlab files.  In a
+   future version of scipy, we will change the default setting to
+   True, and following versions may remove this flag entirely.  For
+   now, we set the default to False, for backwards compatibility, but
+   issue a warning.  Note that non-record arrays cannot be exported
+   via savemat.''',
+     'matstream_arg':
+         '''mat_stream : file-like
+   object with file API, open for reading''',
+     'long_fields':
+         '''long_field_names : boolean, optional, default=False
+   * False - maximum field name length in a structure is 31 characters
+     which is the documented maximum length
+   * True - maximum field name length in a structure is 63 characters
+     which works for Matlab 7.6''',
+     'do_compression':
+         '''do_compression : {False, True} bool, optional
+   Whether to compress matrices on write. Default is False''',
+     'oned_as':
+         '''oned_as : {'column', 'row'} string, optional
+   If 'column', write 1D numpy arrays as column vectors
+   If 'row', write 1D numpy arrays as row vectors''',
+     'unicode_strings':
+         '''unicode_strings : {True, False} boolean, optional
+   If True, write strings as Unicode, else matlab usual encoding'''}
 
+docfiller = doccer.filldoc(doc_dict)
+
+
 def small_product(arr):
     ''' Faster than product for small arrays '''
     res = 1
@@ -75,6 +86,7 @@
         res *= e
     return res
 
+
 def get_matfile_version(fileobj):
     ''' Return major, minor tuple depending on apparent mat file type
 
@@ -209,6 +221,7 @@
 scipy.io.matlab.byteordercodes module instead.
 """)(ByteOrder)
 
+
 class MatStreamAgent(object):
     ''' Base object for readers / getters from mat file streams
 
@@ -257,7 +270,7 @@
     guess_byte_order        - guesses file byte order from file
     """
 
-    @filldoc
+    @docfiller
     def __init__(self, mat_stream,
                  byte_order=None,
                  mat_dtype=False,
@@ -508,6 +521,9 @@
             self.arr = self.arr.astype(dt.newbyteorder('='))
         self.name = name
 
+    def rewind(self):
+        self.file_stream.seek(0)
+
     def arr_dtype_number(self, num):
         ''' Return dtype for given number of items per element'''
         return np.dtype(self.arr.dtype.str[:2] + str(num))

Modified: trunk/scipy/io/matlab/tests/test_mio.py
===================================================================
--- trunk/scipy/io/matlab/tests/test_mio.py	2009-02-19 21:45:37 UTC (rev 5576)
+++ trunk/scipy/io/matlab/tests/test_mio.py	2009-02-20 03:37:55 UTC (rev 5577)
@@ -474,6 +474,31 @@
     arr = np.arange(5)
     stream = StringIO()
     savemat(stream, {'oned':arr})
-    stream.seek(0)
     vals = loadmat(stream)
-    assert_equal(vals['oned'].shape, (5,1))
+    yield assert_equal, vals['oned'].shape, (5,1)
+    # which is the same as 'column' for oned_as
+    stream = StringIO()
+    savemat(stream, {'oned':arr}, oned_as='column')
+    vals = loadmat(stream)
+    yield assert_equal, vals['oned'].shape, (5,1)
+    # but different from 'row'
+    stream = StringIO()
+    savemat(stream, {'oned':arr}, oned_as='row')
+    vals = loadmat(stream)
+    yield assert_equal, vals['oned'].shape, (1,5)
+
+
+def test_compression():
+    arr = np.zeros(100).reshape((5,20))
+    arr[2,10] = 1
+    stream = StringIO()
+    savemat(stream, {'arr':arr})
+    raw_len = len(stream.getvalue())
+    vals = loadmat(stream)
+    yield assert_array_equal, vals['arr'], arr
+    stream = StringIO()
+    savemat(stream, {'arr':arr}, do_compression=True)
+    compressed_len = len(stream.getvalue())
+    vals = loadmat(stream)
+    yield assert_array_equal, vals['arr'], arr
+    yield assert_true, raw_len>compressed_len



More information about the Scipy-svn mailing list