[Scipy-svn] r6879 - in trunk/scipy/io: . arff matlab

scipy-svn@scip... scipy-svn@scip...
Sat Nov 13 23:39:03 CST 2010


Author: rgommers
Date: 2010-11-13 23:39:02 -0600 (Sat, 13 Nov 2010)
New Revision: 6879

Modified:
   trunk/scipy/io/arff/arffread.py
   trunk/scipy/io/matlab/mio.py
   trunk/scipy/io/matlab/miobase.py
   trunk/scipy/io/netcdf.py
Log:
DOC: merge wiki edits for io module.

Modified: trunk/scipy/io/arff/arffread.py
===================================================================
--- trunk/scipy/io/arff/arffread.py	2010-11-14 05:38:10 UTC (rev 6878)
+++ trunk/scipy/io/arff/arffread.py	2010-11-14 05:39:02 UTC (rev 6879)
@@ -455,8 +455,15 @@
 
 
 def loadarff(filename):
-    """Read an arff file.
+    """
+    Read an arff file.
 
+    The data is returned as a record array, which can be accessed much like
+    a dictionary of numpy arrays.  For example, if one of the attributes is
+    called 'pressure', then its first 10 data points can be accessed from the
+    ``data`` record array like so: ``data['pressure'][0:10]``
+
+
     Parameters
     ----------
     filename : str
@@ -465,22 +472,32 @@
     Returns
     -------
     data : record array
-       the data of the arff file. Each record corresponds to one attribute.
-    meta : MetaData
-       this contains information about the arff file, like type and
-       names of attributes, the relation (name of the dataset), etc...
+       The data of the arff file, accessible by attribute names.
+    meta : `MetaData`
+       Contains information about the arff file such as name and
+       type of attributes, the relation (name of the dataset), etc...
 
+    Raises
+    ------
+    `ParseArffError`
+        This is raised if the given file is not ARFF-formatted.
+    NotImplementedError
+        The ARFF file has an attribute which is not supported yet.
+
     Notes
     -----
 
     This function should be able to read most arff files. Not
-    implemented functionalities include:
+    implemented functionality include:
 
     * date type attributes
     * string type attributes
 
-    It can read files with numeric and nominal attributes.  It can read
-    files with sparse data (? in the file).
+    It can read files with numeric and nominal attributes.  It cannot read
+    files with sparse data ({} in the file).  However, this function can
+    read files with missing data (? in the file), representing the data
+    points as NaNs.
+
     """
     ofile = open(filename)
 

Modified: trunk/scipy/io/matlab/mio.py
===================================================================
--- trunk/scipy/io/matlab/mio.py	2010-11-14 05:38:10 UTC (rev 6878)
+++ trunk/scipy/io/matlab/mio.py	2010-11-14 05:39:02 UTC (rev 6879)
@@ -22,7 +22,7 @@
 
     Parameters
     ----------
-    file_name : string
+    file_name : str
        file name for mat file
     %(append_arg)s
 
@@ -113,13 +113,14 @@
 
 @docfiller
 def loadmat(file_name,  mdict=None, appendmat=True, **kwargs):
-    ''' Load Matlab(tm) file
+    """
+    Load MATLAB file
 
     Parameters
     ----------
     %(file_arg)s
     m_dict : dict, optional
-        dictionary in which to insert matfile variables
+        Dictionary in which to insert matfile variables.
     %(append_arg)s
     %(load_args)s
     %(struct_arg)s
@@ -143,7 +144,8 @@
     You will need an HDF5 python library to read matlab 7.3 format mat
     files.  Because scipy does not supply one, we do not implement the
     HDF5 / 7.3 interface here.
-    '''
+
+    """
     variable_names = kwargs.pop('variable_names', None)
     MR = mat_reader_factory(file_name, appendmat, **kwargs)
     matfile_dict = MR.get_variables(variable_names)
@@ -154,31 +156,53 @@
     return mdict
 
 @docfiller
-def savemat(file_name, mdict, 
-            appendmat=True, 
-            format='5', 
+def savemat(file_name, mdict,
+            appendmat=True,
+            format='5',
             long_field_names=False,
             do_compression=False,
             oned_as=None):
-    """Save a dictionary of names and arrays into the MATLAB-style .mat file.
+    """
+    Save a dictionary of names and arrays into a MATLAB-style .mat file.
 
-    This saves the arrayobjects in the given dictionary to a matlab
+    This saves the array objects in the given dictionary to a MATLAB-
     style .mat file.
 
     Parameters
     ----------
-    file_name : {string, file-like object}
-        Name of the mat file (do not need .mat extension if
-        appendmat==True) Can also pass open file-like object
+    file_name : str or file-like object
+        Name of the .mat file (.mat extension not needed if ``appendmat ==
+        True``).
+        Can also pass open file_like object.
     m_dict : dict
-        dictionary from which to save matfile variables
+        Dictionary from which to save matfile variables.
     %(append_arg)s
-    format : {'5', '4'} string, optional
-        '5' for matlab 5 (up to matlab 7.2)
-        '4' for matlab 4 mat files
+    format : {'5', '4'}, string, optional
+        '5' (the default) for MATLAB 5 and up (to 7.2),
+        '4' for MATLAB 4 .mat files
     %(long_fields)s
     %(do_compression)s
     %(oned_as)s
+
+    See also
+    --------
+    mio4.MatFile4Writer
+    mio5.MatFile5Writer
+
+    Notes
+    -----
+    If ``format == '4'``, `mio4.MatFile4Writer` is called, which sets
+    `oned_as` to 'row' if it had been None.  If ``format == '5'``,
+    `mio5.MatFile5Writer` is called, which sets `oned_as` to 'column' if
+    it had been None, but first it executes:
+
+    ``warnings.warn("Using oned_as default value ('column')" +``
+                  ``" This will change to 'row' in future versions",``
+                  ``FutureWarning, stacklevel=2)``
+
+    without being more specific as to precisely when the change will take
+    place.
+
     """
     file_is_string = isinstance(file_name, basestring)
     if file_is_string:

Modified: trunk/scipy/io/matlab/miobase.py
===================================================================
--- trunk/scipy/io/matlab/miobase.py	2010-11-14 05:38:10 UTC (rev 6878)
+++ trunk/scipy/io/matlab/miobase.py	2010-11-14 05:39:02 UTC (rev 6879)
@@ -24,55 +24,55 @@
 
 doc_dict = \
     {'file_arg':
-         '''file_name : string
+         '''file_name : str
    Name of the mat file (do not need .mat extension if
-   appendmat==True) Can also pass open file-like object''',
+   appendmat==True) Can also pass open file-like object.''',
      'append_arg':
-         '''appendmat : {True, False} optional
+         '''appendmat : bool, optional
    True to append the .mat extension to the end of the given
-   filename, if not already present''',
+   filename, if not already present.''',
      'load_args':
-         '''byte_order : {None, string}, optional
+         '''byte_order : str or None, optional
    None by default, implying byte order guessed from mat
    file. Otherwise can be one of ('native', '=', 'little', '<',
-   'BIG', '>')
-mat_dtype : {False, True} optional
+   'BIG', '>').
+mat_dtype : bool, optional
    If True, return arrays in same dtype as would be loaded into
-   matlab (instead of the dtype with which they are saved)
-squeeze_me : {False, True} optional
-   whether to squeeze unit matrix dimensions or not
-chars_as_strings : {True, False} optional
-   whether to convert char arrays to string arrays
-matlab_compatible : {False, True}
-   returns matrices as would be loaded by matlab (implies
+   MATLAB (instead of the dtype with which they are saved).
+squeeze_me : bool, optional
+   Whether to squeeze unit matrix dimensions or not.
+chars_as_strings : bool, optional
+   Whether to convert char arrays to string arrays.
+matlab_compatible : bool, optional
+   Returns matrices as would be loaded by MATLAB (implies
    squeeze_me=False, chars_as_strings=False, mat_dtype=True,
-   struct_as_record=True)''',
+   struct_as_record=True).''',
      'struct_arg':
-         '''struct_as_record : {True, False} optional
-   Whether to load matlab structs as numpy record arrays, or as
+         '''struct_as_record : bool, optional
+   Whether to load MATLAB structs as numpy record arrays, or as
    old-style numpy arrays with dtype=object.  Setting this flag to
    False replicates the behaviour of scipy version 0.7.x (returning
    numpy object arrays).  The default setting is True, because it
-   allows easier round-trip load and save of matlab files.''',
+   allows easier round-trip load and save of MATLAB files.''',
      'matstream_arg':
          '''mat_stream : file-like
-   object with file API, open for reading''',
+   Object with file API, open for reading.''',
      'long_fields':
-         '''long_field_names : boolean, optional, default=False
+         '''long_field_names : bool, optional
    * False - maximum field name length in a structure is 31 characters
-     which is the documented maximum length
+     which is the documented maximum length. This is the default.
    * True - maximum field name length in a structure is 63 characters
      which works for Matlab 7.6''',
      'do_compression':
-         '''do_compression : {False, True} bool, optional
-   Whether to compress matrices on write. Default is False''',
+         '''do_compression : bool, optional
+   Whether to compress matrices on write. Default is False.''',
      'oned_as':
-         '''oned_as : {'column', 'row'} string, optional
-   If 'column', write 1D numpy arrays as column vectors
-   If 'row', write 1D numpy arrays as row vectors''',
+         '''oned_as : {'column', 'row'}, optional
+   If 'column', write 1-D numpy arrays as column vectors.
+   If 'row', write 1D numpy arrays as row vectors.''',
      'unicode_strings':
-         '''unicode_strings : {True, False} boolean, optional
-   If True, write strings as Unicode, else matlab usual encoding'''}
+         '''unicode_strings : bool, optional
+   If True, write strings as Unicode, else matlab usual encoding.'''}
 
 docfiller = doccer.filldoc(doc_dict)
 

Modified: trunk/scipy/io/netcdf.py
===================================================================
--- trunk/scipy/io/netcdf.py	2010-11-14 05:38:10 UTC (rev 6878)
+++ trunk/scipy/io/netcdf.py	2010-11-14 05:39:02 UTC (rev 6879)
@@ -1,52 +1,57 @@
 """
 NetCDF reader/writer module.
 
-This module implements the Scientific.IO.NetCDF API to read and create
-NetCDF files. The same API is also used in the PyNIO and pynetcdf
-modules, allowing these modules to be used interchangebly when working
-with NetCDF files. The major advantage of ``scipy.io.netcdf`` over other
-modules is that it doesn't require the code to be linked to the NetCDF
-libraries as the other modules do.
+This module is used to read and create NetCDF files. NetCDF files are
+accessed through the `netcdf_file` object. Data written to and from NetCDF
+files are contained in `netcdf_variable` objects. Attributes are given
+as member variables of the `netcdf_file` and `netcdf_variable` objects.
 
-The code is based on the `NetCDF file format specification
-<http://www.unidata.ucar.edu/software/netcdf/docs/netcdf.html>`_. A
-NetCDF file is a self-describing binary format, with a header followed
-by data. The header contains metadata describing dimensions, variables
-and the position of the data in the file, so access can be done in an
-efficient manner without loading unnecessary data into memory. We use
-the ``mmap`` module to create Numpy arrays mapped to the data on disk,
-for the same purpose.
+Notes
+-----
+NetCDF files are a self-describing binary data format. The file contains
+metadata that describes the dimensions and variables in the file. More
+details about NetCDF files can be found `here
+<http://www.unidata.ucar.edu/software/netcdf/docs/netcdf.html>`_. There
+are three main sections to a NetCDF data structure:
 
-The structure of a NetCDF file is as follows:
+1. Dimensions
+2. Variables
+3. Attributes
 
-    C D F <VERSION BYTE> <NUMBER OF RECORDS>
-    <DIMENSIONS> <GLOBAL ATTRIBUTES> <VARIABLES METADATA>
-    <NON-RECORD DATA> <RECORD DATA>
+The dimensions section records the name and length of each dimension used
+by the variables. The variables would then indicate which dimensions it
+uses and any attributes such as data units, along with containing the data
+values for the variable. It is good practice to include a
+variable that is the same name as a dimension to provide the values for
+that axes. Lastly, the attributes section would contain additional
+information such as the name of the file creator or the instrument used to
+collect the data.
 
-Record data refers to data where the first axis can be expanded at
-will. All record variables share a same dimension at the first axis,
-and they are stored at the end of the file per record, ie
+When writing data to a NetCDF file, there is often the need to indicate the
+'record dimension'. A record dimension is the unbounded dimension for a
+variable. For example, a temperature variable may have dimensions of
+latitude, longitude and time. If one wants to add more temperature data to
+the NetCDF file as time progresses, then the temperature variable should
+have the time dimension flagged as the record dimension.
 
-    A[0], B[0], ..., A[1], B[1], ..., etc,
+This module implements the Scientific.IO.NetCDF API to read and create
+NetCDF files. The same API is also used in the PyNIO and pynetcdf
+modules, allowing these modules to be used interchangeably when working
+with NetCDF files. The major advantage of this module over other
+modules is that it doesn't require the code to be linked to the NetCDF
+libraries.
 
-so that new data can be appended to the file without changing its original
-structure. Non-record data are padded to a 4n bytes boundary. Record data
-are also padded, unless there is exactly one record variable in the file,
-in which case the padding is dropped.  All data is stored in big endian
-byte order.
+In addition, the NetCDF file header contains the position of the data in
+the file, so access can be done in an efficient manner without loading
+unnecessary data into memory. It uses the ``mmap`` module to create
+Numpy arrays mapped to the data on disk, for the same purpose.
 
-The Scientific.IO.NetCDF API allows attributes to be added directly to
-instances of ``netcdf_file`` and ``netcdf_variable``. To differentiate
-between user-set attributes and instance attributes, user-set attributes
-are automatically stored in the ``_attributes`` attribute by overloading
-``__setattr__``. This is the reason why the code sometimes uses
-``obj.__dict__['key'] = value``, instead of simply ``obj.key = value``;
-otherwise the key would be inserted into userspace attributes.
+Examples
+--------
+To create a NetCDF file:
 
-To create a NetCDF file::
-
-    >>> import time
-    >>> f = netcdf_file('simple.nc', 'w')
+    >>> from scipy.io import netcdf
+    >>> f = netcdf.netcdf_file('simple.nc', 'w')
     >>> f.history = 'Created for a test'
     >>> f.createDimension('time', 10)
     >>> time = f.createVariable('time', 'i', ('time',))
@@ -54,9 +59,14 @@
     >>> time.units = 'days since 2008-01-01'
     >>> f.close()
 
-To read the NetCDF file we just created::
+Note the assignment of ``range(10)`` to ``time[:]``.  Exposing the slice
+of the time variable allows for the data to be set in the object, rather
+than letting ``range(10)`` overwrite the ``time`` variable.
 
-    >>> f = netcdf_file('simple.nc', 'r')
+To read the NetCDF file we just created:
+
+    >>> from scipy.io import netcdf
+    >>> f = netcdf.netcdf_file('simple.nc', 'r')
     >>> print f.history
     Created for a test
     >>> time = f.variables['time']
@@ -68,13 +78,23 @@
     9
     >>> f.close()
 
-TODO:
- * properly implement ``_FillValue``.
- * implement Jeff Whitaker's patch for masked variables.
- * fix character variables.
- * implement PAGESIZE for Python 2.6?
 """
 
+#TODO:
+# * properly implement ``_FillValue``.
+# * implement Jeff Whitaker's patch for masked variables.
+# * fix character variables.
+# * implement PAGESIZE for Python 2.6?
+
+#The Scientific.IO.NetCDF API allows attributes to be added directly to
+#instances of ``netcdf_file`` and ``netcdf_variable``. To differentiate
+#between user-set attributes and instance attributes, user-set attributes
+#are automatically stored in the ``_attributes`` attribute by overloading
+#``__setattr__``. This is the reason why the code sometimes uses
+#``obj.__dict__['key'] = value``, instead of simply ``obj.key = value``;
+#otherwise the key would be inserted into userspace attributes.
+
+
 __all__ = ['netcdf_file', 'netcdf_variable']
 
 
@@ -122,35 +142,37 @@
 
 class netcdf_file(object):
     """
-    A ``netcdf_file`` object has two standard attributes: ``dimensions`` and
-    ``variables``. The values of both are dictionaries, mapping dimension
+    A file object for NetCDF data.
+
+    A `netcdf_file` object has two standard attributes: `dimensions` and
+    `variables`. The values of both are dictionaries, mapping dimension
     names to their associated lengths and variable names to variables,
     respectively. Application programs should never modify these
     dictionaries.
 
     All other attributes correspond to global attributes defined in the
     NetCDF file. Global file attributes are created by assigning to an
-    attribute of the ``netcdf_file`` object.
+    attribute of the `netcdf_file` object.
 
+    Parameters
+    ----------
+    filename : string or file-like
+        string -> filename
+    mode : {'r', 'w'}, optional
+        read-write mode, default is 'r'
+    mmap : None or bool, optional
+        Whether to mmap `filename` when reading.  Default is True
+        when `filename` is a file name, False when `filename` is a
+        file-like object
+    version : {1, 2}, optional
+        version of netcdf to read / write, where 1 means *Classic
+        format* and 2 means *64-bit offset format*.  Default is 1.  See
+        `here <http://www.unidata.ucar.edu/software/netcdf/docs/netcdf/Which-Format.html>`_
+        for more info.
+
     """
     def __init__(self, filename, mode='r', mmap=None, version=1):
-        ''' Initialize netcdf_file from fileobj (string or file-like)
-
-        Parameters
-        ----------
-        filename : string or file-like
-           string -> filename
-        mode : {'r', 'w'}, optional
-           read-write mode, default is 'r'
-        mmap : None or bool, optional
-           Whether to mmap `filename` when reading.  Default is True
-           when `filename` is a file name, False when `filename` is a
-           file-like object
-        version : {1, 2}, optional
-           version of netcdf to read / write, where 1 means *Classic
-           format* and 2 means *64-bit offset format*.  Default is 1.  See
-           http://www.unidata.ucar.edu/software/netcdf/docs/netcdf/Which-Format.html#Which-Format
-        '''
+        """Initialize netcdf_file from fileobj (str or file-like)."""
         if hasattr(filename, 'seek'): # file-like
             self.fp = filename
             self.filename = 'None'
@@ -192,6 +214,7 @@
         self.__dict__[attr] = value
 
     def close(self):
+        """Closes the NetCDF file."""
         if not self.fp.closed:
             try:
                self.flush()
@@ -200,10 +223,59 @@
     __del__ = close
 
     def createDimension(self, name, length):
+        """
+        Adds a dimension to the Dimension section of the NetCDF data structure.
+
+        Note that this function merely adds a new dimension that the variables can
+        reference.  The values for the dimension, if desired, should be added as
+        a variable using `createVariable`, referring to this dimension.
+
+        Parameters
+        ----------
+        name : str
+            Name of the dimension (Eg, 'lat' or 'time').
+        length : int
+            Length of the dimension.
+
+        See Also
+        --------
+        createVariable
+
+        """
         self.dimensions[name] = length
         self._dims.append(name)
 
     def createVariable(self, name, type, dimensions):
+        """
+        Create an empty variable for the `netcdf_file` object, specifying its data
+        type and the dimensions it uses.
+
+        Parameters
+        ----------
+        name : str
+            Name of the new variable.
+        type : dtype or str
+            Data type of the variable.
+        dimensions : sequence of str
+            List of the dimension names used by the variable, in the desired order.
+
+        Returns
+        -------
+        variable : netcdf_variable
+            The newly created ``netcdf_variable`` object.
+            This object has also been added to the `netcdf_file` object as well.
+
+        See Also
+        --------
+        createDimension
+
+        Notes
+        -----
+        Any dimensions to be used by the variable should already exist in the
+        NetCDF data structure or should be created by `createDimension` prior to
+        creating the NetCDF variable.
+
+        """
         shape = tuple([self.dimensions[dim] for dim in dimensions])
         shape_ = tuple([dim or 0 for dim in shape])  # replace None with 0 for numpy
 
@@ -217,6 +289,14 @@
         return self.variables[name]
 
     def flush(self):
+        """
+        Perform a sync-to-disk flush if the `netcdf_file` object is in write mode.
+
+        See Also
+        --------
+        sync : Identical function
+
+        """
         if hasattr(self, 'mode') and self.mode is 'w':
             self._write()
     sync = flush
@@ -649,12 +729,47 @@
     shape = property(shape)
 
     def getValue(self):
+        """
+        Retrieve a scalar value from a `netcdf_variable` of length one.
+
+        Raises
+        ------
+        ValueError
+            If the netcdf variable is an array of length greater than one,
+            this exception will be raised.
+
+        """
         return self.data.item()
 
     def assignValue(self, value):
+        """
+        Assign a scalar value to a `netcdf_variable` of length one.
+
+        Parameters
+        ----------
+        value : scalar
+            Scalar value (of compatible type) to assign to a length-one netcdf
+            variable. This value will be written to file.
+
+        Raises
+        ------
+        ValueError
+            If the input is not a scalar, or if the destination is not a length-one
+            netcdf variable.
+
+        """
         self.data.itemset(value)
 
     def typecode(self):
+        """
+        Return the typecode of the variable.
+
+        Returns
+        -------
+        typecode : char
+            The character typecode of the variable (eg, 'i' for int).
+
+        """
         return self._typecode
 
     def __getitem__(self, index):



More information about the Scipy-svn mailing list