[Numpy-svn] r6329 - in trunk/numpy/lib: . tests

numpy-svn@scip... numpy-svn@scip...
Mon Jan 19 15:22:59 CST 2009


Author: pierregm
Date: 2009-01-19 15:22:52 -0600 (Mon, 19 Jan 2009)
New Revision: 6329

Added:
   trunk/numpy/lib/_iotools.py
   trunk/numpy/lib/tests/test__iotools.py
Modified:
   trunk/numpy/lib/io.py
   trunk/numpy/lib/tests/test_io.py
Log:
* lib     : introduced _iotools
* lib.io : introduced genfromtxt, ndfromtxt, mafromtxt, recfromtxt, recfromcsv.


Added: trunk/numpy/lib/_iotools.py
===================================================================
--- trunk/numpy/lib/_iotools.py	2009-01-19 09:04:20 UTC (rev 6328)
+++ trunk/numpy/lib/_iotools.py	2009-01-19 21:22:52 UTC (rev 6329)
@@ -0,0 +1,469 @@
+"""
+A collection of functions designed to help I/O with ascii file.
+
+"""
+__docformat__ = "restructuredtext en"
+
+import numpy as np
+import numpy.core.numeric as nx
+from __builtin__ import bool, int, long, float, complex, object, unicode, str
+
+
+def _is_string_like(obj):
+    """
+    Check whether obj behaves like a string.
+    """
+    try:
+        obj + ''
+    except (TypeError, ValueError):
+        return False
+    return True
+
+
+def _to_filehandle(fname, flag='r', return_opened=False):
+    """
+    Returns the filehandle corresponding to a string or a file.
+    If the string ends in '.gz', the file is automatically unzipped.
+    
+    Parameters
+    ----------
+    fname : string, filehandle
+        Name of the file whose filehandle must be returned.
+    flag : string, optional
+        Flag indicating the status of the file ('r' for read, 'w' for write).
+    return_opened : boolean, optional
+        Whether to return the opening status of the file.
+    """
+    if _is_string_like(fname):
+        if fname.endswith('.gz'):
+            import gzip
+            fhd = gzip.open(fname, flag)
+        elif fname.endswith('.bz2'):
+            import bz2
+            fhd = bz2.BZ2File(fname)
+        else:
+            fhd = file(fname, flag)
+        opened = True
+    elif hasattr(fname, 'seek'):
+        fhd = fname
+        opened = False
+    else:
+        raise ValueError('fname must be a string or file handle')
+    if return_opened:
+        return fhd, opened
+    return fhd
+
+
+def flatten_dtype(ndtype):
+    """
+    Unpack a structured data-type.
+
+    """
+    names = ndtype.names
+    if names is None:
+        return [ndtype]
+    else:
+        types = []
+        for field in names:
+            (typ, _) = ndtype.fields[field]
+            flat_dt = flatten_dtype(typ)
+            types.extend(flat_dt)
+        return types
+
+
+
+class LineSplitter:
+    """
+    Defines a function to split a string at a given delimiter or at given places.
+    
+    Parameters
+    ----------
+    comment : {'#', string}
+        Character used to mark the beginning of a comment.
+    delimiter : var, optional
+        If a string, character used to delimit consecutive fields.
+        If an integer or a sequence of integers, width(s) of each field.
+    autostrip : boolean, optional
+        Whether to strip each individual fields
+    """
+
+    def autostrip(self, method):
+        "Wrapper to strip each member of the output of `method`."
+        return lambda input: [_.strip() for _ in method(input)]
+    #
+    def __init__(self, delimiter=None, comments='#', autostrip=True):
+        self.comments = comments
+        # Delimiter is a character
+        if (delimiter is None) or _is_string_like(delimiter):
+            delimiter = delimiter or None
+            _handyman = self._delimited_splitter
+        # Delimiter is a list of field widths
+        elif hasattr(delimiter, '__iter__'):
+            _handyman = self._variablewidth_splitter
+            idx = np.cumsum([0]+list(delimiter))
+            delimiter = [slice(i,j) for (i,j) in zip(idx[:-1], idx[1:])]
+        # Delimiter is a single integer
+        elif int(delimiter):
+            (_handyman, delimiter) = (self._fixedwidth_splitter, int(delimiter))
+        else:
+            (_handyman, delimiter) = (self._delimited_splitter, None)
+        self.delimiter = delimiter
+        if autostrip:
+            self._handyman = self.autostrip(_handyman)
+        else:
+            self._handyman = _handyman
+    #
+    def _delimited_splitter(self, line):
+        line = line.split(self.comments)[0].strip()
+        if not line:
+            return []
+        return line.split(self.delimiter)
+    #
+    def _fixedwidth_splitter(self, line):
+        line = line.split(self.comments)[0]
+        if not line:
+            return []
+        fixed = self.delimiter
+        slices = [slice(i, i+fixed) for i in range(len(line))[::fixed]]
+        return [line[s] for s in slices]
+    #
+    def _variablewidth_splitter(self, line):
+        line = line.split(self.comments)[0]
+        if not line:
+            return []
+        slices = self.delimiter
+        return [line[s] for s in slices]
+    #
+    def __call__(self, line):
+        return self._handyman(line)
+
+
+
+class NameValidator:
+    """
+    Validates a list of strings to use as field names.
+    The strings are stripped of any non alphanumeric character, and spaces
+    are replaced by `_`. If the optional input parameter `case_sensitive`
+    is False, the strings are set to upper case.
+
+    During instantiation, the user can define a list of names to exclude, as 
+    well as a list of invalid characters. Names in the exclusion list
+    are appended a '_' character.
+
+    Once an instance has been created, it can be called with a list of names
+    and a list of valid names will be created.
+    The `__call__` method accepts an optional keyword, `default`, that sets
+    the default name in case of ambiguity. By default, `default = 'f'`, so
+    that names will default to `f0`, `f1`
+
+    Parameters
+    ----------
+    excludelist : sequence, optional
+        A list of names to exclude. This list is appended to the default list
+        ['return','file','print']. Excluded names are appended an underscore:
+        for example, `file` would become `file_`.
+    deletechars : string, optional
+        A string combining invalid characters that must be deleted from the names.
+    casesensitive : {True, False, 'upper', 'lower'}, optional
+        If True, field names are case_sensitive.
+        If False or 'upper', field names are converted to upper case.
+        If 'lower', field names are converted to lower case.
+    """
+    #
+    defaultexcludelist = ['return','file','print']
+    defaultdeletechars = set("""~!@#$%^&*()-=+~\|]}[{';: /?.>,<""")
+    #
+    def __init__(self, excludelist=None, deletechars=None, case_sensitive=None):
+        #
+        if excludelist is None:
+            excludelist = []
+        excludelist.extend(self.defaultexcludelist)
+        self.excludelist = excludelist
+        #
+        if deletechars is None:
+            delete = self.defaultdeletechars
+        else:
+            delete = set(deletechars)
+        delete.add('"')
+        self.deletechars = delete
+        
+        if (case_sensitive is None) or (case_sensitive is True):
+            self.case_converter = lambda x: x
+        elif (case_sensitive is False) or ('u' in case_sensitive):
+            self.case_converter = lambda x: x.upper()
+        elif 'l' in case_sensitive:
+            self.case_converter = lambda x: x.lower()
+        else:
+            self.case_converter = lambda x: x
+    #
+    def validate(self, names, default='f'):
+        #
+        if names is None:
+            return
+        #
+        validatednames = []
+        seen = dict()
+        #
+        deletechars = self.deletechars
+        excludelist = self.excludelist
+        #
+        case_converter = self.case_converter
+        #
+        for i, item in enumerate(names):
+            item = case_converter(item)
+            item = item.strip().replace(' ', '_')
+            item = ''.join([c for c in item if c not in deletechars])
+            if not len(item):
+                item = '%s%d' % (default, i)
+            elif item in excludelist:
+                item += '_'
+            cnt = seen.get(item, 0)
+            if cnt > 0:
+                validatednames.append(item + '_%d' % cnt)
+            else:
+                validatednames.append(item)
+            seen[item] = cnt+1
+        return validatednames
+    #
+    def __call__(self, names, default='f'):
+        return self.validate(names, default)
+
+
+
+def str2bool(value):
+    """
+    Tries to transform a string supposed to represent a boolean to a boolean.
+    
+    Raises
+    ------
+    ValueError
+        If the string is not 'True' or 'False' (case independent)
+    """
+    value = value.upper()
+    if value == 'TRUE':
+        return True
+    elif value == 'FALSE':
+        return False
+    else:
+        raise ValueError("Invalid boolean")
+
+
+
+class StringConverter:
+    """
+    Factory class for function transforming a string into another object (int,
+    float).
+
+    After initialization, an instance can be called to transform a string 
+    into another object. If the string is recognized as representing a missing
+    value, a default value is returned.
+
+    Parameters
+    ----------
+    dtype_or_func : {None, dtype, function}, optional
+        Input data type, used to define a basic function and a default value
+        for missing data. For example, when `dtype` is float, the :attr:`func`
+        attribute is set to ``float`` and the default value to `np.nan`.
+        Alternatively, function used to convert a string to another object.
+        In that later case, it is recommended to give an associated default
+        value as input.
+    default : {None, var}, optional
+        Value to return by default, that is, when the string to be converted
+        is flagged as missing.
+    missing_values : {sequence}, optional
+        Sequence of strings indicating a missing value.
+    locked : {boolean}, optional
+        Whether the StringConverter should be locked to prevent automatic 
+        upgrade or not.
+
+    Attributes
+    ----------
+    func : function
+        Function used for the conversion
+    default : var
+        Default value to return when the input corresponds to a missing value.
+    type : type
+        Type of the output.
+    _status : integer
+        Integer representing the order of the conversion.
+    _mapper : sequence of tuples
+        Sequence of tuples (dtype, function, default value) to evaluate in order.
+    _locked : boolean
+        Whether the StringConverter is locked, thereby preventing automatic any
+        upgrade or not.
+
+    """
+    #
+    _mapper = [(nx.bool_, str2bool, None),
+               (nx.integer, int, -1),
+               (nx.floating, float, nx.nan),
+               (complex, complex, nx.nan+0j),
+               (nx.string_, str, '???')]
+    (_defaulttype, _defaultfunc, _defaultfill) = zip(*_mapper)
+    #
+    @classmethod
+    def _getsubdtype(cls, val):
+        """Returns the type of the dtype of the input variable."""
+        return np.array(val).dtype.type
+    #
+    @classmethod
+    def upgrade_mapper(cls, func, default=None):
+        """
+    Upgrade the mapper of a StringConverter by adding a new function and its
+    corresponding default.
+    
+    The input function (or sequence of functions) and its associated default 
+    value (if any) is inserted in penultimate position of the mapper.
+    The corresponding type is estimated from the dtype of the default value.
+    
+    Parameters
+    ----------
+    func : var
+        Function, or sequence of functions
+
+    Examples
+    --------
+    >>> import dateutil.parser
+    >>> import datetime
+    >>> dateparser = datetutil.parser.parse
+    >>> defaultdate = datetime.date(2000, 1, 1)
+    >>> StringConverter.upgrade_mapper(dateparser, default=defaultdate)
+        """
+        # Func is a single functions
+        if hasattr(func, '__call__'):
+            cls._mapper.insert(-1, (cls._getsubdtype(default), func, default))
+            return
+        elif hasattr(func, '__iter__'):
+            if isinstance(func[0], (tuple, list)):
+                for _ in func:
+                    cls._mapper.insert(-1, _)
+                return
+            if default is None:
+                default = [None] * len(func)
+            else:
+                default = list(default)
+                default.append([None] * (len(func)-len(default)))
+            for (fct, dft) in zip(func, default):
+                cls._mapper.insert(-1, (cls._getsubdtype(dft), fct, dft))
+    #
+    def __init__(self, dtype_or_func=None, default=None, missing_values=None,
+                 locked=False):
+        # Defines a lock for upgrade
+        self._locked = bool(locked)
+        # No input dtype: minimal initialization
+        if dtype_or_func is None:
+            self.func = str2bool
+            self._status = 0
+            self.default = default
+            ttype = np.bool
+        else:
+            # Is the input a np.dtype ?
+            try:
+                self.func = None
+                ttype = np.dtype(dtype_or_func).type
+            except TypeError:
+                # dtype_or_func must be a function, then
+                if not hasattr(dtype_or_func, '__call__'):
+                    errmsg = "The input argument `dtype` is neither a function"\
+                             " or a dtype (got '%s' instead)"
+                    raise TypeError(errmsg % type(dtype_or_func))
+                # Set the function
+                self.func = dtype_or_func
+                # If we don't have a default, try to guess it or set it to None
+                if default is None:
+                    try:
+                        default = self.func('0')
+                    except ValueError:
+                        default = None
+                ttype = self._getsubdtype(default)
+            # Set the status according to the dtype
+            for (i, (deftype, func, default_def)) in enumerate(self._mapper):
+                if np.issubdtype(ttype, deftype):
+                    self._status = i
+                    self.default = default or default_def
+                    break
+            # If the input was a dtype, set the function to the last we saw
+            if self.func is None:
+                self.func = func
+            # If the status is 1 (int), change the function to smthg more robust
+            if self.func == self._mapper[1][1]:
+                self.func = lambda x : int(float(x))
+        # Store the list of strings corresponding to missing values.
+        if missing_values is None:
+            self.missing_values = set([''])
+        else:
+            self.missing_values = set(list(missing_values) + [''])
+        #
+        self._callingfunction = self._strict_call
+        self.type = ttype
+    #
+    def _loose_call(self, value):
+        try:
+            return self.func(value)
+        except ValueError:
+            return self.default
+    #
+    def _strict_call(self, value):
+        try:
+            return self.func(value)
+        except ValueError:
+            if value.strip() in self.missing_values:
+                return self.default
+            raise ValueError("Cannot convert string '%s'" % value)
+    #
+    def __call__(self, value):
+        return self._callingfunction(value)
+    #
+    def upgrade(self, value):
+        """
+    Tries to find the best converter for `value`, by testing different
+    converters in order.
+    The order in which the converters are tested is read from the
+    :attr:`_status` attribute of the instance.
+        """
+        try:
+            self._strict_call(value)
+        except ValueError:
+            # Raise an exception if we locked the converter...
+            if self._locked:
+                raise ValueError("Converter is locked and cannot be upgraded")
+            _statusmax = len(self._mapper)
+            # Complains if we try to upgrade by the maximum
+            if self._status == _statusmax:
+                raise ValueError("Could not find a valid conversion function")
+            elif self._status < _statusmax - 1:
+                self._status += 1
+            (self.type, self.func, self.default) = self._mapper[self._status]
+            self.upgrade(value)
+    #
+    def update(self, func, default=None, missing_values='', locked=False):
+        """
+    Sets the :attr:`func` and :attr:`default` attributes directly.
+
+    Parameters
+    ----------
+    func : function
+        Conversion function.
+    default : {var}, optional
+        Default value to return when a missing value is encountered.
+    missing_values : {var}, optional
+        Sequence of strings representing missing values.
+    locked : {False, True}, optional
+        Whether the status should be locked to prevent automatic upgrade.
+        """
+        self.func = func
+        self._locked = locked
+        # Don't reset the default to None if we can avoid it
+        if default is not None:
+            self.default = default
+        # Add the missing values to the existing set
+        if missing_values is not None:
+            if _is_string_like(missing_values):
+                self.missing_values.add(missing_values)
+            elif hasattr(missing_values, '__iter__'):
+                for val in missing_values:
+                    self.missing_values.add(val)
+        else:
+            self.missing_values = []        # Update the type
+        self.type = self._getsubdtype(func('0'))
+


Property changes on: trunk/numpy/lib/_iotools.py
___________________________________________________________________
Name: svn:mime-type
   + text/plain

Modified: trunk/numpy/lib/io.py
===================================================================
--- trunk/numpy/lib/io.py	2009-01-19 09:04:20 UTC (rev 6328)
+++ trunk/numpy/lib/io.py	2009-01-19 21:22:52 UTC (rev 6329)
@@ -1,4 +1,5 @@
 __all__ = ['savetxt', 'loadtxt',
+           'genfromtxt', 'ndfromtxt', 'mafromtxt', 'recfromtxt', 'recfromcsv',
            'load', 'loads',
            'save', 'savez',
            'packbits', 'unpackbits',
@@ -15,7 +16,11 @@
 from _datasource import DataSource
 from _compiled_base import packbits, unpackbits
 
+from _iotools import LineSplitter, NameValidator, StringConverter, \
+                     _is_string_like, flatten_dtype
+
 _file = file
+_string_like = _is_string_like
 
 class BagObj(object):
     """A simple class that converts attribute lookups to
@@ -264,10 +269,6 @@
         return str
 
 
-def _string_like(obj):
-    try: obj + ''
-    except (TypeError, ValueError): return 0
-    return 1
 
 def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None,
             skiprows=0, usecols=None, unpack=False):
@@ -342,7 +343,7 @@
     if usecols is not None:
         usecols = list(usecols)
 
-    if _string_like(fname):
+    if _is_string_like(fname):
         if fname.endswith('.gz'):
             import gzip
             fh = gzip.open(fname)
@@ -520,7 +521,7 @@
 
     """
 
-    if _string_like(fname):
+    if _is_string_like(fname):
         if fname.endswith('.gz'):
             import gzip
             fh = gzip.open(fname,'wb')
@@ -608,3 +609,466 @@
         seq = [(x,) for x in seq]
     output = np.array(seq, dtype=dtype)
     return output
+
+
+
+
+#####--------------------------------------------------------------------------
+#---- --- ASCII functions ---
+#####--------------------------------------------------------------------------
+
+
+
+def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0,
+               converters=None, missing='', missing_values=None, usecols=None,
+               names=None, excludelist=None, deletechars=None,
+               case_sensitive=True, unpack=None, usemask=False, loose=True):
+    """
+    Load data from a text file.
+
+    Each line past the first `skiprows` ones is split at the `delimiter`
+    character, and characters following the `comments` character are discarded.
+    
+
+
+    Parameters
+    ----------
+    fname : file or string
+        File or filename to read.  If the filename extension is `.gz` or `.bz2`,
+        the file is first decompressed.
+    dtype : data-type
+        Data type of the resulting array.  If this is a flexible data-type,
+        the resulting array will be 1-dimensional, and each row will be
+        interpreted as an element of the array. In this case, the number
+        of columns used must match the number of fields in the data-type,
+        and the names of each field will be set by the corresponding name
+        of the dtype.
+        If None, the dtypes will be determined by the contents of each
+        column, individually.
+    comments : {string}, optional
+        The character used to indicate the start of a comment.
+        All the characters occurring on a line after a comment are discarded
+    delimiter : {string}, optional
+        The string used to separate values.  By default, any consecutive
+        whitespace act as delimiter.
+    skiprows : {int}, optional
+        Numbers of lines to skip at the beginning of the file.
+    converters : {None, dictionary}, optional
+        A dictionary mapping column number to a function that will convert
+        values in the column to a number. Converters can also be used to
+        provide a default value for missing data:
+        ``converters = {3: lambda s: float(s or 0)}``.
+    missing : {string}, optional
+        A string representing a missing value, irrespective of the column where
+        it appears (e.g., `'missing'` or `'unused'`).
+    missing_values : {None, dictionary}, optional
+        A dictionary mapping a column number to a string indicating whether the
+        corresponding field should be masked.
+    usecols : {None, sequence}, optional
+        Which columns to read, with 0 being the first.  For example,
+        ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns.
+    names : {None, True, string, sequence}, optional
+        If `names` is True, the field names are read from the first valid line
+        after the first `skiprows` lines.
+        If `names` is a sequence or a single-string of comma-separated names,
+        the names will be used to define the field names in a flexible dtype.
+        If `names` is None, the names of the dtype fields will be used, if any.
+    excludelist : {sequence}, optional
+        A list of names to exclude. This list is appended to the default list
+        ['return','file','print']. Excluded names are appended an underscore:
+        for example, `file` would become `file_`.
+    deletechars : {string}, optional
+        A string combining invalid characters that must be deleted from the names.
+    case_sensitive : {True, False, 'upper', 'lower'}, optional
+        If True, field names are case_sensitive.
+        If False or 'upper', field names are converted to upper case.
+        If 'lower', field names are converted to lower case.
+    unpack : {bool}, optional
+        If True, the returned array is transposed, so that arguments may be
+        unpacked using ``x, y, z = loadtxt(...)``
+    usemask : {bool}, optional
+        If True, returns a masked array.
+        If False, return a regular standard array.
+
+    Returns
+    -------
+    out : MaskedArray
+        Data read from the text file.
+
+    Notes
+    --------
+    * When spaces are used as delimiters, or when no delimiter has been given
+      as input, there should not be any missing data between two fields.
+    * When the variable are named (either by a flexible dtype or with `names`,
+      there must not be any header in the file (else a :exc:ValueError exception
+      is raised).
+
+    See Also
+    --------
+    numpy.loadtxt : equivalent function when no data is missing.
+
+    """
+    #
+    if usemask:
+        from numpy.ma import MaskedArray, make_mask_descr
+    # Check the input dictionary of converters
+    user_converters = converters or {}
+    if not isinstance(user_converters, dict):
+        errmsg = "The input argument 'converter' should be a valid dictionary "\
+                 "(got '%s' instead)"
+        raise TypeError(errmsg % type(user_converters))
+    # Check the input dictionary of missing values
+    user_missing_values = missing_values or {}
+    if not isinstance(user_missing_values, dict):
+        errmsg = "The input argument 'missing_values' should be a valid "\
+                 "dictionary (got '%s' instead)"
+        raise TypeError(errmsg % type(missing_values))
+    defmissing = [_.strip() for _ in missing.split(',')] + ['']
+
+    # Initialize the filehandle, the LineSplitter and the NameValidator
+#    fhd = _to_filehandle(fname)
+    if isinstance(fname, basestring):
+        fhd = np.lib._datasource.open(fname)
+    elif not hasattr(fname, 'read'):
+        raise TypeError("The input should be a string or a filehandle. "\
+                        "(got %s instead)" % type(fname))
+    else:
+        fhd = fname
+    split_line = LineSplitter(delimiter=delimiter, comments=comments, 
+                              autostrip=False)._handyman
+    validate_names = NameValidator(excludelist=excludelist,
+                                   deletechars=deletechars,
+                                   case_sensitive=case_sensitive)
+
+    # Get the first valid lines after the first skiprows ones
+    for i in xrange(skiprows):
+        fhd.readline()
+    first_values = None
+    while not first_values:
+        first_line = fhd.readline()
+        if first_line == '':
+            raise IOError('End-of-file reached before encountering data.')
+        first_values = split_line(first_line)
+
+    # Check the columns to use
+    if usecols is not None:
+        usecols = list(usecols)
+    nbcols = len(usecols or first_values)
+
+    # Check the names and overwrite the dtype.names if needed
+    if dtype is not None:
+        dtype = np.dtype(dtype)
+    dtypenames = getattr(dtype, 'names', None)
+    if names is True:
+        names = validate_names([_.strip() for _ in first_values])
+        first_line =''
+    elif _is_string_like(names):
+        names = validate_names([_.strip() for _ in names.split(',')])
+    elif names:
+        names = validate_names(names)
+    elif dtypenames:
+        dtype.names = validate_names(dtypenames)
+    if names and dtypenames:
+        dtype.names = names
+
+    # If usecols is a list of names, convert to a list of indices
+    if usecols:
+        for (i, current) in enumerate(usecols):
+            if _is_string_like(current):
+                usecols[i] = names.index(current)
+
+    # If user_missing_values has names as keys, transform them to indices
+    missing_values = {}
+    for (key, val) in user_missing_values.iteritems():
+        # If val is a list, flatten it. In any case, add missing &'' to the list
+        if isinstance(val, (list, tuple)):
+            val = [str(_) for _ in val]
+        else:
+            val = [str(val),]
+        val.extend(defmissing)
+        if _is_string_like(key):
+            try:
+                missing_values[names.index(key)] = val
+            except ValueError:
+                pass
+        else:
+            missing_values[key] = val
+
+
+    # Initialize the default converters
+    if dtype is None:
+        # Note: we can't use a [...]*nbcols, as we would have 3 times the same
+        # ... converter, instead of 3 different converters.
+        converters = [StringConverter(None,
+                              missing_values=missing_values.get(_, defmissing))
+                      for _ in range(nbcols)]
+    else:
+        flatdtypes = flatten_dtype(dtype)
+        # Initialize the converters
+        if len(flatdtypes) > 1:
+            # Flexible type : get a converter from each dtype
+            converters = [StringConverter(dt,
+                              missing_values=missing_values.get(i, defmissing),
+                              locked=True)
+                          for (i, dt) in enumerate(flatdtypes)]
+        else:
+            # Set to a default converter (but w/ different missing values)
+            converters = [StringConverter(dtype,
+                              missing_values=missing_values.get(_, defmissing),
+                              locked=True)
+                          for _ in range(nbcols)]
+    missing_values = [_.missing_values for _ in converters]
+
+    # Update the converters to use the user-defined ones
+    for (i, conv) in user_converters.iteritems():
+        # If the converter is specified by column names, use the index instead
+        if _is_string_like(i):
+            i = names.index(i)
+        if usecols:
+            try:
+                i = usecols.index(i)
+            except ValueError:
+                # Unused converter specified
+                continue
+        converters[i].update(conv, default=None, 
+                             missing_values=missing_values[i],
+                             locked=True)
+
+    # Reset the names to match the usecols
+    if (not first_line) and usecols:
+        names = [names[_] for _ in usecols]
+
+    rows = []
+    append_to_rows = rows.append
+    if usemask:
+        masks = []
+        append_to_masks = masks.append
+    # Parse each line
+    for line in itertools.chain([first_line,], fhd):
+        values = split_line(line)
+        # Skip an empty line
+        if len(values) == 0:
+            continue
+        # Select only the columns we need
+        if usecols:
+            values = [values[_] for _ in usecols]
+        # Check whether we need to update the converter
+        if dtype is None:
+            for (converter, item) in zip(converters, values):
+                converter.upgrade(item)
+        # Store the values
+        append_to_rows(tuple(values))
+        if usemask:
+            append_to_masks(tuple([val.strip() in mss 
+                                   for (val, mss) in zip(values,
+                                                         missing_values)]))
+
+    # Convert each value according to the converter:
+    # We want to modify the list in place to avoid creating a new one...
+    if loose:
+        conversionfuncs = [conv._loose_call for conv in converters]
+    else:
+        conversionfuncs = [conv._strict_call for conv in converters]
+    for (i, vals) in enumerate(rows):
+        rows[i] = tuple([convert(val)
+                         for (convert, val) in zip(conversionfuncs, vals)])
+
+    # Reset the dtype
+    data = rows
+    if dtype is None:
+        # Get the dtypes from the first row
+        coldtypes = [np.array(val).dtype for val in data[0]]
+        # Find the columns with strings, and take the largest number of chars.
+        strcolidx = [i for (i, v) in enumerate(coldtypes) if v.char == 'S']
+        for i in strcolidx:
+            coldtypes[i] = "|S%i" % max(len(row[i]) for row in data)
+        #
+        if names is None:
+            # If the dtype is uniform, don't define names, else use ''
+            base = coldtypes[0]
+            if np.all([(dt == base) for dt in coldtypes]):
+                (ddtype, mdtype) = (base, np.bool)
+            else:
+                ddtype = [('', dt) for dt in coldtypes]
+                mdtype = [('', np.bool) for dt in coldtypes]
+        else:
+            ddtype = zip(names, coldtypes)
+            mdtype = zip(names, [np.bool] * len(coldtypes))
+        output = np.array(data, dtype=ddtype)
+        if usemask:
+            outputmask = np.array(masks, dtype=mdtype)
+    else:
+        # Overwrite the initial dtype names if needed
+        if names and dtype.names:
+            dtype.names = names
+        flatdtypes = flatten_dtype(dtype)
+        # Case 1. We have a structured type
+        if len(flatdtypes) > 1:
+            # Nested dtype, eg  [('a', int), ('b', [('b0', int), ('b1', 'f4')])]
+            # First, create the array using a flattened dtype:
+            # [('a', int), ('b1', int), ('b2', float)]
+            # Then, view the array using the specified dtype.
+            rows = np.array(data, dtype=[('', t) for t in flatdtypes])
+            output = rows.view(dtype)
+            # Now, process the rowmasks the same way
+            if usemask:
+                rowmasks = np.array(masks,
+                                    dtype=np.dtype([('', np.bool)
+                                                    for t in flatdtypes]))
+                # Construct the new dtype
+                mdtype = make_mask_descr(dtype)
+                outputmask = rowmasks.view(mdtype)
+        # Case #2. We have a basic dtype
+        else:
+            # We used some user-defined converters
+            if user_converters:
+                ishomogeneous = True
+                descr = []
+                for (i, ttype) in enumerate([conv.type for conv in converters]):
+                    # Keep the dtype of the current converter
+                    if i in user_converters:
+                        ishomogeneous &= (ttype == dtype.type)
+                        if ttype == np.string_:
+                            ttype = "|S%i" % max(len(row[i]) for row in data)
+                        descr.append(('', ttype))
+                    else:
+                        descr.append(('', dtype))
+                if not ishomogeneous:
+                    dtype = np.dtype(descr)
+            #
+            output = np.array(data, dtype)
+            if usemask:
+                if dtype.names:
+                    mdtype = [(_, np.bool) for _ in dtype.names]
+                else:
+                    mdtype = np.bool
+                outputmask = np.array(masks, dtype=mdtype)
+    # Try to take care of the missing data we missed
+    if usemask and output.dtype.names:
+        for (name, conv) in zip(names or (), converters):
+            missing_values = [conv(_) for _ in conv.missing_values if _ != '']
+            for mval in missing_values:
+                outputmask[name] |= (output[name] == mval)
+    # Construct the final array
+    if usemask:
+        output = output.view(MaskedArray)
+        output._mask = outputmask
+    if unpack:
+        return output.squeeze().T
+    return output.squeeze()
+
+
+
+def ndfromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0,
+             converters=None, missing='', missing_values=None,
+             usecols=None, unpack=None, names=None,
+             excludelist=None, deletechars=None, case_sensitive=True,):
+    """
+    Load ASCII data stored in fname and returns a ndarray.
+    
+    Complete description of all the optional input parameters is available in
+    the docstring of the `genfromtxt` function.
+    
+    See Also
+    --------
+    numpy.genfromtxt : generic function.
+    
+    """
+    kwargs = dict(dtype=dtype, comments=comments, delimiter=delimiter, 
+                  skiprows=skiprows, converters=converters,
+                  missing=missing, missing_values=missing_values,
+                  usecols=usecols, unpack=unpack, names=names, 
+                  excludelist=excludelist, deletechars=deletechars,
+                  case_sensitive=case_sensitive, usemask=False)
+    return genfromtxt(fname, **kwargs)
+
+def mafromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0,
+              converters=None, missing='', missing_values=None,
+              usecols=None, unpack=None, names=None,
+              excludelist=None, deletechars=None, case_sensitive=True,):
+    """
+    Load ASCII data stored in fname and returns a MaskedArray.
+    
+    Complete description of all the optional input parameters is available in
+    the docstring of the `genfromtxt` function.
+    
+    See Also
+    --------
+    numpy.genfromtxt : generic function.
+    """
+    kwargs = dict(dtype=dtype, comments=comments, delimiter=delimiter, 
+                  skiprows=skiprows, converters=converters,
+                  missing=missing, missing_values=missing_values,
+                  usecols=usecols, unpack=unpack, names=names, 
+                  excludelist=excludelist, deletechars=deletechars,
+                  case_sensitive=case_sensitive,
+                  usemask=True)
+    return genfromtxt(fname, **kwargs)
+
+
+def recfromtxt(fname, dtype=None, comments='#', delimiter=None, skiprows=0,
+               converters=None, missing='', missing_values=None,
+               usecols=None, unpack=None, names=None,
+               excludelist=None, deletechars=None, case_sensitive=True,
+               usemask=False):
+    """
+    Load ASCII data stored in fname and returns a standard recarray (if 
+    `usemask=False`) or a MaskedRecords (if `usemask=True`).
+    
+    Complete description of all the optional input parameters is available in
+    the docstring of the `genfromtxt` function.
+    
+    See Also
+    --------
+    numpy.genfromtxt : generic function
+
+    Warnings
+    --------
+    * by default, `dtype=None`, which means that the dtype of the output array
+      will be determined from the data.
+    """
+    kwargs = dict(dtype=dtype, comments=comments, delimiter=delimiter, 
+                  skiprows=skiprows, converters=converters,
+                  missing=missing, missing_values=missing_values,
+                  usecols=usecols, unpack=unpack, names=names, 
+                  excludelist=excludelist, deletechars=deletechars,
+                  case_sensitive=case_sensitive, usemask=usemask)
+    output = genfromtxt(fname, **kwargs)
+    if usemask:
+        from numpy.ma.mrecords import MaskedRecords
+        output = output.view(MaskedRecords)
+    else:
+        output = output.view(np.recarray)
+    return output
+
+
+def recfromcsv(fname, dtype=None, comments='#', skiprows=0,
+               converters=None, missing='', missing_values=None,
+               usecols=None, unpack=None, names=True,
+               excludelist=None, deletechars=None, case_sensitive='lower',
+               usemask=False):
+    """
+    Load ASCII data stored in comma-separated file and returns a recarray (if 
+    `usemask=False`) or a MaskedRecords (if `usemask=True`).
+    
+    Complete description of all the optional input parameters is available in
+    the docstring of the `genfromtxt` function.
+    
+    See Also
+    --------
+    numpy.genfromtxt : generic function
+    """
+    kwargs = dict(dtype=dtype, comments=comments, delimiter=",", 
+                  skiprows=skiprows, converters=converters,
+                  missing=missing, missing_values=missing_values,
+                  usecols=usecols, unpack=unpack, names=names, 
+                  excludelist=excludelist, deletechars=deletechars,
+                  case_sensitive=case_sensitive, usemask=usemask)
+    output = genfromtxt(fname, **kwargs)
+    if usemask:
+        from numpy.ma.mrecords import MaskedRecords
+        output = output.view(MaskedRecords)
+    else:
+        output = output.view(np.recarray)
+    return output
+

Added: trunk/numpy/lib/tests/test__iotools.py
===================================================================
--- trunk/numpy/lib/tests/test__iotools.py	2009-01-19 09:04:20 UTC (rev 6328)
+++ trunk/numpy/lib/tests/test__iotools.py	2009-01-19 21:22:52 UTC (rev 6329)
@@ -0,0 +1,140 @@
+
+import StringIO
+
+import numpy as np
+from numpy.lib._iotools import LineSplitter, NameValidator, StringConverter
+from numpy.testing import *
+
+class TestLineSplitter(TestCase):
+    "Tests the LineSplitter class."
+    #
+    def test_no_delimiter(self):
+        "Test LineSplitter w/o delimiter"
+        strg = " 1 2 3 4  5 # test"
+        test = LineSplitter()(strg)
+        assert_equal(test, ['1', '2', '3', '4', '5'])
+        test = LineSplitter('')(strg)
+        assert_equal(test, ['1', '2', '3', '4', '5'])
+
+    def test_space_delimiter(self):
+        "Test space delimiter"
+        strg = " 1 2 3 4  5 # test"
+        test = LineSplitter(' ')(strg)
+        assert_equal(test, ['1', '2', '3', '4', '', '5'])
+        test = LineSplitter('  ')(strg)
+        assert_equal(test, ['1 2 3 4', '5'])
+
+    def test_tab_delimiter(self):
+        "Test tab delimiter"
+        strg= " 1\t 2\t 3\t 4\t 5  6"
+        test = LineSplitter('\t')(strg)
+        assert_equal(test, ['1', '2', '3', '4', '5  6'])
+        strg= " 1  2\t 3  4\t 5  6"
+        test = LineSplitter('\t')(strg)
+        assert_equal(test, ['1  2', '3  4', '5  6'])
+
+    def test_other_delimiter(self):
+        "Test LineSplitter on delimiter"
+        strg = "1,2,3,4,,5"
+        test = LineSplitter(',')(strg)
+        assert_equal(test, ['1', '2', '3', '4', '', '5'])
+        #
+        strg = " 1,2,3,4,,5 # test"
+        test = LineSplitter(',')(strg)
+        assert_equal(test, ['1', '2', '3', '4', '', '5'])
+
+    def test_constant_fixed_width(self):
+        "Test LineSplitter w/ fixed-width fields"
+        strg = "  1  2  3  4     5   # test"
+        test = LineSplitter(3)(strg)
+        assert_equal(test, ['1', '2', '3', '4', '', '5', ''])
+        #
+        strg = "  1     3  4  5  6# test"
+        test = LineSplitter(20)(strg)
+        assert_equal(test, ['1     3  4  5  6'])
+        #
+        strg = "  1     3  4  5  6# test"
+        test = LineSplitter(30)(strg)
+        assert_equal(test, ['1     3  4  5  6'])
+
+    def test_variable_fixed_width(self):
+        strg = "  1     3  4  5  6# test"
+        test = LineSplitter((3,6,6,3))(strg)
+        assert_equal(test, ['1', '3', '4  5', '6'])
+        #
+        strg = "  1     3  4  5  6# test"
+        test = LineSplitter((6,6,9))(strg)
+        assert_equal(test, ['1', '3  4', '5  6'])
+
+
+#-------------------------------------------------------------------------------
+
+class TestNameValidator(TestCase):
+    #
+    def test_case_sensitivity(self):
+        "Test case sensitivity"
+        names = ['A', 'a', 'b', 'c']
+        test = NameValidator().validate(names)
+        assert_equal(test, ['A', 'a', 'b', 'c'])
+        test = NameValidator(case_sensitive=False).validate(names)
+        assert_equal(test, ['A', 'A_1', 'B', 'C'])
+        test = NameValidator(case_sensitive='upper').validate(names)
+        assert_equal(test, ['A', 'A_1', 'B', 'C'])
+        test = NameValidator(case_sensitive='lower').validate(names)
+        assert_equal(test, ['a', 'a_1', 'b', 'c'])
+    #
+    def test_excludelist(self):
+        "Test excludelist"
+        names = ['dates', 'data', 'Other Data', 'mask']
+        validator = NameValidator(excludelist = ['dates', 'data', 'mask'])
+        test = validator.validate(names)
+        assert_equal(test, ['dates_', 'data_', 'Other_Data', 'mask_'])
+
+
+#-------------------------------------------------------------------------------
+
+class TestStringConverter(TestCase):
+    "Test StringConverter"
+    #
+    def test_creation(self):
+        "Test creation of a StringConverter"
+        converter = StringConverter(int, -99999)
+        assert_equal(converter._status, 1)
+        assert_equal(converter.default, -99999)
+    #
+    def test_upgrade(self):
+        "Tests the upgrade method."
+        converter = StringConverter()
+        assert_equal(converter._status, 0)
+        converter.upgrade('0')
+        assert_equal(converter._status, 1)
+        converter.upgrade('0.')
+        assert_equal(converter._status, 2)
+        converter.upgrade('0j')
+        assert_equal(converter._status, 3)
+        converter.upgrade('a')
+        assert_equal(converter._status, len(converter._mapper)-1)
+    #
+    def test_missing(self):
+        "Tests the use of missing values."
+        converter = StringConverter(missing_values=('missing','missed'))
+        converter.upgrade('0')
+        assert_equal(converter('0'), 0)
+        assert_equal(converter(''), converter.default)
+        assert_equal(converter('missing'), converter.default)
+        assert_equal(converter('missed'), converter.default)
+        try:
+            converter('miss')
+        except ValueError:
+            pass
+    #
+    def test_upgrademapper(self):
+        "Tests updatemapper"
+        import dateutil.parser
+        import datetime
+        dateparser = dateutil.parser.parse
+        StringConverter.upgrade_mapper(dateparser, datetime.date(2000,1,1))
+        convert = StringConverter(dateparser, datetime.date(2000, 1, 1))
+        test = convert('2001-01-01')
+        assert_equal(test, datetime.datetime(2001, 01, 01, 00, 00, 00))
+


Property changes on: trunk/numpy/lib/tests/test__iotools.py
___________________________________________________________________
Name: svn:mime-type
   + text/plain

Modified: trunk/numpy/lib/tests/test_io.py
===================================================================
--- trunk/numpy/lib/tests/test_io.py	2009-01-19 09:04:20 UTC (rev 6328)
+++ trunk/numpy/lib/tests/test_io.py	2009-01-19 21:22:52 UTC (rev 6329)
@@ -1,5 +1,8 @@
-from numpy.testing import *
+
 import numpy as np
+import numpy.ma as ma
+from numpy.ma.testutils import *
+
 import StringIO
 
 from tempfile import NamedTemporaryFile
@@ -355,5 +358,358 @@
         assert_array_equal(x, a)
 
 
+#####--------------------------------------------------------------------------
+
+
+class TestFromTxt(TestCase):
+    #
+    def test_record(self):
+        "Test w/ explicit dtype"
+        data = StringIO.StringIO('1 2\n3 4')
+#        data.seek(0)
+        test = np.ndfromtxt(data, dtype=[('x', np.int32), ('y', np.int32)])
+        control = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')])
+        assert_equal(test, control)
+        #
+        data = StringIO.StringIO('M 64.0 75.0\nF 25.0 60.0')
+#        data.seek(0)
+        descriptor = {'names': ('gender','age','weight'),
+                      'formats': ('S1', 'i4', 'f4')}
+        control = np.array([('M', 64.0, 75.0), ('F', 25.0, 60.0)],
+                           dtype=descriptor)
+        test = np.ndfromtxt(data, dtype=descriptor)
+        assert_equal(test, control)
+
+    def test_array(self):
+        "Test outputing a standard ndarray"
+        data = StringIO.StringIO('1 2\n3 4')
+        control = np.array([[1,2],[3,4]], dtype=int)
+        test = np.ndfromtxt(data, dtype=int)
+        assert_array_equal(test, control)
+        #
+        data.seek(0)
+        control = np.array([[1,2],[3,4]], dtype=float)
+        test = np.loadtxt(data, dtype=float)
+        assert_array_equal(test, control)
+
+    def test_1D(self):
+        "Test squeezing to 1D"
+        control = np.array([1, 2, 3, 4], int)
+        #
+        data = StringIO.StringIO('1\n2\n3\n4\n')
+        test = np.ndfromtxt(data, dtype=int)
+        assert_array_equal(test, control)
+        #
+        data = StringIO.StringIO('1,2,3,4\n')
+        test = np.ndfromtxt(data, dtype=int, delimiter=',')
+        assert_array_equal(test, control)
+
+    def test_comments(self):
+        "Test the stripping of comments"
+        control = np.array([1, 2, 3, 5], int)
+        # Comment on its own line
+        data = StringIO.StringIO('# comment\n1,2,3,5\n')
+        test = np.ndfromtxt(data, dtype=int, delimiter=',', comments='#')
+        assert_equal(test, control)
+        # Comment at the end of a line
+        data = StringIO.StringIO('1,2,3,5# comment\n')
+        test = np.ndfromtxt(data, dtype=int, delimiter=',', comments='#')
+        assert_equal(test, control)
+
+    def test_skiprows(self):
+        "Test row skipping"
+        control = np.array([1, 2, 3, 5], int)
+        #
+        data = StringIO.StringIO('comment\n1,2,3,5\n')
+        test = np.ndfromtxt(data, dtype=int, delimiter=',', skiprows=1)
+        assert_equal(test, control)
+        #
+        data = StringIO.StringIO('# comment\n1,2,3,5\n')
+        test = np.loadtxt(data, dtype=int, delimiter=',', skiprows=1)
+        assert_equal(test, control)
+
+    def test_header(self):
+        "Test retrieving a header"
+        data = StringIO.StringIO('gender age weight\nM 64.0 75.0\nF 25.0 60.0')
+        test = np.ndfromtxt(data, dtype=None, names=True)
+        control = {'gender': np.array(['M', 'F']),
+                   'age': np.array([64.0, 25.0]),
+                   'weight': np.array([75.0, 60.0])}
+        assert_equal(test['gender'], control['gender'])
+        assert_equal(test['age'], control['age'])
+        assert_equal(test['weight'], control['weight'])
+
+    def test_auto_dtype(self):
+        "Test the automatic definition of the output dtype"
+        data = StringIO.StringIO('A 64 75.0 3+4j True\nBCD 25 60.0 5+6j False')
+        test = np.ndfromtxt(data, dtype=None)
+        control = [np.array(['A', 'BCD']),
+                   np.array([64, 25]),
+                   np.array([75.0, 60.0]),
+                   np.array([3+4j, 5+6j]),
+                   np.array([True, False]),]
+        assert_equal(test.dtype.names, ['f0','f1','f2','f3','f4'])
+        for (i, ctrl) in enumerate(control):
+            assert_equal(test['f%i' % i], ctrl)
+
+
+    def test_auto_dtype_uniform(self):
+        "Tests whether the output dtype can be uniformized"
+        data = StringIO.StringIO('1 2 3 4\n5 6 7 8\n')
+        test = np.ndfromtxt(data, dtype=None)
+        control = np.array([[1,2,3,4],[5,6,7,8]])
+        assert_equal(test, control)
+
+
+    def test_fancy_dtype(self):
+        "Check that a nested dtype isn't MIA"
+        data = StringIO.StringIO('1,2,3.0\n4,5,6.0\n')
+        fancydtype = np.dtype([('x', int), ('y', [('t', int), ('s', float)])])
+        test = np.ndfromtxt(data, dtype=fancydtype, delimiter=',')
+        control = np.array([(1,(2,3.0)),(4,(5,6.0))], dtype=fancydtype)
+        assert_equal(test, control)
+
+
+    def test_names_overwrite(self):
+        "Test overwriting the names of the dtype"
+        descriptor = {'names': ('g','a','w'),
+                      'formats': ('S1', 'i4', 'f4')}
+        data = StringIO.StringIO('M 64.0 75.0\nF 25.0 60.0')
+        names = ('gender','age','weight')
+        test = np.ndfromtxt(data, dtype=descriptor, names=names)
+        descriptor['names'] = names
+        control = np.array([('M', 64.0, 75.0),
+                            ('F', 25.0, 60.0)], dtype=descriptor)
+        assert_equal(test, control)
+
+
+    def test_autonames_and_usecols(self):
+        "Tests names and usecols"
+        data = StringIO.StringIO('A B C D\n aaaa 121 45 9.1')
+        test = np.ndfromtxt(data, usecols=('A', 'C', 'D'),
+                            names=True, dtype=None)
+        control = np.array(('aaaa', 45, 9.1),
+                           dtype=[('A', '|S4'), ('C', int), ('D', float)])
+        assert_equal(test, control)
+
+
+    def test_converters_with_usecols(self):
+        "Test the combination user-defined converters and usecol"
+        data = StringIO.StringIO('1,2,3,,5\n6,7,8,9,10\n')
+        test = np.ndfromtxt(data, dtype=int, delimiter=',',
+                            converters={3:lambda s: int(s or -999)},
+                            usecols=(1, 3, ))
+        control = np.array([[2,  -999], [7, 9]], int)
+        assert_equal(test, control)
+
+    def test_converters_with_usecols_and_names(self):
+        "Tests names and usecols"
+        data = StringIO.StringIO('A B C D\n aaaa 121 45 9.1')
+        test = np.ndfromtxt(data, usecols=('A', 'C', 'D'), names=True,
+                            dtype=None, converters={'C':lambda s: 2 * int(s)})
+        control = np.array(('aaaa', 90, 9.1),
+            dtype=[('A', '|S4'), ('C', int), ('D', float)])
+        assert_equal(test, control)
+
+
+    def test_unused_converter(self):
+        "Test whether unused converters are forgotten"
+        data = StringIO.StringIO("1 21\n  3 42\n")
+        test = np.ndfromtxt(data, usecols=(1,),
+                            converters={0: lambda s: int(s, 16)})
+        assert_equal(test, [21, 42])
+        #
+        data.seek(0)
+        test = np.ndfromtxt(data, usecols=(1,),
+                            converters={1: lambda s: int(s, 16)})
+        assert_equal(test, [33, 66])
+
+
+    def test_dtype_with_converters(self):
+        dstr = "2009; 23; 46"
+        test = np.ndfromtxt(StringIO.StringIO(dstr,),
+                            delimiter=";", dtype=float, converters={0:str})
+        control = np.array([('2009', 23., 46)],
+                           dtype=[('f0','|S4'), ('f1', float), ('f2', float)])
+        assert_equal(test, control)
+        test = np.ndfromtxt(StringIO.StringIO(dstr,),
+                            delimiter=";", dtype=float, converters={0:float})
+        control = np.array([2009., 23., 46],)
+        assert_equal(test, control)
+
+
+    def test_spacedelimiter(self):
+        "Test space delimiter"
+        data = StringIO.StringIO("1  2  3  4   5\n6  7  8  9  10")
+        test = np.ndfromtxt(data)
+        control = np.array([[ 1., 2., 3., 4., 5.],
+                            [ 6., 7., 8., 9.,10.]])
+        assert_equal(test, control)
+
+
+    def test_missing(self):
+        data = StringIO.StringIO('1,2,3,,5\n')
+        test = np.ndfromtxt(data, dtype=int, delimiter=',', \
+                            converters={3:lambda s: int(s or -999)})
+        control = np.array([1, 2, 3, -999, 5], int)
+        assert_equal(test, control)
+
+
+    def test_usecols(self):
+        "Test the selection of columns"
+        # Select 1 column
+        control = np.array( [[1, 2], [3, 4]], float)
+        data = StringIO.StringIO()
+        np.savetxt(data, control)
+        data.seek(0)
+        test = np.ndfromtxt(data, dtype=float, usecols=(1,))
+        assert_equal(test, control[:, 1])
+        #
+        control = np.array( [[1, 2, 3], [3, 4, 5]], float)
+        data = StringIO.StringIO()
+        np.savetxt(data, control)
+        data.seek(0)
+        test = np.ndfromtxt(data, dtype=float, usecols=(1, 2))
+        assert_equal(test, control[:, 1:])
+        # Testing with arrays instead of tuples.
+        data.seek(0)
+        test = np.ndfromtxt(data, dtype=float, usecols=np.array([1, 2]))
+        assert_equal(test, control[:, 1:])
+        # Checking with dtypes defined converters.
+        data = StringIO.StringIO("""JOE 70.1 25.3\nBOB 60.5 27.9""")
+        names = ['stid', 'temp']
+        dtypes = ['S4', 'f8']
+        test = np.ndfromtxt(data, usecols=(0, 2), dtype=zip(names, dtypes))
+        assert_equal(test['stid'],  ["JOE",  "BOB"])
+        assert_equal(test['temp'],  [25.3,  27.9])
+
+
+    def test_empty_file(self):
+        "Test that an empty file raises the proper exception"
+        data = StringIO.StringIO()
+        assert_raises(IOError, np.ndfromtxt, data)
+
+
+    def test_fancy_dtype_alt(self):
+        "Check that a nested dtype isn't MIA"
+        data = StringIO.StringIO('1,2,3.0\n4,5,6.0\n')
+        fancydtype = np.dtype([('x', int), ('y', [('t', int), ('s', float)])])
+        test = np.mafromtxt(data, dtype=fancydtype, delimiter=',')
+        control = ma.array([(1,(2,3.0)),(4,(5,6.0))], dtype=fancydtype)
+        assert_equal(test, control)
+
+
+    def test_withmissing(self):
+        data = StringIO.StringIO('A,B\n0,1\n2,N/A')
+        test = np.mafromtxt(data, dtype=None, delimiter=',', missing='N/A',
+                            names=True)
+        control = ma.array([(0, 1), (2, -1)],
+                           mask=[(False, False), (False, True)],
+                           dtype=[('A', np.int), ('B', np.int)])
+        assert_equal(test, control)
+        assert_equal(test.mask, control.mask)
+        #
+        data.seek(0)
+        test = np.mafromtxt(data, delimiter=',', missing='N/A', names=True)
+        control = ma.array([(0, 1), (2, -1)],
+                           mask=[[False, False], [False, True]],)
+        assert_equal(test, control)
+        assert_equal(test.mask, control.mask)
+
+
+    def test_user_missing_values(self):
+        datastr ="A, B, C\n0, 0., 0j\n1, N/A, 1j\n-9, 2.2, N/A\n3, -99, 3j" 
+        data = StringIO.StringIO(datastr)
+        basekwargs = dict(dtype=None, delimiter=',', names=True, missing='N/A')
+        mdtype = [('A', int), ('B', float), ('C', complex)]
+        #
+        test = np.mafromtxt(data, **basekwargs)
+        control = ma.array([(   0, 0.0,    0j), (1, -999, 1j),
+                            (  -9, 2.2, -999j), (3,  -99, 3j)],
+                            mask=[(0, 0, 0), (0, 1, 0), (0, 0, 1), (0, 0, 0)],
+                            dtype=mdtype)
+        assert_equal(test, control)
+        #
+        data.seek(0)
+        test = np.mafromtxt(data, 
+                            missing_values={0:-9, 1:-99, 2:-999j}, **basekwargs)
+        control = ma.array([(   0, 0.0,    0j), (1, -999, 1j),
+                            (  -9, 2.2, -999j), (3,  -99, 3j)],
+                            mask=[(0, 0, 0), (0, 1, 0), (1, 0, 1), (0, 1, 0)],
+                            dtype=mdtype)
+        assert_equal(test, control)
+        #
+        data.seek(0)
+        test = np.mafromtxt(data, 
+                            missing_values={0:-9, 'B':-99, 'C':-999j},
+                            **basekwargs)
+        control = ma.array([(   0, 0.0,    0j), (1, -999, 1j),
+                            (  -9, 2.2, -999j), (3,  -99, 3j)],
+                            mask=[(0, 0, 0), (0, 1, 0), (1, 0, 1), (0, 1, 0)],
+                            dtype=mdtype)
+        assert_equal(test, control)
+
+
+    def test_withmissing_float(self):
+        data = StringIO.StringIO('A,B\n0,1.5\n2,-999.00')
+        test = np.mafromtxt(data, dtype=None, delimiter=',', missing='-999.0',
+                            names=True,)
+        control = ma.array([(0, 1.5), (2, -1.)],
+                           mask=[(False, False), (False, True)],
+                           dtype=[('A', np.int), ('B', np.float)])
+        assert_equal(test, control)
+        assert_equal(test.mask, control.mask)
+
+
+    def test_recfromtxt(self):
+        #
+        data = StringIO.StringIO('A,B\n0,1\n2,3')
+        test = np.recfromtxt(data, delimiter=',', missing='N/A', names=True)
+        control = np.array([(0, 1), (2, 3)],
+                           dtype=[('A', np.int), ('B', np.int)])
+        self.failUnless(isinstance(test, np.recarray))
+        assert_equal(test, control)
+        #
+        data = StringIO.StringIO('A,B\n0,1\n2,N/A')
+        test = np.recfromtxt(data, dtype=None, delimiter=',', missing='N/A',
+                             names=True, usemask=True)
+        control = ma.array([(0, 1), (2, -1)],
+                           mask=[(False, False), (False, True)],
+                           dtype=[('A', np.int), ('B', np.int)])
+        assert_equal(test, control)
+        assert_equal(test.mask, control.mask)
+        assert_equal(test.A, [0, 2])
+
+
+    def test_recfromcsv(self):
+        #
+        data = StringIO.StringIO('A,B\n0,1\n2,3')
+        test = np.recfromcsv(data, missing='N/A',
+                             names=True, case_sensitive=True)
+        control = np.array([(0, 1), (2, 3)],
+                           dtype=[('A', np.int), ('B', np.int)])
+        self.failUnless(isinstance(test, np.recarray))
+        assert_equal(test, control)
+        #
+        data = StringIO.StringIO('A,B\n0,1\n2,N/A')
+        test = np.recfromcsv(data, dtype=None, missing='N/A',
+                             names=True, case_sensitive=True, usemask=True)
+        control = ma.array([(0, 1), (2, -1)],
+                           mask=[(False, False), (False, True)],
+                           dtype=[('A', np.int), ('B', np.int)])
+        assert_equal(test, control)
+        assert_equal(test.mask, control.mask)
+        assert_equal(test.A, [0, 2])
+        #
+        data = StringIO.StringIO('A,B\n0,1\n2,3')
+        test = np.recfromcsv(data, missing='N/A',)
+        control = np.array([(0, 1), (2, 3)],
+                           dtype=[('a', np.int), ('b', np.int)])
+        self.failUnless(isinstance(test, np.recarray))
+        assert_equal(test, control)
+        
+
+
+
 if __name__ == "__main__":
     run_module_suite()



More information about the Numpy-svn mailing list