[Numpy-svn] r5500 - branches/1.1.x/numpy/lib

numpy-svn@scip... numpy-svn@scip...
Tue Jul 22 11:40:26 CDT 2008


Author: charris
Date: 2008-07-22 11:40:24 -0500 (Tue, 22 Jul 2008)
New Revision: 5500

Modified:
   branches/1.1.x/numpy/lib/io.py
Log:
Backport r5498 fixes to loadtxt.

Modified: branches/1.1.x/numpy/lib/io.py
===================================================================
--- branches/1.1.x/numpy/lib/io.py	2008-07-22 16:37:54 UTC (rev 5499)
+++ branches/1.1.x/numpy/lib/io.py	2008-07-22 16:40:24 UTC (rev 5500)
@@ -10,6 +10,7 @@
 import cStringIO
 import tempfile
 import os
+import itertools
 
 from cPickle import load as _cload, loads
 from _datasource import DataSource
@@ -273,6 +274,7 @@
 
     SeeAlso: scipy.io.loadmat to read and write matfiles.
     """
+    user_converters = converters
 
     if _string_like(fname):
         if fname.endswith('.gz'):
@@ -286,42 +288,89 @@
         raise ValueError('fname must be a string or file handle')
     X = []
 
+    def flatten_dtype(dt):
+        """Unpack a structured data-type."""
+        if dt.names is None:
+            return [dt]
+        else:
+            types = []
+            for field in dt.names:
+                tp, bytes = dt.fields[field]
+                flat_dt = flatten_dtype(tp)
+                types.extend(flat_dt)
+            return types
+
+    def split_line(line):
+        """Chop off comments, strip, and split at delimiter."""
+        line = line.split(comments)[0].strip()
+        if line:
+            return line.split(delimiter)
+        else:
+            return []
+
+    # Make sure we're dealing with a proper dtype
     dtype = np.dtype(dtype)
     defconv = _getconv(dtype)
-    converterseq = None
-    if converters is None:
-        converters = {}
-        if dtype.names is not None:
-            converterseq = [_getconv(dtype.fields[name][0]) \
-                            for name in dtype.names]
 
-    for i,line in enumerate(fh):
-        if i<skiprows: continue
-        comment_start = line.find(comments)
-        if comment_start != -1:
-            line = line[:comment_start].strip()
-        else:
-            line = line.strip()
-        if not len(line): continue
-        vals = line.split(delimiter)
-        if converterseq is None:
-            converterseq = [converters.get(j,defconv) \
-                            for j in xrange(len(vals))]
-        if usecols is not None:
-            row = [converterseq[j](vals[j]) for j in usecols]
-        else:
-            row = [converterseq[j](val) for j,val in enumerate(vals)]
-        if dtype.names is not None:
-            row = tuple(row)
-        X.append(row)
+    # Skip the first `skiprows` lines
+    for i in xrange(skiprows):
+        fh.readline()
 
-    X = np.array(X, dtype)
+    # Read until we find a line with some values, and use
+    # it to estimate the number of columns, N.
+    first_vals = None
+    while not first_vals:
+        first_line = fh.readline()
+        first_vals = split_line(first_line)
+    N = len(usecols or first_vals)
+
+    dtype_types = flatten_dtype(dtype)
+    if len(dtype_types) > 1:
+        # We're dealing with a structured array, each field of
+        # the dtype matches a column
+        converters = [_getconv(dt) for dt in dtype_types]
+    else:
+        # All fields have the same dtype
+        converters = [defconv for i in xrange(N)]
+
+    # By preference, use the converters specified by the user
+    for i, conv in (user_converters or {}).iteritems():
+        if usecols:
+            i = usecols.find(i)
+        converters[i] = conv
+
+    # Parse each line, including the first
+    for i, line in enumerate(itertools.chain([first_line], fh)):
+        vals = split_line(line)
+        if len(vals) == 0:
+            continue
+
+        if usecols:
+            vals = [vals[i] for i in usecols]
+
+        # Convert each value according to its column and store
+        X.append(tuple([conv(val) for (conv, val) in zip(converters, vals)]))
+
+    if len(dtype_types) > 1:
+        # We're dealing with a structured array, with a dtype such as
+        # [('x', int), ('y', [('s', int), ('t', float)])]
+        #
+        # First, create the array using a flattened dtype:
+        # [('x', int), ('s', int), ('t', float)]
+        #
+        # Then, view the array using the specified dtype.
+        X = np.array(X, dtype=np.dtype([('', t) for t in dtype_types]))
+        X = X.view(dtype)
+    else:
+        X = np.array(X, dtype)
+
     X = np.squeeze(X)
-    if unpack: return X.T
-    else:  return X
+    if unpack:
+        return X.T
+    else:
+        return X
 
 
-
 def savetxt(fname, X, fmt='%.18e',delimiter=' '):
     """
     Save the data in X to file fname using fmt string to convert the
@@ -344,9 +393,9 @@
 
     Examples
     --------
-    >>> savetxt('test.out', x, delimiter=',') # X is an array
-    >>> savetxt('test.out', (x,y,z)) # x,y,z equal sized 1D arrays
-    >>> savetxt('test.out', x, fmt='%1.4e') # use exponential notation
+    >>> np.savetxt('test.out', x, delimiter=',') # X is an array
+    >>> np.savetxt('test.out', (x,y,z)) # x,y,z equal sized 1D arrays
+    >>> np.savetxt('test.out', x, fmt='%1.4e') # use exponential notation
 
     Notes on fmt
     ------------



More information about the Numpy-svn mailing list