[Scipy-svn] r6528 - trunk/scipy/io/arff

scipy-svn@scip... scipy-svn@scip...
Fri Jun 18 08:03:59 CDT 2010


Author: rgommers
Date: 2010-06-18 08:03:56 -0500 (Fri, 18 Jun 2010)
New Revision: 6528

Modified:
   trunk/scipy/io/arff/arffread.py
Log:
ENH: Add ARFF reader speed improvements by Benjamin Root.

Modified: trunk/scipy/io/arff/arffread.py
===================================================================
--- trunk/scipy/io/arff/arffread.py	2010-06-18 13:03:39 UTC (rev 6527)
+++ trunk/scipy/io/arff/arffread.py	2010-06-18 13:03:56 UTC (rev 6528)
@@ -343,7 +343,7 @@
     >>> safe_float('?\\n')
     nan
     """
-    if x.strip() == '?':
+    if '?' in x:
         return np.nan
     else:
         return np.float(x)
@@ -574,15 +574,20 @@
         while r_comment.match(raw):
             raw = row_iter.next()
 
+        # 'compiling' the range since it does not change
+        # Note, I have already tried zipping the converters and
+        # row elements and got slightly worse performance.
+        elems = range(ni)
+
         row = raw.split(delim)
-        yield tuple([convertors[i](row[i]) for i in range(ni)])
+        yield tuple([convertors[i](row[i]) for i in elems])
         for raw in row_iter:
             while r_comment.match(raw):
                 raw = row_iter.next()
             while r_empty.match(raw):
                 raw = row_iter.next()
             row = raw.split(delim)
-            yield tuple([convertors[i](row[i]) for i in range(ni)])
+            yield tuple([convertors[i](row[i]) for i in elems])
 
     a = generator(ofile, delim = delim)
     # No error should happen here: it is a bug otherwise



More information about the Scipy-svn mailing list