[Scipy-svn] r5303 - trunk/scipy/io/matlab

scipy-svn@scip... scipy-svn@scip...
Tue Dec 30 17:54:54 CST 2008


Author: matthew.brett@gmail.com
Date: 2008-12-30 17:54:50 -0600 (Tue, 30 Dec 2008)
New Revision: 5303

Added:
   trunk/scipy/io/matlab/gzipstreams.py
Modified:
   trunk/scipy/io/matlab/mio5.py
   trunk/scipy/io/matlab/miobase.py
Log:
Fixed bug reading empty strings; added theoretically more satisfying as-needed gzip stream reader for compressed arrays

Added: trunk/scipy/io/matlab/gzipstreams.py
===================================================================
--- trunk/scipy/io/matlab/gzipstreams.py	2008-12-30 23:30:23 UTC (rev 5302)
+++ trunk/scipy/io/matlab/gzipstreams.py	2008-12-30 23:54:50 UTC (rev 5303)
@@ -0,0 +1,220 @@
+''' Object for reading from gzipped file-like object
+
+Edited by Matthew Brett, with thanks, from
+http://effbot.org/librarybook/zlib-example-4.py
+
+The copyright and license for that code is:
+
+Copyright 1995-2008 by Fredrik Lundh
+
+By obtaining, using, and/or copying this software and/or its
+associated documentation, you agree that you have read, understood,
+and will comply with the following terms and conditions:
+
+Permission to use, copy, modify, and distribute this software and its
+associated documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appears in all
+copies, and that both that copyright notice and this permission notice
+appear in supporting documentation, and that the name of Secret Labs
+AB or the author not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
+THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+'''
+
+from zlib import decompressobj
+
+
+class GzipInputStream(object):
+    ''' Fileobject to wrap zlib compressed stream for reading
+
+    >>> from StringIO import StringIO
+    >>> from zlib import compress
+    >>> S = 'A handy module for reading compressed streams'
+    >>> F = StringIO(compress(S))
+    >>> ZF = GzipInputStream(F)
+    >>> ZF.read()
+    'A handy module for reading compressed streams'
+    >>> ZF.tell() == len(S)
+    True
+    >>> F = StringIO(compress(S))
+    >>> ZF = GzipInputStream(F)
+    >>> ZF.tell()
+    0
+    >>> ZF.read(6)
+    'A hand'
+    >>> ZF.tell()
+    6
+    >>> F = StringIO(compress(S))
+    >>> ZF = GzipInputStream(F, 6) # with length
+    >>> ZF.read()
+    'A hand'
+    >>> ZF.read()
+    ''
+    >>> ZF.tell()
+    6
+    >>> 
+    '''
+    
+    blocksize = 16384 # 16K
+    def __init__(self, fileobj, length=None):
+        ''' Initialize GzipInputStream
+
+        Parameters
+        ----------
+        fileobj : file-like object
+            Object only need implement ``read`` method
+        length : None or int, optional
+            Uncompressed length of input stream in bytes
+        '''
+        self.fileobj = fileobj
+        self.length=length
+        self.exhausted = False
+        self.unzipped_pos = 0
+        self.data = ""
+        self._unzipper = decompressobj()
+        self._bytes_read = 0
+
+    def __fill(self, bytes):
+        ''' Fill self.data with at least *bytes* number of bytes
+        If bytes == -1, continue until the end of the stream
+
+        Returns ``None``
+        '''
+        if self.exhausted:
+            return
+        # read until we have enough bytes in the buffer
+        read_to_end = bytes == -1
+        n_to_fetch = self.blocksize
+        while read_to_end or len(self.data) < bytes:
+            if self.length: # do not read beyond specified length
+                n_to_fetch = min(self.length-self._bytes_read,
+                                 self.blocksize)
+                if n_to_fetch == 0:
+                    self.exhausted = True
+                    break
+            data = self.fileobj.read(n_to_fetch)
+            if data:
+                self.__add_data(self._unzipper.decompress(data))
+            if len(data) < n_to_fetch: # hit end of file
+                self.__add_data(self._unzipper.flush())
+                self.exhausted = True
+                break
+
+    def __add_data(self, data):
+        self.data += data
+        self._bytes_read += len(data)
+
+    def seek(self, offset, whence=0):
+        ''' Set position in uncompressed stream
+
+        Parameters
+        ----------
+        offset : int
+            byte offset relative to position given by *whence*
+            offsets are in terms of uncompressed bytes from stream
+        whence : {0,1}
+            0 signifies *offset* is relative to beginning of file
+            1 means *offset* is relative to current position
+
+        Returns
+        -------
+        None
+        '''
+        if whence == 0:
+            position = offset
+        elif whence == 1:
+            position = self.unzipped_pos + offset
+        else:
+            raise IOError, "Illegal argument"
+        if position < self.unzipped_pos:
+            raise IOError, "Cannot seek backwards"
+
+        # skip forward, in blocks
+        while position > self.unzipped_pos:
+            if not self.read(min(position - self.unzipped_pos,
+                                 self.blocksize)):
+                break
+
+    def tell(self):
+        ''' Return current position in terms of uncompressed bytes '''
+        return self.unzipped_pos
+
+    def read(self, bytes = -1):
+        ''' Read bytes from file
+
+        Parameters
+        ----------
+        bytes : int, optional
+            If *bytes* is a positive integer, read this many bytes
+            from file. If *bytes* == -1 (the default), read all bytes
+            to the end of file, where the end of the file is detected
+            by running out of read data, or by the ``length``
+            attribute.
+
+        Returns
+        -------
+        data : string
+            string containing read data
+            
+        '''
+        self.__fill(bytes)
+        if bytes == -1:
+            data = self.data
+            self.data = ""
+        else:
+            data = self.data[:bytes]
+            self.data = self.data[bytes:]
+        self.unzipped_pos += len(data)
+        return data
+
+    def readline(self):
+        ''' Read text line from data
+
+        Examples
+        --------
+        >>> from StringIO import StringIO
+        >>> from zlib import compress
+        >>> S = 'A handy module\\nfor reading\\ncompressed streams'
+        >>> F = StringIO(compress(S))
+        >>> ZF = GzipInputStream(F)
+        >>> ZF.readline()
+        'A handy module\\n'
+        >>> ZF.readline()
+        'for reading\\n'
+        '''
+        # make sure we have an entire line
+        while not self.exhausted and "\n" not in self.data:
+            self.__fill(len(self.data) + 512)
+        i = self.data.find("\n") + 1
+        if i <= 0:
+            return self.read()
+        return self.read(i)
+
+    def readlines(self):
+        ''' Read all data broken up into list of text lines
+        >>> from StringIO import StringIO
+        >>> from zlib import compress
+        >>> S = 'A handy module\\nfor reading\\ncompressed streams'
+        >>> F = StringIO(compress(S))
+        >>> ZF = GzipInputStream(F)
+        >>> ZF.readlines()
+        ['A handy module\\n', 'for reading\\n', 'compressed streams']
+        >>>
+        '''
+        lines = []
+        while 1:
+            s = self.readline()
+            if not s:
+                break
+            lines.append(s)
+        return lines
+

Modified: trunk/scipy/io/matlab/mio5.py
===================================================================
--- trunk/scipy/io/matlab/mio5.py	2008-12-30 23:30:23 UTC (rev 5302)
+++ trunk/scipy/io/matlab/mio5.py	2008-12-30 23:54:50 UTC (rev 5303)
@@ -12,6 +12,7 @@
 
 import sys
 import zlib
+from gzipstreams import GzipInputStream
 from StringIO import StringIO
 from copy import copy as pycopy
 import warnings
@@ -322,14 +323,12 @@
 class Mat5ZArrayReader(Mat5ArrayReader):
     ''' Getter for compressed arrays
 
-    Reads and uncompresses gzipped stream on init, providing wrapper
+    Sets up reader for gzipped stream on init, providing wrapper
     for this new sub-stream.
     '''
     def __init__(self, array_reader, byte_count):
-        '''Reads and uncompresses gzipped stream'''
-        data = array_reader.mat_stream.read(byte_count)
         super(Mat5ZArrayReader, self).__init__(
-            StringIO(zlib.decompress(data)),
+            GzipInputStream(array_reader.mat_stream, byte_count),
             array_reader.dtypes,
             array_reader.processor_func,
             array_reader.codecs,

Modified: trunk/scipy/io/matlab/miobase.py
===================================================================
--- trunk/scipy/io/matlab/miobase.py	2008-12-30 23:30:23 UTC (rev 5302)
+++ trunk/scipy/io/matlab/miobase.py	2008-12-30 23:54:50 UTC (rev 5303)
@@ -308,12 +308,13 @@
                 # Convert char array to string or array of strings
                 dims = arr.shape
                 if len(dims) >= 2: # return array of strings
-                    dtt = self.order_code + 'U'
                     n_dims = dims[:-1]
+                    last_dim = dims[-1]
                     str_arr = arr.reshape(
                         (small_product(n_dims),
-                         dims[-1]))
-                    arr = np.empty(n_dims, dtype='U%d' % dims[-1])
+                         last_dim))
+                    dtstr = 'U%d' % (last_dim and last_dim or 1)
+                    arr = np.empty(n_dims, dtype=dtstr)
                     for i in range(0, n_dims[-1]):
                         arr[...,i] = self.chars_to_str(str_arr[i])
                 else: # return string



More information about the Scipy-svn mailing list