[Scipy-svn] r4473 - branches/mb_mio_refactor/matlab

scipy-svn@scip... scipy-svn@scip...
Tue Jun 24 03:55:24 CDT 2008


Author: matthew.brett@gmail.com
Date: 2008-06-24 03:55:06 -0500 (Tue, 24 Jun 2008)
New Revision: 4473

Added:
   branches/mb_mio_refactor/matlab/c_python.pxd
   branches/mb_mio_refactor/matlab/cython_setup.py
   branches/mb_mio_refactor/matlab/tagreader.pyx
Modified:
   branches/mb_mio_refactor/matlab/mio.py
   branches/mb_mio_refactor/matlab/mio4.py
   branches/mb_mio_refactor/matlab/mio5.py
   branches/mb_mio_refactor/matlab/miobase.py
Log:
Scribbling at cython, checking for HDF5 format

Added: branches/mb_mio_refactor/matlab/c_python.pxd
===================================================================
--- branches/mb_mio_refactor/matlab/c_python.pxd	2008-06-24 08:02:32 UTC (rev 4472)
+++ branches/mb_mio_refactor/matlab/c_python.pxd	2008-06-24 08:55:06 UTC (rev 4473)
@@ -0,0 +1,75 @@
+# -*- python -*-
+# :Author:    Robert Kern 
+# :Copyright: 2004, Enthought, Inc.
+# :License:   BSD Style
+
+
+cdef extern from "Python.h":
+    # Not part of the Python API, but we might as well define it here.
+    # Note that the exact type doesn't actually matter for Pyrex.
+    ctypedef int size_t
+
+    # Some type declarations we need
+    ctypedef int Py_intptr_t
+
+
+    # String API
+    char* PyString_AsString(object string)
+    char* PyString_AS_STRING(object string)
+    object PyString_FromString(char* c_string)
+    object PyString_FromStringAndSize(char* c_string, int length)
+    object PyString_InternFromString(char *v)
+
+    # Float API
+    object PyFloat_FromDouble(double v)
+    double PyFloat_AsDouble(object ob)
+    long PyInt_AsLong(object ob)
+
+
+    # Memory API
+    void* PyMem_Malloc(size_t n)
+    void* PyMem_Realloc(void* buf, size_t n)
+    void PyMem_Free(void* buf)
+
+    void Py_DECREF(object obj)
+    void Py_XDECREF(object obj)
+    void Py_INCREF(object obj)
+    void Py_XINCREF(object obj)
+
+    # CObject API
+    ctypedef void (*destructor1)(void* cobj)
+    ctypedef void (*destructor2)(void* cobj, void* desc)
+    int PyCObject_Check(object p)
+    object PyCObject_FromVoidPtr(void* cobj, destructor1 destr)
+    object PyCObject_FromVoidPtrAndDesc(void* cobj, void* desc, 
+        destructor2 destr)
+    void* PyCObject_AsVoidPtr(object self)
+    void* PyCObject_GetDesc(object self)
+    int PyCObject_SetVoidPtr(object self, void* cobj)  
+
+    # TypeCheck API
+    int PyFloat_Check(object obj)
+    int PyInt_Check(object obj)
+
+    # Error API
+    int PyErr_Occurred()
+    void PyErr_Clear()
+    int  PyErr_CheckSignals()
+
+    # File API
+    ctypedef struct FILE
+    FILE* PyFile_AsFile(object)
+
+cdef extern from "stdio.h":
+    size_t fread(void *ptr, size_t size, size_t n, FILE *file)
+
+cdef extern from "string.h":
+    void *memcpy(void *s1, void *s2, int n)
+
+cdef extern from "math.h":
+    double fabs(double x)
+
+cdef extern from "fileobject.h":
+    ctypedef class __builtin__.file [object PyFileObject]:
+        pass
+

Added: branches/mb_mio_refactor/matlab/cython_setup.py
===================================================================
--- branches/mb_mio_refactor/matlab/cython_setup.py	2008-06-24 08:02:32 UTC (rev 4472)
+++ branches/mb_mio_refactor/matlab/cython_setup.py	2008-06-24 08:55:06 UTC (rev 4473)
@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+"""Install file for example on how to use Cython with Numpy.
+
+Note: Cython is the successor project to Pyrex.  For more information, see
+http://cython.org.
+"""
+
+from distutils.core import setup
+from distutils.extension import Extension
+
+import numpy
+
+# We detect whether Cython is available, so that below, we can eventually ship
+# pre-generated C for users to compile the extension without having Cython
+# installed on their systems.
+try:
+    from Cython.Distutils import build_ext
+    has_cython = True
+except ImportError:
+    has_cython = False
+
+# Define a cython-based extension module, using the generated sources if cython
+# is not available.
+if has_cython:
+    pyx_sources = ['tagreader.pyx']
+    cmdclass    = {'build_ext': build_ext}
+else:
+    # In production work, you can ship the auto-generated C source yourself to
+    # your users.  In this case, we do NOT ship the .c file as part of numpy,
+    # so you'll need to actually have cython installed at least the first
+    # time.  Since this is really just an example to show you how to use
+    # *Cython*, it makes more sense NOT to ship the C sources so you can edit
+    # the pyx at will with less chances for source update conflicts when you
+    # update numpy.
+    pyx_sources = ['tagreader.c']
+    cmdclass    = {}
+
+
+# Declare the extension object
+pyx_ext = Extension('tagreader',
+                    pyx_sources,
+                    include_dirs = [numpy.get_include()])
+
+# Call the routine which does the real work
+setup(name        = 'tagreader',
+      description = 'tagreader extension',
+      ext_modules = [pyx_ext],
+      cmdclass    = cmdclass,
+      )

Modified: branches/mb_mio_refactor/matlab/mio.py
===================================================================
--- branches/mb_mio_refactor/matlab/mio.py	2008-06-24 08:02:32 UTC (rev 4472)
+++ branches/mb_mio_refactor/matlab/mio.py	2008-06-24 08:55:06 UTC (rev 4473)
@@ -7,6 +7,7 @@
 import os
 import sys
 
+from miobase import get_matfile_version
 from mio4 import MatFile4Reader, MatFile4Writer
 from mio5 import MatFile5Reader, MatFile5Writer
 
@@ -57,11 +58,16 @@
             raise IOError, 'Reader needs file name or open file-like object'
         byte_stream = file_name
 
-    MR = MatFile4Reader(byte_stream, **kwargs)
-    if MR.format_looks_right():
-        return MR
-    return MatFile5Reader(byte_stream, **kwargs)
-
+    mv = get_matfile_version(byte_stream)
+    if mv == '4':
+        return MatFile4Reader(byte_stream, **kwargs)
+    elif mv == '5':
+        return MatFile5Reader(byte_stream, **kwargs)
+    elif mv == '7':
+        raise NotImplementedError('Please use PyTables for matlab HDF files')
+    else:
+        raise TypeError('Did not recognize version %s' % mv)
+    
 def loadmat(file_name,  mdict=None, appendmat=True, basename='raw', **kwargs):
     ''' Load Matlab(tm) file
 

Modified: branches/mb_mio_refactor/matlab/mio4.py
===================================================================
--- branches/mb_mio_refactor/matlab/mio4.py	2008-06-24 08:02:32 UTC (rev 4472)
+++ branches/mb_mio_refactor/matlab/mio4.py	2008-06-24 08:55:06 UTC (rev 4473)
@@ -198,15 +198,6 @@
     def matrix_getter_factory(self):
         return self._array_reader.matrix_getter_factory()
 
-    def format_looks_right(self):
-        # Mat4 files have a zero somewhere in first 4 bytes
-        self.mat_stream.seek(0)
-        mopt_bytes = N.ndarray(shape=(4,),
-                             dtype=N.uint8,
-                             buffer = self.mat_stream.read(4))
-        self.mat_stream.seek(0)
-        return 0 in mopt_bytes
-
     def guess_byte_order(self):
         self.mat_stream.seek(0)
         mopt = self.read_dtype(N.dtype('i4'))

Modified: branches/mb_mio_refactor/matlab/mio5.py
===================================================================
--- branches/mb_mio_refactor/matlab/mio5.py	2008-06-24 08:02:32 UTC (rev 4472)
+++ branches/mb_mio_refactor/matlab/mio5.py	2008-06-24 08:55:06 UTC (rev 4473)
@@ -188,46 +188,27 @@
         self.class_dtypes = class_dtypes
 
     def read_element(self, copy=True):
-        raw_tag = self.mat_stream.read(8)
-        tag = N.ndarray(shape=(),
-                        dtype=self.dtypes['tag_full'],
-                        buffer=raw_tag)
-        mdtype = tag['mdtype'].item()
-
-        byte_count = mdtype >> 16
-        if byte_count: # small data element format
-            if byte_count > 4:
-                raise ValueError, 'Too many bytes for sde format'
-            mdtype = mdtype & 0xFFFF
-            dt = self.dtypes[mdtype]
-            el_count = byte_count // dt.itemsize
-            return N.ndarray(shape=(el_count,),
-                             dtype=dt,
-                             buffer=raw_tag[4:])
-
-        byte_count = tag['byte_count'].item()
+        mdtype, byte_count, buf = tagparse(self.mat_stream, swapf)
         if mdtype == miMATRIX:
+            # Can this use buf or not?
             return self.current_getter(byte_count).get_array()
         elif mdtype in self.codecs: # encoded char data
-            raw_str = self.mat_stream.read(byte_count)
             codec = self.codecs[mdtype]
             if not codec:
                 raise TypeError, 'Do not support encoding %d' % mdtype
-            el = raw_str.decode(codec)
+            el = buf.decode(codec)
         else: # numeric data
             dt = self.dtypes[mdtype]
             el_count = byte_count // dt.itemsize
             el = N.ndarray(shape=(el_count,),
                          dtype=dt,
-                         buffer=self.mat_stream.read(byte_count))
+                         buffer=buf)
             if copy:
                 el = el.copy()
-
         # Seek to next 64-bit boundary
         mod8 = byte_count % 8
         if mod8:
             self.mat_stream.seek(8 - mod8, 1)
-
         return el
 
     def matrix_getter_factory(self):
@@ -460,7 +441,6 @@
     uint16_codec       - char codec to use for uint16 char arrays
                           (defaults to system default codec)
    '''
-
     def __init__(self,
                  mat_stream,
                  byte_order=None,
@@ -533,6 +513,8 @@
         return self._array_reader.matrix_getter_factory()
 
     def guess_byte_order(self):
+        ''' Guess byte order.
+        Sets stream pointer to 0 '''
         self.mat_stream.seek(126)
         mi = self.mat_stream.read(2)
         self.mat_stream.seek(0)
@@ -548,16 +530,7 @@
         hdict['__version__'] = '%d.%d' % (v_major, v_minor)
         return hdict
 
-    def format_looks_right(self):
-        # Mat4 files have a zero somewhere in first 4 bytes
-        self.mat_stream.seek(0)
-        mopt_bytes = N.ndarray(shape=(4,),
-                             dtype=N.uint8,
-                             buffer = self.mat_stream.read(4))
-        self.mat_stream.seek(0)
-        return 0 not in mopt_bytes
 
-
 class Mat5MatrixWriter(MatStreamWriter):
 
     mat_tag = N.zeros((), mdtypes_template['tag_full'])

Modified: branches/mb_mio_refactor/matlab/miobase.py
===================================================================
--- branches/mb_mio_refactor/matlab/miobase.py	2008-06-24 08:02:32 UTC (rev 4472)
+++ branches/mb_mio_refactor/matlab/miobase.py	2008-06-24 08:55:06 UTC (rev 4473)
@@ -22,6 +22,38 @@
         res *= e
     return res
 
+def get_matfile_version(fileobj):
+    ''' Return '4', '5', or '7' depending on apparent mat file type
+    Inputs
+    fileobj       - file object implementing seek() and read()
+    Outputs
+    version_str   - one of (strings) 4, 5, or 7
+    
+    Has the side effect of setting the file read pointer to 0
+    '''
+    # Mat4 files have a zero somewhere in first 4 bytes
+    fileobj.seek(0)
+    mopt_bytes = N.ndarray(shape=(4,),
+                           dtype=N.uint8,
+                           buffer = fileobj.read(4))
+    if 0 in mopt_bytes:
+        fileobj.seek(0)
+        return '4'
+    # For 5 or 7 we need to read an integer in the header
+    # bytes 124 through 128 contain a version integer
+    # and an endian test string
+    fileobj.seek(124)
+    tst_str = fileobj.read(4)
+    fileobj.seek(0)
+    maj_ind = int(tst_str[2] == 'I')
+    verb = ord(tst_str[maj_ind])
+    if verb == 1:
+        return '5'
+    elif verb == 2:
+        return '7'
+    raise ValueError('Unknown mat file type, version %d' % verb)
+
+
 class ByteOrder(object):
     ''' Namespace for byte ordering '''
     little_endian = sys.byteorder == 'little'
@@ -50,7 +82,7 @@
     Attaches to initialized stream
 
     Base class for "getters" - which do store state of what they are
-    reading on itialization, and therefore need to be initialized
+    reading on initialization, and therefore need to be initialized
     before each read, and "readers" which do not store state, and only
     need to be initialized once on object creation
 
@@ -102,11 +134,8 @@
 
     set_dtypes              - sets data types defs from byte order
     matrix_getter_factory   - gives object to fetch next matrix from stream
-    format_looks_right      - returns True if format looks correct for
-                              this file type (Mat4, Mat5)
     guess_byte_order        - guesses file byte order from file
     """
-
     def __init__(self, mat_stream,
                  byte_order=None,
                  mat_dtype=False,
@@ -177,7 +206,8 @@
                           'get/set order code')
 
     def set_dtypes(self):
-        assert False, 'Not implemented'
+        ''' Set dtype endianness. In this case we have no dtypes '''
+        pass
 
     def convert_dtypes(self, dtype_template):
         dtypes = dtype_template.copy()
@@ -188,16 +218,13 @@
 
     def matrix_getter_factory(self):
         assert False, 'Not implemented'
-
-    def format_looks_right(self):
-        "Return True if the format looks right for this object"
-        assert False, 'Not implemented'
-
+    
     def file_header(self):
         return {}
 
     def guess_byte_order(self):
-        assert 0, 'Not implemented'
+        ''' As we do not know what file type we have, assume native '''
+        return ByteOrder.native_code
 
     def get_processor_func(self):
         ''' Processing to apply to read matrices

Added: branches/mb_mio_refactor/matlab/tagreader.pyx
===================================================================
--- branches/mb_mio_refactor/matlab/tagreader.pyx	2008-06-24 08:02:32 UTC (rev 4472)
+++ branches/mb_mio_refactor/matlab/tagreader.pyx	2008-06-24 08:55:06 UTC (rev 4473)
@@ -0,0 +1,19 @@
+# -*- python -*- 
+''' Extension to parse matlab 5 tags '''
+
+# Import the pieces of the Python C API we need to use (from c_python.pxd):
+cimport c_python as py
+
+def parse(fileobj, int swapf):
+    ''' Read in the tag
+    The tag can be normal format (mdtype=u4, byte_count=u4)
+    or small element format (mdtype=u2, byte_count=u2, data in last 4 bytes)
+    Small element format is where mdtype (u4) has non-zero high bytes
+    '''
+    cdef py.size_t n_out
+    cdef char raw_tag[8]
+    cdef py.FILE* infile
+    infile = py.PyFile_AsFile(fileobj)
+    n_out = py.fread(raw_tag, 8, 1, infile)
+    # Raise Exception if n_out < 1
+    return mdtype, byte_count, buf



More information about the Scipy-svn mailing list