[Numpy-svn] r8240 - in trunk/numpy/core: . src/multiarray tests

numpy-svn@scip... numpy-svn@scip...
Sun Feb 21 09:36:18 CST 2010


Author: ptvirtan
Date: 2010-02-21 09:36:18 -0600 (Sun, 21 Feb 2010)
New Revision: 8240

Modified:
   trunk/numpy/core/_internal.py
   trunk/numpy/core/src/multiarray/buffer.c
   trunk/numpy/core/tests/test_multiarray.py
Log:
ENH: core: better support for native vs. standard sizes and alignments in the PEP 3118 interface

Also, try to produce minimal buffer format strings without unnecessary
alignment characters.

Modified: trunk/numpy/core/_internal.py
===================================================================
--- trunk/numpy/core/_internal.py	2010-02-21 12:25:57 UTC (rev 8239)
+++ trunk/numpy/core/_internal.py	2010-02-21 15:36:18 UTC (rev 8240)
@@ -350,7 +350,7 @@
 # Given a string containing a PEP 3118 format specifier,
 # construct a Numpy dtype
 
-_pep3118_map = {
+_pep3118_native_map = {
     '?': '?',
     'b': 'b',
     'B': 'B',
@@ -365,7 +365,6 @@
     'f': 'f',
     'd': 'd',
     'g': 'g',
-    'Q': 'Q',
     'Zf': 'F',
     'Zd': 'D',
     'Zg': 'G',
@@ -374,9 +373,32 @@
     'O': 'O',
     'x': 'V', # padding
 }
-_pep3118_typechars = ''.join(_pep3118_map.keys())
+_pep3118_native_typechars = ''.join(_pep3118_native_map.keys())
 
-def _dtype_from_pep3118(spec, byteorder='=', is_subdtype=False):
+_pep3118_standard_map = {
+    '?': '?',
+    'b': 'b',
+    'B': 'B',
+    'h': 'i2',
+    'H': 'u2',
+    'i': 'i4',
+    'I': 'u4',
+    'l': 'i4',
+    'L': 'u4',
+    'q': 'i8',
+    'Q': 'u8',
+    'f': 'f',
+    'd': 'd',
+    'Zf': 'F',
+    'Zd': 'D',
+    's': 'S',
+    'w': 'U',
+    'O': 'O',
+    'x': 'V', # padding
+}
+_pep3118_standard_typechars = ''.join(_pep3118_standard_map.keys())
+
+def _dtype_from_pep3118(spec, byteorder='@', is_subdtype=False):
     from numpy.core.multiarray import dtype
 
     fields = {}
@@ -400,10 +422,18 @@
             spec = spec[j+1:]
 
         # Byte order
-        if spec[0] in ('=', '<', '>'):
+        if spec[0] in ('@', '=', '<', '>', '^'):
             byteorder = spec[0]
             spec = spec[1:]
 
+        # Byte order characters also control native vs. standard type sizes
+        if byteorder in ('@', '^'):
+            type_map = _pep3118_native_map
+            type_map_chars = _pep3118_native_typechars
+        else:
+            type_map = _pep3118_standard_map
+            type_map_chars = _pep3118_standard_typechars
+
         # Item sizes
         itemsize = 1
         if spec[0].isdigit():
@@ -423,22 +453,41 @@
             if itemsize != 1:
                 # Not supported
                 raise ValueError("Non item-size 1 structures not supported")
-        elif spec[0] in _pep3118_typechars:
+        elif spec[0] in type_map_chars:
             j = 1
             for j in xrange(1, len(spec)):
-                if spec[j] not in _pep3118_typechars:
+                if spec[j] not in type_map_chars:
                     break
             typechar = spec[:j]
             spec = spec[j:]
             is_padding = (typechar == 'x')
-            dtypechar = _pep3118_map[typechar]
+            dtypechar = type_map[typechar]
             if dtypechar in 'USV':
                 dtypechar += '%d' % itemsize
                 itemsize = 1
-            value = dtype(byteorder + dtypechar)
+            numpy_byteorder = {'@': '=', '^': '='}.get(byteorder, byteorder)
+            value = dtype(numpy_byteorder + dtypechar)
         else:
             raise ValueError("Unknown PEP 3118 data type specifier %r" % spec)
 
+        # Native alignment may require padding
+        #
+        # XXX: here we assume that the presence of a '@' character implies
+        #      that the start of the array is *also* aligned.
+        extra_offset = 0
+        if byteorder == '@':
+            start_padding = offset % value.alignment
+            intra_padding = value.itemsize % value.alignment
+
+            offset += start_padding
+
+            if intra_padding != 0:
+                if itemsize > 1 or shape is not None:
+                    value = dtype([('f0', value),
+                                   ('pad', '%dV' % intra_padding)])
+                else:
+                    extra_offset += intra_padding
+
         # Convert itemsize to sub-array
         if itemsize != 1:
             value = dtype((value, (itemsize,)))
@@ -462,8 +511,8 @@
         if not is_padding or this_explicit_name:
             fields[name] = (value, offset)
         offset += value.itemsize
+        offset += extra_offset
 
-
     if len(fields.keys()) == 1 and not explicit_name and fields['f0'][1] == 0:
         ret = fields['f0'][0]
     else:

Modified: trunk/numpy/core/src/multiarray/buffer.c
===================================================================
--- trunk/numpy/core/src/multiarray/buffer.c	2010-02-21 12:25:57 UTC (rev 8239)
+++ trunk/numpy/core/src/multiarray/buffer.c	2010-02-21 15:36:18 UTC (rev 8240)
@@ -142,12 +142,56 @@
     return 0;
 }
 
+/*
+ * Return non-zero if a type is aligned in each item in the given array,
+ * AND, the descr element size is a multiple of the alignment,
+ * AND, the array data is positioned to alignment granularity.
+ */
 static int
+_is_natively_aligned_at(PyArray_Descr *descr,
+                        PyArrayObject *arr, Py_ssize_t offset)
+{
+    int k;
+
+    if ((Py_ssize_t)(arr->data) % descr->alignment != 0) {
+        return 0;
+    }
+
+    if (offset % descr->alignment != 0) {
+        return 0;
+    }
+
+    if (descr->elsize % descr->alignment) {
+        return 0;
+    }
+
+    for (k = 0; k < arr->nd; ++k) {
+        if (arr->dimensions[k] > 1) {
+            if (arr->strides[k] % descr->alignment != 0) {
+                return 0;
+            }
+        }
+    }
+
+    return 1;
+}
+
+static int
 _buffer_format_string(PyArray_Descr *descr, _tmp_string_t *str,
-                      Py_ssize_t *offset)
+                      PyArrayObject* arr, Py_ssize_t *offset,
+                      char *active_byteorder)
 {
     int k;
+    char _active_byteorder = '@';
+    Py_ssize_t _offset = 0;
 
+    if (active_byteorder == NULL) {
+        active_byteorder = &_active_byteorder;
+    }
+    if (offset == NULL) {
+        offset = &_offset;
+    }
+
     if (descr->subarray) {
         PyObject *item, *repr;
         Py_ssize_t total_count = 1;
@@ -170,7 +214,8 @@
         }
         _append_char(str, ')');
         old_offset = *offset;
-        ret = _buffer_format_string(descr->subarray->base, str, offset);
+        ret = _buffer_format_string(descr->subarray->base, str, arr, offset,
+                                    active_byteorder);
         *offset = old_offset + (*offset - old_offset) * total_count;
         return ret;
     }
@@ -198,7 +243,8 @@
             *offset += child->elsize;
 
             /* Insert child item */
-            _buffer_format_string(child, str, offset);
+            _buffer_format_string(child, str, arr, offset,
+                                  active_byteorder);
 
             /* Insert field name */
 #if defined(NPY_PY3K)
@@ -232,11 +278,50 @@
         _append_char(str, '}');
     }
     else {
-        if (descr->byteorder == '<' || descr->byteorder == '>' ||
-            descr->byteorder == '=') {
-            _append_char(str, descr->byteorder);
+        int is_standard_size = 1;
+        int is_native_only_type = (descr->type_num == NPY_LONGDOUBLE ||
+                                   descr->type_num == NPY_CLONGDOUBLE);
+#if NPY_SIZEOF_LONG_LONG != 8
+        is_native_only_type = is_native_only_type || (
+            descr->type_num == NPY_LONGLONG ||
+            descr->type_num == NPY_ULONGLONG);
+#endif
+
+        if (descr->byteorder == '=' &&
+                _is_natively_aligned_at(descr, arr, *offset)) {
+            /* Prefer native types, to cater for Cython */
+            is_standard_size = 0;
+            if (*active_byteorder != '@') {
+                _append_char(str, '@');
+                *active_byteorder = '@';
+            }
         }
+        else if (descr->byteorder == '=' && is_native_only_type) {
+            /* Data types that have no standard size */
+            is_standard_size = 0;
+            if (*active_byteorder != '^') {
+                _append_char(str, '^');
+                *active_byteorder = '^';
+            }
+        }
+        else if (descr->byteorder == '<' || descr->byteorder == '>' ||
+                 descr->byteorder == '=') {
+            is_standard_size = 1;
+            if (*active_byteorder != descr->byteorder) {
+                _append_char(str, descr->byteorder);
+                *active_byteorder = descr->byteorder;
+            }
 
+            if (is_native_only_type) {
+                /* It's not possible to express native-only data types
+                   in non-native byte orders */
+                PyErr_Format(PyExc_ValueError,
+                             "cannot expose native-only dtype '%c' in "
+                             "non-native byte order '%c' via buffer interface",
+                             descr->type, descr->byteorder);
+            }
+        }
+
         switch (descr->type_num) {
         case NPY_BOOL:         if (_append_char(str, '?')) return -1; break;
         case NPY_BYTE:         if (_append_char(str, 'b')) return -1; break;
@@ -245,8 +330,22 @@
         case NPY_USHORT:       if (_append_char(str, 'H')) return -1; break;
         case NPY_INT:          if (_append_char(str, 'i')) return -1; break;
         case NPY_UINT:         if (_append_char(str, 'I')) return -1; break;
-        case NPY_LONG:         if (_append_char(str, 'l')) return -1; break;
-        case NPY_ULONG:        if (_append_char(str, 'L')) return -1; break;
+        case NPY_LONG:
+            if (is_standard_size && (NPY_SIZEOF_LONG == 8)) {
+                if (_append_char(str, 'q')) return -1;
+            }
+            else {
+                if (_append_char(str, 'l')) return -1;
+            }
+            break;
+        case NPY_ULONG:
+            if (is_standard_size && (NPY_SIZEOF_LONG == 8)) {
+                if (_append_char(str, 'Q')) return -1;
+            }
+            else {
+                if (_append_char(str, 'L')) return -1;
+            }
+            break;
         case NPY_LONGLONG:     if (_append_char(str, 'q')) return -1; break;
         case NPY_ULONGLONG:    if (_append_char(str, 'Q')) return -1; break;
         case NPY_FLOAT:        if (_append_char(str, 'f')) return -1; break;
@@ -280,8 +379,9 @@
             break;
         }
         default:
-            PyErr_Format(PyExc_ValueError, "cannot convert dtype %d to buffer",
-                         descr->type_num);
+            PyErr_Format(PyExc_ValueError,
+                         "cannot include dtype '%c' in a buffer",
+                         descr->type);
             return -1;
         }
     }
@@ -322,14 +422,13 @@
 _buffer_info_new(PyArrayObject *arr)
 {
     _buffer_info_t *info;
-    Py_ssize_t offset = 0;
     _tmp_string_t fmt = {0,0,0};
     int k;
 
     info = (_buffer_info_t*)malloc(sizeof(_buffer_info_t));
 
     /* Fill in format */
-    if (_buffer_format_string(PyArray_DESCR(arr), &fmt, &offset) != 0) {
+    if (_buffer_format_string(PyArray_DESCR(arr), &fmt, arr, NULL, NULL) != 0) {
         free(info);
         return NULL;
     }

Modified: trunk/numpy/core/tests/test_multiarray.py
===================================================================
--- trunk/numpy/core/tests/test_multiarray.py	2010-02-21 12:25:57 UTC (rev 8239)
+++ trunk/numpy/core/tests/test_multiarray.py	2010-02-21 15:36:18 UTC (rev 8240)
@@ -1504,16 +1504,35 @@
             x = np.array(([[1,2],[3,4]],), dtype=[('a', (int, (2,2)))])
             self._check_roundtrip(x)
 
+            x = np.array([1,2,3], dtype='>i2')
+            self._check_roundtrip(x)
+
+            x = np.array([1,2,3], dtype='<i2')
+            self._check_roundtrip(x)
+
             x = np.array([1,2,3], dtype='>i4')
             self._check_roundtrip(x)
 
             x = np.array([1,2,3], dtype='<i4')
             self._check_roundtrip(x)
 
+            # Native-only data types can be passed through the buffer interface
+            # only in native byte order
+            if sys.byteorder == 'little':
+                x = np.array([1,2,3], dtype='>i8')
+                assert_raises(ValueError, self._check_roundtrip, x)
+                x = np.array([1,2,3], dtype='<i8')
+                self._check_roundtrip(x)
+            else:
+                x = np.array([1,2,3], dtype='>i8')
+                self._check_roundtrip(x)
+                x = np.array([1,2,3], dtype='<i8')
+                assert_raises(ValueError, self._check_roundtrip, x)
+
         def test_export_simple_1d(self):
             x = np.array([1,2,3,4,5], dtype='i')
             y = memoryview(x)
-            assert_equal(y.format, '=i')
+            assert_equal(y.format, 'i')
             assert_equal(y.shape, (5,))
             assert_equal(y.ndim, 1)
             assert_equal(y.strides, (4,))
@@ -1523,7 +1542,7 @@
         def test_export_simple_nd(self):
             x = np.array([[1,2],[3,4]], dtype=np.float64)
             y = memoryview(x)
-            assert_equal(y.format, '=d')
+            assert_equal(y.format, 'd')
             assert_equal(y.shape, (2, 2))
             assert_equal(y.ndim, 2)
             assert_equal(y.strides, (16, 8))
@@ -1533,7 +1552,7 @@
         def test_export_discontiguous(self):
             x = np.zeros((3,3,3), dtype=np.float32)[:,0,:]
             y = memoryview(x)
-            assert_equal(y.format, '=f')
+            assert_equal(y.format, 'f')
             assert_equal(y.shape, (3, 3))
             assert_equal(y.ndim, 2)
             assert_equal(y.strides, (36, 4))
@@ -1562,7 +1581,7 @@
                            asbytes('aaaa'), 'bbbb', asbytes('   '), True)],
                          dtype=dt)
             y = memoryview(x)
-            assert_equal(y.format, 'T{b:a:=h:b:=i:c:=l:d:=q:dx:B:e:=H:f:=I:g:=L:h:=Q:hx:=d:i:=d:j:=g:k:4s:l:=4w:m:3x:n:?:o:}')
+            assert_equal(y.format, 'T{b:a:=h:b:i:c:l:d:^q:dx:B:e:@H:f:=I:g:L:h:^Q:hx:=d:i:d:j:^g:k:4s:l:=4w:m:3x:n:?:o:}')
             assert_equal(y.shape, (1,))
             assert_equal(y.ndim, 1)
             assert_equal(y.suboffsets, None)
@@ -1576,7 +1595,7 @@
         def test_export_subarray(self):
             x = np.array(([[1,2],[3,4]],), dtype=[('a', ('i', (2,2)))])
             y = memoryview(x)
-            assert_equal(y.format, 'T{(2,2)=i:a:}')
+            assert_equal(y.format, 'T{(2,2)i:a:}')
             assert_equal(y.shape, None)
             assert_equal(y.ndim, 0)
             assert_equal(y.strides, None)
@@ -1589,12 +1608,12 @@
             if sys.byteorder == 'little':
                 assert_equal(y.format, '>l')
             else:
-                assert_equal(y.format, '=l')
+                assert_equal(y.format, 'l')
 
             x = np.array([1,2,3], dtype='<l')
             y = memoryview(x)
             if sys.byteorder == 'little':
-                assert_equal(y.format, '=l')
+                assert_equal(y.format, 'l')
             else:
                 assert_equal(y.format, '<l')
 



More information about the Numpy-svn mailing list