[Scipy-svn] r3568 - in trunk/scipy/sandbox/numexpr: . tests

scipy-svn@scip... scipy-svn@scip...
Fri Nov 23 01:20:10 CST 2007


Author: cookedm
Date: 2007-11-23 01:20:02 -0600 (Fri, 23 Nov 2007)
New Revision: 3568

Added:
   trunk/scipy/sandbox/numexpr/boolean_timing.py
Modified:
   trunk/scipy/sandbox/numexpr/__init__.py
   trunk/scipy/sandbox/numexpr/compiler.py
   trunk/scipy/sandbox/numexpr/complex_functions.inc
   trunk/scipy/sandbox/numexpr/expressions.py
   trunk/scipy/sandbox/numexpr/info.py
   trunk/scipy/sandbox/numexpr/interp_body.c
   trunk/scipy/sandbox/numexpr/interpreter.c
   trunk/scipy/sandbox/numexpr/tests/test_numexpr.py
   trunk/scipy/sandbox/numexpr/timing.py
Log:
[numexpr] Merge improvements from PyTables into numexpr (#529), thanks to Francesc Altet.
This includes:
 - support for long long int datatype
 - support for string datatype
 - optimization of the unidimensional strided array case
 - optimization of the unidimensional unaligned array case


Modified: trunk/scipy/sandbox/numexpr/__init__.py
===================================================================
--- trunk/scipy/sandbox/numexpr/__init__.py	2007-11-22 12:38:12 UTC (rev 3567)
+++ trunk/scipy/sandbox/numexpr/__init__.py	2007-11-23 07:20:02 UTC (rev 3568)
@@ -1,6 +1,6 @@
-from info import __doc__
-from expressions import E
-from compiler import numexpr, disassemble, evaluate
+from numexpr.info import __doc__
+from numexpr.expressions import E
+from numexpr.compiler import numexpr, disassemble, evaluate
 
 def test(level=1, verbosity=1):
     from numpy.testing import NumpyTest

Added: trunk/scipy/sandbox/numexpr/boolean_timing.py
===================================================================
--- trunk/scipy/sandbox/numexpr/boolean_timing.py	2007-11-22 12:38:12 UTC (rev 3567)
+++ trunk/scipy/sandbox/numexpr/boolean_timing.py	2007-11-23 07:20:02 UTC (rev 3568)
@@ -0,0 +1,139 @@
+import sys
+import timeit
+import numpy
+
+array_size = 1000*1000
+iterations = 10
+
+numpy_ttime = 0
+numpy_sttime = 0
+numpy_nttime = 0
+numexpr_ttime = 0
+numexpr_sttime = 0
+numexpr_nttime = 0
+
+def compare_times(expr, nexpr):
+    global numpy_ttime
+    global numpy_sttime
+    global numpy_nttime
+    global numexpr_ttime
+    global numexpr_sttime
+    global numexpr_nttime
+
+    print "******************* Expression:", expr
+
+    setup_contiguous = setupNP_contiguous
+    setup_strided = setupNP_strided
+    setup_unaligned = setupNP_unaligned
+
+    numpy_timer = timeit.Timer(expr, setup_contiguous)
+    numpy_time = round(numpy_timer.timeit(number=iterations), 4)
+    numpy_ttime += numpy_time
+    print 'numpy:', numpy_time / iterations
+
+    numpy_timer = timeit.Timer(expr, setup_strided)
+    numpy_stime = round(numpy_timer.timeit(number=iterations), 4)
+    numpy_sttime += numpy_stime
+    print 'numpy strided:', numpy_stime / iterations
+
+    numpy_timer = timeit.Timer(expr, setup_unaligned)
+    numpy_ntime = round(numpy_timer.timeit(number=iterations), 4)
+    numpy_nttime += numpy_ntime
+    print 'numpy unaligned:', numpy_ntime / iterations
+
+    evalexpr = 'evaluate("%s", optimization="aggressive")' % expr
+    numexpr_timer = timeit.Timer(evalexpr, setup_contiguous)
+    numexpr_time = round(numexpr_timer.timeit(number=iterations), 4)
+    numexpr_ttime += numexpr_time
+    print "numexpr:", numexpr_time/iterations,
+    print "Speed-up of numexpr over numpy:", round(numpy_time/numexpr_time, 4)
+
+    evalexpr = 'evaluate("%s", optimization="aggressive")' % expr
+    numexpr_timer = timeit.Timer(evalexpr, setup_strided)
+    numexpr_stime = round(numexpr_timer.timeit(number=iterations), 4)
+    numexpr_sttime += numexpr_stime
+    print "numexpr strided:", numexpr_stime/iterations,
+    print "Speed-up of numexpr strided over numpy:", \
+          round(numpy_stime/numexpr_stime, 4)
+
+    evalexpr = 'evaluate("%s", optimization="aggressive")' % expr
+    numexpr_timer = timeit.Timer(evalexpr, setup_unaligned)
+    numexpr_ntime = round(numexpr_timer.timeit(number=iterations), 4)
+    numexpr_nttime += numexpr_ntime
+    print "numexpr unaligned:", numexpr_ntime/iterations,
+    print "Speed-up of numexpr unaligned over numpy:", \
+          round(numpy_ntime/numexpr_ntime, 4)
+
+
+
+setupNP = """\
+from numpy import arange, where, arctan2, sqrt
+from numpy import rec as records
+from numexpr import evaluate
+
+# Initialize a recarray of 16 MB in size
+r=records.array(None, formats='a%s,i4,f8', shape=%s)
+c1 = r.field('f0')%s
+i2 = r.field('f1')%s
+f3 = r.field('f2')%s
+c1[:] = "a"
+i2[:] = arange(%s)/1000
+f3[:] = i2/2.
+"""
+
+setupNP_contiguous = setupNP % (4, array_size,
+                                ".copy()", ".copy()", ".copy()",
+                                array_size)
+setupNP_strided = setupNP % (4, array_size, "", "", "", array_size)
+setupNP_unaligned = setupNP % (1, array_size, "", "", "", array_size)
+
+
+expressions = []
+expressions.append('i2 > 0')
+expressions.append('i2 < 0')
+expressions.append('i2 < f3')
+expressions.append('i2-10 < f3')
+expressions.append('i2*f3+f3*f3 > i2')
+expressions.append('0.1*i2 > arctan2(i2, f3)')
+expressions.append('i2%2 > 3')
+expressions.append('i2%10 < 4')
+expressions.append('i2**2 + (f3+1)**-2.5 < 3')
+expressions.append('(f3+1)**50 > i2')
+expressions.append('sqrt(i2**2 + f3**2) > 1')
+expressions.append('(i2>2) | ((f3**2>3) & ~(i2*f3<2))')
+
+def compare(expression=False):
+    if expression:
+        compare_times(expression, 1)
+        sys.exit(0)
+    nexpr = 0
+    for expr in expressions:
+        nexpr += 1
+        compare_times(expr, nexpr)
+    print
+
+if __name__ == '__main__':
+
+    print 'Python version:    %s' % sys.version
+    print "NumPy version:     %s" % numpy.__version__
+
+    if len(sys.argv) > 1:
+        expression = sys.argv[1]
+        print "expression-->", expression
+        compare(expression)
+    else:
+        compare()
+
+    print "*************** TOTALS **************************"
+    print "numpy total:", numpy_ttime/iterations
+    print "numpy strided total:", numpy_sttime/iterations
+    print "numpy unaligned total:", numpy_nttime/iterations
+    print "numexpr total:", numexpr_ttime/iterations
+    print "Speed-up of numexpr over numpy:", \
+          round(numpy_ttime/numexpr_ttime, 3)
+    print "numexpr strided total:", numexpr_sttime/iterations
+    print "Speed-up of numexpr strided over numpy:", \
+          round(numpy_sttime/numexpr_sttime, 3)
+    print "numexpr unaligned total:", numexpr_nttime/iterations
+    print "Speed-up of numexpr unaligned over numpy:", \
+          round(numpy_nttime/numexpr_nttime, 3)

Modified: trunk/scipy/sandbox/numexpr/compiler.py
===================================================================
--- trunk/scipy/sandbox/numexpr/compiler.py	2007-11-22 12:38:12 UTC (rev 3567)
+++ trunk/scipy/sandbox/numexpr/compiler.py	2007-11-23 07:20:02 UTC (rev 3568)
@@ -1,12 +1,12 @@
 import sys
 import numpy
 
-import interpreter, expressions
+from numexpr import interpreter, expressions
 
-typecode_to_kind = {'b': 'bool', 'i': 'int', 'f': 'float',
-                    'c': 'complex', 'n' : 'none'}
-kind_to_typecode = {'bool': 'b', 'int': 'i', 'float': 'f',
-                    'complex': 'c', 'none' : 'n'}
+typecode_to_kind = {'b': 'bool', 'i': 'int', 'l': 'long', 'f': 'float',
+                    'c': 'complex', 's': 'str', 'n' : 'none'}
+kind_to_typecode = {'bool': 'b', 'int': 'i', 'long': 'l', 'float': 'f',
+                    'complex': 'c', 'str': 's', 'none' : 'n'}
 type_to_kind = expressions.type_to_kind
 kind_to_type = expressions.kind_to_type
 
@@ -91,7 +91,7 @@
     """Generate all possible signatures derived by upcasting the given
     signature.
     """
-    codes = 'bifc'
+    codes = 'bilfc'
     if not s:
         yield ''
     elif s[0] in codes:
@@ -99,6 +99,9 @@
         for x in codes[start:]:
             for y in sigPerms(s[1:]):
                 yield x + y
+    elif s[0] == 's':  # numbers shall not be cast to strings
+        for y in sigPerms(s[1:]):
+            yield 's' + y
     else:
         yield s
 
@@ -198,6 +201,10 @@
         for name in c.co_names:
             if name == "None":
                 names[name] = None
+            elif name == "True":
+                names[name] = True
+            elif name == "False":
+                names[name] = False
             else:
                 t = types.get(name, float)
                 names[name] = expressions.VariableNode(name, type_to_kind[t])
@@ -206,6 +213,8 @@
         ex = eval(c, names)
         if expressions.isConstant(ex):
             ex = expressions.ConstantNode(ex, expressions.getKind(ex))
+        elif not isinstance(ex, expressions.ExpressionNode):
+            raise TypeError("unsupported expression type: %s" % type(ex))
     finally:
         expressions._context.ctx = old_ctx
     return ex
@@ -308,8 +317,8 @@
         for c in n.children:
             if c.reg.temporary:
                 users_of[c.reg].add(n)
-    unused = {'bool' : set(), 'int' : set(),
-              'float' : set(), 'complex' : set()}
+    unused = {'bool' : set(), 'int' : set(), 'long': set(),
+              'float' : set(), 'complex' : set(), 'str': set()}
     for n in nodes:
         for reg, users in users_of.iteritems():
             if n in users:
@@ -435,7 +444,7 @@
 
     ast = expressionToAST(ex)
 
-    # Add a copy for strided or unaligned arrays
+    # Add a copy for strided or unaligned unidimensional arrays
     for a in ast.postorderWalk():
         if a.astType == "variable" and a.value in copy_args:
             newVar = ASTNode(*a.key())
@@ -536,15 +545,19 @@
 
 
 def getType(a):
-    t = a.dtype.type
-    if issubclass(t, numpy.bool_):
+    kind = a.dtype.kind
+    if kind == 'b':
         return bool
-    if issubclass(t, numpy.integer):
+    if kind in 'iu':
+        if a.dtype.itemsize > 4:
+            return long  # ``long`` is for integers of more than 32 bits
         return int
-    if issubclass(t, numpy.floating):
+    if kind == 'f':
         return float
-    if issubclass(t, numpy.complexfloating):
+    if kind == 'c':
         return complex
+    if kind == 'S':
+        return str
     raise ValueError("unkown type %s" % a.dtype.name)
 
 
@@ -589,12 +602,29 @@
             a = local_dict[name]
         except KeyError:
             a = global_dict[name]
-        # byteswapped arrays are taken care of in the extension.
-        arguments.append(numpy.asarray(a)) # don't make a data copy, if possible
-        if (hasattr(a, "flags") and            # numpy object
-            (not a.flags.contiguous or
-             not a.flags.aligned)):
-            copy_args.append(name)    # do a copy to temporary
+        b = numpy.asarray(a)
+        # Byteswapped arrays are dealt with in the extension
+        # All the opcodes can deal with strided arrays directly as
+        # long as they are undimensional (strides in other
+        # dimensions are dealt within the extension), so we don't
+        # need a copy for the strided case.
+        if not b.flags.aligned:
+            # For the unaligned case, we have two cases:
+            if b.ndim == 1:
+                # For unidimensional arrays we can use the copy opcode
+                # because it can deal with unaligned arrays as long
+                # as they are unidimensionals with a possible stride
+                # (very common case for recarrays).  This can be up to
+                # 2x faster than doing a copy using NumPy.
+                copy_args.append(name)
+            else:
+                # For multimensional unaligned arrays do a plain copy.
+                # We could refine more this and do a plain copy only
+                # in the case that strides doesn't exist in dimensions
+                # other than the last one (whose case is supported by
+                # the copy opcode).
+                b = b.copy()
+        arguments.append(b)
 
     # Create a signature
     signature = [(name, getType(arg)) for (name, arg) in zip(names, arguments)]

Modified: trunk/scipy/sandbox/numexpr/complex_functions.inc
===================================================================
--- trunk/scipy/sandbox/numexpr/complex_functions.inc	2007-11-22 12:38:12 UTC (rev 3567)
+++ trunk/scipy/sandbox/numexpr/complex_functions.inc	2007-11-23 07:20:02 UTC (rev 3568)
@@ -365,3 +365,34 @@
 	r->imag = (is*rc-rs*ic)/d;
 	return;
 }
+
+
+/* The next nowarn1() and nowarn2() are defined in order to avoid
+   compiler warnings about unused functions.  Just add to nowarn1() a
+   call to each function the compiler complains about to make it look
+   like it gets used somewhere.
+
+   Of course, you shouldn't directly invoke neither of these two
+   functions, since they cause a stack overflow. */
+
+void nowarn2(cdouble *x, cdouble *r, cdouble *b);
+
+void
+nowarn1(cdouble *x, cdouble *r, cdouble *b)
+{
+	nc_floor_quot(x, b, r);
+	nc_log1p(x, r);
+	nc_expm1(x, r);
+	nc_log10(x, r);
+	nc_asinh(x, r);
+	nc_acosh(x, r);
+	nc_atanh(x, r);
+	/* Append more calls here. */
+	nowarn2(x, r, b);
+}
+
+void
+nowarn2(cdouble *x, cdouble *r, cdouble *b)
+{
+	nowarn1(x, r, b);
+}

Modified: trunk/scipy/sandbox/numexpr/expressions.py
===================================================================
--- trunk/scipy/sandbox/numexpr/expressions.py	2007-11-22 12:38:12 UTC (rev 3567)
+++ trunk/scipy/sandbox/numexpr/expressions.py	2007-11-23 07:20:02 UTC (rev 3568)
@@ -6,7 +6,7 @@
 
 import numpy
 
-import interpreter
+from numexpr import interpreter
 
 class Expression(object):
     def __init__(self):
@@ -55,25 +55,53 @@
 
 def isConstant(ex):
     "Returns True if ex is a constant scalar of an allowed type."
-    return isinstance(ex, (bool, int, float, complex))
+    return isinstance(ex, (bool, int, long, float, complex, str))
 
-type_to_kind = {bool: 'bool', int: 'int', float: 'float', complex: 'complex'}
-kind_to_type = {'bool': bool, 'int': int, 'float': float, 'complex': complex}
-kind_rank = ['bool', 'int', 'float', 'complex', 'none']
+type_to_kind = {bool: 'bool', int: 'int', long: 'long', float: 'float', complex: 'complex', str: 'str'}
+kind_to_type = {'bool': bool, 'int': int, 'long': long, 'float': float, 'complex': complex, 'str': str}
+kind_rank = ['bool', 'int', 'long', 'float', 'complex', 'none']
 
 def commonKind(nodes):
+    node_kinds = [node.astKind for node in nodes]
+    str_count = node_kinds.count('str')
+    if 0 < str_count < len(node_kinds):  # some args are strings, but not all
+        raise TypeError("strings can only be operated with strings")
+    if str_count > 0:  # if there are some, all of them must be
+        return 'str'
     n = -1
     for x in nodes:
         n = max(n, kind_rank.index(x.astKind))
     return kind_rank[n]
 
+max_int32 = 2147483647
+min_int32 = -max_int32 - 1
 def bestConstantType(x):
-    for converter in bool, int, float, complex:
+    if isinstance(x, str):  # ``numpy.string_`` is a subclass of ``str``
+        return str
+    # ``long`` objects are kept as is to allow the user to force
+    # promotion of results by using long constants, e.g. by operating
+    # a 32-bit array with a long (64-bit) constant.
+    if isinstance(x, (long, numpy.int64)):
+        return long
+    # Numeric conversion to boolean values is not tried because
+    # ``bool(1) == True`` (same for 0 and False), so 0 and 1 would be
+    # interpreted as booleans when ``False`` and ``True`` are already
+    # supported.
+    if isinstance(x, (bool, numpy.bool_)):
+        return bool
+    # ``long`` is not explicitly needed since ``int`` automatically
+    # returns longs when needed (since Python 2.3).
+    for converter in int, float, complex:
         try:
             y = converter(x)
         except StandardError, err:
             continue
         if x == y:
+            # Constants needing more than 32 bits are always
+            # considered ``long``, *regardless of the platform*, so we
+            # can clearly tell 32- and 64-bit constants apart.
+            if converter is int and not (min_int32 <= x <= max_int32):
+                return long
             return converter
 
 def getKind(x):
@@ -94,7 +122,7 @@
             return OpNode(opname, (self, other), kind=kind)
     return operation
 
-def func(func, minkind=None):
+def func(func, minkind=None, maxkind=None):
     @ophelper
     def function(*args):
         if allConstantNodes(args):
@@ -102,6 +130,8 @@
         kind = commonKind(args)
         if minkind and kind_rank.index(minkind) > kind_rank.index(kind):
             kind = minkind
+        if maxkind and kind_rank.index(maxkind) < kind_rank.index(kind):
+            kind = maxkind
         return FuncNode(func.__name__, args, kind)
     return function
 
@@ -129,23 +159,17 @@
     axis = encode_axis(axis)
     if isinstance(a, ConstantNode):
         return a
-    if isinstance(a, (bool, int, float, complex)):
+    if isinstance(a, (bool, int, long, float, complex)):
         a = ConstantNode(a)
-    kind = a.astKind
-    if kind == 'bool':
-        kind = 'int'
-    return FuncNode('sum', [a, axis], kind=kind)
+    return FuncNode('sum', [a, axis], kind=a.astKind)
 
 def prod_func(a, axis=-1):
     axis = encode_axis(axis)
-    if isinstance(a, (bool, int, float, complex)):
+    if isinstance(a, (bool, int, long, float, complex)):
         a = ConstantNode(a)
     if isinstance(a, ConstantNode):
         return a
-    kind = a.astKind
-    if kind == 'bool':
-        kind = 'int'
-    return FuncNode('prod', [a, axis], kind=kind)
+    return FuncNode('prod', [a, axis], kind=a.astKind)
 
 @ophelper
 def div_op(a, b):
@@ -182,7 +206,7 @@
                     p = OpNode('mul', [p,p])
                 if ishalfpower:
                     kind = commonKind([a])
-                    if kind == 'int': kind = 'float'
+                    if kind in ('int', 'long'): kind = 'float'
                     r = multiply(r, OpNode('sqrt', [a], kind))
                 if r is None:
                     r = OpNode('ones_like', [a])
@@ -196,7 +220,7 @@
                 return FuncNode('ones_like', [a])
             if x == 0.5:
                 kind = a.astKind
-                if kind == 'int': kind = 'float'
+                if kind in ('int', 'long'): kind = 'float'
                 return FuncNode('sqrt', [a], kind=kind)
             if x == 1:
                 return a
@@ -224,6 +248,8 @@
 
     'where' : where_func,
 
+    'real': func(numpy.real, 'float', 'float'),
+    'imag': func(numpy.imag, 'float', 'float'),
     'complex' : func(complex, 'complex'),
 
     'sum' : sum_func,
@@ -306,7 +332,7 @@
 class VariableNode(LeafNode):
     astType = 'variable'
     def __init__(self, value=None, kind=None, children=None):
-        ExpressionNode.__init__(self, value=value, kind=kind)
+        LeafNode.__init__(self, value=value, kind=kind)
 
 class RawNode(object):
     """Used to pass raw integers to interpreter.

Modified: trunk/scipy/sandbox/numexpr/info.py
===================================================================
--- trunk/scipy/sandbox/numexpr/info.py	2007-11-22 12:38:12 UTC (rev 3567)
+++ trunk/scipy/sandbox/numexpr/info.py	2007-11-23 07:20:02 UTC (rev 3568)
@@ -2,25 +2,55 @@
 
 Usage:
 
-Build an expression up by using E.<variable> for the variables:
+The easiest way is to use the ``evaluate`` function:
 
->>> ex = 2.0 * E.a + 3 * E.b * E.c
+>>> a = numpy.array([1., 2, 3])
+>>> b = numpy.array([4, 5, 6])
+>>> c = numpy.array([7., 8, 9])
+>>> numexpr.evaluate("2.0 * a + 3 * b * c")
+array([  86.,  124.,  168.])
 
-then compile it to a function:
+This works for every datatype defined in NumPy and the next operators
+are supported:
 
->>> func = numexpr(ex, input_names=('a', 'b', 'c'))
+    Logical operators: &, |, ~
+    Comparison operators: <, <=, ==, !=, >=, >
+    Unary arithmetic operators: -
+    Binary arithmetic operators: +, -, *, /, **, %
 
-(if input_names is not given to compile, the variables are sort lexically.)
+Note: Types do not support all operators. Boolean values only support
+    logical and strict (in)equality comparison operators, while
+    strings only support comparisons, numbers do not work with logical
+    operators, and complex comparisons can only check for strict
+    (in)equality. Unsupported operations (including invalid castings)
+    raise NotImplementedError exceptions.
 
-Then, you can use that function for elementwise operations:
+The next functions are also supported:
 
->>> func(array([1., 2, 3]), array([4., 5, 6]), array([7., 8, 9]))
-array([  86.,  124.,  168.])
+    where(bool, number1, number2): number - number1 if the bool
+    condition is true, number2 otherwise.
 
-Currently, this is only implemented for arrays of float64, and only
-for the simple operations +, -, *, and /.
+    {sin,cos,tan}(float|complex): float|complex - trigonometric sinus,
+    cosinus or tangent.
 
-Copyright 2006 David M. Cooke <cookedm@physics.mcmaster.ca>
+    {arcsin,arccos,arctan}(float|complex): float|complex -
+    trigonometric inverse sinus, cosinus or tangent.
+
+    arctan2(float1, float2): float - trigonometric inverse tangent of
+    float1/float2.
+
+    {sinh,cosh,tanh}(float|complex): float|complex - hyperbolic sinus,
+    cosinus or tangent.
+
+    sqrt(float|complex): float|complex - square root.
+
+    {real,imag}(complex): float - real or imaginary part of complex.
+
+    complex(float, float): complex - complex from real and imaginary
+    parts.
+
+
+Copyright 2006,2007 David M. Cooke <cookedm@physics.mcmaster.ca>
 Licenced under a BSD-style license. See LICENSE.txt in the scipy source
 directory.
 """

Modified: trunk/scipy/sandbox/numexpr/interp_body.c
===================================================================
--- trunk/scipy/sandbox/numexpr/interp_body.c	2007-11-22 12:38:12 UTC (rev 3567)
+++ trunk/scipy/sandbox/numexpr/interp_body.c	2007-11-23 07:20:02 UTC (rev 3568)
@@ -8,9 +8,13 @@
     {                                           \
         char *dest = params.mem[store_in];      \
         char *x1 = params.mem[arg1];            \
+        intp ss1 = params.memsizes[arg1];       \
         intp sb1 = params.memsteps[arg1];       \
-        intp si1 = sb1 / sizeof(long);          \
-        intp sf1 = sb1 / sizeof(double);        \
+        /* nowarns is defined and used so as to \
+        avoid compiler warnings about unused    \
+        variables */                            \
+        intp nowarns = ss1+sb1+*x1;             \
+        nowarns += 1;                           \
         VEC_LOOP(expr);                         \
     } break
 #define VEC_ARG2(expr)                          \
@@ -20,14 +24,16 @@
     {                                           \
         char *dest = params.mem[store_in];      \
         char *x1 = params.mem[arg1];            \
+        intp ss1 = params.memsizes[arg1];       \
         intp sb1 = params.memsteps[arg1];       \
-        intp si1 = sb1 / sizeof(long);          \
-        intp sf1 = sb1 / sizeof(double);        \
+        /* nowarns is defined and used so as to \
+        avoid compiler warnings about unused    \
+        variables */                            \
+        intp nowarns = ss1+sb1+*x1;             \
         char *x2 = params.mem[arg2];            \
+        intp ss2 = params.memsizes[arg2];       \
         intp sb2 = params.memsteps[arg2];       \
-        intp si2 = sb2 / sizeof(long);          \
-        intp sf2 = sb2 / sizeof(double);        \
-        intp s2 = params.memsteps[arg2];        \
+        nowarns += ss2+sb2+*x2;                 \
         VEC_LOOP(expr);                         \
     } break
 
@@ -39,19 +45,20 @@
     {                                           \
         char *dest = params.mem[store_in];      \
         char *x1 = params.mem[arg1];            \
+        intp ss1 = params.memsizes[arg1];       \
         intp sb1 = params.memsteps[arg1];       \
-        intp si1 = sb1 / sizeof(long);          \
-        intp sf1 = sb1 / sizeof(double);        \
+        /* nowarns is defined and used so as to \
+        avoid compiler warnings about unused    \
+        variables */                            \
+        intp nowarns = ss1+sb1+*x1;             \
         char *x2 = params.mem[arg2];            \
-        intp s2 = params.memsteps[arg2];        \
+        intp ss2 = params.memsizes[arg2];       \
         intp sb2 = params.memsteps[arg2];       \
-        intp si2 = sb2 / sizeof(long);          \
-        intp sf2 = sb2 / sizeof(double);        \
         char *x3 = params.mem[arg3];            \
-        intp s3 = params.memsteps[arg3];        \
+        intp ss3 = params.memsizes[arg3];       \
         intp sb3 = params.memsteps[arg3];       \
-        intp si3 = sb3 / sizeof(long);          \
-        intp sf3 = sb3 / sizeof(double);        \
+        nowarns += ss2+sb2+*x2;                 \
+        nowarns += ss3+sb3+*x3;                 \
         VEC_LOOP(expr);                         \
     } break
 
@@ -82,6 +89,11 @@
             params.mem[1+r] = params.inputs[r] + index * params.memsteps[1+r];
         }
     }
+
+    /* WARNING: From now on, only do references to params.mem[arg[123]]
+       & params.memsteps[arg[123]] inside the VEC_ARG[123] macros,
+       or you will risk accessing invalid addresses.  */
+
     for (pc = 0; pc < params.prog_len; pc += 4) {
         unsigned char op = params.program[pc];
         unsigned int store_in = params.program[pc+1];
@@ -89,37 +101,41 @@
         unsigned int arg2 = params.program[pc+3];
         #define      arg3   params.program[pc+5]
         #define store_index params.index_data[store_in]
-        
-       /* WARNING: From now on, only do references to params.mem[arg[123]]
-         & params.memsteps[arg[123]] inside the VEC_ARG[123] macros,
-          or you will risk accessing invalid addresses.  */
-        
         #define reduce_ptr  (dest + flat_index(&store_index, j))
-        #define i_reduce    *(long *)reduce_ptr
+        #define i_reduce    *(int *)reduce_ptr
+        #define l_reduce    *(long long *)reduce_ptr
         #define f_reduce    *(double *)reduce_ptr
         #define cr_reduce   *(double *)ptr
         #define ci_reduce   *((double *)ptr+1)
         #define b_dest ((char *)dest)[j]
-        #define i_dest ((long *)dest)[j]
+        #define i_dest ((int *)dest)[j]
+        #define l_dest ((long long *)dest)[j]
         #define f_dest ((double *)dest)[j]
         #define cr_dest ((double *)dest)[2*j]
         #define ci_dest ((double *)dest)[2*j+1]
-        #define b1    ((char   *)x1)[j*sb1]
-        #define i1    ((long   *)x1)[j*si1]
-        #define f1    ((double *)x1)[j*sf1]
-        #define c1r   ((double *)x1)[j*sf1]
-        #define c1i   ((double *)x1)[j*sf1+1]
-        #define b2    ((char   *)x2)[j*sb2]
-        #define i2    ((long   *)x2)[j*si2]
-        #define f2    ((double *)x2)[j*sf2]
-        #define c2r   ((double *)x2)[j*sf2]
-        #define c2i   ((double *)x2)[j*sf2+1]
-        #define b3    ((char   *)x3)[j*sb3]
-        #define i3    ((long   *)x3)[j*si3]
-        #define f3    ((double *)x3)[j*sf3]
-        #define c3r   ((double *)x3)[j*sf3]
-        #define c3i   ((double *)x3)[j*sf3+1]
-
+        #define s_dest ((char *)dest + j*params.memsteps[store_in])
+        #define b1    ((char   *)(x1+j*sb1))[0]
+        #define i1    ((int    *)(x1+j*sb1))[0]
+        #define l1    ((long long *)(x1+j*sb1))[0]
+        #define f1    ((double *)(x1+j*sb1))[0]
+        #define c1r   ((double *)(x1+j*sb1))[0]
+        #define c1i   ((double *)(x1+j*sb1))[1]
+        #define s1    ((char   *)x1+j*sb1)
+        #define b2    ((char   *)(x2+j*sb2))[0]
+        #define i2    ((int    *)(x2+j*sb2))[0]
+        #define l2    ((long long *)(x2+j*sb2))[0]
+        #define f2    ((double *)(x2+j*sb2))[0]
+        #define c2r   ((double *)(x2+j*sb2))[0]
+        #define c2i   ((double *)(x2+j*sb2))[1]
+        #define s2    ((char   *)x2+j*sb2)
+        #define b3    ((char   *)(x3+j*sb3))[0]
+        #define i3    ((int    *)(x3+j*sb3))[0]
+        #define l3    ((long long *)(x3+j*sb3))[0]
+        #define f3    ((double *)(x3+j*sb3))[0]
+        #define c3r   ((double *)(x3+j*sb3))[0]
+        #define c3i   ((double *)(x3+j*sb3))[1]
+        #define s3    ((char   *)x3+j*sb3)
+	/* Some temporaries */
         double fa, fb;
         cdouble ca, cb;
         char *ptr;
@@ -129,27 +145,42 @@
         case OP_NOOP: break;
 
         case OP_COPY_BB: VEC_ARG1(b_dest = b1);
-        case OP_COPY_II: VEC_ARG1(i_dest = i1);
-        case OP_COPY_FF: VEC_ARG1(f_dest = f1);
-        case OP_COPY_CC: VEC_ARG1(cr_dest = c1r;
-                                  ci_dest = c1i);
+        case OP_COPY_SS: VEC_ARG1(memcpy(s_dest, s1, ss1));
+        /* The next versions of copy opcodes can cope with unaligned
+           data even on platforms that crash while accessing it
+           (like the Sparc architecture under Solaris). */
+        case OP_COPY_II: VEC_ARG1(memcpy(&i_dest, s1, sizeof(int)));
+        case OP_COPY_LL: VEC_ARG1(memcpy(&f_dest, s1, sizeof(long long)));
+        case OP_COPY_FF: VEC_ARG1(memcpy(&f_dest, s1, sizeof(double)));
+        case OP_COPY_CC: VEC_ARG1(memcpy(&cr_dest, s1, sizeof(double)*2));
 
         case OP_INVERT_BB: VEC_ARG1(b_dest = !b1);
+        case OP_AND_BBB: VEC_ARG2(b_dest = (b1 && b2));
+        case OP_OR_BBB: VEC_ARG2(b_dest = (b1 || b2));
 
-        case OP_AND_BBB: VEC_ARG2(b_dest = b1 && b2);
-        case OP_OR_BBB: VEC_ARG2(b_dest = b1 || b2);
+        case OP_EQ_BBB: VEC_ARG2(b_dest = (b1 == b2));
+        case OP_NE_BBB: VEC_ARG2(b_dest = (b1 != b2));
 
-        case OP_GT_BII: VEC_ARG2(b_dest = (i1 > i2) ? 1 : 0);
-        case OP_GE_BII: VEC_ARG2(b_dest = (i1 >= i2) ? 1 : 0);
-        case OP_EQ_BII: VEC_ARG2(b_dest = (i1 == i2) ? 1 : 0);
-        case OP_NE_BII: VEC_ARG2(b_dest = (i1 != i2) ? 1 : 0);
+        case OP_GT_BII: VEC_ARG2(b_dest = (i1 > i2));
+        case OP_GE_BII: VEC_ARG2(b_dest = (i1 >= i2));
+        case OP_EQ_BII: VEC_ARG2(b_dest = (i1 == i2));
+        case OP_NE_BII: VEC_ARG2(b_dest = (i1 != i2));
 
-        case OP_GT_BFF: VEC_ARG2(b_dest = (f1 > f2) ? 1 : 0);
-        case OP_GE_BFF: VEC_ARG2(b_dest = (f1 >= f2) ? 1 : 0);
-        case OP_EQ_BFF: VEC_ARG2(b_dest = (f1 == f2) ? 1 : 0);
-        case OP_NE_BFF: VEC_ARG2(b_dest = (f1 != f2) ? 1 : 0);
+        case OP_GT_BLL: VEC_ARG2(b_dest = (l1 > l2));
+        case OP_GE_BLL: VEC_ARG2(b_dest = (l1 >= l2));
+        case OP_EQ_BLL: VEC_ARG2(b_dest = (l1 == l2));
+        case OP_NE_BLL: VEC_ARG2(b_dest = (l1 != l2));
 
-        case OP_CAST_IB: VEC_ARG1(i_dest = (long)b1);
+        case OP_GT_BFF: VEC_ARG2(b_dest = (f1 > f2));
+        case OP_GE_BFF: VEC_ARG2(b_dest = (f1 >= f2));
+        case OP_EQ_BFF: VEC_ARG2(b_dest = (f1 == f2));
+        case OP_NE_BFF: VEC_ARG2(b_dest = (f1 != f2));
+
+        case OP_GT_BSS: VEC_ARG2(b_dest = (stringcmp(s1, s2, ss1, ss2) > 0));
+        case OP_GE_BSS: VEC_ARG2(b_dest = (stringcmp(s1, s2, ss1, ss2) >= 0));
+        case OP_EQ_BSS: VEC_ARG2(b_dest = (stringcmp(s1, s2, ss1, ss2) == 0));
+        case OP_NE_BSS: VEC_ARG2(b_dest = (stringcmp(s1, s2, ss1, ss2) != 0));
+
         case OP_ONES_LIKE_II: VEC_ARG1(i_dest = 1);
         case OP_NEG_II: VEC_ARG1(i_dest = -i1);
 
@@ -157,13 +188,26 @@
         case OP_SUB_III: VEC_ARG2(i_dest = i1 - i2);
         case OP_MUL_III: VEC_ARG2(i_dest = i1 * i2);
         case OP_DIV_III: VEC_ARG2(i_dest = i1 / i2);
-        case OP_POW_III: VEC_ARG2(i_dest = (i2 < 0) ? (1 / i1) : (long)pow(i1, i2));
+        case OP_POW_III: VEC_ARG2(i_dest = (i2 < 0) ? (1 / i1) : (int)pow(i1, i2));
         case OP_MOD_III: VEC_ARG2(i_dest = i1 % i2);
 
-        case OP_WHERE_IFII: VEC_ARG3(i_dest = f1 ? i2 : i3);
+        case OP_WHERE_IBII: VEC_ARG3(i_dest = b1 ? i2 : i3);
 
-        case OP_CAST_FB: VEC_ARG1(f_dest = (long)b1);
+        case OP_CAST_LI: VEC_ARG1(l_dest = (long long)(i1));
+        case OP_ONES_LIKE_LL: VEC_ARG1(l_dest = 1);
+        case OP_NEG_LL: VEC_ARG1(l_dest = -i1);
+
+        case OP_ADD_LLL: VEC_ARG2(l_dest = l1 + l2);
+        case OP_SUB_LLL: VEC_ARG2(l_dest = l1 - l2);
+        case OP_MUL_LLL: VEC_ARG2(l_dest = l1 * l2);
+        case OP_DIV_LLL: VEC_ARG2(l_dest = l1 / l2);
+        case OP_POW_LLL: VEC_ARG2(l_dest = (l2 < 0) ? (1 / l1) : (long long)pow(l1, l2));
+        case OP_MOD_LLL: VEC_ARG2(l_dest = l1 % l2);
+
+        case OP_WHERE_LBLL: VEC_ARG3(l_dest = b1 ? l2 : l3);
+
         case OP_CAST_FI: VEC_ARG1(f_dest = (double)(i1));
+        case OP_CAST_FL: VEC_ARG1(f_dest = (double)(l1));
         case OP_ONES_LIKE_FF: VEC_ARG1(f_dest = 1.0);
         case OP_NEG_FF: VEC_ARG1(f_dest = -f1);
 
@@ -180,15 +224,15 @@
         case OP_SQRT_FF: VEC_ARG1(f_dest = sqrt(f1));
         case OP_ARCTAN2_FFF: VEC_ARG2(f_dest = atan2(f1, f2));
 
-        case OP_WHERE_FFFF: VEC_ARG3(f_dest = f1 ? f2 : f3);
+        case OP_WHERE_FBFF: VEC_ARG3(f_dest = b1 ? f2 : f3);
 
         case OP_FUNC_FF: VEC_ARG1(f_dest = functions_f[arg2](f1));
         case OP_FUNC_FFF: VEC_ARG2(f_dest = functions_ff[arg3](f1, f2));
 
-        case OP_CAST_CB: VEC_ARG1(cr_dest = (double)b1;
-                                  ci_dest = 0);
         case OP_CAST_CI: VEC_ARG1(cr_dest = (double)(i1);
                                   ci_dest = 0);
+        case OP_CAST_CL: VEC_ARG1(cr_dest = (double)(l1);
+                                  ci_dest = 0);
         case OP_CAST_CF: VEC_ARG1(cr_dest = f1;
                                   ci_dest = 0);
         case OP_ONES_LIKE_CC: VEC_ARG1(cr_dest = 1;
@@ -208,11 +252,11 @@
                                   ci_dest = (c1i*c2r - c1r*c2i) / fa;
                                   cr_dest = fb);
 
-        case OP_EQ_BCC: VEC_ARG2(b_dest = (c1r == c2r && c1i == c2i) ? 1 : 0);
-        case OP_NE_BCC: VEC_ARG2(b_dest = (c1r != c2r || c1i != c2i) ? 1 : 0);
+        case OP_EQ_BCC: VEC_ARG2(b_dest = (c1r == c2r && c1i == c2i));
+        case OP_NE_BCC: VEC_ARG2(b_dest = (c1r != c2r || c1i != c2i));
 
-        case OP_WHERE_CFCC: VEC_ARG3(cr_dest = f1 ? c2r : c3r;
-                                     ci_dest = f1 ? c2i : c3i);
+        case OP_WHERE_CBCC: VEC_ARG3(cr_dest = b1 ? c2r : c3r;
+                                     ci_dest = b1 ? c2i : c3i);
         case OP_FUNC_CC: VEC_ARG1(ca.real = c1r;
                                   ca.imag = c1i;
                                   functions_cc[arg2](&ca, &ca);
@@ -232,12 +276,14 @@
                                       ci_dest = f2);
 
         case OP_SUM_IIN: VEC_ARG1(i_reduce += i1);
+        case OP_SUM_LLN: VEC_ARG1(l_reduce += l1);
         case OP_SUM_FFN: VEC_ARG1(f_reduce += f1);
         case OP_SUM_CCN: VEC_ARG1(ptr = reduce_ptr;
                                   cr_reduce += c1r;
                                   ci_reduce += c1i);
 
         case OP_PROD_IIN: VEC_ARG1(i_reduce *= i1);
+        case OP_PROD_LLN: VEC_ARG1(l_reduce *= l1);
         case OP_PROD_FFN: VEC_ARG1(f_reduce *= f1);
         case OP_PROD_CCN: VEC_ARG1(ptr = reduce_ptr;
                                    fa = cr_reduce*c1r - ci_reduce*c1i;
@@ -258,22 +304,30 @@
 
 #undef b_dest
 #undef i_dest
+#undef l_dest
 #undef f_dest
 #undef cr_dest
 #undef ci_dest
+#undef s_dest
 #undef b1
 #undef i1
+#undef l1
 #undef f1
 #undef c1r
 #undef c1i
+#undef s1
 #undef b2
 #undef i2
+#undef l2
 #undef f2
 #undef c2r
 #undef c2i
+#undef s2
 #undef b3
 #undef i3
+#undef l3
 #undef f3
 #undef c3r
 #undef c3i
+#undef s3
 }

Modified: trunk/scipy/sandbox/numexpr/interpreter.c
===================================================================
--- trunk/scipy/sandbox/numexpr/interpreter.c	2007-11-22 12:38:12 UTC (rev 3567)
+++ trunk/scipy/sandbox/numexpr/interpreter.c	2007-11-23 07:20:02 UTC (rev 3568)
@@ -2,6 +2,8 @@
 #include "structmember.h"
 #include "numpy/noprefix.h"
 #include "math.h"
+#include "string.h"
+#include "assert.h"
 
 #include "complex_functions.inc"
 
@@ -25,17 +27,29 @@
     OP_AND_BBB,
     OP_OR_BBB,
 
+    OP_EQ_BBB,
+    OP_NE_BBB,
+
     OP_GT_BII,
     OP_GE_BII,
     OP_EQ_BII,
     OP_NE_BII,
 
+    OP_GT_BLL,
+    OP_GE_BLL,
+    OP_EQ_BLL,
+    OP_NE_BLL,
+
     OP_GT_BFF,
     OP_GE_BFF,
     OP_EQ_BFF,
     OP_NE_BFF,
 
-    OP_CAST_IB,
+    OP_GT_BSS,
+    OP_GE_BSS,
+    OP_EQ_BSS,
+    OP_NE_BSS,
+
     OP_COPY_II,
     OP_ONES_LIKE_II,
     OP_NEG_II,
@@ -45,10 +59,22 @@
     OP_DIV_III,
     OP_POW_III,
     OP_MOD_III,
-    OP_WHERE_IFII,
+    OP_WHERE_IBII,
 
-    OP_CAST_FB,
+    OP_CAST_LI,
+    OP_COPY_LL,
+    OP_ONES_LIKE_LL,
+    OP_NEG_LL,
+    OP_ADD_LLL,
+    OP_SUB_LLL,
+    OP_MUL_LLL,
+    OP_DIV_LLL,
+    OP_POW_LLL,
+    OP_MOD_LLL,
+    OP_WHERE_LBLL,
+
     OP_CAST_FI,
+    OP_CAST_FL,
     OP_COPY_FF,
     OP_ONES_LIKE_FF,
     OP_NEG_FF,
@@ -63,15 +89,15 @@
     OP_TAN_FF,
     OP_SQRT_FF,
     OP_ARCTAN2_FFF,
-    OP_WHERE_FFFF,
+    OP_WHERE_FBFF,
     OP_FUNC_FF,
     OP_FUNC_FFF,
 
     OP_EQ_BCC,
     OP_NE_BCC,
 
-    OP_CAST_CB,
     OP_CAST_CI,
+    OP_CAST_CL,
     OP_CAST_CF,
     OP_ONES_LIKE_CC,
     OP_COPY_CC,
@@ -80,7 +106,7 @@
     OP_SUB_CCC,
     OP_MUL_CCC,
     OP_DIV_CCC,
-    OP_WHERE_CFCC,
+    OP_WHERE_CBCC,
     OP_FUNC_CC,
     OP_FUNC_CCC,
 
@@ -88,15 +114,19 @@
     OP_IMAG_FC,
     OP_COMPLEX_CFF,
 
+    OP_COPY_SS,
+
     OP_REDUCTION,
 
     OP_SUM,
     OP_SUM_IIN,
+    OP_SUM_LLN,
     OP_SUM_FFN,
     OP_SUM_CCN,
 
     OP_PROD,
     OP_PROD_IIN,
+    OP_PROD_LLN,
     OP_PROD_FFN,
     OP_PROD_CCN
 
@@ -116,6 +146,8 @@
             break;
         case OP_AND_BBB:
         case OP_OR_BBB:
+        case OP_EQ_BBB:
+        case OP_NE_BBB:
             if (n == 0 || n == 1 || n == 2) return 'b';
             break;
         case OP_GT_BII:
@@ -125,6 +157,13 @@
             if (n == 0) return 'b';
             if (n == 1 || n == 2) return 'i';
             break;
+        case OP_GT_BLL:
+        case OP_GE_BLL:
+        case OP_EQ_BLL:
+        case OP_NE_BLL:
+            if (n == 0) return 'b';
+            if (n == 1 || n == 2) return 'l';
+            break;
         case OP_GT_BFF:
         case OP_GE_BFF:
         case OP_EQ_BFF:
@@ -132,9 +171,12 @@
             if (n == 0) return 'b';
             if (n == 1 || n == 2) return 'f';
             break;
-        case OP_CAST_IB:
-            if (n == 0) return 'i';
-            if (n == 1) return 'b';
+        case OP_GT_BSS:
+        case OP_GE_BSS:
+        case OP_EQ_BSS:
+        case OP_NE_BSS:
+            if (n == 0) return 'b';
+            if (n == 1 || n == 2) return 's';
             break;
         case OP_COPY_II:
         case OP_ONES_LIKE_II:
@@ -149,18 +191,39 @@
         case OP_POW_III:
             if (n == 0 || n == 1 || n == 2) return 'i';
             break;
-        case OP_WHERE_IFII:
+        case OP_WHERE_IBII:
             if (n == 0 || n == 2 || n == 3) return 'i';
-            if (n == 1) return 'f';
+            if (n == 1) return 'b';
             break;
-        case OP_CAST_FB:
-            if (n == 0) return 'f';
+        case OP_CAST_LI:
+            if (n == 0) return 'l';
+            if (n == 1) return 'i';
+            break;
+        case OP_COPY_LL:
+        case OP_ONES_LIKE_LL:
+        case OP_NEG_LL:
+            if (n == 0 || n == 1) return 'l';
+            break;
+        case OP_ADD_LLL:
+        case OP_SUB_LLL:
+        case OP_MUL_LLL:
+        case OP_DIV_LLL:
+        case OP_MOD_LLL:
+        case OP_POW_LLL:
+            if (n == 0 || n == 1 || n == 2) return 'l';
+            break;
+        case OP_WHERE_LBLL:
+            if (n == 0 || n == 2 || n == 3) return 'l';
             if (n == 1) return 'b';
             break;
         case OP_CAST_FI:
             if (n == 0) return 'f';
             if (n == 1) return 'i';
             break;
+        case OP_CAST_FL:
+            if (n == 0) return 'f';
+            if (n == 1) return 'l';
+            break;
         case OP_COPY_FF:
         case OP_ONES_LIKE_FF:
         case OP_NEG_FF:
@@ -179,8 +242,9 @@
         case OP_ARCTAN2_FFF:
             if (n == 0 || n == 1 || n == 2) return 'f';
             break;
-        case OP_WHERE_FFFF:
-            if (n == 0 || n == 1 || n == 2 || n == 3) return 'f';
+        case OP_WHERE_FBFF:
+            if (n == 0 || n == 2 || n == 3) return 'f';
+            if (n == 1) return 'b';
             break;
         case OP_FUNC_FF:
             if (n == 0 || n == 1) return 'f';
@@ -195,14 +259,14 @@
             if (n == 0) return 'b';
             if (n == 1 || n == 2) return 'c';
             break;
-        case OP_CAST_CB:
-            if (n == 0) return 'c';
-            if (n == 1) return 'b';
-            break;
         case OP_CAST_CI:
             if (n == 0) return 'c';
             if (n == 1) return 'i';
             break;
+        case OP_CAST_CL:
+            if (n == 0) return 'c';
+            if (n == 1) return 'l';
+            break;
         case OP_CAST_CF:
             if (n == 0) return 'c';
             if (n == 1) return 'f';
@@ -218,9 +282,9 @@
         case OP_DIV_CCC:
             if (n == 0 || n == 1 || n == 2) return 'c';
             break;
-        case OP_WHERE_CFCC:
+        case OP_WHERE_CBCC:
             if (n == 0 || n == 2 || n == 3) return 'c';
-            if (n == 1) return 'f';
+            if (n == 1) return 'b';
             break;
         case OP_FUNC_CC:
             if (n == 0 || n == 1) return 'c';
@@ -239,11 +303,19 @@
             if (n == 0) return 'c';
             if (n == 1 || n == 2) return 'f';
             break;
+        case OP_COPY_SS:
+            if (n == 0 || n == 1) return 's';
+            break;
         case OP_PROD_IIN:
         case OP_SUM_IIN:
             if (n == 0 || n == 1) return 'i';
             if (n == 2) return 'n';
             break;
+        case OP_PROD_LLN:
+        case OP_SUM_LLN:
+            if (n == 0 || n == 1) return 'l';
+            if (n == 2) return 'n';
+            break;
         case OP_PROD_FFN:
         case OP_SUM_FFN:
             if (n == 0 || n == 1) return 'f';
@@ -383,6 +455,7 @@
     char **mem;             /* pointers to registers */
     char *rawmem;           /* a chunks of raw memory for storing registers */
     intp *memsteps;
+    intp *memsizes;
     int  rawmemsize;
 } NumExprObject;
 
@@ -399,6 +472,7 @@
     PyMem_Del(self->mem);
     PyMem_Del(self->rawmem);
     PyMem_Del(self->memsteps);
+    PyMem_Del(self->memsizes);
     self->ob_type->tp_free((PyObject*)self);
 }
 
@@ -425,6 +499,7 @@
         self->mem = NULL;
         self->rawmem = NULL;
         self->memsteps = NULL;
+        self->memsizes = NULL;
         self->rawmemsize = 0;
 #undef INIT_WITH
     }
@@ -454,11 +529,13 @@
 {
     switch (c) {
         case 'b': return sizeof(char);
-        case 'i': return sizeof(long);
+        case 'i': return sizeof(int);
+        case 'l': return sizeof(long long);
         case 'f': return sizeof(double);
         case 'c': return 2*sizeof(double);
+        case 's': return 0;  /* strings are ok but size must be computed */
         default:
-            PyErr_SetString(PyExc_TypeError, "signature value not in 'bifc'");
+            PyErr_SetString(PyExc_TypeError, "signature value not in 'bilfcs'");
             return -1;
     }
 }
@@ -482,11 +559,13 @@
 {
     switch (c) {
         case 'b': return PyArray_BOOL;
-        case 'i': return PyArray_LONG;
+        case 'i': return PyArray_INT;
+        case 'l': return PyArray_LONGLONG;
         case 'f': return PyArray_DOUBLE;
         case 'c': return PyArray_CDOUBLE;
+        case 's': return PyArray_STRING;
         default:
-            PyErr_SetString(PyExc_TypeError, "signature value not in 'ifc'");
+            PyErr_SetString(PyExc_TypeError, "signature value not in 'bilfcs'");
             return -1;
     }
 }
@@ -502,7 +581,6 @@
 
 static int
 get_reduction_axis(PyObject* program) {
-    char last_opcode, sig;
     int end = PyString_Size(program);
     int axis = ((unsigned char *)PyString_AS_STRING(program))[end-1];
     if (axis != 255 && axis >= MAX_DIMS)
@@ -579,7 +657,7 @@
             }
             arg = program[argloc];
 
-            if (sig != 'n' && (arg >= n_buffers) || (arg < 0)) {
+            if (sig != 'n' && ((arg >= n_buffers) || (arg < 0))) {
                 PyErr_Format(PyExc_RuntimeError, "invalid program: buffer out of range (%i) at %i", arg, argloc);
                 return -1;
             }
@@ -630,8 +708,10 @@
     PyObject *signature = NULL, *tempsig = NULL, *constsig = NULL;
     PyObject *fullsig = NULL, *program = NULL, *constants = NULL;
     PyObject *input_names = NULL, *o_constants = NULL;
+    int *itemsizes = NULL;
     char **mem = NULL, *rawmem = NULL;
     intp *memsteps;
+    intp *memsizes;
     int rawmemsize;
     static char *kwlist[] = {"signature", "tempsig",
                              "program",  "constants",
@@ -660,33 +740,53 @@
             Py_DECREF(constants);
             return -1;
         }
+        if (!(itemsizes = PyMem_New(int, n_constants))) {
+            Py_DECREF(constants);
+            return -1;
+        }
         for (i = 0; i < n_constants; i++) {
             PyObject *o;
             if (!(o = PySequence_GetItem(o_constants, i))) { /* new reference */
                 Py_DECREF(constants);
                 Py_DECREF(constsig);
+                PyMem_Del(itemsizes);
                 return -1;
             }
             PyTuple_SET_ITEM(constants, i, o); /* steals reference */
             if (PyBool_Check(o)) {
                 PyString_AS_STRING(constsig)[i] = 'b';
+                itemsizes[i] = size_from_char('b');
                 continue;
             }
             if (PyInt_Check(o)) {
                 PyString_AS_STRING(constsig)[i] = 'i';
+                itemsizes[i] = size_from_char('i');
                 continue;
             }
+            if (PyLong_Check(o)) {
+                PyString_AS_STRING(constsig)[i] = 'l';
+                itemsizes[i] = size_from_char('l');
+                continue;
+            }
             if (PyFloat_Check(o)) {
                 PyString_AS_STRING(constsig)[i] = 'f';
+                itemsizes[i] = size_from_char('f');
                 continue;
             }
             if (PyComplex_Check(o)) {
                 PyString_AS_STRING(constsig)[i] = 'c';
+                itemsizes[i] = size_from_char('c');
                 continue;
             }
-            PyErr_SetString(PyExc_TypeError, "constants must be of type int/float/complex");
+            if (PyString_Check(o)) {
+                PyString_AS_STRING(constsig)[i] = 's';
+                itemsizes[i] = PyString_GET_SIZE(o);
+                continue;
+            }
+            PyErr_SetString(PyExc_TypeError, "constants must be of type bool/int/long/float/complex/str");
             Py_DECREF(constsig);
             Py_DECREF(constants);
+            PyMem_Del(itemsizes);
             return -1;
         }
     } else {
@@ -705,23 +805,34 @@
     if (!fullsig) {
         Py_DECREF(constants);
         Py_DECREF(constsig);
+        PyMem_Del(itemsizes);
+        return -1;
     }
 
     if (!input_names) {
         input_names = Py_None;
     }
 
-    rawmemsize = BLOCK_SIZE1 * (size_from_sig(constsig) + size_from_sig(tempsig));
+    /* Compute the size of registers. */
+    rawmemsize = 0;
+    for (i = 0; i < n_constants; i++)
+        rawmemsize += itemsizes[i];
+    rawmemsize += size_from_sig(tempsig);  /* no string temporaries */
+    rawmemsize *= BLOCK_SIZE1;
+
     mem = PyMem_New(char *, 1 + n_inputs + n_constants + n_temps);
     rawmem = PyMem_New(char, rawmemsize);
-    memsteps = PyMem_New(int, 1 + n_inputs + n_constants + n_temps);
-    if (!mem || !rawmem || !memsteps) {
+    memsteps = PyMem_New(intp, 1 + n_inputs + n_constants + n_temps);
+    memsizes = PyMem_New(intp, 1 + n_inputs + n_constants + n_temps);
+    if (!mem || !rawmem || !memsteps || !memsizes) {
         Py_DECREF(constants);
         Py_DECREF(constsig);
         Py_DECREF(fullsig);
+        PyMem_Del(itemsizes);
         PyMem_Del(mem);
         PyMem_Del(rawmem);
         PyMem_Del(memsteps);
+        PyMem_Del(memsizes);
         return -1;
     }
     /*
@@ -734,10 +845,10 @@
     mem_offset = 0;
     for (i = 0; i < n_constants; i++) {
         char c = PyString_AS_STRING(constsig)[i];
-        int size = size_from_char(c);
+        int size = itemsizes[i];
         mem[i+n_inputs+1] = rawmem + mem_offset;
         mem_offset += BLOCK_SIZE1 * size;
-        memsteps[i+n_inputs+1] = size;
+        memsteps[i+n_inputs+1] = memsizes[i+n_inputs+1] = size;
         /* fill in the constants */
         if (c == 'b') {
             char *bmem = (char*)mem[i+n_inputs+1];
@@ -746,11 +857,17 @@
                 bmem[j] = value;
             }
         } else if (c == 'i') {
-            long *imem = (long*)mem[i+n_inputs+1];
-            long value = PyInt_AS_LONG(PyTuple_GET_ITEM(constants, i));
+            int *imem = (int*)mem[i+n_inputs+1];
+            int value = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(constants, i));
             for (j = 0; j < BLOCK_SIZE1; j++) {
                 imem[j] = value;
             }
+        } else if (c == 'l') {
+            long long *lmem = (long long*)mem[i+n_inputs+1];
+            long long value = PyLong_AsLongLong(PyTuple_GET_ITEM(constants, i));
+            for (j = 0; j < BLOCK_SIZE1; j++) {
+                lmem[j] = value;
+            }
         } else if (c == 'f') {
             double *dmem = (double*)mem[i+n_inputs+1];
             double value = PyFloat_AS_DOUBLE(PyTuple_GET_ITEM(constants, i));
@@ -764,14 +881,32 @@
                 cmem[j] = value.real;
                 cmem[j+1] = value.imag;
             }
+        } else if (c == 's') {
+            char *smem = (char*)mem[i+n_inputs+1];
+            char *value = PyString_AS_STRING(PyTuple_GET_ITEM(constants, i));
+            for (j = 0; j < size*BLOCK_SIZE1; j+=size) {
+                memcpy(smem + j, value, size);
+            }
         }
     }
+    /* This is no longer needed since no unusual item sizes appear
+       in temporaries (there are no string temporaries). */
+    PyMem_Del(itemsizes);
+
     /* Fill in 'mem' for temps */
     for (i = 0; i < n_temps; i++) {
-        int size = size_from_char(PyString_AS_STRING(tempsig)[i]);
+        char c = PyString_AS_STRING(tempsig)[i];
+        int size = size_from_char(c);
+        /* XXX: This check is quite useless, since using a string temporary
+           still causes a crash when freeing rawmem.  Why? */
+        if (c == 's') {
+            PyErr_SetString(PyExc_NotImplementedError,
+                            "string temporaries are not supported");
+            break;
+        }
         mem[i+n_inputs+n_constants+1] = rawmem + mem_offset;
         mem_offset += BLOCK_SIZE1 * size;
-        memsteps[i+n_inputs+n_constants+1] = size;
+        memsteps[i+n_inputs+n_constants+1] = memsizes[i+n_inputs+n_constants+1] = size;
     }
     /* See if any errors occured (e.g., in size_from_char) or if mem_offset is wrong */
     if (PyErr_Occurred() || mem_offset != rawmemsize) {
@@ -784,6 +919,7 @@
         PyMem_Del(mem);
         PyMem_Del(rawmem);
         PyMem_Del(memsteps);
+        PyMem_Del(memsizes);
         return -1;
     }
 
@@ -805,6 +941,7 @@
     REPLACE_MEM(mem);
     REPLACE_MEM(rawmem);
     REPLACE_MEM(memsteps);
+    REPLACE_MEM(memsizes);
     self->rawmemsize = rawmemsize;
 
     #undef REPLACE_OBJ
@@ -832,8 +969,8 @@
     int count;
     int size;
     int findex;
-    int *shape;
-    int *strides;
+    intp *shape;
+    intp *strides;
     int *index;
     char *buffer;
 };
@@ -847,6 +984,7 @@
     char **inputs;
     char **mem;
     intp *memsteps;
+    intp *memsizes;
     struct index_data *index_data;
 };
 
@@ -886,6 +1024,26 @@
 #define BOUNDS_CHECK(arg)
 #endif
 
+int
+stringcmp(const char *s1, const char *s2, intp maxlen1, intp maxlen2)
+{
+    intp maxlen, nextpos;
+    /* Point to this when the end of a string is found,
+       to simulate infinte trailing NUL characters. */
+    const char null = 0;
+
+    maxlen = (maxlen1 > maxlen2) ? maxlen1 : maxlen2;
+    for (nextpos = 1;  nextpos <= maxlen;  nextpos++) {
+        if (*s1 < *s2)
+            return -1;
+        if (*s1 > *s2)
+            return +1;
+        s1 = (nextpos >= maxlen1) ? &null : s1+1;
+        s2 = (nextpos >= maxlen2) ? &null : s2+1;
+    }
+    return 0;
+}
+
 static inline int
 vm_engine_1(int start, int blen, struct vm_params params, int *pc_error)
 {
@@ -943,6 +1101,7 @@
     params.index_data = index_data;
     params.mem = self->mem;
     params.memsteps = self->memsteps;
+    params.memsizes = self->memsizes;
     params.r_end = PyString_Size(self->fullsig);
     blen1 = len - len % BLOCK_SIZE1;
     r = vm_engine_1(0, blen1, params, pc_error);
@@ -966,7 +1125,7 @@
     PyObject *output = NULL, *a_inputs = NULL;
     struct index_data *inddata = NULL;
     unsigned int n_inputs, n_dimensions = 0;
-    int shape[MAX_DIMS];
+    intp shape[MAX_DIMS];
     int i, j, size, r, pc_error;
     char **inputs = NULL;
     intp strides[MAX_DIMS]; /* clean up XXX */
@@ -1004,7 +1163,12 @@
         int typecode = typecode_from_char(c);
         if (typecode == -1) goto cleanup_and_exit;
         /* Convert it just in case of a non-swapped array */
-        a = PyArray_FROM_OTF(o, typecode, NOTSWAPPED);
+        if (!PyArray_Check(o) || PyArray_TYPE(o) != PyArray_STRING) {
+            a = PyArray_FROM_OTF(o, typecode, NOTSWAPPED);
+        } else {
+            Py_INCREF(PyArray_DESCR(o));  /* typecode is not enough */
+            a = PyArray_FromAny(o, PyArray_DESCR(o), 0, 0, NOTSWAPPED, NULL);
+        }
         if (!a) goto cleanup_and_exit;
         PyTuple_SET_ITEM(a_inputs, i, a);  /* steals reference */
         if (PyArray_NDIM(a) > n_dimensions)
@@ -1042,7 +1206,7 @@
     for (i = 0; i < n_inputs; i++) {
         PyObject *a = PyTuple_GET_ITEM(a_inputs, i);
         PyObject *b;
-        int strides[MAX_DIMS];
+        intp strides[MAX_DIMS];
         int delta = n_dimensions - PyArray_NDIM(a);
         if (PyArray_NDIM(a)) {
             for (j = 0; j < n_dimensions; j++)
@@ -1071,15 +1235,25 @@
         if (PyArray_NDIM(a) == 0) {
             /* Broadcast scalars */
             intp dims[1] = {BLOCK_SIZE1};
-            b = PyArray_SimpleNew(1, dims, typecode);
+            Py_INCREF(PyArray_DESCR(a));
+            b = PyArray_SimpleNewFromDescr(1, dims, PyArray_DESCR(a));
             if (!b) goto cleanup_and_exit;
             self->memsteps[i+1] = 0;
+            self->memsizes[i+1] = PyArray_ITEMSIZE(a);
             PyTuple_SET_ITEM(a_inputs, i+2*n_inputs, b);  /* steals reference */
             inputs[i] = PyArray_DATA(b);
-            if (typecode == PyArray_LONG) {
-                long value = ((long*)PyArray_DATA(a))[0];
+            if (typecode == PyArray_BOOL) {
+                char value = ((char*)PyArray_DATA(a))[0];
                 for (j = 0; j < BLOCK_SIZE1; j++)
-                    ((long*)PyArray_DATA(b))[j] = value;
+                    ((char*)PyArray_DATA(b))[j] = value;
+            } else if (typecode == PyArray_INT) {
+                int value = ((int*)PyArray_DATA(a))[0];
+                for (j = 0; j < BLOCK_SIZE1; j++)
+                    ((int*)PyArray_DATA(b))[j] = value;
+            } else if (typecode == PyArray_LONGLONG) {
+                long long value = ((long long*)PyArray_DATA(a))[0];
+                for (j = 0; j < BLOCK_SIZE1; j++)
+                    ((long long*)PyArray_DATA(b))[j] = value;
             } else if (typecode == PyArray_DOUBLE) {
                 double value = ((double*)PyArray_DATA(a))[0];
                 for (j = 0; j < BLOCK_SIZE1; j++)
@@ -1091,17 +1265,25 @@
                     ((double*)PyArray_DATA(b))[j] = rvalue;
                     ((double*)PyArray_DATA(b))[j+1] = ivalue;
                 }
+            } else if (typecode == PyArray_STRING) {
+                int itemsize = PyArray_ITEMSIZE(a);
+                char *value = (char*)(PyArray_DATA(a));
+                for (j = 0; j < itemsize*BLOCK_SIZE1; j+=itemsize)
+                    memcpy((char*)(PyArray_DATA(b)) + j, value, itemsize);
             } else {
                 PyErr_SetString(PyExc_RuntimeError, "illegal typecode value");
                 goto cleanup_and_exit;
             }
         } else {
-            PyObject *origA = a;
-            int inner_size = -1;
-            /* Check array is contiguous */
-            for (j = PyArray_NDIM(a)-1; j >= 0; j--) {
-                if ((inner_size == -1 && PyArray_STRIDE(a, j) % PyArray_ITEMSIZE(a)) ||
-                    (inner_size != -1 && PyArray_STRIDE(a, j) != inner_size)) {
+            /* Check that discontiguous strides appear only on the last
+               dimension. If not, the arrays should be copied.
+               Furthermore, such arrays can appear when doing
+               broadcasting above, so this check really needs to be
+               here, and not in Python space. */
+            intp inner_size;
+            for (j = PyArray_NDIM(a)-2; j >= 0; j--) {
+                inner_size = PyArray_STRIDE(a, j) * PyArray_DIM(a, j);
+                if (PyArray_STRIDE(a, j+1) != inner_size) {
                     intp dims[1] = {BLOCK_SIZE1};
                     inddata[i+1].count = PyArray_NDIM(a);
                     inddata[i+1].findex = -1;
@@ -1112,20 +1294,24 @@
                     inddata[i+1].index = PyMem_New(int, inddata[i+1].count);
                     for (j = 0; j < inddata[i+1].count; j++)
                         inddata[i+1].index[j] = 0;
-                    a = PyArray_SimpleNew(1, dims, typecode);
-                    PyTuple_SET_ITEM(a_inputs, i+2*n_inputs, a);  /* steals reference */
+                    Py_INCREF(PyArray_DESCR(a));
+                    a = PyArray_SimpleNewFromDescr(1, dims, PyArray_DESCR(a));
+                    /* steals reference below */
+                    PyTuple_SET_ITEM(a_inputs, i+2*n_inputs, a);
                     break;
                 }
-                inner_size = PyArray_STRIDE(a, j) * PyArray_DIM(a, j);
             }
 
             self->memsteps[i+1] = PyArray_STRIDE(a, PyArray_NDIM(a)-1);
+            self->memsizes[i+1] = PyArray_ITEMSIZE(a);
             inputs[i] = PyArray_DATA(a);
 
         }
     }
 
     if (last_opcode(self->program) > OP_REDUCTION) {
+        /* A reduction can not result in a string,
+           so we don't need to worry about item sizes here. */
         char retsig = get_return_sig(self->program);
         int axis = get_reduction_axis(self->program);
         self->memsteps[0] = 0; /*size_from_char(retsig);*/
@@ -1188,10 +1374,26 @@
     }
     else {
         char retsig = get_return_sig(self->program);
-        self->memsteps[0] = size_from_char(retsig);
-        output = PyArray_SimpleNew(n_dimensions,
-                                   shape,
-                                   typecode_from_char(retsig));
+        if (retsig != 's') {
+            self->memsteps[0] = self->memsizes[0] = size_from_char(retsig);
+            output = PyArray_SimpleNew(
+                n_dimensions, shape, typecode_from_char(retsig));
+        } else {
+            /* Since the *only* supported operation returning a string
+             * is a copy, the size of returned strings
+             * can be directly gotten from the first (and only)
+             * input/constant/temporary. */
+            PyArray_Descr *descr;
+            if (n_inputs > 0) {  /* input, like in 'a' where a -> 'foo' */
+                descr = PyArray_DESCR(PyTuple_GET_ITEM(a_inputs, 1));
+            Py_INCREF(descr);
+            } else {  /* constant, like in '"foo"' */
+                descr = PyArray_DescrFromType(PyArray_STRING);
+                descr->elsize = self->memsizes[1];
+            }  /* no string temporaries, so no third case  */
+            self->memsteps[0] = self->memsizes[0] = self->memsizes[1];
+            output = PyArray_SimpleNewFromDescr(n_dimensions, shape, descr);
+        }
         if (!output) goto cleanup_and_exit;
     }
 
@@ -1311,17 +1513,30 @@
     add_op("invert_bb", OP_INVERT_BB);
     add_op("and_bbb", OP_AND_BBB);
     add_op("or_bbb", OP_OR_BBB);
+
+    add_op("eq_bbb", OP_EQ_BBB);
+    add_op("ne_bbb", OP_NE_BBB);
+
     add_op("gt_bii", OP_GT_BII);
     add_op("ge_bii", OP_GE_BII);
     add_op("eq_bii", OP_EQ_BII);
     add_op("ne_bii", OP_NE_BII);
 
+    add_op("gt_bll", OP_GT_BLL);
+    add_op("ge_bll", OP_GE_BLL);
+    add_op("eq_bll", OP_EQ_BLL);
+    add_op("ne_bll", OP_NE_BLL);
+
     add_op("gt_bff", OP_GT_BFF);
     add_op("ge_bff", OP_GE_BFF);
     add_op("eq_bff", OP_EQ_BFF);
     add_op("ne_bff", OP_NE_BFF);
 
-    add_op("cast_ib", OP_CAST_IB);
+    add_op("gt_bss", OP_GT_BSS);
+    add_op("ge_bss", OP_GE_BSS);
+    add_op("eq_bss", OP_EQ_BSS);
+    add_op("ne_bss", OP_NE_BSS);
+
     add_op("ones_like_ii", OP_ONES_LIKE_II);
     add_op("copy_ii", OP_COPY_II);
     add_op("neg_ii", OP_NEG_II);
@@ -1331,10 +1546,22 @@
     add_op("div_iii", OP_DIV_III);
     add_op("pow_iii", OP_POW_III);
     add_op("mod_iii", OP_MOD_III);
-    add_op("where_ifii", OP_WHERE_IFII);
+    add_op("where_ibii", OP_WHERE_IBII);
 
-    add_op("cast_fb", OP_CAST_FB);
+    add_op("cast_li", OP_CAST_LI);
+    add_op("ones_like_ll", OP_ONES_LIKE_LL);
+    add_op("copy_ll", OP_COPY_LL);
+    add_op("neg_ll", OP_NEG_LL);
+    add_op("add_lll", OP_ADD_LLL);
+    add_op("sub_lll", OP_SUB_LLL);
+    add_op("mul_lll", OP_MUL_LLL);
+    add_op("div_lll", OP_DIV_LLL);
+    add_op("pow_lll", OP_POW_LLL);
+    add_op("mod_lll", OP_MOD_LLL);
+    add_op("where_lbll", OP_WHERE_LBLL);
+
     add_op("cast_fi", OP_CAST_FI);
+    add_op("cast_fl", OP_CAST_FL);
     add_op("copy_ff", OP_COPY_FF);
     add_op("ones_like_ff", OP_ONES_LIKE_FF);
     add_op("neg_cc", OP_NEG_CC);
@@ -1350,15 +1577,15 @@
     add_op("tan_ff", OP_TAN_FF);
     add_op("sqrt_ff", OP_SQRT_FF);
     add_op("arctan2_fff", OP_ARCTAN2_FFF);
-    add_op("where_ffff", OP_WHERE_FFFF);
+    add_op("where_fbff", OP_WHERE_FBFF);
     add_op("func_ff", OP_FUNC_FF);
     add_op("func_fff", OP_FUNC_FFF);
 
     add_op("eq_bcc", OP_EQ_BCC);
     add_op("ne_bcc", OP_NE_BCC);
 
-    add_op("cast_cb", OP_CAST_CB);
     add_op("cast_ci", OP_CAST_CI);
+    add_op("cast_cl", OP_CAST_CL);
     add_op("cast_cf", OP_CAST_CF);
     add_op("copy_cc", OP_COPY_CC);
     add_op("ones_like_cc", OP_ONES_LIKE_CC);
@@ -1367,7 +1594,7 @@
     add_op("sub_ccc", OP_SUB_CCC);
     add_op("mul_ccc", OP_MUL_CCC);
     add_op("div_ccc", OP_DIV_CCC);
-    add_op("where_cfcc", OP_WHERE_CFCC);
+    add_op("where_cbcc", OP_WHERE_CBCC);
     add_op("func_cc", OP_FUNC_CC);
     add_op("func_ccc", OP_FUNC_CCC);
 
@@ -1375,11 +1602,15 @@
     add_op("imag_fc", OP_IMAG_FC);
     add_op("complex_cff", OP_COMPLEX_CFF);
 
+    add_op("copy_ss", OP_COPY_SS);
+
     add_op("sum_iin", OP_SUM_IIN);
+    add_op("sum_lln", OP_SUM_LLN);
     add_op("sum_ffn", OP_SUM_FFN);
     add_op("sum_ccn", OP_SUM_CCN);
 
     add_op("prod_iin", OP_PROD_IIN);
+    add_op("prod_lln", OP_PROD_LLN);
     add_op("prod_ffn", OP_PROD_FFN);
     add_op("prod_ccn", OP_PROD_CCN);
 

Modified: trunk/scipy/sandbox/numexpr/tests/test_numexpr.py
===================================================================
--- trunk/scipy/sandbox/numexpr/tests/test_numexpr.py	2007-11-22 12:38:12 UTC (rev 3567)
+++ trunk/scipy/sandbox/numexpr/tests/test_numexpr.py	2007-11-23 07:20:02 UTC (rev 3568)
@@ -6,7 +6,7 @@
 from numexpr import E, numexpr, evaluate, disassemble
 restore_path()
 
-class TestNumExpr(NumpyTestCase):
+class test_numexpr(NumpyTestCase):
     def check_simple(self):
         ex = 2.0 * E.a + 3.0 * E.b * E.c
         func = numexpr(ex, signature=[('a', float), ('b', float), ('c', float)])
@@ -63,14 +63,14 @@
         x = x.astype(int)
         assert_equal(evaluate("sum(x**2+2,axis=0)"), sum(x**2+2,axis=0))
         assert_equal(evaluate("prod(x**2+2,axis=0)"), prod(x**2+2,axis=0))
+        # Check longs
+        x = x.astype(long)
+        assert_equal(evaluate("sum(x**2+2,axis=0)"), sum(x**2+2,axis=0))
+        assert_equal(evaluate("prod(x**2+2,axis=0)"), prod(x**2+2,axis=0))
         # Check complex
         x = x + 5j
         assert_equal(evaluate("sum(x**2+2,axis=0)"), sum(x**2+2,axis=0))
         assert_equal(evaluate("prod(x**2+2,axis=0)"), prod(x**2+2,axis=0))
-        # Check boolean (should cast to integer)
-        x = (arange(10) % 2).astype(bool)
-        assert_equal(evaluate("prod(x,axis=0)"), prod(x,axis=0))
-        assert_equal(evaluate("sum(x,axis=0)"), sum(x,axis=0))
 
     def check_axis(self):
         y = arange(9.0).reshape(3,3)
@@ -95,7 +95,7 @@
                     [('mul_fff', 'r0', 'r1[x]', 'r1[x]'),
                      ('add_fff', 'r0', 'r0', 'c2[2.0]')])
 
-class TestEvaluate(NumpyTestCase):
+class test_evaluate(NumpyTestCase):
     def check_simple(self):
         a = array([1., 2., 3.])
         b = array([4., 5., 6.])
@@ -187,8 +187,8 @@
           '2*a + (cos(3)+5)*sinh(cos(b))',
           '2*a + arctan2(a, b)',
           'arcsin(0.5)',
-          'where(a, 2, b)',
-          'where((a-10).real, a, 2)',
+          'where(a != 0.0, 2, b)',
+          'where((a-10).real != 0.0, a, 2)',
           'cos(1+1)',
           '1+1',
           '1',
@@ -239,7 +239,7 @@
 
 class Skip(Exception): pass
 
-class TestExpressions(NumpyTestCase):
+class test_expressions(NumpyTestCase):
     pass
 
 def generate_check_expressions():
@@ -274,7 +274,7 @@
                 new.instancemethod(method, None, test_expressions))
     x = None
     for test_scalar in [0,1,2]:
-        for dtype in [int, float, complex]:
+        for dtype in [int, long, float, complex]:
             array_size = 100
             a = arange(2*array_size, dtype=dtype)[::2]
             a2 = zeros([array_size, array_size], dtype=dtype)
@@ -298,7 +298,7 @@
                                '<' in expr or '>' in expr or '%' in expr
                                or "arctan2" in expr or "fmod" in expr):
                             continue # skip complex comparisons
-                        if dtype == int and test_scalar and expr == '(a+1) ** -1':
+                        if dtype in (int, long) and test_scalar and expr == '(a+1) ** -1':
                             continue
                         make_check_method(a, a2, b, c, d, e, x,
                                           expr, test_scalar, dtype,
@@ -306,5 +306,145 @@
 
 generate_check_expressions()
 
+class test_int32_int64(NumpyTestCase):
+    def check_small_long(self):
+        # Small longs should not be downgraded to ints.
+        res = evaluate('42L')
+        assert_array_equal(res, 42)
+        self.assertEqual(res.dtype.name, 'int64')
+
+    def check_big_int(self):
+        # Big ints should be promoted to longs.
+        # This test may only fail under 64-bit platforms.
+        res = evaluate('2**40')
+        assert_array_equal(res, 2**40)
+        self.assertEqual(res.dtype.name, 'int64')
+
+    def check_long_constant_promotion(self):
+        int32array = arange(100, dtype='int32')
+        res = int32array * 2
+        res32 = evaluate('int32array * 2')
+        res64 = evaluate('int32array * 2L')
+        assert_array_equal(res, res32)
+        assert_array_equal(res, res64)
+        self.assertEqual(res32.dtype.name, 'int32')
+        self.assertEqual(res64.dtype.name, 'int64')
+
+    def check_int64_array_promotion(self):
+        int32array = arange(100, dtype='int32')
+        int64array = arange(100, dtype='int64')
+        respy = int32array * int64array
+        resnx = evaluate('int32array * int64array')
+        assert_array_equal(respy, resnx)
+        self.assertEqual(resnx.dtype.name, 'int64')
+
+class test_strings(NumpyTestCase):
+    BLOCK_SIZE1 = 128
+    BLOCK_SIZE2 = 8
+    str_list1 = ['foo', 'bar', '', '  ']
+    str_list2 = ['foo', '', 'x', ' ']
+    str_nloops = len(str_list1) * (BLOCK_SIZE1 + BLOCK_SIZE2 + 1)
+    str_array1 = array(str_list1 * str_nloops)
+    str_array2 = array(str_list2 * str_nloops)
+    str_constant = 'doodoo'
+
+    def check_null_chars(self):
+        str_list = [
+            '\0\0\0', '\0\0foo\0', '\0\0foo\0b', '\0\0foo\0b\0',
+            'foo\0', 'foo\0b', 'foo\0b\0', 'foo\0bar\0baz\0\0' ]
+        for s in str_list:
+            r = evaluate('s')
+            self.assertEqual(s, r.tostring())  # check *all* stored data
+
+    def check_compare_copy(self):
+        sarr = self.str_array1
+        expr = 'sarr'
+        res1 = eval(expr)
+        res2 = evaluate(expr)
+        assert_array_equal(res1, res2)
+
+    def check_compare_array(self):
+        sarr1 = self.str_array1
+        sarr2 = self.str_array2
+        expr = 'sarr1 >= sarr2'
+        res1 = eval(expr)
+        res2 = evaluate(expr)
+        assert_array_equal(res1, res2)
+
+    def check_compare_variable(self):
+        sarr = self.str_array1
+        svar = self.str_constant
+        expr = 'sarr >= svar'
+        res1 = eval(expr)
+        res2 = evaluate(expr)
+        assert_array_equal(res1, res2)
+
+    def check_compare_constant(self):
+        sarr = self.str_array1
+        expr = 'sarr >= %r' % self.str_constant
+        res1 = eval(expr)
+        res2 = evaluate(expr)
+        assert_array_equal(res1, res2)
+
+    def check_add_string_array(self):
+        sarr1 = self.str_array1
+        sarr2 = self.str_array2
+        expr = 'sarr1 + sarr2'
+        self.assert_missing_op('add_sss', expr, locals())
+
+    def check_add_numeric_array(self):
+        sarr = self.str_array1
+        narr = arange(len(sarr), dtype='int32')
+        expr = 'sarr >= narr'
+        self.assert_missing_op('ge_bsi', expr, locals())
+
+    def assert_missing_op(self, op, expr, local_dict):
+        msg = "expected NotImplementedError regarding '%s'" % op
+        try:
+            evaluate(expr, local_dict)
+        except NotImplementedError, nie:
+            if "'%s'" % op not in nie.args[0]:
+                self.fail(msg)
+        else:
+            self.fail(msg)
+
+    def check_compare_prefix(self):
+        # Check comparing two strings where one is a prefix of the
+        # other.
+        for s1, s2 in [ ('foo', 'foobar'), ('foo', 'foo\0bar'),
+                        ('foo\0a', 'foo\0bar') ]:
+            self.assert_(evaluate('s1 < s2'))
+            self.assert_(evaluate('s1 <= s2'))
+            self.assert_(evaluate('~(s1 == s2)'))
+            self.assert_(evaluate('~(s1 >= s2)'))
+            self.assert_(evaluate('~(s1 > s2)'))
+
+        # Check for NumPy array-style semantics in string equality.
+        s1, s2 = 'foo', 'foo\0\0'
+        self.assert_(evaluate('s1 == s2'))
+
+# Case for testing selections in fields which are aligned but whose
+# data length is not an exact multiple of the length of the record.
+# The following test exposes the problem only in 32-bit machines,
+# because in 64-bit machines 'c2' is unaligned.  However, this should
+# check most platforms where, while not unaligned, 'len(datatype) >
+# boundary_alignment' is fullfilled.
+class test_irregular_stride(NumpyTestCase):
+    def check_select(self):
+        f0 = arange(10, dtype=int32)
+        f1 = arange(10, dtype=float64)
+
+        irregular = rec.fromarrays([f0, f1])
+
+        f0 = irregular['f0']
+        f1 = irregular['f1']
+
+        i0 = evaluate('f0 < 5')
+        i1 = evaluate('f1 < 5')
+
+        assert_array_equal(f0[i0], arange(5, dtype=int32))
+        assert_array_equal(f1[i1], arange(5, dtype=float64))
+
+
 if __name__ == '__main__':
     NumpyTest().run()

Modified: trunk/scipy/sandbox/numexpr/timing.py
===================================================================
--- trunk/scipy/sandbox/numexpr/timing.py	2007-11-22 12:38:12 UTC (rev 3567)
+++ trunk/scipy/sandbox/numexpr/timing.py	2007-11-23 07:20:02 UTC (rev 3568)
@@ -88,13 +88,13 @@
 """ % ((array_size,)*3)
 expr5 = 'where(0.1*a > arctan2(a, b), 2*a, arctan2(a,b))'
 
-expr6 = 'where(a, 2, b)'
+expr6 = 'where(a != 0.0, 2, b)'
 
-expr7 = 'where(a-10, a, 2)'
+expr7 = 'where(a-10 != 0.0, a, 2)'
 
-expr8 = 'where(a%2, b+5, 2)'
+expr8 = 'where(a%2 != 0.0, b+5, 2)'
 
-expr9 = 'where(a%2, 2, b+5)'
+expr9 = 'where(a%2 != 0.0, 2, b+5)'
 
 expr10 = 'a**2 + (b+1)**-2.5'
 
@@ -131,6 +131,6 @@
 
 if __name__ == '__main__':
     averages = []
-    for i in range(10):
+    for i in range(iterations):
         averages.append(compare())
     print "Averages:", ', '.join("%.2f" % x for x in averages)



More information about the Scipy-svn mailing list