[Numpy-svn] r3377 - trunk/numpy/f2py/lib/parser

numpy-svn at scipy.org numpy-svn at scipy.org
Sat Oct 21 04:55:30 CDT 2006


Author: pearu
Date: 2006-10-21 04:55:23 -0500 (Sat, 21 Oct 2006)
New Revision: 3377

Modified:
   trunk/numpy/f2py/lib/parser/base_classes.py
   trunk/numpy/f2py/lib/parser/expressions.py
   trunk/numpy/f2py/lib/parser/pattern_tools.py
Log:
F2PY G3: fixed bugs (ticket 361), impl expression parser.

Modified: trunk/numpy/f2py/lib/parser/base_classes.py
===================================================================
--- trunk/numpy/f2py/lib/parser/base_classes.py	2006-10-21 05:48:11 UTC (rev 3376)
+++ trunk/numpy/f2py/lib/parser/base_classes.py	2006-10-21 09:55:23 UTC (rev 3377)
@@ -366,7 +366,7 @@
 
     def is_deferred_shape_array(self):
         if not self.is_array(): return False
-        return self.is_allocatable() or self.is_pointer():
+        return self.is_allocatable() or self.is_pointer()
 
     def is_assumed_size_array(self):
         if not self.is_array(): return False

Modified: trunk/numpy/f2py/lib/parser/expressions.py
===================================================================
--- trunk/numpy/f2py/lib/parser/expressions.py	2006-10-21 05:48:11 UTC (rev 3376)
+++ trunk/numpy/f2py/lib/parser/expressions.py	2006-10-21 09:55:23 UTC (rev 3377)
@@ -13,6 +13,8 @@
 """
 
 import re
+from splitline import string_replace_map
+import pattern_tools as pattern
 
 class DefinedOp:
     def __new__(cls, letters):
@@ -35,262 +37,410 @@
 class NoMatchError(Exception):
     pass
 
-is_name = re.compile(r'\A[a-z]\w*\Z',re.I).match
 
+## class Designator(Primary):
+##     """
+##     <designator> = <object-name>
+##                    | <array-element>
+##                    | <array-section>
+##                    | <structure-component>
+##                    | <substring>
+##     <array-element> = <data-ref>
+##     <array-section> = <data-ref> [ ( <substring-range> ) ]
+##     <data-ref> = <part-ref> [ % <part-ref> ]...
+##     <part-ref> = <part-name> [ ( <section-subscript-list> ) ]
+##     <substring> = <parent-string> ( <substring-range> )
+##     <parent-string> = <scalar-variable-name>
+##                       | <array-element>
+##                       | <scalar-structure-component>
+##                       | <scalar-constant>
+##     <substring-range> = [ <scalar-int-expr> ] : [ <scalar-int-expr> ]
+##     <structure-component> = <data-ref>
+##     """
+
+
+
+## class LiteralConstant(Constant):
+##     """
+##     <constant> = <int-literal-constant>
+##                  | <real-literal-constant>
+##                  | <complex-literal-constant>
+##                  | <logical-literal-constant>
+##                  | <char-literal-constant>
+##                  | <boz-literal-constant>
+##     """
+
+## class SignedIntLiteralConstant(LiteralConstant):
+##     """
+##     <signed-int-literal-constant> = [ <sign> ] <int-literal-constant>
+##     <sign> = + | -
+##     """
+##     match = re.compile(r'\A[+-]\s*\d+\Z').match
+
+##     def init(self, string):
+##         Base.init(self, string)
+##         self.content = [string[0], IntLiteralConstant(string[1:].lstrip())]
+##         return
+##     def tostr(self):
+##         return '%s%s' % tuple(self.content)
+
+## class NamedConstant(Constant):
+##     """
+##     <named-constant> = <name>
+##     """
+
+## class Name(Designator, NamedConstant, NoChildAllowed):
+##     """
+##     <name> = <letter> [ <alpha-numeric-character> ]...
+##     """
+##     match = re.compile(r'\A'+name_pat+r'\Z',re.I).match
+
+## class IntLiteralConstant(SignedIntLiteralConstant, NoChildAllowed):
+##     """
+##     <int-literal-constant> = <digit-string> [ _ <kind-param> ]
+##     <kind-param> = <digit-string>
+##                  | <scalar-int-constant-name>
+##     <digit-string> = <digit> [ <digit> ]...
+##     """
+##     match = compose_pattern([digit_string_pat, '_', kind_param_pat],r'\s*')
+
+##     compose_pattern('int-literal-constant','digit-string','_','kind-param')
+
+## class DigitString(IntLiteralConstant, NoChildAllowed):
+##     """
+##     <digit-string> = <digit> [ <digit> ]...
+##     """
+##     match = re.compile(r'\A\d+\Z').match
+
+
 class Base(object):
 
     subclasses = {}
 
     def __new__(cls, string):
-        match = getattr(cls,'match',None)
-        if match is not None:
-            if match(string):
-                obj = object.__new__(cls)
-                init = cls.__dict__.get('init', Base.init)
-                init(obj, string)
-                return obj
-        for c in Base.subclasses.get(cls.__name__,[]):
-            try:
-                return c(string)
-            except NoMatchError:
-                pass
+        if hasattr(cls,'match'):
+            match = cls.match
+            result = match(string)
+        else:
+            result = None
+        if isinstance(result, tuple):
+            obj = object.__new__(cls)
+            obj.string = string
+            obj.init(*result)
+            return obj
+        elif isinstance(result, Base):
+            return result
+        elif result is None:
+            for subcls in Base.subclasses.get(cls.__name__,[]):
+                try:
+                    return subcls(string)
+                except NoMatchError:
+                    pass
+        else:
+            raise AssertionError,`result`
         raise NoMatchError,'%s: %r' % (cls.__name__, string)
 
-    def init(self, string):
-        self.string = string
-        return
+    findall = staticmethod(re.compile(r'(_F2PY_STRING_CONSTANT_\d+_|F2PY_EXPR_TUPLE_\d+)').findall)
     
-    def __str__(self):
-        str_func = self.__class__.__dict__.get('tostr', None)
-        if str_func is not None:
-            return str_func(self)
-        return self.string
-    def __repr__(self): return '%s(%r)' % (self.__class__.__name__, self.string)
+    def match_binary_operand_right(lhs_cls, op_pattern, rhs_cls, string):
+        line, repmap = string_replace_map(string)
+        t = op_pattern.rsplit(line)
+        if t is None: return
+        lhs, op, rhs = t
+        for k in Base.findall(lhs):
+            lhs = lhs.replace(k, repman[k])
+        for k in Base.findall(rhs):
+            rhs = rhs.replace(k, repman[k])
+        lhs_obj = lhs_cls(lhs)
+        rhs_obj = rhs_cls(rhs)
+        return lhs_obj, t[1], rhs_obj
+    match_binary_operand_right = staticmethod(match_binary_operand_right)
 
+    def match_binary_unary_operand_right(lhs_cls, op_pattern, rhs_cls, string):
+        line, repmap = string_replace_map(string)
+        t = op_pattern.rsplit(line)
+        if t is None: return
+        lhs, op, rhs = t
+        if lhs: 
+            for k in Base.findall(lhs):
+                lhs = lhs.replace(k, repman[k])
+        for k in Base.findall(rhs):
+            rhs = rhs.replace(k, repman[k])
+        rhs_obj = rhs_cls(rhs)
+        if lhs:
+            lhs_obj = lhs_cls(lhs)
+            return lhs_obj, t[1], rhs_obj
+        else:
+            return None, t[1], rhs_obj
+    match_binary_unary_operand_right = staticmethod(match_binary_unary_operand_right)
 
+    def match_binary_operand_left(lhs_cls, op_pattern, rhs_cls, string):
+        line, repmap = string_replace_map(string)
+        t = op_pattern.lsplit(line)
+        if t is None: return
+        lhs, op, rhs = t
+        for k in Base.findall(lhs):
+            lhs = lhs.replace(k, repman[k])
+        for k in Base.findall(rhs):
+            rhs = rhs.replace(k, repman[k])
+        lhs_obj = lhs_cls(lhs)
+        rhs_obj = rhs_cls(rhs)
+        return lhs_obj, t[1], rhs_obj
+    match_binary_operand_left = staticmethod(match_binary_operand_left)
 
-class Primary(Base):
-    """
-    <primary> = <constant>
-                | <designator>
-                | <array-constructor>
-                | <structure-constructor>
-                | <function-reference>
-                | <type-param-inquiry>
-                | <type-param-name>
-                | ( <expr> )
-    <type-param-inquiry> = <designator> % <type-param-name>
-    """
+    def match_unary_operand(op_pattern, rhs_cls, string):
+        line, repmap = string_replace_map(string)
+        t = op_pattern.lsplit(line)
+        if t is None: return
+        lhs, op, rhs = t
+        for k in Base.findall(rhs):
+            rhs = rhs.replace(k, repman[k])
+        assert not lhs,`lhs`
+        rhs_obj = rhs_cls(rhs)
+        return t[1], rhs_obj
+    match_unary_operand = staticmethod(match_unary_operand)
 
-class Constant(Primary):
-    """
-    <constant> = <literal-constant>
-                 | <named-constant>
-    """
+    def init_binary_operand(self, lhs, op, rhs):
+        self.lhs = lhs
+        self.op = op
+        self.rhs = rhs
+        return
 
-class Designator(Primary):
-    """
-    <designator> = <object-name>
-                   | <array-element>
-                   | <array-section>
-                   | <structure-component>
-                   | <substring>
-    <array-element> = <data-ref>
-    <array-section> = <data-ref> [ ( <substring-range> ) ]
-    <data-ref> = <part-ref> [ % <part-ref> ]...
-    <part-ref> = <part-name> [ ( <section-subscript-list> ) ]
-    <substring> = <parent-string> ( <substring-range> )
-    <parent-string> = <scalar-variable-name>
-                      | <array-element>
-                      | <scalar-structure-component>
-                      | <scalar-constant>
-    <substring-range> = [ <scalar-int-expr> ] : [ <scalar-int-expr> ]
-    <structure-component> = <data-ref>
-    """
+    def init_unary_operand(self, op, rhs):
+        self.op = op
+        self.rhs = rhs
+        return
 
+    def init_primary(self, primary):
+        self.primary = primary
+        return
 
+    def tostr_binary_operand(self):
+        return '%s %s %s' % (self.lhs, self.op, self.rhs)
 
-class LiteralConstant(Constant):
-    """
-    <constant> = <int-literal-constant>
-                 | <real-literal-constant>
-                 | <complex-literal-constant>
-                 | <logical-literal-constant>
-                 | <char-literal-constant>
-                 | <boz-literal-constant>
-    """
+    def tostr_binary_unary_operand(self):
+        if self.lhs is None:
+            return '%s %s' % (self.op, self.rhs)
+        return '%s %s %s' % (self.lhs, self.op, self.rhs)
 
-class SignedIntLiteralConstant(LiteralConstant):
-    """
-    <signed-int-literal-constant> = [ <sign> ] <int-literal-constant>
-    <sign> = + | -
-    """
-    match = re.compile(r'\A[+-]\s*\d+\Z').match
+    def tostr_unary_operand(self):
+        return '%s %s' % (self.op, self.rhs)
 
-    def init(self, string):
-        Base.init(self, string)
-        self.content = [string[0], IntLiteralConstant(string[1:].lstrip())]
-        return
-    def tostr(self):
-        return '%s%s' % tuple(self.content)
+    def tostr_primary(self):
+        return str(self.primary)
 
-class NamedConstant(Constant):
-    """
-    <named-constant> = <name>
-    """
+    def torepr_binary_operand(self):
+        return '%s(%r, %r, %r)' % (self.__class__.__name__,self.lhs, self.op, self.rhs)
 
-def compose_patterns(pattern_list, names join=''):
-    return join.join(pattern_list)
+    def torepr_binary_unary_operand(self):
+        return '%s(%r, %r, %r)' % (self.__class__.__name__,self.lhs, self.op, self.rhs)
 
-def add_pattern(pattern_name, *pat_list):
-    p = ''
-    for pat in pat_list:
-        if isinstance(pat, PatternOptional):
-            p += '(%s|)' % (add_pattern(None, pat.args))
-        elif isinstance(pat, PatternOr):
-            p += '(%s)' % ('|'.join([add_pattern(None, p1) for p1 in par.args]))
-        else:
-            subpat = pattern_map.get(pat,None)
-            if subpat is None:
-                p += pat
-            else:
-                p += '(?P<%s>%s)' % (pat, subpat)
-    if pattern_map is not None:
-        pattern_map[pattern_name] = p
-    return p
+    def torepr_unary_operand(self):
+        return '%s(%r, %r)' % (self.__class__.__name__,self.op, self.rhs)
 
+    def torepr_primary(self):
+        return '%s(%r)' % (self.__class__.__name__,self.primary)
 
+    def __str__(self):
+        if self.__class__.__dict__.has_key('tostr'):
+            return self.tostr()
+        return repr(self)
 
-class PatternBase:
-    def __init__(self,*args):
-        self.args = args
-        return
+    def __repr__(self):
+        if self.__class__.__dict__.has_key('torepr'):
+            return self.torepr()
+        return '%s(%r)' % (self.__class__.__name__, self.string)
 
-class PatternOptional(PatternBase):
-    pass
-class PatternOr(PatternBase):
-    pass
-class PatternJoin(PatternBase):
-    join = ''
-
-pattern_map = {
-    'name': r'[a-zA-Z]\w+'
-    'digit-string': r'\d+'
-    }
-add_pattern('kind-param',
-            PatternOr('digit-string','name'))
-add_pattern('int-literal-constant',
-            'digit-string',PatternOptional('_','kind-param'))
-
-name_pat = r'[a-z]\w*'
-digit_pat = r'\d'
-digit_string_pat = r'\d+'
-kind_param_pat = '(%s|%s)' % (digit_string_pat, name_pat)
-
-class Name(Designator, NamedConstant, NoChildAllowed):
+class Expr(Base):
     """
-    <name> = <letter> [ <alpha-numeric-character> ]...
+    <expr> = [ <expr> <defined-binary-op> ] <level-5-expr>
+    <defined-binary-op> = . <letter> [ <letter> ]... .
     """
-    match = re.compile(r'\A'+name_pat+r'\Z',re.I).match
+    def match(string):
+        return Base.match_binary_operand_right(\
+            Expr,pattern.defined_binary_op.named(),Level_5_Expr,string)
+    match = staticmethod(match)
+    init = Base.init_binary_operand
+    tostr = Base.tostr_binary_operand
+    torepr = Base.torepr_binary_operand
 
-class IntLiteralConstant(SignedIntLiteralConstant, NoChildAllowed):
+class Level_5_Expr(Expr):
     """
-    <int-literal-constant> = <digit-string> [ _ <kind-param> ]
-    <kind-param> = <digit-string>
-                 | <scalar-int-constant-name>
-    <digit-string> = <digit> [ <digit> ]...
+    <level-5-expr> = [ <level-5-expr> <equiv-op> ] <equiv-operand>
+    <equiv-op> = .EQV.
+               | .NEQV.
     """
-    match = compose_pattern([digit_string_pat, '_', kind_param_pat],r'\s*')
-
-    compose_pattern('int-literal-constant','digit-string','_','kind-param')
-
-class DigitString(IntLiteralConstant, NoChildAllowed):
+    def match(string):
+        return Base.match_binary_operand_right(\
+            Level_5_Expr,pattern.equiv_op.named(),Equiv_Operand,string)
+    match = staticmethod(match)
+    init = Base.init_binary_operand
+    tostr = Base.tostr_binary_operand
+    torepr = Base.torepr_binary_operand
+    
+class Equiv_Operand(Level_5_Expr):
     """
-    <digit-string> = <digit> [ <digit> ]...
+    <equiv-operand> = [ <equiv-operand> <or-op> ] <or-operand>
+    <or-op>  = .OR.
     """
-    match = re.compile(r'\A\d+\Z').match
+    def match(string):
+        return Base.match_binary_operand_right(\
+            Equiv_Operand,pattern.or_op.named(),Or_Operand,string)
+    match = staticmethod(match)
+    init = Base.init_binary_operand
+    tostr = Base.tostr_binary_operand
+    torepr = Base.torepr_binary_operand
+    
+class Or_Operand(Equiv_Operand):
+    """
+    <or-operand> = [ <or-operand> <and-op> ] <and-operand>    
+    <and-op> = .AND.
 
-################# Setting up Base.subclasses #####################
-
-def set_subclasses(cls):
     """
-    Append cls to cls base classes attribute lists `_subclasses`
-    so that all classes derived from Base know their subclasses
-    one level down.
+    def match(string):
+        return Base.match_binary_operand_right(\
+            Or_Operand,pattern.and_op.named(),And_Operand,string)
+    match = staticmethod(match)
+    init = Base.init_binary_operand
+    tostr = Base.tostr_binary_operand
+    torepr = Base.torepr_binary_operand
+    
+class And_Operand(Or_Operand):
     """
-    for basecls in cls.__bases__:
-        if issubclass(basecls, Base):
-            if issubclass(basecls, NoChildAllowed):
-                raise NoChildAllowedError,'%s while adding %s' % (basecls.__name__,cls.__name__)
-            try:
-                Base.subclasses[basecls.__name__].append(cls)
-            except KeyError:
-                Base.subclasses[basecls.__name__] = [cls]
-    return
-ClassType = type(Base)
-for clsname in dir():
-    cls = eval(clsname)
-    if isinstance(cls, ClassType) and issubclass(cls, Base):
-        set_subclasses(cls)
-
-####################################################################
-
-class Level1Expression:#(Primary):
+    <and-operand> = [ <not-op> ] <level-4-expr>
+    <not-op> = .NOT.
     """
-    <level-1-expr> = [ <defined-unary-op> ] <primary>
-    <defined-unary-op> = . <letter> [ <letter> ]... .
+    def match(string):
+        return Base.match_unary_operand(\
+            pattern.not_op.named(),Level_4_Expr,string)
+    match = staticmethod(match)
+    init = Base.init_unary_operand
+    tostr = Base.tostr_unary_operand
+    torepr = Base.torepr_unary_operand
+    
+class Level_4_Expr(And_Operand):
     """
-    def __new__(cls, primary, defined_unary_op = None):
-        obj = object.__new__(cls)
-        
-        return obj
-
-class Level2Expression:
+    <level-4-expr> = [ <level-3-expr> <rel-op> ] <level-3-expr>
+    <rel-op> = .EQ. | .NE. | .LT. | .LE. | .GT. | .GE. | == | /= | < | <= | > | >=
     """
+    def match(string):
+        return Base.match_binary_operand_right(\
+            Level_3_Expr,pattern.rel_op.named(),Level_3_Expr,string)
+    match = staticmethod(match)
+    init = Base.init_binary_operand
+    tostr = Base.tostr_binary_operand
+    torepr = Base.torepr_binary_operand
+    
+class Level_3_Expr(Level_4_Expr):
+    """
+    <level-3-expr> = [ <level-3-expr> <concat-op> ] <level-2-expr>
+    <concat-op>    = //
+    """
+    def match(string):
+        return Base.match_binary_operand_right(\
+            Level_3_Expr,pattern.concat_op.named(),Level_2_Expr,string)
+    match = staticmethod(match)
+    init = Base.init_binary_operand
+    tostr = Base.tostr_binary_operand
+    torepr = Base.torepr_binary_operand
+    
+class Level_2_Expr(Level_3_Expr):
+    """
     <level-2-expr> = [ [ <level-2-expr> ] <add-op> ] <add-operand>
-    <add-operand> = [ <add-operand> <mult-op> ] <mult-operand>
-    <mult-operand> = <level-1-expr> [ <power-op> <mult-operand> ]
-    <power-op> = **
-    <mult-op>  = *
-                 | /
     <add-op>   = +
                  | -
     """
 
-class Level3Expression:
+    def match(string):
+        return Base.match_binary_unary_operand_right(\
+            Level_2_Expr,pattern.add_op.named(),Add_Operand,string)
+    match = staticmethod(match)
+    init = Base.init_binary_operand
+    tostr = Base.tostr_binary_unary_operand
+    torepr = Base.torepr_binary_operand
+    
+class Add_Operand(Level_2_Expr):
     """
-    <level-3-expr> = [ <level-3-expr> <concat-op> ] <level-2-expr>
-    <concat-op>    = //
+    <add-operand> = [ <add-operand> <mult-op> ] <mult-operand>
+    <mult-op>  = *
+                 | /
     """
 
-class Level4Expression:
+    def match(string):
+        return Base.match_binary_operand_right(\
+            Add_Operand,pattern.mult_op.named(),Mult_Operand,string)
+    match = staticmethod(match)
+    init = Base.init_binary_operand
+    tostr = Base.tostr_binary_operand
+    torepr = Base.torepr_binary_operand
+    
+class Mult_Operand(Add_Operand):
     """
-    <level-4-expr> = [ <level-3-expr> <rel-op> ] <level-3-expr>
-    <rel-op> = .EQ. | .NE. | .LT. | .LE. | .GT. | .GE. | == | /= | < | <= | > | >=
+    <mult-operand> = <level-1-expr> [ <power-op> <mult-operand> ]
+    <power-op> = **
     """
 
-class Level5Expression:
+    def match(string):
+        return Base.match_binary_operand_left(\
+            Level_1_Expr,pattern.power_op.named(),Mult_Operand,string)
+    match = staticmethod(match)
+    init = Base.init_binary_operand
+    tostr = Base.tostr_binary_operand
+    torepr = Base.torepr_binary_operand
+    
+class Level_1_Expr(Mult_Operand):
     """
-    <level-5-expr> = [ <level-5-expr> <equiv-op> ] <equiv-operand>
-    <equiv-operand> = [ <equiv-operand> <or-op> ] <or-operand>
-    <or-operand> = [ <or-operand> <and-op> ] <and-operand>
-    <and-operand> = [ <not-op> ] <level-4-expr>
-    <not-op> = .NOT.
-    <and-op> = .AND.
-    <or-op>  = .OR.
-    <equiv-op> = .EQV.
-               | .NEQV.
+    <level-1-expr> = [ <defined-unary-op> ] <primary>
+    <defined-unary-op> = . <letter> [ <letter> ]... .
     """
+    def match(string):
+        return Base.match_unary_operand(\
+            pattern.defined_unary_op.named(),Primary,string)
+    match = staticmethod(match)
+    init = Base.init_unary_operand
+    tostr = Base.tostr_unary_operand
+    torepr = Base.torepr_unary_operand
+    
+class Primary(Level_1_Expr):
+    """
+    <primary> = <constant>
+                | <designator>
+                | <array-constructor>
+                | <structure-constructor>
+                | <function-reference>
+                | <type-param-inquiry>
+                | <type-param-name>
+                | ( <expr> )
+    <type-param-inquiry> = <designator> % <type-param-name>
+    """
 
-class Expression:
+class Constant(Primary):
     """
-    <expr> = [ <expr> <defined-binary-op> ] <level-5-expr>
-    <defined-binary-op> = . <letter> [ <letter> ]... .
+    <constant> = <literal-constant>
+                 | <named-constant>
     """
+    def match(string):
+        if pattern.abs_constant.match(string):
+            return (string,)
+        return
+    match = staticmethod(match)
+    init = Base.init_primary
+    tostr = Base.tostr_primary
+    torepr = Base.torepr_primary
 
-from splitline import string_replace_map
+ClassType = type(Base)
+for clsname in dir():
+    cls = eval(clsname)
+    if isinstance(cls, ClassType) and issubclass(cls, Base):
+        for basecls in cls.__bases__:
+            if issubclass(basecls, Base):
+                try:
+                    Base.subclasses[basecls.__name__].append(cls)
+                except KeyError:
+                    Base.subclasses[basecls.__name__] = [cls]
 
-def parse_expr(line, lower=False):
-    newline, repmap = string_replace_map(line, lower=lower)
-    if repmap:
-        raise NotImplementedError,`newline,repmap`
+
+print Constant('a')
+print `Constant('1')`
+print `Base('+1')`
+print `Base('c-1*a/b')`

Modified: trunk/numpy/f2py/lib/parser/pattern_tools.py
===================================================================
--- trunk/numpy/f2py/lib/parser/pattern_tools.py	2006-10-21 05:48:11 UTC (rev 3376)
+++ trunk/numpy/f2py/lib/parser/pattern_tools.py	2006-10-21 09:55:23 UTC (rev 3377)
@@ -1,4 +1,16 @@
+"""
+Tools for constructing patterns.
 
+-----
+Permission to use, modify, and distribute this software is given under the
+terms of the NumPy License. See http://scipy.org.
+
+NO WARRANTY IS EXPRESSED OR IMPLIED.  USE AT YOUR OWN RISK.
+Author: Pearu Peterson <pearu at cens.ioc.ee>
+Created: Oct 2006
+-----
+"""
+
 import re
 
 class Pattern:
@@ -20,6 +32,15 @@
                            '|': '[|]',
                            '(': r'\(',
                            ')': r'\)',
+                           '[': r'\[',
+                           ']': r'\]',
+                           '^': '[^]',
+                           '$': '[$]',
+                           '?': '[?]',
+                           '{': '\{',
+                           '}': '\}',
+                           '>': '[>]',
+                           '<': '[<]',
                            }
 
     def __init__(self, label, pattern, optional=0):
@@ -28,12 +49,48 @@
         self.optional = optional
         return
 
+    def get_compiled(self):
+        try:
+            return self._compiled_pattern
+        except AttributeError:
+            self._compiled_pattern = compiled = re.compile(self.pattern)
+            return compiled
+
     def match(self, string):
-        if hasattr(self, '_compiled_match'):
-            return self._compiled.match(string)
-        self._compiled = compiled = re.compile(self.pattern)
-        return compiled.match(string)
+        return self.get_compiled().match(string)
 
+    def rsplit(self, string):
+        """
+        Return (<lhs>, <pattern_match>, <rhs>) where
+          string = lhs + pattern_match + rhs
+        and rhs does not contain pattern_match.
+        If no pattern_match is found in string, return None.
+        """
+        compiled = self.get_compiled()
+        t = compiled.split(string)
+        if len(t) < 3: return
+        rhs = t[-1]
+        pattern_match = t[-2]
+        assert abs(self).match(pattern_match),`pattern_match`
+        lhs = ''.join(t[:-2])
+        return lhs, pattern_match, rhs
+
+    def lsplit(self, string):
+        """
+        Return (<lhs>, <pattern_match>, <rhs>) where
+          string = lhs + pattern_match + rhs
+        and rhs does not contain pattern_match.
+        If no pattern_match is found in string, return None.
+        """
+        compiled = self.get_compiled()
+        t = compiled.split(string) # can be optimized
+        if len(t) < 3: return
+        lhs = t[0]
+        pattern_match = t[1]
+        rhs = ''.join(t[2:])
+        assert abs(self).match(pattern_match),`pattern_match`
+        return lhs, pattern_match, rhs
+
     def __abs__(self):
         return Pattern(self.label, r'\A' + self.pattern+ r'\Z')
 
@@ -42,7 +99,10 @@
 
     def __or__(self, other):
         label = '( %s OR %s )' % (self.label, other.label)
-        pattern = '(%s|%s)' % (self.pattern, other.pattern)
+        if self.pattern==other.pattern:
+            pattern = self.pattern
+        else:
+            pattern = '(%s|%s)' % (self.pattern, other.pattern)
         return Pattern(label, pattern)
 
     def __and__(self, other):
@@ -99,21 +159,106 @@
         pattern = '(?P%s%s)' % (label.replace('-','_'), self.pattern)
         return Pattern(label, pattern)
 
-name = Pattern('<name>', r'[a-z]\w*')
+    def rename(self, label):
+        if label[0]+label[-1]!='<>':
+            label = '<%s>' % (label)
+        return Pattern(label, self.pattern, self.optional)
+
+# Predefined patterns
+
+letter = Pattern('<letter>','[a-zA-Z]')
+name = Pattern('<name>', r'[a-zA-Z]\w*')
+digit = Pattern('<digit>',r'\d')
+underscore = Pattern('<underscore>', '_')
+hex_digit = Pattern('<hex-digit>',r'[\da-fA-F]')
+
 digit_string = Pattern('<digit-string>',r'\d+')
+hex_digit_string = Pattern('<hex-digit-string>',r'[\da-fA-F]+')
+
 sign = Pattern('<sign>',r'[+-]')
 exponent_letter = Pattern('<exponent-letter>',r'[ED]')
 
+alphanumeric_character = Pattern('<alphanumeric-character>','\w') # [a-z0-9_]
+special_character = Pattern('<special-character>',r'[ =+-*/\()[\]{},.:;!"%&~<>?,\'`^|$#@]')
+character = alphanumeric_character | special_character
+
 kind_param = digit_string | name
 signed_digit_string = ~sign + digit_string
 int_literal_constant = digit_string + ~('_' + kind_param)
 signed_int_literal_constant = ~sign + int_literal_constant
 
+binary_constant = '[Bb]' + ("'" & digit_string & "'" | '"' & digit_string & '"')
+octal_constant = '[Oo]' + ("'" & digit_string & "'" | '"' & digit_string & '"')
+hex_constant = '[Zz]' + ("'" & hex_digit_string & "'" | '"' & hex_digit_string & '"')
+boz_literal_constant = binary_constant | octal_constant | hex_constant
+
 exponent = signed_digit_string
 significand = digit_string + '.' + ~digit_string | '.' + digit_string
 real_literal_constant = significand + ~(exponent_letter + exponent) + ~ ('_' + kind_param) | \
                         digit_string + exponent_letter + exponent + ~ ('_' + kind_param)
 signed_real_literal_constant = ~sign + real_literal_constant
 
+named_constant = name
+real_part = signed_int_literal_constant | signed_real_literal_constant | named_constant
+imag_part = real_part
+complex_literal_constant = '(' + real_part + ',' + imag_part + ')'
 
-print signed_real_literal_constant
+char_literal_constant = ~( kind_param + '_') + "'.*'" | ~( kind_param + '_') + '".*"'
+
+logical_literal_constant = '[.](TRUE|FALSE)[.]' + ~ ('_' + kind_param)
+literal_constant = int_literal_constant | real_literal_constant | complex_literal_constant | logical_literal_constant | char_literal_constant | boz_literal_constant
+constant = literal_constant | named_constant
+int_constant = int_literal_constant | boz_literal_constant | named_constant
+char_constant = char_literal_constant | named_constant
+abs_constant = abs(constant)
+
+power_op = Pattern('<power-op>','[*]{2}')
+mult_op = Pattern('<mult-op>','[*/]')
+add_op = Pattern('<add-op>','[+-]')
+concat_op = Pattern('<concat-op>','[/]{}')
+rel_op = Pattern('<rel-op>','([.](EQ|NE|LT|LE|GT|GE)[.])|[=]{2}|/[=]|[<]|[<][=]|[>]|[=][>]')
+not_op = Pattern('<not-op>','[.]NOT[.]')
+and_op = Pattern('<and-op>','[.]AND[.]')
+or_op = Pattern('<or-op>','[.]OR[.]')
+equiv_op = Pattern('<equiv-op>','[.](EQV|NEQV)[.]')
+intrinsic_operator = power_op | mult_op | add_op | concat_op | rel_op | not_op | and_op | or_op | equiv_op
+extended_intrinsic_operator = intrinsic_operator
+
+defined_unary_op = Pattern('<defined-unary-op>','[.][a-zA-Z]+[.]')
+defined_binary_op = Pattern('<defined-binary-op>','[.][a-zA-Z]+[.]')
+defined_operator = defined_unary_op | defined_binary_op | extended_intrinsic_operator
+
+label = Pattern('<label>','\d{1,5}')
+
+def _test():
+    assert name.match('a1_a')
+    assert abs(name).match('a1_a')
+    assert not abs(name).match('a1_a[]')
+
+    m = abs(kind_param)
+    assert m.match('23')
+    assert m.match('SHORT')
+
+    m = abs(signed_digit_string)
+    assert m.match('23')
+    assert m.match('+ 23')
+    assert m.match('- 23')
+    assert m.match('-23')
+    assert not m.match('+n')
+
+    m = ~sign.named() + digit_string.named('number')
+    r = m.match('23')
+    assert r.groupdict()=={'number': '23', 'sign': None}
+    r = m.match('- 23')
+    assert r.groupdict()=={'number': '23', 'sign': '-'}
+
+    m = abs(char_literal_constant)
+    assert m.match('"adadfa"')
+    assert m.match('"adadfa""adad"')
+    assert m.match('HEY_"adadfa"')
+    assert m.match('HEY _ "ad\tadfa"')
+    assert not m.match('adadfa')
+    print 'ok'
+
+if __name__ == '__main__':
+    _test()



More information about the Numpy-svn mailing list