python/parser1.py

# walter's erster versuch mit Python
#
# starten mit python -i basics.py
def err(*e):
    print('error:', *e)
    exit()

import re;
if False:
    class wk:
        def __init__(self):
            if False:
                self.eins = 'null'
        def __repr__ ( self):
            return 'wk' + repr(self.__dict__)
    o = wk()
    print('o', o)
    o.eins = 'eins'
    print('o', o)
eof = 'eof'
dflt = 'dflt'
ruleStr = ["s = s '+' p", "s = p", "p = p '*' i", "p = i", "i = '(' s           ')'", "i = '0'", "i = '1'", "i = '2'"]
print("ruleStr", ruleStr);
tb = [re.split(r'\s+', r) for r in ruleStr]
tb.insert(0, ['star t ', '=', tb[0][0]])
rules = tb.copy()
accX = len(tb)
failX = accX+1
rulesetX = failX + 1
tb += ['accept', 'fail']
print("rules", tb)
ruleset4name = {}
ruleset4rule = []
for i in range(accX):
    r = tb[i]
    if r[1] != '=':
        err("r[1] not =", r)
    if r[0] in ruleset4name:
        s = ruleset4name[r[0]]
        tb[s].append(i)
    else:
        s = len(tb)
        ruleset4name[r[0]] = s
        tb.append(['ruleset', r[0], s, i])
    r[0:2] = ['rule', r[0], i]
    assert len(ruleset4rule) == i
    ruleset4rule.append(s)
handleX = len(tb)
print("ruleset4name", ruleset4name, 'ruleset4rule', ruleset4rule)
print('tb', tb)
for r in rules:
    for i in range(3, len(r)):
        v = r[i]
        if v[0] == "'" and v[-1] == "'":
            r[i] = v[1: len(v)-1]
        elif v in ruleset4name:
            r[i] = ruleset4name[v]
        elif v != eof:
            err(f"rule {v} in {r} not in", ruleD)
print("tb after rulemap", tb)

class Handle:
    byStart = {}
    def __init__(self, s):
        self.start = frozenset(s)
        assert self.start not in Handle.byStart
        self.tbX = len(tb)
        tb.append(self)
        self.byStart[self.start] = self.tbX
        self.map = {}
        self.go = {}
        self.goDef = None
    def __repr__ ( self):
        return 'handle' + repr(self.__dict__)

    def mapAdd(self, l):
        rx, px = l[-1]
        r = tb[rx]
        if px > len(r):
            return
        nx = (rx, px+1)
        c = r[px] if px < len(r) else dflt
        #print(f'mapadd enter c {c}, l {l}, self {self}');
        if isinstance(c, str):
            if c in self.map:
                self.map[c].add(nx)
            else:
                self.map[c] = {nx}
        elif isinstance(c, int):
            assert c >= rulesetX and c < handleX
            rs = tb[c]
            
            # forward: search tree for next terminal avoid infinite recursion        
            for cx in rs[3:]:        
                if (cx, 3) not in l:
                    self.mapAdd(l + [(cx, 3)])
            # afterward next step
            if c in self.map:
                self.map[c].add(nx)
            else:
                self.map[c] = {nx}
        #print(f'handleadd exit  l {l}, handle {self}');

    @staticmethod
    def mapExp():
        hx = handleX
        print('mapExp0', handleX, 'tb', len(tb))
        while hx < len(tb):
            h = tb[hx]
            for k in h.map:
                strt = frozenset(h.map[k])
                h.map[k] = strt
                if strt not in Handle.byStart:
                    n = None 
                    for s in strt:
                        if s[1] <= len(tb[s[0]]):
                            if n == None:
                                n = Handle(strt)
                            n.mapAdd([s])
            hx += 1
        print('mapExp', handleX, 'tb', len(tb))

    @classmethod
    def genGo(cl):
        print('genGo 0')
        for hx in Handle.byStart.values():
            h = tb[hx]
            # h.go = {}
            for k in h.map:
                v = h.map[k]
                if len(v) != 1:
                    r = cl.byStart[v]    
                else:
                    for e in v: 
                        if e[1] > len(tb[e[0]]) :
                            r = e[0]
                        else:
                            r = cl.byStart[v]            
                if k == dflt: 
                    h.goDef = r
                else:
                    h.go[k] = r
        print('genGo 9')
                

h = Handle({(0, 3)})
print('handle', h, 'tb', tb);
h.mapAdd([(0, 3)])
print('Handle.byStart', Handle.byStart, tb);
Handle.mapExp()
print('Handle.mapExp', Handle.byStart, tb);
accRS = ruleset4rule[0]
print(f"accRS {accRS}, {handleX} {tb[handleX]} accX={accX}")
accH = 0, len(tb[0])
print('accH', accH)
cnt = 0
for h in [h for h in tb[handleX:] if accH in h.start]:
    cnt += 1
    print(f'accH {h}')
    assert eof not in h.map
    h.map[eof] = frozenset([(accX, 999)])
assert cnt >= 1
assert accRS not in tb[handleX].map
#tb[handleX].map[accRS] = frozenset([(accX,99)])
print(f"accRS2 {accRS}, {handleX} {tb[handleX]}")
Handle.genGo()
print('Handle.genGo', Handle.byStart);

def run(inp):
    inI = iter(inp + [eof])
    st = [[handleX, 'st 0']]
    lah = next(inI)
    while True:
        tos = st[-1]
        print(f'run1 lah={lah} tos={tos}, st={len(st)} {st}')
        m = tb[tos[0]]
        go = m.go[lah] if lah in m.go else m.goDef 
        if go == None or go == failX:
            err(f"bad lah={lah} at {[tb[s[0]][1] + '.' + str(s[1]) for s in m.start]}, "
                f"expected {[k for k in m.map.keys() if isinstance(k, str)]}, "
                f"go {go}, st={len(st)} {st}")
        elif go >= handleX: #shift
            st.append([go, lah])
            lah = next(inI)
        elif go < accX:    
            r = tb[go]
            n = [r[1], go]
            nx = len(st) - len(r) + 3
            for i in range(nx, len(st)):
                n.append(st[i][1])
            pa = tb[st[nx-1][0] ]
            #assert pa[0] == 'handle'
            rs = ruleset4rule[go]
            if rs not in pa.go:
                err(f'reduce {rs} missing in pa', pa, 'stack', st)
            st[nx] = [pa.go[rs], n]            
            del st[nx+1:] 
        elif go == accX:
            assert lah == eof
            assert len(st) == 2
            return st[1][1]
        else:
            err(f"bad go {go}, lah={lah}, st={st}")

def treeSplit(nd, pr):
    if isinstance(nd, str):
        return pr + nd
    r = pr + nd[0] + '.' + str(nd[1])
    for ch in nd[2:]:
        r += treeSplit(ch, pr + '  ')
    return r

def treeSpli2(nd, of):
    if isinstance(nd, str):
        return nd
    r = nd[0] + '.' + str(nd[1]) + ' '
    of += len(r)
    r += treeSpli2(nd[2], of)
    for ch in nd[3:]:
        r += "\n" + of * ' ' + treeSpli2(ch, of)
    return r

for s in ['2', '1  +  2 * 0', '2 * ( 1 * 2 + 0 )']:
    ast = run(re.split(r'\s+', s))
    print('run', s , ' ==>', treeSplit(ast, "\n  "))
    print('run', s , ' ==2\n  ' + treeSpli2(ast, 2))
print('end', __file__)