python/parser1.py
# walter's erster versuch mit Python
#
# starten mit python -i basics.py
def err(*e):
print('error:', *e)
exit()
import re;
if False:
class wk:
def __init__(self):
if False:
self.eins = 'null'
def __repr__ ( self):
return 'wk' + repr(self.__dict__)
o = wk()
print('o', o)
o.eins = 'eins'
print('o', o)
eof = 'eof'
dflt = 'dflt'
ruleStr = ["s = s '+' p", "s = p", "p = p '*' i", "p = i", "i = '(' s ')'", "i = '0'", "i = '1'", "i = '2'"]
print("ruleStr", ruleStr);
tb = [re.split(r'\s+', r) for r in ruleStr]
tb.insert(0, ['star t ', '=', tb[0][0]])
rules = tb.copy()
accX = len(tb)
failX = accX+1
rulesetX = failX + 1
tb += ['accept', 'fail']
print("rules", tb)
ruleset4name = {}
ruleset4rule = []
for i in range(accX):
r = tb[i]
if r[1] != '=':
err("r[1] not =", r)
if r[0] in ruleset4name:
s = ruleset4name[r[0]]
tb[s].append(i)
else:
s = len(tb)
ruleset4name[r[0]] = s
tb.append(['ruleset', r[0], s, i])
r[0:2] = ['rule', r[0], i]
assert len(ruleset4rule) == i
ruleset4rule.append(s)
handleX = len(tb)
print("ruleset4name", ruleset4name, 'ruleset4rule', ruleset4rule)
print('tb', tb)
for r in rules:
for i in range(3, len(r)):
v = r[i]
if v[0] == "'" and v[-1] == "'":
r[i] = v[1: len(v)-1]
elif v in ruleset4name:
r[i] = ruleset4name[v]
elif v != eof:
err(f"rule {v} in {r} not in", ruleD)
print("tb after rulemap", tb)
class Handle:
byStart = {}
def __init__(self, s):
self.start = frozenset(s)
assert self.start not in Handle.byStart
self.tbX = len(tb)
tb.append(self)
self.byStart[self.start] = self.tbX
self.map = {}
self.go = {}
self.goDef = None
def __repr__ ( self):
return 'handle' + repr(self.__dict__)
def mapAdd(self, l):
rx, px = l[-1]
r = tb[rx]
if px > len(r):
return
nx = (rx, px+1)
c = r[px] if px < len(r) else dflt
#print(f'mapadd enter c {c}, l {l}, self {self}');
if isinstance(c, str):
if c in self.map:
self.map[c].add(nx)
else:
self.map[c] = {nx}
elif isinstance(c, int):
assert c >= rulesetX and c < handleX
rs = tb[c]
# forward: search tree for next terminal avoid infinite recursion
for cx in rs[3:]:
if (cx, 3) not in l:
self.mapAdd(l + [(cx, 3)])
# afterward next step
if c in self.map:
self.map[c].add(nx)
else:
self.map[c] = {nx}
#print(f'handleadd exit l {l}, handle {self}');
@staticmethod
def mapExp():
hx = handleX
print('mapExp0', handleX, 'tb', len(tb))
while hx < len(tb):
h = tb[hx]
for k in h.map:
strt = frozenset(h.map[k])
h.map[k] = strt
if strt not in Handle.byStart:
n = None
for s in strt:
if s[1] <= len(tb[s[0]]):
if n == None:
n = Handle(strt)
n.mapAdd([s])
hx += 1
print('mapExp', handleX, 'tb', len(tb))
@classmethod
def genGo(cl):
print('genGo 0')
for hx in Handle.byStart.values():
h = tb[hx]
# h.go = {}
for k in h.map:
v = h.map[k]
if len(v) != 1:
r = cl.byStart[v]
else:
for e in v:
if e[1] > len(tb[e[0]]) :
r = e[0]
else:
r = cl.byStart[v]
if k == dflt:
h.goDef = r
else:
h.go[k] = r
print('genGo 9')
h = Handle({(0, 3)})
print('handle', h, 'tb', tb);
h.mapAdd([(0, 3)])
print('Handle.byStart', Handle.byStart, tb);
Handle.mapExp()
print('Handle.mapExp', Handle.byStart, tb);
accRS = ruleset4rule[0]
print(f"accRS {accRS}, {handleX} {tb[handleX]} accX={accX}")
accH = 0, len(tb[0])
print('accH', accH)
cnt = 0
for h in [h for h in tb[handleX:] if accH in h.start]:
cnt += 1
print(f'accH {h}')
assert eof not in h.map
h.map[eof] = frozenset([(accX, 999)])
assert cnt >= 1
assert accRS not in tb[handleX].map
#tb[handleX].map[accRS] = frozenset([(accX,99)])
print(f"accRS2 {accRS}, {handleX} {tb[handleX]}")
Handle.genGo()
print('Handle.genGo', Handle.byStart);
def run(inp):
inI = iter(inp + [eof])
st = [[handleX, 'st 0']]
lah = next(inI)
while True:
tos = st[-1]
print(f'run1 lah={lah} tos={tos}, st={len(st)} {st}')
m = tb[tos[0]]
go = m.go[lah] if lah in m.go else m.goDef
if go == None or go == failX:
err(f"bad lah={lah} at {[tb[s[0]][1] + '.' + str(s[1]) for s in m.start]}, "
f"expected {[k for k in m.map.keys() if isinstance(k, str)]}, "
f"go {go}, st={len(st)} {st}")
elif go >= handleX: #shift
st.append([go, lah])
lah = next(inI)
elif go < accX:
r = tb[go]
n = [r[1], go]
nx = len(st) - len(r) + 3
for i in range(nx, len(st)):
n.append(st[i][1])
pa = tb[st[nx-1][0] ]
#assert pa[0] == 'handle'
rs = ruleset4rule[go]
if rs not in pa.go:
err(f'reduce {rs} missing in pa', pa, 'stack', st)
st[nx] = [pa.go[rs], n]
del st[nx+1:]
elif go == accX:
assert lah == eof
assert len(st) == 2
return st[1][1]
else:
err(f"bad go {go}, lah={lah}, st={st}")
def treeSplit(nd, pr):
if isinstance(nd, str):
return pr + nd
r = pr + nd[0] + '.' + str(nd[1])
for ch in nd[2:]:
r += treeSplit(ch, pr + ' ')
return r
def treeSpli2(nd, of):
if isinstance(nd, str):
return nd
r = nd[0] + '.' + str(nd[1]) + ' '
of += len(r)
r += treeSpli2(nd[2], of)
for ch in nd[3:]:
r += "\n" + of * ' ' + treeSpli2(ch, of)
return r
for s in ['2', '1 + 2 * 0', '2 * ( 1 * 2 + 0 )']:
ast = run(re.split(r'\s+', s))
print('run', s , ' ==>', treeSplit(ast, "\n "))
print('run', s , ' ==2\n ' + treeSpli2(ast, 2))
print('end', __file__)