.data

	.align 2
heap:
	.space	0x80000	# 512KiB

	.align 1
_cINT:
	.byte	0	# pointer arity
	.byte	1	# basic value arity
	.byte	0	# number of arguments that still have to be curried in

	.align 1
_cCHAR:
	.byte	0	# pointer arity
	.byte	1	# basic value arity
	.byte	0	# number of arguments that still have to be curried in

	.align 2
PRINTROOT:
	.word	_nroot
	.word	0	# to be filled in
EVALROOT:
	.word	_nroot
	.word	0	# to be filled in

	.text
main:
	la	$gp,heap

	la	$t0,__main_ntest
	sw	$t0,($gp)

	la	$s0,PRINTROOT
	ori	$s1,$gp,0
	sw	$s1,4($s0)

	jal	driver
	addi	$gp,$gp,8

	li	$a0,'\n'
	li	$v0,11	# print_char
	syscall
	li	$v0,10	# exit
	syscall

driver:
	addi	$sp,$sp,-4
	sw	$ra,($sp)
	la	$s7,0x10000000	# end of text section / begin of data section
print:
	lw	$t0,($s1)
	sltu	$t0,$t0,$s7
	beq	$t0,$0,print_hnf
	nop
	la	$s2,EVALROOT
	addi	$s3,$s1,0
	sw	$s3,4($s2)
	jal	eval
	nop
	addi	$s1,$s3,0
print_hnf:
	lw	$t0,($s1)
	la	$t1,_cINT
	beq	$t0,$t1,print_int
	la	$t1,_cCHAR
	beq	$t0,$t1,print_char
	li	$a0,'('
	li	$v0,11	# print_char
	syscall
	lbu	$t3,($t0)	# arity
	beq	$t3,$0,print_rewind
	# continue with printing the arguments
	addi	$t2,$s0,1
	addi	$s0,$s1,0
	lw	$s1,4($s1)
	sw	$t2,4($s0)
	b	print
	nop

print_int:
	lw	$a0,4($s1)
	li	$v0,1	# print_int
	syscall
	b	print_rewind
	nop

print_char:
	li	$a0,39	# '
	li	$v0,11	# print_char
	syscall
	lw	$a0,4($s1)
	li	$v0,11	# print_char
	syscall
	li	$a0,39	# '
	li	$v0,11	# print_char
	syscall
	b	print_rewind
	nop

print_rewind:
	la	$t5,PRINTROOT
	beq	$s0,$t5,print_rewind_root
	addi	$t0,$s0,0
	addi	$t1,$s0,0	# argument pointer
	lw	$t2,($t0)
	lbu	$t2,($t2)	# arity
print_rewind_loop:
	addi	$t1,$t1,4
	lw	$t3,($t1)
	andi	$t4,$t3,1
	beq	$t4,$0,print_rewind_loop
	addi	$t2,$t2,-1
	# we have found the argument that was being printed
	beq	$t2,$0,print_rewind_done
	# there is a next argument to print
	li	$a0,','
	li	$v0,11	# print_char
	syscall
	sw	$s1,($t1)
	lw	$s1,4($t1)
	sw	$t3,4($t1)
	b	print
	nop
print_rewind_done:
	# this node is fully printed; go up to the parent
	li	$a0,')'
	li	$v0,11	# print_char
	syscall
	xori	$s0,$t3,1
	sw	$s1,($t1)
	addi	$s1,$t0,0
	j	print_rewind
	nop
print_rewind_root:
	lw	$ra,($sp)
	jr	$ra
	addi	$sp,$sp,4

eval:
	lw	$t0,($s3)
	# Exit early in case of hnf
	sltu	$t1,$t0,$s7
	beq	$t1,$0,eval_rewind
	# Walk through indirections
	la	$t1,_nindir
	bne	$t0,$t1,eval_indir_done
	addi	$t2,$s3,0
eval_indir:
	lw	$s3,4($s3)
	lw	$t0,($s3)
	beq	$t0,$t1,eval_indir
	sw	$s3,4($t2)	# update original indir; optimizes chained indirs
	# Check again for hnf
	sltu	$t1,$t0,$s7
	beq	$t1,$0,eval_rewind
	nop
eval_indir_done:
	lbu	$t1,-1($t0)	# load arity
	lbu	$t2,-2($t0)	# strictness
	addi	$t3,$s3,4	# argument pointer
eval_arg_loop:
	beq	$t1,$0,eval_args_done
	andi	$t4,$t2,1
	bne	$t4,$0,eval_arg_strict
	srl	$t2,$t2,1
	addi	$t1,$t1,-1
	j	eval_arg_loop
	addi	$t3,$t3,4
eval_arg_strict:
	lw	$t4,($t3)
	lw	$t4,($t4)
	sltu	$t4,$t4,$s7
	bne	$t4,$0,eval_arg
	addi	$t1,$t1,-1
	j	eval_arg_loop
	addi	$t3,$t3,4
eval_arg:
	lw	$t4,($t3)	# reverse pointers
	addi	$s2,$s2,1
	sw	$s2,($t3)
	addi	$s2,$s3,0
	addi	$s3,$t4,0
	j	eval		# continue evaluating argument
	nop
eval_args_done:
	jr	$t0
	nop

eval_rewind:
	la	$t4,EVALROOT
	beq	$s2,$t4,eval_rewind_root
	addi	$t0,$s2,0
	addi	$t1,$s2,0
eval_rewind_loop:
	addi	$t1,$t1,4
	lw	$t2,($t1)
	andi	$t3,$t2,1
	beq	$t3,$0,eval_rewind_loop
	nop
	xori	$s2,$t2,1
	sw	$s3,($t1)
	addi	$s3,$t0,0
	j	eval
	nop
eval_rewind_root:
	jr	$ra
	nop

	.data
cycle_error:
	.asciiz	"error: cycle in spine detected\n"
	.text
	#.align	1
	#.byte	0	# strictness
	#.byte	1	# arity
	# TODO: we cannot use the above because SPIM does not accept .byte in text section
	.align 2
	.word	0x0001	# strictness, arity
_nroot:
	la	$a0,cycle_error
	li	$v0,4	# print_string
	syscall
	break	0

	.data
indir_error:
	.asciiz	"error: indirection evaluated\n"
	.text
	#.align	1
	#.byte	0x1	# strictness
	#.byte	1	# arity
	# TODO: we cannot use the above because SPIM does not accept .byte in text section
	.align 2
	.word	0x0101	# strictness, arity
_nindir:
	# indirections are handled in eval
	la	$a0,indir_error
	li	$v0,4	# print_string
	syscall
	break	0