diff options
author | Camil Staps | 2018-12-23 23:55:56 +0100 |
---|---|---|
committer | Camil Staps | 2018-12-23 23:55:56 +0100 |
commit | 9ccf8a561345641ad083d90b5f301ebcc7a61f47 (patch) | |
tree | b758e6c987151b2f346ca8820f300a1ed40ab4c9 |
Initial commit
-rw-r--r-- | .gitignore | 4 | ||||
-rw-r--r-- | Makefile | 20 | ||||
-rw-r--r-- | README.md | 27 | ||||
-rw-r--r-- | sjit.icl | 178 | ||||
-rw-r--r-- | sjit_c.c | 218 |
5 files changed, 447 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7310f09 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.abc +*.o + +sjit diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..c4dbdac --- /dev/null +++ b/Makefile @@ -0,0 +1,20 @@ +CLM:=clm +CLMFLAGS:=-IL Platform + +BIN:=sjit + +all: $(BIN) + +$(BIN): %: %.icl Clean\ System\ Files/sjit_c.o .FORCE + $(CLM) $(CLMFLAGS) $@ -o $@ + +Clean\ System\ Files/sjit_c.o: sjit_c.c + mkdir -p Clean\ System\ Files + $(CC) $(CFLAGS) -c $< -o '$@' + +clean: + $(RM) -r $(BIN) Clean\ System\ Files + +.PHONY: all clean + +.FORCE: diff --git a/README.md b/README.md new file mode 100644 index 0000000..0bdef99 --- /dev/null +++ b/README.md @@ -0,0 +1,27 @@ +# Sjit + +Sjit is a stupid just in time compiler. +It does almost nothing, and what it does, it does badly and is not useful. + +- There is no parser, you have to write your program in the internal Clean + representation (see `Start` in [`sjit.icl`](/sjit.icl)). +- There is no type checker, you have to guess the implicit rules (such as that + a `fun_expr` must always be an `Abstr`). +- There is no register allocation, everything is done on the stack. +- There is no code optimisation, not even to eliminate `push rbx` followed by + `pop rbx`. +- JIT generated code is for the x64 instruction set only. +- Because we use `mmap` and `mprotect` this only works on POSIX systems. + +## Usage + +```bash +git clone https://gitlab.science.ru.nl/cstaps/sjit-compiler +cd sjit-compiler +make +./sjit +``` + +## Colophon + +Copyright © 2018–present [Camil Staps](https://camilstaps.nl). diff --git a/sjit.icl b/sjit.icl new file mode 100644 index 0000000..e0c1bef --- /dev/null +++ b/sjit.icl @@ -0,0 +1,178 @@ +module sjit + +import StdEnv +import StdGeneric +import StdMaybe +from Data.Func import mapSt, $ +from Data.Map import :: Map(..), get, put, newMap, fromList + +import code from "sjit_c." + +:: Expr + = Int !Int + | Var !String + | Abstr ![String] !Expr + | App !String ![Expr] + +:: Function = + { fun_name :: !String + , fun_expr :: !Expr + } + +:: Instr + = PushRef !Int + | PushI !Int + | Put !Int + | Pop !Int + + | Call !Int + | Ret + | Halt + + | IAddRet + | IMulRet + | ISubRet + | IDivRet + +:: Program :== {!Instr} + +:: CompileState = + { vars :: !Map String Int + , funs :: !Map String Int + , sp :: !Int + , pc :: !Int + } + +compile :: ![Function] -> Program +compile funs +# (len_bs, bs_funs) = bootstrap_funs +# (is,cs) = mapSt fun funs {vars=newMap, funs=fromList bs_funs, sp=0, pc=len_bs} += case get "main" cs.funs of + Nothing -> abort "no main function\n" + Just m + # bs = bootstrap m + -> {i \\ i <- flatten [is \\ (_,is) <- bs] ++ flatten is} +where + fun :: !Function !CompileState -> (![Instr], !CompileState) + fun f cs + # cs & funs = put f.fun_name cs.pc cs.funs + # (is,cs) = expr f.fun_expr cs + = (reverse [Ret:is], {cs & pc=cs.pc+1}) + + expr :: !Expr !CompileState -> (![Instr], !CompileState) + expr (Int i) cs = ([PushI i], {cs & sp=cs.sp+1, pc=cs.pc+1}) + expr (Var v) cs = case get v cs.vars of + Just i -> ([PushRef (i-cs.sp)], {cs & sp=cs.sp+1, pc=cs.pc+1}) + Nothing -> abort "undefined variable\n" + expr (App f args) cs + # (iss,cs) = mapSt expr args cs + = case get f cs.funs of + Just f -> ([Pop (length args-1):Call f:flatten iss], {cs & sp=cs.sp+1, pc=cs.pc+2}) + Nothing -> abort "undefined function\n" + expr (Abstr vs e) cs + # cs & vars = foldr (uncurry put) cs.vars [(v,sp) \\ v <- vs & sp <- [cs.sp+1..]] + # (is,cs) = expr e cs + = ([Put (max 1 (length vs)+1):is], {cs & sp=cs.sp-1, pc=cs.pc+1}) + + bootstrap_funs :: (!Int, ![(String, Int)]) + bootstrap_funs = iter 0 (bootstrap 0) + where + iter :: !Int ![(String, [Instr])] -> (!Int, ![(String, Int)]) + iter pc [] = (pc, []) + iter pc [(name,is):rest] + # fun = (name,pc) + # (pc,funs) = iter (pc+length is) rest + = (pc,[fun:funs]) + + bootstrap :: !Int -> [(String, [Instr])] + bootstrap main = + [ ("_", [PushI 0,Call main,Halt]) + , ("+", [IAddRet]) + , ("*", [IMulRet]) + , ("-", [ISubRet]) + , ("/", [IDivRet]) + ] + +exec :: !Program -> Int +exec prog = exec 0 [] +where + sz = size prog + + exec :: !Int ![Int] -> Int + exec i stack + | i < 0 || i >= sz = abort "out of bounds\n" + | otherwise = case prog.[i] of + PushI n -> exec (i+1) [n:stack] + PushRef r -> exec (i+1) [stack!!r:stack] + Put n -> case stack of + [val:stack] -> exec (i+1) (take (n-1) stack ++ [val:drop n stack]) + Pop n -> exec (i+1) (drop n stack) + Call f -> exec f [i+1:stack] + Ret -> case stack of + [ret:stack] -> exec ret stack + Halt -> case stack of + [r] -> r + _ -> abort (toString (length stack) +++ " values left on stack\n") + + IAddRet -> case stack of + [ret:a:b:stack] -> exec ret [a:a+b:stack] + IMulRet -> case stack of + [ret:a:b:stack] -> exec ret [a:a*b:stack] + ISubRet -> case stack of + [ret:a:b:stack] -> exec ret [a:a-b:stack] + IDivRet -> case stack of + [ret:a:b:stack] -> exec ret [a:a/b:stack] + +generic gEncodedSize a :: !a -> Int +gEncodedSize{|Int|} _ = 1 +gEncodedSize{|{!}|} fx xs = 1 + sum [fx x \\ x <-: xs] +gEncodedSize{|UNIT|} _ = 0 +gEncodedSize{|PAIR|} fx fy (PAIR x y) = fx x + fy y +gEncodedSize{|EITHER|} fl _ (LEFT l) = fl l +gEncodedSize{|EITHER|} _ fr (RIGHT r) = fr r +gEncodedSize{|CONS|} fx (CONS x) = fx x + 1 +gEncodedSize{|OBJECT|} fx (OBJECT x) = fx x + +derive gEncodedSize Instr + +generic gEncode a :: !a !Int !*{#Int} -> (!Int, !*{#Int}) +gEncode{|Int|} n i arr = (i+1, {arr & [i]=n}) +gEncode{|{!}|} fx xs i arr = walk 0 (i+1) {arr & [i]=sz} +where + sz = size xs + + walk ai i arr + | ai >= sz = (i,arr) + # (i,arr) = fx xs.[ai] i arr + = walk (ai+1) i arr +gEncode{|UNIT|} _ i arr = (i,arr) +gEncode{|PAIR|} fx fy (PAIR x y) i arr + # (i,arr) = fx x i arr + = fy y i arr +gEncode{|EITHER|} fl _ (LEFT l) i arr = fl l i arr +gEncode{|EITHER|} _ fr (RIGHT r) i arr = fr r i arr +gEncode{|CONS of {gcd_index}|} fx (CONS x) i arr = fx x (i+1) {arr & [i]=gcd_index} +gEncode{|OBJECT|} fx (OBJECT x) i arr = fx x i arr + +derive gEncode Instr + +encode :: !a -> *{#Int} | gEncodedSize{|*|}, gEncode{|*|} a +encode x +# (_,arr) = gEncode{|*|} x 0 (createArray (gEncodedSize{|*|} x) -1) += arr + +jit :: !Program -> Int +jit prog = jit (encode prog) +where + jit :: !*{#Int} -> Int + jit _ = code { + ccall jit "A:I" + } + +Start = (exec prog, jit prog) + +prog =: compile + [ {fun_name="id", fun_expr=Abstr ["x"] (Var "x")} + , {fun_name="const", fun_expr=Abstr ["x","y"] (Var "x")} + , {fun_name="main", fun_expr=Abstr [] (App "+" [App "const" [Int 37, Int 10], App "const" [Int 5, Int 10]])} + ] diff --git a/sjit_c.c b/sjit_c.c new file mode 100644 index 0000000..85702fc --- /dev/null +++ b/sjit_c.c @@ -0,0 +1,218 @@ +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> + +enum instr { + PushRef, + PushI, + Put, + Pop, + + Call, + Ret, + Halt, + + IAddRet, + IMulRet, + ISubRet, + IDivRet +}; + +static uint32_t instr_size(enum instr instr) { + switch (instr) { + case PushRef: return 5+1; + case PushI: return 7+1; + case Put: return 1+5; + case Pop: return 4; + + case Call: return 5; + case Ret: return 1; + case Halt: return 1+1; + + case IAddRet: + case ISubRet: return 5+5+3+5+1; + case IMulRet: return 5+5+4+5+1; + case IDivRet: return 5+5+3+3+5+1; + } +} + +static inline void gen_instr(char *full_code, char **code_p, uint64_t **pgm_p, uint32_t *mapping) { + uint64_t arg; + + char *code=*code_p; + uint64_t *pgm=*pgm_p; + + switch (*pgm) { + case PushRef: + arg=pgm[1]; +#ifdef DEBUG_JIT_INSTRUCTIONS + fprintf(stderr,"PushRef %lu\n",arg); +#endif + code[0]='\x48'; /* mov rbx,[rsp+ARG*8] */ + code[1]='\x8b'; + code[2]='\x5c'; + code[3]='\x24'; + code[4]=(unsigned char)arg*8; + code[5]='\x53'; /* push rbx */ + pgm+=2; + code+=6; + break; + case PushI: + arg=pgm[1]; +#ifdef DEBUG_JIT_INSTRUCTIONS + fprintf(stderr,"PushI %lu\n",arg); +#endif + code[0]='\x48'; /* mov rbx,ARG */ + code[1]='\xc7'; + code[2]='\xc3'; + *(uint32_t*)&code[3]=(uint32_t)arg; + code[7]='\x53'; /* push rbx */ + pgm+=2; + code+=8; + break; + case Put: + arg=pgm[1]; +#ifdef DEBUG_JIT_INSTRUCTIONS + fprintf(stderr,"Put %lu\n",arg); +#endif + code[0]='\x5b'; /* pop rbx */ + code[1]='\x48'; /* mov [rsp+ARG*8],rbx */ + code[2]='\x89'; + code[3]='\x5c'; + code[4]='\x24'; + code[5]=(unsigned char)(arg-1)*8; + pgm+=2; + code+=6; + break; + case Pop: + arg=pgm[1]; +#ifdef DEBUG_JIT_INSTRUCTIONS + fprintf(stderr,"Pop %lu\n",arg); +#endif + code[0]='\x48'; /* add rsp,ARG*8 */ + code[1]='\x83'; + code[2]='\xc4'; + code[3]=(unsigned char)arg*8; + pgm+=2; + code+=4; + break; + + case Call: + arg=pgm[1]; +#ifdef DEBUG_JIT_INSTRUCTIONS + fprintf(stderr,"Call %lu -> %d\n",arg,mapping[arg]-(&code[5]-full_code)); +#endif + code[0]='\xe8'; /* callq ARG */ + *(uint32_t*)&code[1]=mapping[arg]-(&code[5]-full_code); + pgm+=2; + code+=5; + break; + case Ret: +#ifdef DEBUG_JIT_INSTRUCTIONS + fprintf(stderr,"Ret\n"); +#endif + code[0]='\xc3'; /* retq */ + pgm++; + code+=1; + break; + case Halt: +#ifdef DEBUG_JIT_INSTRUCTIONS + fprintf(stderr,"Halt\n"); +#endif + code[0]='\x58'; /* pop rax */ + code[1]='\xc3'; /* retq */ + pgm++; + code+=2; + break; + + case IAddRet: + case IMulRet: + case ISubRet: + case IDivRet: +#ifdef DEBUG_JIT_INSTRUCTIONS + fprintf(stderr,"I<Op>Ret\n"); +#endif + /* mov rax,[rsp+8] */ + code[0]='\x48'; code[1]='\x8b'; code[2]='\x44'; code[3]='\x24'; code[4]='\x08'; + /* mov rbx,[rsp+16] */ + code[5]='\x48'; code[6]='\x8b'; code[7]='\x5c'; code[8]='\x24'; code[9]='\x10'; + switch (*pgm) { + case IAddRet: + case ISubRet: + /* {add,sub} rax,rbx */ + code[10]='\x48'; + code[11]=*pgm==IAddRet ? '\x01' : '\x29'; + code[12]='\xd8'; + code+=13; + break; + case IMulRet: + /* imul rax,rbx */ + code[10]='\x48'; code[11]='\x0f'; code[12]='\xaf'; code[13]='\xc3'; + code+=14; + break; + case IDivRet: + /* xor rdx,rdx */ + code[10]='\x48'; code[11]='\x31'; code[12]='\xd2'; + /* idiv rbx */ + code[13]='\x48'; code[14]='\xf7'; code[15]='\xfb'; + code+=16; + break; + } + /* mov [rsp+16],rax */ + code[0]='\x48'; code[1]='\x89'; code[2]='\x44'; code[3]='\x24'; code[4]='\x10'; + /* retq */ + code[5]='\xc3'; + pgm++; + code+=6; + break; + + default: + fprintf(stderr,"unknown instruction %lu\n",pgm[-1]); + exit(1); + } + + *code_p=code; + *pgm_p=pgm; +} + +unsigned int jit(uint64_t *pgm) { + uint32_t len=*pgm++; + uint32_t i; + + uint32_t *mapping=malloc(len*sizeof(uint32_t)); + uint32_t code_i=0; + + uint64_t *pgm_p=pgm; + for (i=0; i<len; i++) { + enum instr instr=(enum instr)*pgm_p; + + mapping[i]=code_i; + code_i+=instr_size(instr); + + switch (instr) { + case PushRef: + case PushI: + case Put: + case Pop: + case Call: + pgm_p+=2; + break; + default: + pgm_p+=1; + break; + } + } + + char *code=mmap(NULL, code_i, PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + char *code_wr=code; + pgm_p=pgm; + for (i=0; i<len; i++) + gen_instr(code, &code_wr, &pgm_p, mapping); + + mprotect(code, code_i, PROT_READ | PROT_EXEC); + int (*fun)(void) = (int(*)(void)) code; + + return fun(); +} |