aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCamil Staps2018-12-23 23:55:56 +0100
committerCamil Staps2018-12-23 23:55:56 +0100
commit9ccf8a561345641ad083d90b5f301ebcc7a61f47 (patch)
treeb758e6c987151b2f346ca8820f300a1ed40ab4c9
Initial commit
-rw-r--r--.gitignore4
-rw-r--r--Makefile20
-rw-r--r--README.md27
-rw-r--r--sjit.icl178
-rw-r--r--sjit_c.c218
5 files changed, 447 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..7310f09
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+*.abc
+*.o
+
+sjit
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..c4dbdac
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,20 @@
+CLM:=clm
+CLMFLAGS:=-IL Platform
+
+BIN:=sjit
+
+all: $(BIN)
+
+$(BIN): %: %.icl Clean\ System\ Files/sjit_c.o .FORCE
+ $(CLM) $(CLMFLAGS) $@ -o $@
+
+Clean\ System\ Files/sjit_c.o: sjit_c.c
+ mkdir -p Clean\ System\ Files
+ $(CC) $(CFLAGS) -c $< -o '$@'
+
+clean:
+ $(RM) -r $(BIN) Clean\ System\ Files
+
+.PHONY: all clean
+
+.FORCE:
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..0bdef99
--- /dev/null
+++ b/README.md
@@ -0,0 +1,27 @@
+# Sjit
+
+Sjit is a stupid just in time compiler.
+It does almost nothing, and what it does, it does badly and is not useful.
+
+- There is no parser, you have to write your program in the internal Clean
+ representation (see `Start` in [`sjit.icl`](/sjit.icl)).
+- There is no type checker, you have to guess the implicit rules (such as that
+ a `fun_expr` must always be an `Abstr`).
+- There is no register allocation, everything is done on the stack.
+- There is no code optimisation, not even to eliminate `push rbx` followed by
+ `pop rbx`.
+- JIT generated code is for the x64 instruction set only.
+- Because we use `mmap` and `mprotect` this only works on POSIX systems.
+
+## Usage
+
+```bash
+git clone https://gitlab.science.ru.nl/cstaps/sjit-compiler
+cd sjit-compiler
+make
+./sjit
+```
+
+## Colophon
+
+Copyright &copy; 2018&ndash;present [Camil Staps](https://camilstaps.nl).
diff --git a/sjit.icl b/sjit.icl
new file mode 100644
index 0000000..e0c1bef
--- /dev/null
+++ b/sjit.icl
@@ -0,0 +1,178 @@
+module sjit
+
+import StdEnv
+import StdGeneric
+import StdMaybe
+from Data.Func import mapSt, $
+from Data.Map import :: Map(..), get, put, newMap, fromList
+
+import code from "sjit_c."
+
+:: Expr
+ = Int !Int
+ | Var !String
+ | Abstr ![String] !Expr
+ | App !String ![Expr]
+
+:: Function =
+ { fun_name :: !String
+ , fun_expr :: !Expr
+ }
+
+:: Instr
+ = PushRef !Int
+ | PushI !Int
+ | Put !Int
+ | Pop !Int
+
+ | Call !Int
+ | Ret
+ | Halt
+
+ | IAddRet
+ | IMulRet
+ | ISubRet
+ | IDivRet
+
+:: Program :== {!Instr}
+
+:: CompileState =
+ { vars :: !Map String Int
+ , funs :: !Map String Int
+ , sp :: !Int
+ , pc :: !Int
+ }
+
+compile :: ![Function] -> Program
+compile funs
+# (len_bs, bs_funs) = bootstrap_funs
+# (is,cs) = mapSt fun funs {vars=newMap, funs=fromList bs_funs, sp=0, pc=len_bs}
+= case get "main" cs.funs of
+ Nothing -> abort "no main function\n"
+ Just m
+ # bs = bootstrap m
+ -> {i \\ i <- flatten [is \\ (_,is) <- bs] ++ flatten is}
+where
+ fun :: !Function !CompileState -> (![Instr], !CompileState)
+ fun f cs
+ # cs & funs = put f.fun_name cs.pc cs.funs
+ # (is,cs) = expr f.fun_expr cs
+ = (reverse [Ret:is], {cs & pc=cs.pc+1})
+
+ expr :: !Expr !CompileState -> (![Instr], !CompileState)
+ expr (Int i) cs = ([PushI i], {cs & sp=cs.sp+1, pc=cs.pc+1})
+ expr (Var v) cs = case get v cs.vars of
+ Just i -> ([PushRef (i-cs.sp)], {cs & sp=cs.sp+1, pc=cs.pc+1})
+ Nothing -> abort "undefined variable\n"
+ expr (App f args) cs
+ # (iss,cs) = mapSt expr args cs
+ = case get f cs.funs of
+ Just f -> ([Pop (length args-1):Call f:flatten iss], {cs & sp=cs.sp+1, pc=cs.pc+2})
+ Nothing -> abort "undefined function\n"
+ expr (Abstr vs e) cs
+ # cs & vars = foldr (uncurry put) cs.vars [(v,sp) \\ v <- vs & sp <- [cs.sp+1..]]
+ # (is,cs) = expr e cs
+ = ([Put (max 1 (length vs)+1):is], {cs & sp=cs.sp-1, pc=cs.pc+1})
+
+ bootstrap_funs :: (!Int, ![(String, Int)])
+ bootstrap_funs = iter 0 (bootstrap 0)
+ where
+ iter :: !Int ![(String, [Instr])] -> (!Int, ![(String, Int)])
+ iter pc [] = (pc, [])
+ iter pc [(name,is):rest]
+ # fun = (name,pc)
+ # (pc,funs) = iter (pc+length is) rest
+ = (pc,[fun:funs])
+
+ bootstrap :: !Int -> [(String, [Instr])]
+ bootstrap main =
+ [ ("_", [PushI 0,Call main,Halt])
+ , ("+", [IAddRet])
+ , ("*", [IMulRet])
+ , ("-", [ISubRet])
+ , ("/", [IDivRet])
+ ]
+
+exec :: !Program -> Int
+exec prog = exec 0 []
+where
+ sz = size prog
+
+ exec :: !Int ![Int] -> Int
+ exec i stack
+ | i < 0 || i >= sz = abort "out of bounds\n"
+ | otherwise = case prog.[i] of
+ PushI n -> exec (i+1) [n:stack]
+ PushRef r -> exec (i+1) [stack!!r:stack]
+ Put n -> case stack of
+ [val:stack] -> exec (i+1) (take (n-1) stack ++ [val:drop n stack])
+ Pop n -> exec (i+1) (drop n stack)
+ Call f -> exec f [i+1:stack]
+ Ret -> case stack of
+ [ret:stack] -> exec ret stack
+ Halt -> case stack of
+ [r] -> r
+ _ -> abort (toString (length stack) +++ " values left on stack\n")
+
+ IAddRet -> case stack of
+ [ret:a:b:stack] -> exec ret [a:a+b:stack]
+ IMulRet -> case stack of
+ [ret:a:b:stack] -> exec ret [a:a*b:stack]
+ ISubRet -> case stack of
+ [ret:a:b:stack] -> exec ret [a:a-b:stack]
+ IDivRet -> case stack of
+ [ret:a:b:stack] -> exec ret [a:a/b:stack]
+
+generic gEncodedSize a :: !a -> Int
+gEncodedSize{|Int|} _ = 1
+gEncodedSize{|{!}|} fx xs = 1 + sum [fx x \\ x <-: xs]
+gEncodedSize{|UNIT|} _ = 0
+gEncodedSize{|PAIR|} fx fy (PAIR x y) = fx x + fy y
+gEncodedSize{|EITHER|} fl _ (LEFT l) = fl l
+gEncodedSize{|EITHER|} _ fr (RIGHT r) = fr r
+gEncodedSize{|CONS|} fx (CONS x) = fx x + 1
+gEncodedSize{|OBJECT|} fx (OBJECT x) = fx x
+
+derive gEncodedSize Instr
+
+generic gEncode a :: !a !Int !*{#Int} -> (!Int, !*{#Int})
+gEncode{|Int|} n i arr = (i+1, {arr & [i]=n})
+gEncode{|{!}|} fx xs i arr = walk 0 (i+1) {arr & [i]=sz}
+where
+ sz = size xs
+
+ walk ai i arr
+ | ai >= sz = (i,arr)
+ # (i,arr) = fx xs.[ai] i arr
+ = walk (ai+1) i arr
+gEncode{|UNIT|} _ i arr = (i,arr)
+gEncode{|PAIR|} fx fy (PAIR x y) i arr
+ # (i,arr) = fx x i arr
+ = fy y i arr
+gEncode{|EITHER|} fl _ (LEFT l) i arr = fl l i arr
+gEncode{|EITHER|} _ fr (RIGHT r) i arr = fr r i arr
+gEncode{|CONS of {gcd_index}|} fx (CONS x) i arr = fx x (i+1) {arr & [i]=gcd_index}
+gEncode{|OBJECT|} fx (OBJECT x) i arr = fx x i arr
+
+derive gEncode Instr
+
+encode :: !a -> *{#Int} | gEncodedSize{|*|}, gEncode{|*|} a
+encode x
+# (_,arr) = gEncode{|*|} x 0 (createArray (gEncodedSize{|*|} x) -1)
+= arr
+
+jit :: !Program -> Int
+jit prog = jit (encode prog)
+where
+ jit :: !*{#Int} -> Int
+ jit _ = code {
+ ccall jit "A:I"
+ }
+
+Start = (exec prog, jit prog)
+
+prog =: compile
+ [ {fun_name="id", fun_expr=Abstr ["x"] (Var "x")}
+ , {fun_name="const", fun_expr=Abstr ["x","y"] (Var "x")}
+ , {fun_name="main", fun_expr=Abstr [] (App "+" [App "const" [Int 37, Int 10], App "const" [Int 5, Int 10]])}
+ ]
diff --git a/sjit_c.c b/sjit_c.c
new file mode 100644
index 0000000..85702fc
--- /dev/null
+++ b/sjit_c.c
@@ -0,0 +1,218 @@
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+
+enum instr {
+ PushRef,
+ PushI,
+ Put,
+ Pop,
+
+ Call,
+ Ret,
+ Halt,
+
+ IAddRet,
+ IMulRet,
+ ISubRet,
+ IDivRet
+};
+
+static uint32_t instr_size(enum instr instr) {
+ switch (instr) {
+ case PushRef: return 5+1;
+ case PushI: return 7+1;
+ case Put: return 1+5;
+ case Pop: return 4;
+
+ case Call: return 5;
+ case Ret: return 1;
+ case Halt: return 1+1;
+
+ case IAddRet:
+ case ISubRet: return 5+5+3+5+1;
+ case IMulRet: return 5+5+4+5+1;
+ case IDivRet: return 5+5+3+3+5+1;
+ }
+}
+
+static inline void gen_instr(char *full_code, char **code_p, uint64_t **pgm_p, uint32_t *mapping) {
+ uint64_t arg;
+
+ char *code=*code_p;
+ uint64_t *pgm=*pgm_p;
+
+ switch (*pgm) {
+ case PushRef:
+ arg=pgm[1];
+#ifdef DEBUG_JIT_INSTRUCTIONS
+ fprintf(stderr,"PushRef %lu\n",arg);
+#endif
+ code[0]='\x48'; /* mov rbx,[rsp+ARG*8] */
+ code[1]='\x8b';
+ code[2]='\x5c';
+ code[3]='\x24';
+ code[4]=(unsigned char)arg*8;
+ code[5]='\x53'; /* push rbx */
+ pgm+=2;
+ code+=6;
+ break;
+ case PushI:
+ arg=pgm[1];
+#ifdef DEBUG_JIT_INSTRUCTIONS
+ fprintf(stderr,"PushI %lu\n",arg);
+#endif
+ code[0]='\x48'; /* mov rbx,ARG */
+ code[1]='\xc7';
+ code[2]='\xc3';
+ *(uint32_t*)&code[3]=(uint32_t)arg;
+ code[7]='\x53'; /* push rbx */
+ pgm+=2;
+ code+=8;
+ break;
+ case Put:
+ arg=pgm[1];
+#ifdef DEBUG_JIT_INSTRUCTIONS
+ fprintf(stderr,"Put %lu\n",arg);
+#endif
+ code[0]='\x5b'; /* pop rbx */
+ code[1]='\x48'; /* mov [rsp+ARG*8],rbx */
+ code[2]='\x89';
+ code[3]='\x5c';
+ code[4]='\x24';
+ code[5]=(unsigned char)(arg-1)*8;
+ pgm+=2;
+ code+=6;
+ break;
+ case Pop:
+ arg=pgm[1];
+#ifdef DEBUG_JIT_INSTRUCTIONS
+ fprintf(stderr,"Pop %lu\n",arg);
+#endif
+ code[0]='\x48'; /* add rsp,ARG*8 */
+ code[1]='\x83';
+ code[2]='\xc4';
+ code[3]=(unsigned char)arg*8;
+ pgm+=2;
+ code+=4;
+ break;
+
+ case Call:
+ arg=pgm[1];
+#ifdef DEBUG_JIT_INSTRUCTIONS
+ fprintf(stderr,"Call %lu -> %d\n",arg,mapping[arg]-(&code[5]-full_code));
+#endif
+ code[0]='\xe8'; /* callq ARG */
+ *(uint32_t*)&code[1]=mapping[arg]-(&code[5]-full_code);
+ pgm+=2;
+ code+=5;
+ break;
+ case Ret:
+#ifdef DEBUG_JIT_INSTRUCTIONS
+ fprintf(stderr,"Ret\n");
+#endif
+ code[0]='\xc3'; /* retq */
+ pgm++;
+ code+=1;
+ break;
+ case Halt:
+#ifdef DEBUG_JIT_INSTRUCTIONS
+ fprintf(stderr,"Halt\n");
+#endif
+ code[0]='\x58'; /* pop rax */
+ code[1]='\xc3'; /* retq */
+ pgm++;
+ code+=2;
+ break;
+
+ case IAddRet:
+ case IMulRet:
+ case ISubRet:
+ case IDivRet:
+#ifdef DEBUG_JIT_INSTRUCTIONS
+ fprintf(stderr,"I<Op>Ret\n");
+#endif
+ /* mov rax,[rsp+8] */
+ code[0]='\x48'; code[1]='\x8b'; code[2]='\x44'; code[3]='\x24'; code[4]='\x08';
+ /* mov rbx,[rsp+16] */
+ code[5]='\x48'; code[6]='\x8b'; code[7]='\x5c'; code[8]='\x24'; code[9]='\x10';
+ switch (*pgm) {
+ case IAddRet:
+ case ISubRet:
+ /* {add,sub} rax,rbx */
+ code[10]='\x48';
+ code[11]=*pgm==IAddRet ? '\x01' : '\x29';
+ code[12]='\xd8';
+ code+=13;
+ break;
+ case IMulRet:
+ /* imul rax,rbx */
+ code[10]='\x48'; code[11]='\x0f'; code[12]='\xaf'; code[13]='\xc3';
+ code+=14;
+ break;
+ case IDivRet:
+ /* xor rdx,rdx */
+ code[10]='\x48'; code[11]='\x31'; code[12]='\xd2';
+ /* idiv rbx */
+ code[13]='\x48'; code[14]='\xf7'; code[15]='\xfb';
+ code+=16;
+ break;
+ }
+ /* mov [rsp+16],rax */
+ code[0]='\x48'; code[1]='\x89'; code[2]='\x44'; code[3]='\x24'; code[4]='\x10';
+ /* retq */
+ code[5]='\xc3';
+ pgm++;
+ code+=6;
+ break;
+
+ default:
+ fprintf(stderr,"unknown instruction %lu\n",pgm[-1]);
+ exit(1);
+ }
+
+ *code_p=code;
+ *pgm_p=pgm;
+}
+
+unsigned int jit(uint64_t *pgm) {
+ uint32_t len=*pgm++;
+ uint32_t i;
+
+ uint32_t *mapping=malloc(len*sizeof(uint32_t));
+ uint32_t code_i=0;
+
+ uint64_t *pgm_p=pgm;
+ for (i=0; i<len; i++) {
+ enum instr instr=(enum instr)*pgm_p;
+
+ mapping[i]=code_i;
+ code_i+=instr_size(instr);
+
+ switch (instr) {
+ case PushRef:
+ case PushI:
+ case Put:
+ case Pop:
+ case Call:
+ pgm_p+=2;
+ break;
+ default:
+ pgm_p+=1;
+ break;
+ }
+ }
+
+ char *code=mmap(NULL, code_i, PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ char *code_wr=code;
+ pgm_p=pgm;
+ for (i=0; i<len; i++)
+ gen_instr(code, &code_wr, &pgm_p, mapping);
+
+ mprotect(code, code_i, PROT_READ | PROT_EXEC);
+ int (*fun)(void) = (int(*)(void)) code;
+
+ return fun();
+}